]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
054da85e404137da548c76ea9a2f26d2ab9b6b94
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE
566 register parts instead of whole registers, so we may maintain just
567 lower part of scalar values in proper format leaving the upper part
568 undefined. */
569 const int x86_sse_split_regs = m_ATHLON_K8;
570 const int x86_sse_typeless_stores = m_ATHLON_K8;
571 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
572 const int x86_use_ffreep = m_ATHLON_K8;
573 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
574 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
575 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
576 /* Some CPU cores are not able to predict more than 4 branch instructions in
577 the 16 byte window. */
578 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
579 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
580 const int x86_use_bt = m_ATHLON_K8;
581
582 /* In case the average insn count for single function invocation is
583 lower than this constant, emit fast (but longer) prologue and
584 epilogue code. */
585 #define FAST_PROLOGUE_INSN_COUNT 20
586
587 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
588 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
589 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
590 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
591
592 /* Array of the smallest class containing reg number REGNO, indexed by
593 REGNO. Used by REGNO_REG_CLASS in i386.h. */
594
595 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
596 {
597 /* ax, dx, cx, bx */
598 AREG, DREG, CREG, BREG,
599 /* si, di, bp, sp */
600 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
601 /* FP registers */
602 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
603 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
604 /* arg pointer */
605 NON_Q_REGS,
606 /* flags, fpsr, dirflag, frame */
607 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
609 SSE_REGS, SSE_REGS,
610 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
611 MMX_REGS, MMX_REGS,
612 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
613 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
614 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
615 SSE_REGS, SSE_REGS,
616 };
617
618 /* The "default" register map used in 32bit mode. */
619
620 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
621 {
622 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
623 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
624 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
625 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
626 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
629 };
630
631 static int const x86_64_int_parameter_registers[6] =
632 {
633 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
634 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
635 };
636
637 static int const x86_64_int_return_registers[4] =
638 {
639 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
640 };
641
642 /* The "default" register map used in 64bit mode. */
643 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
644 {
645 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
646 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
647 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
648 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
649 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
650 8,9,10,11,12,13,14,15, /* extended integer registers */
651 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
652 };
653
654 /* Define the register numbers to be used in Dwarf debugging information.
655 The SVR4 reference port C compiler uses the following register numbers
656 in its Dwarf output code:
657 0 for %eax (gcc regno = 0)
658 1 for %ecx (gcc regno = 2)
659 2 for %edx (gcc regno = 1)
660 3 for %ebx (gcc regno = 3)
661 4 for %esp (gcc regno = 7)
662 5 for %ebp (gcc regno = 6)
663 6 for %esi (gcc regno = 4)
664 7 for %edi (gcc regno = 5)
665 The following three DWARF register numbers are never generated by
666 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
667 believes these numbers have these meanings.
668 8 for %eip (no gcc equivalent)
669 9 for %eflags (gcc regno = 17)
670 10 for %trapno (no gcc equivalent)
671 It is not at all clear how we should number the FP stack registers
672 for the x86 architecture. If the version of SDB on x86/svr4 were
673 a bit less brain dead with respect to floating-point then we would
674 have a precedent to follow with respect to DWARF register numbers
675 for x86 FP registers, but the SDB on x86/svr4 is so completely
676 broken with respect to FP registers that it is hardly worth thinking
677 of it as something to strive for compatibility with.
678 The version of x86/svr4 SDB I have at the moment does (partially)
679 seem to believe that DWARF register number 11 is associated with
680 the x86 register %st(0), but that's about all. Higher DWARF
681 register numbers don't seem to be associated with anything in
682 particular, and even for DWARF regno 11, SDB only seems to under-
683 stand that it should say that a variable lives in %st(0) (when
684 asked via an `=' command) if we said it was in DWARF regno 11,
685 but SDB still prints garbage when asked for the value of the
686 variable in question (via a `/' command).
687 (Also note that the labels SDB prints for various FP stack regs
688 when doing an `x' command are all wrong.)
689 Note that these problems generally don't affect the native SVR4
690 C compiler because it doesn't allow the use of -O with -g and
691 because when it is *not* optimizing, it allocates a memory
692 location for each floating-point variable, and the memory
693 location is what gets described in the DWARF AT_location
694 attribute for the variable in question.
695 Regardless of the severe mental illness of the x86/svr4 SDB, we
696 do something sensible here and we use the following DWARF
697 register numbers. Note that these are all stack-top-relative
698 numbers.
699 11 for %st(0) (gcc regno = 8)
700 12 for %st(1) (gcc regno = 9)
701 13 for %st(2) (gcc regno = 10)
702 14 for %st(3) (gcc regno = 11)
703 15 for %st(4) (gcc regno = 12)
704 16 for %st(5) (gcc regno = 13)
705 17 for %st(6) (gcc regno = 14)
706 18 for %st(7) (gcc regno = 15)
707 */
708 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
709 {
710 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
711 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
712 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
713 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
714 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
715 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
717 };
718
719 /* Test and compare insns in i386.md store the information needed to
720 generate branch and scc insns here. */
721
722 rtx ix86_compare_op0 = NULL_RTX;
723 rtx ix86_compare_op1 = NULL_RTX;
724
725 #define MAX_386_STACK_LOCALS 3
726 /* Size of the register save area. */
727 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
728
729 /* Define the structure for the machine field in struct function. */
730
731 struct stack_local_entry GTY(())
732 {
733 unsigned short mode;
734 unsigned short n;
735 rtx rtl;
736 struct stack_local_entry *next;
737 };
738
739 /* Structure describing stack frame layout.
740 Stack grows downward:
741
742 [arguments]
743 <- ARG_POINTER
744 saved pc
745
746 saved frame pointer if frame_pointer_needed
747 <- HARD_FRAME_POINTER
748 [saved regs]
749
750 [padding1] \
751 )
752 [va_arg registers] (
753 > to_allocate <- FRAME_POINTER
754 [frame] (
755 )
756 [padding2] /
757 */
758 struct ix86_frame
759 {
760 int nregs;
761 int padding1;
762 int va_arg_size;
763 HOST_WIDE_INT frame;
764 int padding2;
765 int outgoing_arguments_size;
766 int red_zone_size;
767
768 HOST_WIDE_INT to_allocate;
769 /* The offsets relative to ARG_POINTER. */
770 HOST_WIDE_INT frame_pointer_offset;
771 HOST_WIDE_INT hard_frame_pointer_offset;
772 HOST_WIDE_INT stack_pointer_offset;
773
774 /* When save_regs_using_mov is set, emit prologue using
775 move instead of push instructions. */
776 bool save_regs_using_mov;
777 };
778
779 /* Used to enable/disable debugging features. */
780 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
781 /* Code model option as passed by user. */
782 const char *ix86_cmodel_string;
783 /* Parsed value. */
784 enum cmodel ix86_cmodel;
785 /* Asm dialect. */
786 const char *ix86_asm_string;
787 enum asm_dialect ix86_asm_dialect = ASM_ATT;
788 /* TLS dialext. */
789 const char *ix86_tls_dialect_string;
790 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
791
792 /* Which unit we are generating floating point math for. */
793 enum fpmath_unit ix86_fpmath;
794
795 /* Which cpu are we scheduling for. */
796 enum processor_type ix86_tune;
797 /* Which instruction set architecture to use. */
798 enum processor_type ix86_arch;
799
800 /* Strings to hold which cpu and instruction set architecture to use. */
801 const char *ix86_tune_string; /* for -mtune=<xxx> */
802 const char *ix86_arch_string; /* for -march=<xxx> */
803 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
804
805 /* # of registers to use to pass arguments. */
806 const char *ix86_regparm_string;
807
808 /* true if sse prefetch instruction is not NOOP. */
809 int x86_prefetch_sse;
810
811 /* ix86_regparm_string as a number */
812 int ix86_regparm;
813
814 /* Alignment to use for loops and jumps: */
815
816 /* Power of two alignment for loops. */
817 const char *ix86_align_loops_string;
818
819 /* Power of two alignment for non-loop jumps. */
820 const char *ix86_align_jumps_string;
821
822 /* Power of two alignment for stack boundary in bytes. */
823 const char *ix86_preferred_stack_boundary_string;
824
825 /* Preferred alignment for stack boundary in bits. */
826 unsigned int ix86_preferred_stack_boundary;
827
828 /* Values 1-5: see jump.c */
829 int ix86_branch_cost;
830 const char *ix86_branch_cost_string;
831
832 /* Power of two alignment for functions. */
833 const char *ix86_align_funcs_string;
834
835 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
836 char internal_label_prefix[16];
837 int internal_label_prefix_len;
838 \f
839 static void output_pic_addr_const (FILE *, rtx, int);
840 static void put_condition_code (enum rtx_code, enum machine_mode,
841 int, int, FILE *);
842 static const char *get_some_local_dynamic_name (void);
843 static int get_some_local_dynamic_name_1 (rtx *, void *);
844 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
845 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
846 rtx *);
847 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
848 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
849 enum machine_mode);
850 static rtx get_thread_pointer (int);
851 static rtx legitimize_tls_address (rtx, enum tls_model, int);
852 static void get_pc_thunk_name (char [32], unsigned int);
853 static rtx gen_push (rtx);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
882 static bool ix86_vector_mode_supported_p (enum machine_mode);
883
884 static int ix86_address_cost (rtx);
885 static bool ix86_cannot_force_const_mem (rtx);
886 static rtx ix86_delegitimize_address (rtx);
887
888 struct builtin_description;
889 static rtx ix86_expand_sse_comi (const struct builtin_description *,
890 tree, rtx);
891 static rtx ix86_expand_sse_compare (const struct builtin_description *,
892 tree, rtx);
893 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
894 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
895 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
896 static rtx ix86_expand_store_builtin (enum insn_code, tree);
897 static rtx safe_vector_operand (rtx, enum machine_mode);
898 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
899 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
900 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
901 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
902 static int ix86_fp_comparison_cost (enum rtx_code code);
903 static unsigned int ix86_select_alt_pic_regnum (void);
904 static int ix86_save_reg (unsigned int, int);
905 static void ix86_compute_frame_layout (struct ix86_frame *);
906 static int ix86_comp_type_attributes (tree, tree);
907 static int ix86_function_regparm (tree, tree);
908 const struct attribute_spec ix86_attribute_table[];
909 static bool ix86_function_ok_for_sibcall (tree, tree);
910 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
911 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
912 static int ix86_value_regno (enum machine_mode);
913 static bool contains_128bit_aligned_vector_p (tree);
914 static rtx ix86_struct_value_rtx (tree, int);
915 static bool ix86_ms_bitfield_layout_p (tree);
916 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
917 static int extended_reg_mentioned_1 (rtx *, void *);
918 static bool ix86_rtx_costs (rtx, int, int, int *);
919 static int min_insn_size (rtx);
920 static tree ix86_md_asm_clobbers (tree clobbers);
921 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
922 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
923 tree, bool);
924
925 /* This function is only used on Solaris. */
926 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
927 ATTRIBUTE_UNUSED;
928
929 /* Register class used for passing given 64bit part of the argument.
930 These represent classes as documented by the PS ABI, with the exception
931 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
932 use SF or DFmode move instead of DImode to avoid reformatting penalties.
933
934 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
935 whenever possible (upper half does contain padding).
936 */
937 enum x86_64_reg_class
938 {
939 X86_64_NO_CLASS,
940 X86_64_INTEGER_CLASS,
941 X86_64_INTEGERSI_CLASS,
942 X86_64_SSE_CLASS,
943 X86_64_SSESF_CLASS,
944 X86_64_SSEDF_CLASS,
945 X86_64_SSEUP_CLASS,
946 X86_64_X87_CLASS,
947 X86_64_X87UP_CLASS,
948 X86_64_COMPLEX_X87_CLASS,
949 X86_64_MEMORY_CLASS
950 };
951 static const char * const x86_64_reg_class_name[] = {
952 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
953 "sseup", "x87", "x87up", "cplx87", "no"
954 };
955
956 #define MAX_CLASSES 4
957
958 /* Table of constants used by fldpi, fldln2, etc.... */
959 static REAL_VALUE_TYPE ext_80387_constants_table [5];
960 static bool ext_80387_constants_init = 0;
961 static void init_ext_80387_constants (void);
962 \f
963 /* Initialize the GCC target structure. */
964 #undef TARGET_ATTRIBUTE_TABLE
965 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
966 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
967 # undef TARGET_MERGE_DECL_ATTRIBUTES
968 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
969 #endif
970
971 #undef TARGET_COMP_TYPE_ATTRIBUTES
972 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
973
974 #undef TARGET_INIT_BUILTINS
975 #define TARGET_INIT_BUILTINS ix86_init_builtins
976
977 #undef TARGET_EXPAND_BUILTIN
978 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
979
980 #undef TARGET_ASM_FUNCTION_EPILOGUE
981 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
982
983 #undef TARGET_ASM_OPEN_PAREN
984 #define TARGET_ASM_OPEN_PAREN ""
985 #undef TARGET_ASM_CLOSE_PAREN
986 #define TARGET_ASM_CLOSE_PAREN ""
987
988 #undef TARGET_ASM_ALIGNED_HI_OP
989 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
990 #undef TARGET_ASM_ALIGNED_SI_OP
991 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
992 #ifdef ASM_QUAD
993 #undef TARGET_ASM_ALIGNED_DI_OP
994 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
995 #endif
996
997 #undef TARGET_ASM_UNALIGNED_HI_OP
998 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
999 #undef TARGET_ASM_UNALIGNED_SI_OP
1000 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1001 #undef TARGET_ASM_UNALIGNED_DI_OP
1002 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1003
1004 #undef TARGET_SCHED_ADJUST_COST
1005 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1006 #undef TARGET_SCHED_ISSUE_RATE
1007 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1008 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1009 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1010 ia32_multipass_dfa_lookahead
1011
1012 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1013 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1014
1015 #ifdef HAVE_AS_TLS
1016 #undef TARGET_HAVE_TLS
1017 #define TARGET_HAVE_TLS true
1018 #endif
1019 #undef TARGET_CANNOT_FORCE_CONST_MEM
1020 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1021
1022 #undef TARGET_DELEGITIMIZE_ADDRESS
1023 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1024
1025 #undef TARGET_MS_BITFIELD_LAYOUT_P
1026 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1027
1028 #undef TARGET_ASM_OUTPUT_MI_THUNK
1029 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1030 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1031 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1032
1033 #undef TARGET_ASM_FILE_START
1034 #define TARGET_ASM_FILE_START x86_file_start
1035
1036 #undef TARGET_RTX_COSTS
1037 #define TARGET_RTX_COSTS ix86_rtx_costs
1038 #undef TARGET_ADDRESS_COST
1039 #define TARGET_ADDRESS_COST ix86_address_cost
1040
1041 #undef TARGET_FIXED_CONDITION_CODE_REGS
1042 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1043 #undef TARGET_CC_MODES_COMPATIBLE
1044 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1045
1046 #undef TARGET_MACHINE_DEPENDENT_REORG
1047 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1048
1049 #undef TARGET_BUILD_BUILTIN_VA_LIST
1050 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1051
1052 #undef TARGET_MD_ASM_CLOBBERS
1053 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1054
1055 #undef TARGET_PROMOTE_PROTOTYPES
1056 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1057 #undef TARGET_STRUCT_VALUE_RTX
1058 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1059 #undef TARGET_SETUP_INCOMING_VARARGS
1060 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1061 #undef TARGET_MUST_PASS_IN_STACK
1062 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1063 #undef TARGET_PASS_BY_REFERENCE
1064 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1065
1066 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1067 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1068
1069 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1070 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1071
1072 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1073 #undef TARGET_INSERT_ATTRIBUTES
1074 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1075 #endif
1076
1077 struct gcc_target targetm = TARGET_INITIALIZER;
1078
1079 \f
1080 /* The svr4 ABI for the i386 says that records and unions are returned
1081 in memory. */
1082 #ifndef DEFAULT_PCC_STRUCT_RETURN
1083 #define DEFAULT_PCC_STRUCT_RETURN 1
1084 #endif
1085
1086 /* Sometimes certain combinations of command options do not make
1087 sense on a particular target machine. You can define a macro
1088 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1089 defined, is executed once just after all the command options have
1090 been parsed.
1091
1092 Don't use this macro to turn on various extra optimizations for
1093 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1094
1095 void
1096 override_options (void)
1097 {
1098 int i;
1099 int ix86_tune_defaulted = 0;
1100
1101 /* Comes from final.c -- no real reason to change it. */
1102 #define MAX_CODE_ALIGN 16
1103
1104 static struct ptt
1105 {
1106 const struct processor_costs *cost; /* Processor costs */
1107 const int target_enable; /* Target flags to enable. */
1108 const int target_disable; /* Target flags to disable. */
1109 const int align_loop; /* Default alignments. */
1110 const int align_loop_max_skip;
1111 const int align_jump;
1112 const int align_jump_max_skip;
1113 const int align_func;
1114 }
1115 const processor_target_table[PROCESSOR_max] =
1116 {
1117 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1118 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1119 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1120 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1121 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1122 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1123 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1124 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1125 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1126 };
1127
1128 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1129 static struct pta
1130 {
1131 const char *const name; /* processor name or nickname. */
1132 const enum processor_type processor;
1133 const enum pta_flags
1134 {
1135 PTA_SSE = 1,
1136 PTA_SSE2 = 2,
1137 PTA_SSE3 = 4,
1138 PTA_MMX = 8,
1139 PTA_PREFETCH_SSE = 16,
1140 PTA_3DNOW = 32,
1141 PTA_3DNOW_A = 64,
1142 PTA_64BIT = 128
1143 } flags;
1144 }
1145 const processor_alias_table[] =
1146 {
1147 {"i386", PROCESSOR_I386, 0},
1148 {"i486", PROCESSOR_I486, 0},
1149 {"i586", PROCESSOR_PENTIUM, 0},
1150 {"pentium", PROCESSOR_PENTIUM, 0},
1151 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1152 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1153 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1154 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1155 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1156 {"i686", PROCESSOR_PENTIUMPRO, 0},
1157 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1158 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1159 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1160 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1161 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1162 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1163 | PTA_MMX | PTA_PREFETCH_SSE},
1164 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1165 | PTA_MMX | PTA_PREFETCH_SSE},
1166 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1167 | PTA_MMX | PTA_PREFETCH_SSE},
1168 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1169 | PTA_MMX | PTA_PREFETCH_SSE},
1170 {"k6", PROCESSOR_K6, PTA_MMX},
1171 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1172 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1173 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1174 | PTA_3DNOW_A},
1175 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1176 | PTA_3DNOW | PTA_3DNOW_A},
1177 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1178 | PTA_3DNOW_A | PTA_SSE},
1179 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1180 | PTA_3DNOW_A | PTA_SSE},
1181 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1182 | PTA_3DNOW_A | PTA_SSE},
1183 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1184 | PTA_SSE | PTA_SSE2 },
1185 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1186 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1187 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1188 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1189 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1190 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1191 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1192 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1193 };
1194
1195 int const pta_size = ARRAY_SIZE (processor_alias_table);
1196
1197 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1198 SUBTARGET_OVERRIDE_OPTIONS;
1199 #endif
1200
1201 /* Set the default values for switches whose default depends on TARGET_64BIT
1202 in case they weren't overwritten by command line options. */
1203 if (TARGET_64BIT)
1204 {
1205 if (flag_omit_frame_pointer == 2)
1206 flag_omit_frame_pointer = 1;
1207 if (flag_asynchronous_unwind_tables == 2)
1208 flag_asynchronous_unwind_tables = 1;
1209 if (flag_pcc_struct_return == 2)
1210 flag_pcc_struct_return = 0;
1211 }
1212 else
1213 {
1214 if (flag_omit_frame_pointer == 2)
1215 flag_omit_frame_pointer = 0;
1216 if (flag_asynchronous_unwind_tables == 2)
1217 flag_asynchronous_unwind_tables = 0;
1218 if (flag_pcc_struct_return == 2)
1219 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1220 }
1221
1222 if (!ix86_tune_string && ix86_arch_string)
1223 ix86_tune_string = ix86_arch_string;
1224 if (!ix86_tune_string)
1225 {
1226 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1227 ix86_tune_defaulted = 1;
1228 }
1229 if (!ix86_arch_string)
1230 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1231
1232 if (ix86_cmodel_string != 0)
1233 {
1234 if (!strcmp (ix86_cmodel_string, "small"))
1235 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1236 else if (flag_pic)
1237 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1238 else if (!strcmp (ix86_cmodel_string, "32"))
1239 ix86_cmodel = CM_32;
1240 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1241 ix86_cmodel = CM_KERNEL;
1242 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1243 ix86_cmodel = CM_MEDIUM;
1244 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1245 ix86_cmodel = CM_LARGE;
1246 else
1247 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1248 }
1249 else
1250 {
1251 ix86_cmodel = CM_32;
1252 if (TARGET_64BIT)
1253 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1254 }
1255 if (ix86_asm_string != 0)
1256 {
1257 if (!strcmp (ix86_asm_string, "intel"))
1258 ix86_asm_dialect = ASM_INTEL;
1259 else if (!strcmp (ix86_asm_string, "att"))
1260 ix86_asm_dialect = ASM_ATT;
1261 else
1262 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1263 }
1264 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1265 error ("code model %qs not supported in the %s bit mode",
1266 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1267 if (ix86_cmodel == CM_LARGE)
1268 sorry ("code model %<large%> not supported yet");
1269 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1270 sorry ("%i-bit mode not compiled in",
1271 (target_flags & MASK_64BIT) ? 64 : 32);
1272
1273 for (i = 0; i < pta_size; i++)
1274 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1275 {
1276 ix86_arch = processor_alias_table[i].processor;
1277 /* Default cpu tuning to the architecture. */
1278 ix86_tune = ix86_arch;
1279 if (processor_alias_table[i].flags & PTA_MMX
1280 && !(target_flags_explicit & MASK_MMX))
1281 target_flags |= MASK_MMX;
1282 if (processor_alias_table[i].flags & PTA_3DNOW
1283 && !(target_flags_explicit & MASK_3DNOW))
1284 target_flags |= MASK_3DNOW;
1285 if (processor_alias_table[i].flags & PTA_3DNOW_A
1286 && !(target_flags_explicit & MASK_3DNOW_A))
1287 target_flags |= MASK_3DNOW_A;
1288 if (processor_alias_table[i].flags & PTA_SSE
1289 && !(target_flags_explicit & MASK_SSE))
1290 target_flags |= MASK_SSE;
1291 if (processor_alias_table[i].flags & PTA_SSE2
1292 && !(target_flags_explicit & MASK_SSE2))
1293 target_flags |= MASK_SSE2;
1294 if (processor_alias_table[i].flags & PTA_SSE3
1295 && !(target_flags_explicit & MASK_SSE3))
1296 target_flags |= MASK_SSE3;
1297 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1298 x86_prefetch_sse = true;
1299 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1300 error ("CPU you selected does not support x86-64 "
1301 "instruction set");
1302 break;
1303 }
1304
1305 if (i == pta_size)
1306 error ("bad value (%s) for -march= switch", ix86_arch_string);
1307
1308 for (i = 0; i < pta_size; i++)
1309 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1310 {
1311 ix86_tune = processor_alias_table[i].processor;
1312 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1313 {
1314 if (ix86_tune_defaulted)
1315 {
1316 ix86_tune_string = "x86-64";
1317 for (i = 0; i < pta_size; i++)
1318 if (! strcmp (ix86_tune_string,
1319 processor_alias_table[i].name))
1320 break;
1321 ix86_tune = processor_alias_table[i].processor;
1322 }
1323 else
1324 error ("CPU you selected does not support x86-64 "
1325 "instruction set");
1326 }
1327 /* Intel CPUs have always interpreted SSE prefetch instructions as
1328 NOPs; so, we can enable SSE prefetch instructions even when
1329 -mtune (rather than -march) points us to a processor that has them.
1330 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1331 higher processors. */
1332 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1333 x86_prefetch_sse = true;
1334 break;
1335 }
1336 if (i == pta_size)
1337 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1338
1339 if (optimize_size)
1340 ix86_cost = &size_cost;
1341 else
1342 ix86_cost = processor_target_table[ix86_tune].cost;
1343 target_flags |= processor_target_table[ix86_tune].target_enable;
1344 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1345
1346 /* Arrange to set up i386_stack_locals for all functions. */
1347 init_machine_status = ix86_init_machine_status;
1348
1349 /* Validate -mregparm= value. */
1350 if (ix86_regparm_string)
1351 {
1352 i = atoi (ix86_regparm_string);
1353 if (i < 0 || i > REGPARM_MAX)
1354 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1355 else
1356 ix86_regparm = i;
1357 }
1358 else
1359 if (TARGET_64BIT)
1360 ix86_regparm = REGPARM_MAX;
1361
1362 /* If the user has provided any of the -malign-* options,
1363 warn and use that value only if -falign-* is not set.
1364 Remove this code in GCC 3.2 or later. */
1365 if (ix86_align_loops_string)
1366 {
1367 warning ("-malign-loops is obsolete, use -falign-loops");
1368 if (align_loops == 0)
1369 {
1370 i = atoi (ix86_align_loops_string);
1371 if (i < 0 || i > MAX_CODE_ALIGN)
1372 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1373 else
1374 align_loops = 1 << i;
1375 }
1376 }
1377
1378 if (ix86_align_jumps_string)
1379 {
1380 warning ("-malign-jumps is obsolete, use -falign-jumps");
1381 if (align_jumps == 0)
1382 {
1383 i = atoi (ix86_align_jumps_string);
1384 if (i < 0 || i > MAX_CODE_ALIGN)
1385 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1386 else
1387 align_jumps = 1 << i;
1388 }
1389 }
1390
1391 if (ix86_align_funcs_string)
1392 {
1393 warning ("-malign-functions is obsolete, use -falign-functions");
1394 if (align_functions == 0)
1395 {
1396 i = atoi (ix86_align_funcs_string);
1397 if (i < 0 || i > MAX_CODE_ALIGN)
1398 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1399 else
1400 align_functions = 1 << i;
1401 }
1402 }
1403
1404 /* Default align_* from the processor table. */
1405 if (align_loops == 0)
1406 {
1407 align_loops = processor_target_table[ix86_tune].align_loop;
1408 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1409 }
1410 if (align_jumps == 0)
1411 {
1412 align_jumps = processor_target_table[ix86_tune].align_jump;
1413 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1414 }
1415 if (align_functions == 0)
1416 {
1417 align_functions = processor_target_table[ix86_tune].align_func;
1418 }
1419
1420 /* Validate -mpreferred-stack-boundary= value, or provide default.
1421 The default of 128 bits is for Pentium III's SSE __m128, but we
1422 don't want additional code to keep the stack aligned when
1423 optimizing for code size. */
1424 ix86_preferred_stack_boundary = (optimize_size
1425 ? TARGET_64BIT ? 128 : 32
1426 : 128);
1427 if (ix86_preferred_stack_boundary_string)
1428 {
1429 i = atoi (ix86_preferred_stack_boundary_string);
1430 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1431 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1432 TARGET_64BIT ? 4 : 2);
1433 else
1434 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1435 }
1436
1437 /* Validate -mbranch-cost= value, or provide default. */
1438 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1439 if (ix86_branch_cost_string)
1440 {
1441 i = atoi (ix86_branch_cost_string);
1442 if (i < 0 || i > 5)
1443 error ("-mbranch-cost=%d is not between 0 and 5", i);
1444 else
1445 ix86_branch_cost = i;
1446 }
1447
1448 if (ix86_tls_dialect_string)
1449 {
1450 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1451 ix86_tls_dialect = TLS_DIALECT_GNU;
1452 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1453 ix86_tls_dialect = TLS_DIALECT_SUN;
1454 else
1455 error ("bad value (%s) for -mtls-dialect= switch",
1456 ix86_tls_dialect_string);
1457 }
1458
1459 /* Keep nonleaf frame pointers. */
1460 if (flag_omit_frame_pointer)
1461 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1462 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1463 flag_omit_frame_pointer = 1;
1464
1465 /* If we're doing fast math, we don't care about comparison order
1466 wrt NaNs. This lets us use a shorter comparison sequence. */
1467 if (flag_unsafe_math_optimizations)
1468 target_flags &= ~MASK_IEEE_FP;
1469
1470 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1471 since the insns won't need emulation. */
1472 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1473 target_flags &= ~MASK_NO_FANCY_MATH_387;
1474
1475 /* Likewise, if the target doesn't have a 387, or we've specified
1476 software floating point, don't use 387 inline instrinsics. */
1477 if (!TARGET_80387)
1478 target_flags |= MASK_NO_FANCY_MATH_387;
1479
1480 /* Turn on SSE2 builtins for -msse3. */
1481 if (TARGET_SSE3)
1482 target_flags |= MASK_SSE2;
1483
1484 /* Turn on SSE builtins for -msse2. */
1485 if (TARGET_SSE2)
1486 target_flags |= MASK_SSE;
1487
1488 /* Turn on MMX builtins for -msse. */
1489 if (TARGET_SSE)
1490 {
1491 target_flags |= MASK_MMX & ~target_flags_explicit;
1492 x86_prefetch_sse = true;
1493 }
1494
1495 /* Turn on MMX builtins for 3Dnow. */
1496 if (TARGET_3DNOW)
1497 target_flags |= MASK_MMX;
1498
1499 if (TARGET_64BIT)
1500 {
1501 if (TARGET_ALIGN_DOUBLE)
1502 error ("-malign-double makes no sense in the 64bit mode");
1503 if (TARGET_RTD)
1504 error ("-mrtd calling convention not supported in the 64bit mode");
1505
1506 /* Enable by default the SSE and MMX builtins. Do allow the user to
1507 explicitly disable any of these. In particular, disabling SSE and
1508 MMX for kernel code is extremely useful. */
1509 target_flags
1510 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1511 & ~target_flags_explicit);
1512
1513 if (TARGET_SSE)
1514 ix86_fpmath = FPMATH_SSE;
1515 }
1516 else
1517 {
1518 ix86_fpmath = FPMATH_387;
1519 /* i386 ABI does not specify red zone. It still makes sense to use it
1520 when programmer takes care to stack from being destroyed. */
1521 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1522 target_flags |= MASK_NO_RED_ZONE;
1523 }
1524
1525 if (ix86_fpmath_string != 0)
1526 {
1527 if (! strcmp (ix86_fpmath_string, "387"))
1528 ix86_fpmath = FPMATH_387;
1529 else if (! strcmp (ix86_fpmath_string, "sse"))
1530 {
1531 if (!TARGET_SSE)
1532 {
1533 warning ("SSE instruction set disabled, using 387 arithmetics");
1534 ix86_fpmath = FPMATH_387;
1535 }
1536 else
1537 ix86_fpmath = FPMATH_SSE;
1538 }
1539 else if (! strcmp (ix86_fpmath_string, "387,sse")
1540 || ! strcmp (ix86_fpmath_string, "sse,387"))
1541 {
1542 if (!TARGET_SSE)
1543 {
1544 warning ("SSE instruction set disabled, using 387 arithmetics");
1545 ix86_fpmath = FPMATH_387;
1546 }
1547 else if (!TARGET_80387)
1548 {
1549 warning ("387 instruction set disabled, using SSE arithmetics");
1550 ix86_fpmath = FPMATH_SSE;
1551 }
1552 else
1553 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1554 }
1555 else
1556 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1557 }
1558
1559 /* If fpmath doesn't include 387, disable use of x87 intrinsics. */
1560 if (! (ix86_fpmath & FPMATH_387))
1561 target_flags |= MASK_NO_FANCY_MATH_387;
1562
1563 if ((x86_accumulate_outgoing_args & TUNEMASK)
1564 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1565 && !optimize_size)
1566 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1567
1568 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1569 {
1570 char *p;
1571 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1572 p = strchr (internal_label_prefix, 'X');
1573 internal_label_prefix_len = p - internal_label_prefix;
1574 *p = '\0';
1575 }
1576
1577 /* When scheduling description is not available, disable scheduler pass
1578 so it won't slow down the compilation and make x87 code slower. */
1579 if (!TARGET_SCHEDULE)
1580 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1581 }
1582 \f
1583 void
1584 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1585 {
1586 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1587 make the problem with not enough registers even worse. */
1588 #ifdef INSN_SCHEDULING
1589 if (level > 1)
1590 flag_schedule_insns = 0;
1591 #endif
1592
1593 /* The default values of these switches depend on the TARGET_64BIT
1594 that is not known at this moment. Mark these values with 2 and
1595 let user the to override these. In case there is no command line option
1596 specifying them, we will set the defaults in override_options. */
1597 if (optimize >= 1)
1598 flag_omit_frame_pointer = 2;
1599 flag_pcc_struct_return = 2;
1600 flag_asynchronous_unwind_tables = 2;
1601 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1602 SUBTARGET_OPTIMIZATION_OPTIONS;
1603 #endif
1604 }
1605 \f
1606 /* Table of valid machine attributes. */
1607 const struct attribute_spec ix86_attribute_table[] =
1608 {
1609 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1610 /* Stdcall attribute says callee is responsible for popping arguments
1611 if they are not variable. */
1612 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1613 /* Fastcall attribute says callee is responsible for popping arguments
1614 if they are not variable. */
1615 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1616 /* Cdecl attribute says the callee is a normal C declaration */
1617 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1618 /* Regparm attribute specifies how many integer arguments are to be
1619 passed in registers. */
1620 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1621 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1622 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1623 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1624 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1625 #endif
1626 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1627 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1628 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1629 SUBTARGET_ATTRIBUTE_TABLE,
1630 #endif
1631 { NULL, 0, 0, false, false, false, NULL }
1632 };
1633
1634 /* Decide whether we can make a sibling call to a function. DECL is the
1635 declaration of the function being targeted by the call and EXP is the
1636 CALL_EXPR representing the call. */
1637
1638 static bool
1639 ix86_function_ok_for_sibcall (tree decl, tree exp)
1640 {
1641 /* If we are generating position-independent code, we cannot sibcall
1642 optimize any indirect call, or a direct call to a global function,
1643 as the PLT requires %ebx be live. */
1644 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1645 return false;
1646
1647 /* If we are returning floats on the 80387 register stack, we cannot
1648 make a sibcall from a function that doesn't return a float to a
1649 function that does or, conversely, from a function that does return
1650 a float to a function that doesn't; the necessary stack adjustment
1651 would not be executed. */
1652 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1653 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1654 return false;
1655
1656 /* If this call is indirect, we'll need to be able to use a call-clobbered
1657 register for the address of the target function. Make sure that all
1658 such registers are not used for passing parameters. */
1659 if (!decl && !TARGET_64BIT)
1660 {
1661 tree type;
1662
1663 /* We're looking at the CALL_EXPR, we need the type of the function. */
1664 type = TREE_OPERAND (exp, 0); /* pointer expression */
1665 type = TREE_TYPE (type); /* pointer type */
1666 type = TREE_TYPE (type); /* function type */
1667
1668 if (ix86_function_regparm (type, NULL) >= 3)
1669 {
1670 /* ??? Need to count the actual number of registers to be used,
1671 not the possible number of registers. Fix later. */
1672 return false;
1673 }
1674 }
1675
1676 /* Otherwise okay. That also includes certain types of indirect calls. */
1677 return true;
1678 }
1679
1680 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1681 arguments as in struct attribute_spec.handler. */
1682 static tree
1683 ix86_handle_cdecl_attribute (tree *node, tree name,
1684 tree args ATTRIBUTE_UNUSED,
1685 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1686 {
1687 if (TREE_CODE (*node) != FUNCTION_TYPE
1688 && TREE_CODE (*node) != METHOD_TYPE
1689 && TREE_CODE (*node) != FIELD_DECL
1690 && TREE_CODE (*node) != TYPE_DECL)
1691 {
1692 warning ("%qs attribute only applies to functions",
1693 IDENTIFIER_POINTER (name));
1694 *no_add_attrs = true;
1695 }
1696 else
1697 {
1698 if (is_attribute_p ("fastcall", name))
1699 {
1700 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1701 {
1702 error ("fastcall and stdcall attributes are not compatible");
1703 }
1704 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1705 {
1706 error ("fastcall and regparm attributes are not compatible");
1707 }
1708 }
1709 else if (is_attribute_p ("stdcall", name))
1710 {
1711 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1712 {
1713 error ("fastcall and stdcall attributes are not compatible");
1714 }
1715 }
1716 }
1717
1718 if (TARGET_64BIT)
1719 {
1720 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1721 *no_add_attrs = true;
1722 }
1723
1724 return NULL_TREE;
1725 }
1726
1727 /* Handle a "regparm" attribute;
1728 arguments as in struct attribute_spec.handler. */
1729 static tree
1730 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1731 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1732 {
1733 if (TREE_CODE (*node) != FUNCTION_TYPE
1734 && TREE_CODE (*node) != METHOD_TYPE
1735 && TREE_CODE (*node) != FIELD_DECL
1736 && TREE_CODE (*node) != TYPE_DECL)
1737 {
1738 warning ("%qs attribute only applies to functions",
1739 IDENTIFIER_POINTER (name));
1740 *no_add_attrs = true;
1741 }
1742 else
1743 {
1744 tree cst;
1745
1746 cst = TREE_VALUE (args);
1747 if (TREE_CODE (cst) != INTEGER_CST)
1748 {
1749 warning ("%qs attribute requires an integer constant argument",
1750 IDENTIFIER_POINTER (name));
1751 *no_add_attrs = true;
1752 }
1753 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1754 {
1755 warning ("argument to %qs attribute larger than %d",
1756 IDENTIFIER_POINTER (name), REGPARM_MAX);
1757 *no_add_attrs = true;
1758 }
1759
1760 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1761 {
1762 error ("fastcall and regparm attributes are not compatible");
1763 }
1764 }
1765
1766 return NULL_TREE;
1767 }
1768
1769 /* Return 0 if the attributes for two types are incompatible, 1 if they
1770 are compatible, and 2 if they are nearly compatible (which causes a
1771 warning to be generated). */
1772
1773 static int
1774 ix86_comp_type_attributes (tree type1, tree type2)
1775 {
1776 /* Check for mismatch of non-default calling convention. */
1777 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1778
1779 if (TREE_CODE (type1) != FUNCTION_TYPE)
1780 return 1;
1781
1782 /* Check for mismatched fastcall types */
1783 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1784 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1785 return 0;
1786
1787 /* Check for mismatched return types (cdecl vs stdcall). */
1788 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1789 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1790 return 0;
1791 if (ix86_function_regparm (type1, NULL)
1792 != ix86_function_regparm (type2, NULL))
1793 return 0;
1794 return 1;
1795 }
1796 \f
1797 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1798 DECL may be NULL when calling function indirectly
1799 or considering a libcall. */
1800
1801 static int
1802 ix86_function_regparm (tree type, tree decl)
1803 {
1804 tree attr;
1805 int regparm = ix86_regparm;
1806 bool user_convention = false;
1807
1808 if (!TARGET_64BIT)
1809 {
1810 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1811 if (attr)
1812 {
1813 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1814 user_convention = true;
1815 }
1816
1817 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1818 {
1819 regparm = 2;
1820 user_convention = true;
1821 }
1822
1823 /* Use register calling convention for local functions when possible. */
1824 if (!TARGET_64BIT && !user_convention && decl
1825 && flag_unit_at_a_time && !profile_flag)
1826 {
1827 struct cgraph_local_info *i = cgraph_local_info (decl);
1828 if (i && i->local)
1829 {
1830 /* We can't use regparm(3) for nested functions as these use
1831 static chain pointer in third argument. */
1832 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1833 regparm = 2;
1834 else
1835 regparm = 3;
1836 }
1837 }
1838 }
1839 return regparm;
1840 }
1841
1842 /* Return true if EAX is live at the start of the function. Used by
1843 ix86_expand_prologue to determine if we need special help before
1844 calling allocate_stack_worker. */
1845
1846 static bool
1847 ix86_eax_live_at_start_p (void)
1848 {
1849 /* Cheat. Don't bother working forward from ix86_function_regparm
1850 to the function type to whether an actual argument is located in
1851 eax. Instead just look at cfg info, which is still close enough
1852 to correct at this point. This gives false positives for broken
1853 functions that might use uninitialized data that happens to be
1854 allocated in eax, but who cares? */
1855 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1856 }
1857
1858 /* Value is the number of bytes of arguments automatically
1859 popped when returning from a subroutine call.
1860 FUNDECL is the declaration node of the function (as a tree),
1861 FUNTYPE is the data type of the function (as a tree),
1862 or for a library call it is an identifier node for the subroutine name.
1863 SIZE is the number of bytes of arguments passed on the stack.
1864
1865 On the 80386, the RTD insn may be used to pop them if the number
1866 of args is fixed, but if the number is variable then the caller
1867 must pop them all. RTD can't be used for library calls now
1868 because the library is compiled with the Unix compiler.
1869 Use of RTD is a selectable option, since it is incompatible with
1870 standard Unix calling sequences. If the option is not selected,
1871 the caller must always pop the args.
1872
1873 The attribute stdcall is equivalent to RTD on a per module basis. */
1874
1875 int
1876 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1877 {
1878 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1879
1880 /* Cdecl functions override -mrtd, and never pop the stack. */
1881 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1882
1883 /* Stdcall and fastcall functions will pop the stack if not
1884 variable args. */
1885 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1886 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1887 rtd = 1;
1888
1889 if (rtd
1890 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1891 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1892 == void_type_node)))
1893 return size;
1894 }
1895
1896 /* Lose any fake structure return argument if it is passed on the stack. */
1897 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1898 && !TARGET_64BIT
1899 && !KEEP_AGGREGATE_RETURN_POINTER)
1900 {
1901 int nregs = ix86_function_regparm (funtype, fundecl);
1902
1903 if (!nregs)
1904 return GET_MODE_SIZE (Pmode);
1905 }
1906
1907 return 0;
1908 }
1909 \f
1910 /* Argument support functions. */
1911
1912 /* Return true when register may be used to pass function parameters. */
1913 bool
1914 ix86_function_arg_regno_p (int regno)
1915 {
1916 int i;
1917 if (!TARGET_64BIT)
1918 return (regno < REGPARM_MAX
1919 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1920 if (SSE_REGNO_P (regno) && TARGET_SSE)
1921 return true;
1922 /* RAX is used as hidden argument to va_arg functions. */
1923 if (!regno)
1924 return true;
1925 for (i = 0; i < REGPARM_MAX; i++)
1926 if (regno == x86_64_int_parameter_registers[i])
1927 return true;
1928 return false;
1929 }
1930
1931 /* Return if we do not know how to pass TYPE solely in registers. */
1932
1933 static bool
1934 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1935 {
1936 if (must_pass_in_stack_var_size_or_pad (mode, type))
1937 return true;
1938
1939 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1940 The layout_type routine is crafty and tries to trick us into passing
1941 currently unsupported vector types on the stack by using TImode. */
1942 return (!TARGET_64BIT && mode == TImode
1943 && type && TREE_CODE (type) != VECTOR_TYPE);
1944 }
1945
1946 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1947 for a call to a function whose data type is FNTYPE.
1948 For a library call, FNTYPE is 0. */
1949
1950 void
1951 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1952 tree fntype, /* tree ptr for function decl */
1953 rtx libname, /* SYMBOL_REF of library name or 0 */
1954 tree fndecl)
1955 {
1956 static CUMULATIVE_ARGS zero_cum;
1957 tree param, next_param;
1958
1959 if (TARGET_DEBUG_ARG)
1960 {
1961 fprintf (stderr, "\ninit_cumulative_args (");
1962 if (fntype)
1963 fprintf (stderr, "fntype code = %s, ret code = %s",
1964 tree_code_name[(int) TREE_CODE (fntype)],
1965 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1966 else
1967 fprintf (stderr, "no fntype");
1968
1969 if (libname)
1970 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1971 }
1972
1973 *cum = zero_cum;
1974
1975 /* Set up the number of registers to use for passing arguments. */
1976 if (fntype)
1977 cum->nregs = ix86_function_regparm (fntype, fndecl);
1978 else
1979 cum->nregs = ix86_regparm;
1980 if (TARGET_SSE)
1981 cum->sse_nregs = SSE_REGPARM_MAX;
1982 if (TARGET_MMX)
1983 cum->mmx_nregs = MMX_REGPARM_MAX;
1984 cum->warn_sse = true;
1985 cum->warn_mmx = true;
1986 cum->maybe_vaarg = false;
1987
1988 /* Use ecx and edx registers if function has fastcall attribute */
1989 if (fntype && !TARGET_64BIT)
1990 {
1991 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1992 {
1993 cum->nregs = 2;
1994 cum->fastcall = 1;
1995 }
1996 }
1997
1998 /* Determine if this function has variable arguments. This is
1999 indicated by the last argument being 'void_type_mode' if there
2000 are no variable arguments. If there are variable arguments, then
2001 we won't pass anything in registers in 32-bit mode. */
2002
2003 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2004 {
2005 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2006 param != 0; param = next_param)
2007 {
2008 next_param = TREE_CHAIN (param);
2009 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2010 {
2011 if (!TARGET_64BIT)
2012 {
2013 cum->nregs = 0;
2014 cum->sse_nregs = 0;
2015 cum->mmx_nregs = 0;
2016 cum->warn_sse = 0;
2017 cum->warn_mmx = 0;
2018 cum->fastcall = 0;
2019 }
2020 cum->maybe_vaarg = true;
2021 }
2022 }
2023 }
2024 if ((!fntype && !libname)
2025 || (fntype && !TYPE_ARG_TYPES (fntype)))
2026 cum->maybe_vaarg = 1;
2027
2028 if (TARGET_DEBUG_ARG)
2029 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2030
2031 return;
2032 }
2033
2034 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2035 But in the case of vector types, it is some vector mode.
2036
2037 When we have only some of our vector isa extensions enabled, then there
2038 are some modes for which vector_mode_supported_p is false. For these
2039 modes, the generic vector support in gcc will choose some non-vector mode
2040 in order to implement the type. By computing the natural mode, we'll
2041 select the proper ABI location for the operand and not depend on whatever
2042 the middle-end decides to do with these vector types. */
2043
2044 static enum machine_mode
2045 type_natural_mode (tree type)
2046 {
2047 enum machine_mode mode = TYPE_MODE (type);
2048
2049 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2050 {
2051 HOST_WIDE_INT size = int_size_in_bytes (type);
2052 if ((size == 8 || size == 16)
2053 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2054 && TYPE_VECTOR_SUBPARTS (type) > 1)
2055 {
2056 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2057
2058 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2059 mode = MIN_MODE_VECTOR_FLOAT;
2060 else
2061 mode = MIN_MODE_VECTOR_INT;
2062
2063 /* Get the mode which has this inner mode and number of units. */
2064 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2065 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2066 && GET_MODE_INNER (mode) == innermode)
2067 return mode;
2068
2069 abort ();
2070 }
2071 }
2072
2073 return mode;
2074 }
2075
2076 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2077 this may not agree with the mode that the type system has chosen for the
2078 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2079 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2080
2081 static rtx
2082 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2083 unsigned int regno)
2084 {
2085 rtx tmp;
2086
2087 if (orig_mode != BLKmode)
2088 tmp = gen_rtx_REG (orig_mode, regno);
2089 else
2090 {
2091 tmp = gen_rtx_REG (mode, regno);
2092 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2093 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2094 }
2095
2096 return tmp;
2097 }
2098
2099 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2100 of this code is to classify each 8bytes of incoming argument by the register
2101 class and assign registers accordingly. */
2102
2103 /* Return the union class of CLASS1 and CLASS2.
2104 See the x86-64 PS ABI for details. */
2105
2106 static enum x86_64_reg_class
2107 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2108 {
2109 /* Rule #1: If both classes are equal, this is the resulting class. */
2110 if (class1 == class2)
2111 return class1;
2112
2113 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2114 the other class. */
2115 if (class1 == X86_64_NO_CLASS)
2116 return class2;
2117 if (class2 == X86_64_NO_CLASS)
2118 return class1;
2119
2120 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2121 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2122 return X86_64_MEMORY_CLASS;
2123
2124 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2125 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2126 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2127 return X86_64_INTEGERSI_CLASS;
2128 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2129 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2130 return X86_64_INTEGER_CLASS;
2131
2132 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2133 MEMORY is used. */
2134 if (class1 == X86_64_X87_CLASS
2135 || class1 == X86_64_X87UP_CLASS
2136 || class1 == X86_64_COMPLEX_X87_CLASS
2137 || class2 == X86_64_X87_CLASS
2138 || class2 == X86_64_X87UP_CLASS
2139 || class2 == X86_64_COMPLEX_X87_CLASS)
2140 return X86_64_MEMORY_CLASS;
2141
2142 /* Rule #6: Otherwise class SSE is used. */
2143 return X86_64_SSE_CLASS;
2144 }
2145
2146 /* Classify the argument of type TYPE and mode MODE.
2147 CLASSES will be filled by the register class used to pass each word
2148 of the operand. The number of words is returned. In case the parameter
2149 should be passed in memory, 0 is returned. As a special case for zero
2150 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2151
2152 BIT_OFFSET is used internally for handling records and specifies offset
2153 of the offset in bits modulo 256 to avoid overflow cases.
2154
2155 See the x86-64 PS ABI for details.
2156 */
2157
2158 static int
2159 classify_argument (enum machine_mode mode, tree type,
2160 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2161 {
2162 HOST_WIDE_INT bytes =
2163 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2164 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2165
2166 /* Variable sized entities are always passed/returned in memory. */
2167 if (bytes < 0)
2168 return 0;
2169
2170 if (mode != VOIDmode
2171 && targetm.calls.must_pass_in_stack (mode, type))
2172 return 0;
2173
2174 if (type && AGGREGATE_TYPE_P (type))
2175 {
2176 int i;
2177 tree field;
2178 enum x86_64_reg_class subclasses[MAX_CLASSES];
2179
2180 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2181 if (bytes > 16)
2182 return 0;
2183
2184 for (i = 0; i < words; i++)
2185 classes[i] = X86_64_NO_CLASS;
2186
2187 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2188 signalize memory class, so handle it as special case. */
2189 if (!words)
2190 {
2191 classes[0] = X86_64_NO_CLASS;
2192 return 1;
2193 }
2194
2195 /* Classify each field of record and merge classes. */
2196 if (TREE_CODE (type) == RECORD_TYPE)
2197 {
2198 /* For classes first merge in the field of the subclasses. */
2199 if (TYPE_BINFO (type))
2200 {
2201 tree binfo, base_binfo;
2202 int basenum;
2203
2204 for (binfo = TYPE_BINFO (type), basenum = 0;
2205 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2206 {
2207 int num;
2208 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2209 tree type = BINFO_TYPE (base_binfo);
2210
2211 num = classify_argument (TYPE_MODE (type),
2212 type, subclasses,
2213 (offset + bit_offset) % 256);
2214 if (!num)
2215 return 0;
2216 for (i = 0; i < num; i++)
2217 {
2218 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2219 classes[i + pos] =
2220 merge_classes (subclasses[i], classes[i + pos]);
2221 }
2222 }
2223 }
2224 /* And now merge the fields of structure. */
2225 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2226 {
2227 if (TREE_CODE (field) == FIELD_DECL)
2228 {
2229 int num;
2230
2231 /* Bitfields are always classified as integer. Handle them
2232 early, since later code would consider them to be
2233 misaligned integers. */
2234 if (DECL_BIT_FIELD (field))
2235 {
2236 for (i = int_bit_position (field) / 8 / 8;
2237 i < (int_bit_position (field)
2238 + tree_low_cst (DECL_SIZE (field), 0)
2239 + 63) / 8 / 8; i++)
2240 classes[i] =
2241 merge_classes (X86_64_INTEGER_CLASS,
2242 classes[i]);
2243 }
2244 else
2245 {
2246 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2247 TREE_TYPE (field), subclasses,
2248 (int_bit_position (field)
2249 + bit_offset) % 256);
2250 if (!num)
2251 return 0;
2252 for (i = 0; i < num; i++)
2253 {
2254 int pos =
2255 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2256 classes[i + pos] =
2257 merge_classes (subclasses[i], classes[i + pos]);
2258 }
2259 }
2260 }
2261 }
2262 }
2263 /* Arrays are handled as small records. */
2264 else if (TREE_CODE (type) == ARRAY_TYPE)
2265 {
2266 int num;
2267 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2268 TREE_TYPE (type), subclasses, bit_offset);
2269 if (!num)
2270 return 0;
2271
2272 /* The partial classes are now full classes. */
2273 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2274 subclasses[0] = X86_64_SSE_CLASS;
2275 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2276 subclasses[0] = X86_64_INTEGER_CLASS;
2277
2278 for (i = 0; i < words; i++)
2279 classes[i] = subclasses[i % num];
2280 }
2281 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2282 else if (TREE_CODE (type) == UNION_TYPE
2283 || TREE_CODE (type) == QUAL_UNION_TYPE)
2284 {
2285 /* For classes first merge in the field of the subclasses. */
2286 if (TYPE_BINFO (type))
2287 {
2288 tree binfo, base_binfo;
2289 int basenum;
2290
2291 for (binfo = TYPE_BINFO (type), basenum = 0;
2292 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2293 {
2294 int num;
2295 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2296 tree type = BINFO_TYPE (base_binfo);
2297
2298 num = classify_argument (TYPE_MODE (type),
2299 type, subclasses,
2300 (offset + (bit_offset % 64)) % 256);
2301 if (!num)
2302 return 0;
2303 for (i = 0; i < num; i++)
2304 {
2305 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2306 classes[i + pos] =
2307 merge_classes (subclasses[i], classes[i + pos]);
2308 }
2309 }
2310 }
2311 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2312 {
2313 if (TREE_CODE (field) == FIELD_DECL)
2314 {
2315 int num;
2316 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2317 TREE_TYPE (field), subclasses,
2318 bit_offset);
2319 if (!num)
2320 return 0;
2321 for (i = 0; i < num; i++)
2322 classes[i] = merge_classes (subclasses[i], classes[i]);
2323 }
2324 }
2325 }
2326 else
2327 abort ();
2328
2329 /* Final merger cleanup. */
2330 for (i = 0; i < words; i++)
2331 {
2332 /* If one class is MEMORY, everything should be passed in
2333 memory. */
2334 if (classes[i] == X86_64_MEMORY_CLASS)
2335 return 0;
2336
2337 /* The X86_64_SSEUP_CLASS should be always preceded by
2338 X86_64_SSE_CLASS. */
2339 if (classes[i] == X86_64_SSEUP_CLASS
2340 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2341 classes[i] = X86_64_SSE_CLASS;
2342
2343 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2344 if (classes[i] == X86_64_X87UP_CLASS
2345 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2346 classes[i] = X86_64_SSE_CLASS;
2347 }
2348 return words;
2349 }
2350
2351 /* Compute alignment needed. We align all types to natural boundaries with
2352 exception of XFmode that is aligned to 64bits. */
2353 if (mode != VOIDmode && mode != BLKmode)
2354 {
2355 int mode_alignment = GET_MODE_BITSIZE (mode);
2356
2357 if (mode == XFmode)
2358 mode_alignment = 128;
2359 else if (mode == XCmode)
2360 mode_alignment = 256;
2361 if (COMPLEX_MODE_P (mode))
2362 mode_alignment /= 2;
2363 /* Misaligned fields are always returned in memory. */
2364 if (bit_offset % mode_alignment)
2365 return 0;
2366 }
2367
2368 /* for V1xx modes, just use the base mode */
2369 if (VECTOR_MODE_P (mode)
2370 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2371 mode = GET_MODE_INNER (mode);
2372
2373 /* Classification of atomic types. */
2374 switch (mode)
2375 {
2376 case DImode:
2377 case SImode:
2378 case HImode:
2379 case QImode:
2380 case CSImode:
2381 case CHImode:
2382 case CQImode:
2383 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2384 classes[0] = X86_64_INTEGERSI_CLASS;
2385 else
2386 classes[0] = X86_64_INTEGER_CLASS;
2387 return 1;
2388 case CDImode:
2389 case TImode:
2390 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2391 return 2;
2392 case CTImode:
2393 return 0;
2394 case SFmode:
2395 if (!(bit_offset % 64))
2396 classes[0] = X86_64_SSESF_CLASS;
2397 else
2398 classes[0] = X86_64_SSE_CLASS;
2399 return 1;
2400 case DFmode:
2401 classes[0] = X86_64_SSEDF_CLASS;
2402 return 1;
2403 case XFmode:
2404 classes[0] = X86_64_X87_CLASS;
2405 classes[1] = X86_64_X87UP_CLASS;
2406 return 2;
2407 case TFmode:
2408 classes[0] = X86_64_SSE_CLASS;
2409 classes[1] = X86_64_SSEUP_CLASS;
2410 return 2;
2411 case SCmode:
2412 classes[0] = X86_64_SSE_CLASS;
2413 return 1;
2414 case DCmode:
2415 classes[0] = X86_64_SSEDF_CLASS;
2416 classes[1] = X86_64_SSEDF_CLASS;
2417 return 2;
2418 case XCmode:
2419 classes[0] = X86_64_COMPLEX_X87_CLASS;
2420 return 1;
2421 case TCmode:
2422 /* This modes is larger than 16 bytes. */
2423 return 0;
2424 case V4SFmode:
2425 case V4SImode:
2426 case V16QImode:
2427 case V8HImode:
2428 case V2DFmode:
2429 case V2DImode:
2430 classes[0] = X86_64_SSE_CLASS;
2431 classes[1] = X86_64_SSEUP_CLASS;
2432 return 2;
2433 case V2SFmode:
2434 case V2SImode:
2435 case V4HImode:
2436 case V8QImode:
2437 classes[0] = X86_64_SSE_CLASS;
2438 return 1;
2439 case BLKmode:
2440 case VOIDmode:
2441 return 0;
2442 default:
2443 if (VECTOR_MODE_P (mode))
2444 {
2445 if (bytes > 16)
2446 return 0;
2447 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2448 {
2449 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2450 classes[0] = X86_64_INTEGERSI_CLASS;
2451 else
2452 classes[0] = X86_64_INTEGER_CLASS;
2453 classes[1] = X86_64_INTEGER_CLASS;
2454 return 1 + (bytes > 8);
2455 }
2456 }
2457 abort ();
2458 }
2459 }
2460
2461 /* Examine the argument and return set number of register required in each
2462 class. Return 0 iff parameter should be passed in memory. */
2463 static int
2464 examine_argument (enum machine_mode mode, tree type, int in_return,
2465 int *int_nregs, int *sse_nregs)
2466 {
2467 enum x86_64_reg_class class[MAX_CLASSES];
2468 int n = classify_argument (mode, type, class, 0);
2469
2470 *int_nregs = 0;
2471 *sse_nregs = 0;
2472 if (!n)
2473 return 0;
2474 for (n--; n >= 0; n--)
2475 switch (class[n])
2476 {
2477 case X86_64_INTEGER_CLASS:
2478 case X86_64_INTEGERSI_CLASS:
2479 (*int_nregs)++;
2480 break;
2481 case X86_64_SSE_CLASS:
2482 case X86_64_SSESF_CLASS:
2483 case X86_64_SSEDF_CLASS:
2484 (*sse_nregs)++;
2485 break;
2486 case X86_64_NO_CLASS:
2487 case X86_64_SSEUP_CLASS:
2488 break;
2489 case X86_64_X87_CLASS:
2490 case X86_64_X87UP_CLASS:
2491 if (!in_return)
2492 return 0;
2493 break;
2494 case X86_64_COMPLEX_X87_CLASS:
2495 return in_return ? 2 : 0;
2496 case X86_64_MEMORY_CLASS:
2497 abort ();
2498 }
2499 return 1;
2500 }
2501
2502 /* Construct container for the argument used by GCC interface. See
2503 FUNCTION_ARG for the detailed description. */
2504
2505 static rtx
2506 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2507 tree type, int in_return, int nintregs, int nsseregs,
2508 const int *intreg, int sse_regno)
2509 {
2510 enum machine_mode tmpmode;
2511 int bytes =
2512 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2513 enum x86_64_reg_class class[MAX_CLASSES];
2514 int n;
2515 int i;
2516 int nexps = 0;
2517 int needed_sseregs, needed_intregs;
2518 rtx exp[MAX_CLASSES];
2519 rtx ret;
2520
2521 n = classify_argument (mode, type, class, 0);
2522 if (TARGET_DEBUG_ARG)
2523 {
2524 if (!n)
2525 fprintf (stderr, "Memory class\n");
2526 else
2527 {
2528 fprintf (stderr, "Classes:");
2529 for (i = 0; i < n; i++)
2530 {
2531 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2532 }
2533 fprintf (stderr, "\n");
2534 }
2535 }
2536 if (!n)
2537 return NULL;
2538 if (!examine_argument (mode, type, in_return, &needed_intregs,
2539 &needed_sseregs))
2540 return NULL;
2541 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2542 return NULL;
2543
2544 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2545 some less clueful developer tries to use floating-point anyway. */
2546 if (needed_sseregs && !TARGET_SSE)
2547 {
2548 static bool issued_error;
2549 if (!issued_error)
2550 {
2551 issued_error = true;
2552 if (in_return)
2553 error ("SSE register return with SSE disabled");
2554 else
2555 error ("SSE register argument with SSE disabled");
2556 }
2557 return NULL;
2558 }
2559
2560 /* First construct simple cases. Avoid SCmode, since we want to use
2561 single register to pass this type. */
2562 if (n == 1 && mode != SCmode)
2563 switch (class[0])
2564 {
2565 case X86_64_INTEGER_CLASS:
2566 case X86_64_INTEGERSI_CLASS:
2567 return gen_rtx_REG (mode, intreg[0]);
2568 case X86_64_SSE_CLASS:
2569 case X86_64_SSESF_CLASS:
2570 case X86_64_SSEDF_CLASS:
2571 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2572 case X86_64_X87_CLASS:
2573 case X86_64_COMPLEX_X87_CLASS:
2574 return gen_rtx_REG (mode, FIRST_STACK_REG);
2575 case X86_64_NO_CLASS:
2576 /* Zero sized array, struct or class. */
2577 return NULL;
2578 default:
2579 abort ();
2580 }
2581 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2582 && mode != BLKmode)
2583 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2584 if (n == 2
2585 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2586 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2587 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2588 && class[1] == X86_64_INTEGER_CLASS
2589 && (mode == CDImode || mode == TImode || mode == TFmode)
2590 && intreg[0] + 1 == intreg[1])
2591 return gen_rtx_REG (mode, intreg[0]);
2592
2593 /* Otherwise figure out the entries of the PARALLEL. */
2594 for (i = 0; i < n; i++)
2595 {
2596 switch (class[i])
2597 {
2598 case X86_64_NO_CLASS:
2599 break;
2600 case X86_64_INTEGER_CLASS:
2601 case X86_64_INTEGERSI_CLASS:
2602 /* Merge TImodes on aligned occasions here too. */
2603 if (i * 8 + 8 > bytes)
2604 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2605 else if (class[i] == X86_64_INTEGERSI_CLASS)
2606 tmpmode = SImode;
2607 else
2608 tmpmode = DImode;
2609 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2610 if (tmpmode == BLKmode)
2611 tmpmode = DImode;
2612 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2613 gen_rtx_REG (tmpmode, *intreg),
2614 GEN_INT (i*8));
2615 intreg++;
2616 break;
2617 case X86_64_SSESF_CLASS:
2618 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2619 gen_rtx_REG (SFmode,
2620 SSE_REGNO (sse_regno)),
2621 GEN_INT (i*8));
2622 sse_regno++;
2623 break;
2624 case X86_64_SSEDF_CLASS:
2625 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2626 gen_rtx_REG (DFmode,
2627 SSE_REGNO (sse_regno)),
2628 GEN_INT (i*8));
2629 sse_regno++;
2630 break;
2631 case X86_64_SSE_CLASS:
2632 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2633 tmpmode = TImode;
2634 else
2635 tmpmode = DImode;
2636 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2637 gen_rtx_REG (tmpmode,
2638 SSE_REGNO (sse_regno)),
2639 GEN_INT (i*8));
2640 if (tmpmode == TImode)
2641 i++;
2642 sse_regno++;
2643 break;
2644 default:
2645 abort ();
2646 }
2647 }
2648 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2649 for (i = 0; i < nexps; i++)
2650 XVECEXP (ret, 0, i) = exp [i];
2651 return ret;
2652 }
2653
2654 /* Update the data in CUM to advance over an argument
2655 of mode MODE and data type TYPE.
2656 (TYPE is null for libcalls where that information may not be available.) */
2657
2658 void
2659 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2660 tree type, int named)
2661 {
2662 int bytes =
2663 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2664 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2665
2666 if (TARGET_DEBUG_ARG)
2667 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2668 "mode=%s, named=%d)\n\n",
2669 words, cum->words, cum->nregs, cum->sse_nregs,
2670 GET_MODE_NAME (mode), named);
2671 if (TARGET_64BIT)
2672 {
2673 int int_nregs, sse_nregs;
2674 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2675 cum->words += words;
2676 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2677 {
2678 cum->nregs -= int_nregs;
2679 cum->sse_nregs -= sse_nregs;
2680 cum->regno += int_nregs;
2681 cum->sse_regno += sse_nregs;
2682 }
2683 else
2684 cum->words += words;
2685 }
2686 else
2687 {
2688 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2689 && (!type || !AGGREGATE_TYPE_P (type)))
2690 {
2691 cum->sse_words += words;
2692 cum->sse_nregs -= 1;
2693 cum->sse_regno += 1;
2694 if (cum->sse_nregs <= 0)
2695 {
2696 cum->sse_nregs = 0;
2697 cum->sse_regno = 0;
2698 }
2699 }
2700 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2701 && (!type || !AGGREGATE_TYPE_P (type)))
2702 {
2703 cum->mmx_words += words;
2704 cum->mmx_nregs -= 1;
2705 cum->mmx_regno += 1;
2706 if (cum->mmx_nregs <= 0)
2707 {
2708 cum->mmx_nregs = 0;
2709 cum->mmx_regno = 0;
2710 }
2711 }
2712 else
2713 {
2714 cum->words += words;
2715 cum->nregs -= words;
2716 cum->regno += words;
2717
2718 if (cum->nregs <= 0)
2719 {
2720 cum->nregs = 0;
2721 cum->regno = 0;
2722 }
2723 }
2724 }
2725 return;
2726 }
2727
2728 /* Define where to put the arguments to a function.
2729 Value is zero to push the argument on the stack,
2730 or a hard register in which to store the argument.
2731
2732 MODE is the argument's machine mode.
2733 TYPE is the data type of the argument (as a tree).
2734 This is null for libcalls where that information may
2735 not be available.
2736 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2737 the preceding args and about the function being called.
2738 NAMED is nonzero if this argument is a named parameter
2739 (otherwise it is an extra parameter matching an ellipsis). */
2740
2741 rtx
2742 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2743 tree type, int named)
2744 {
2745 enum machine_mode mode = orig_mode;
2746 rtx ret = NULL_RTX;
2747 int bytes =
2748 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2749 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2750 static bool warnedsse, warnedmmx;
2751
2752 /* To simplify the code below, represent vector types with a vector mode
2753 even if MMX/SSE are not active. */
2754 if (type && TREE_CODE (type) == VECTOR_TYPE)
2755 mode = type_natural_mode (type);
2756
2757 /* Handle a hidden AL argument containing number of registers for varargs
2758 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2759 any AL settings. */
2760 if (mode == VOIDmode)
2761 {
2762 if (TARGET_64BIT)
2763 return GEN_INT (cum->maybe_vaarg
2764 ? (cum->sse_nregs < 0
2765 ? SSE_REGPARM_MAX
2766 : cum->sse_regno)
2767 : -1);
2768 else
2769 return constm1_rtx;
2770 }
2771 if (TARGET_64BIT)
2772 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2773 cum->sse_nregs,
2774 &x86_64_int_parameter_registers [cum->regno],
2775 cum->sse_regno);
2776 else
2777 switch (mode)
2778 {
2779 /* For now, pass fp/complex values on the stack. */
2780 default:
2781 break;
2782
2783 case BLKmode:
2784 if (bytes < 0)
2785 break;
2786 /* FALLTHRU */
2787 case DImode:
2788 case SImode:
2789 case HImode:
2790 case QImode:
2791 if (words <= cum->nregs)
2792 {
2793 int regno = cum->regno;
2794
2795 /* Fastcall allocates the first two DWORD (SImode) or
2796 smaller arguments to ECX and EDX. */
2797 if (cum->fastcall)
2798 {
2799 if (mode == BLKmode || mode == DImode)
2800 break;
2801
2802 /* ECX not EAX is the first allocated register. */
2803 if (regno == 0)
2804 regno = 2;
2805 }
2806 ret = gen_rtx_REG (mode, regno);
2807 }
2808 break;
2809 case TImode:
2810 case V16QImode:
2811 case V8HImode:
2812 case V4SImode:
2813 case V2DImode:
2814 case V4SFmode:
2815 case V2DFmode:
2816 if (!type || !AGGREGATE_TYPE_P (type))
2817 {
2818 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2819 {
2820 warnedsse = true;
2821 warning ("SSE vector argument without SSE enabled "
2822 "changes the ABI");
2823 }
2824 if (cum->sse_nregs)
2825 ret = gen_reg_or_parallel (mode, orig_mode,
2826 cum->sse_regno + FIRST_SSE_REG);
2827 }
2828 break;
2829 case V8QImode:
2830 case V4HImode:
2831 case V2SImode:
2832 case V2SFmode:
2833 if (!type || !AGGREGATE_TYPE_P (type))
2834 {
2835 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2836 {
2837 warnedmmx = true;
2838 warning ("MMX vector argument without MMX enabled "
2839 "changes the ABI");
2840 }
2841 if (cum->mmx_nregs)
2842 ret = gen_reg_or_parallel (mode, orig_mode,
2843 cum->mmx_regno + FIRST_MMX_REG);
2844 }
2845 break;
2846 }
2847
2848 if (TARGET_DEBUG_ARG)
2849 {
2850 fprintf (stderr,
2851 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2852 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2853
2854 if (ret)
2855 print_simple_rtl (stderr, ret);
2856 else
2857 fprintf (stderr, ", stack");
2858
2859 fprintf (stderr, " )\n");
2860 }
2861
2862 return ret;
2863 }
2864
2865 /* A C expression that indicates when an argument must be passed by
2866 reference. If nonzero for an argument, a copy of that argument is
2867 made in memory and a pointer to the argument is passed instead of
2868 the argument itself. The pointer is passed in whatever way is
2869 appropriate for passing a pointer to that type. */
2870
2871 static bool
2872 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2873 enum machine_mode mode ATTRIBUTE_UNUSED,
2874 tree type, bool named ATTRIBUTE_UNUSED)
2875 {
2876 if (!TARGET_64BIT)
2877 return 0;
2878
2879 if (type && int_size_in_bytes (type) == -1)
2880 {
2881 if (TARGET_DEBUG_ARG)
2882 fprintf (stderr, "function_arg_pass_by_reference\n");
2883 return 1;
2884 }
2885
2886 return 0;
2887 }
2888
2889 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2890 ABI. Only called if TARGET_SSE. */
2891 static bool
2892 contains_128bit_aligned_vector_p (tree type)
2893 {
2894 enum machine_mode mode = TYPE_MODE (type);
2895 if (SSE_REG_MODE_P (mode)
2896 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2897 return true;
2898 if (TYPE_ALIGN (type) < 128)
2899 return false;
2900
2901 if (AGGREGATE_TYPE_P (type))
2902 {
2903 /* Walk the aggregates recursively. */
2904 if (TREE_CODE (type) == RECORD_TYPE
2905 || TREE_CODE (type) == UNION_TYPE
2906 || TREE_CODE (type) == QUAL_UNION_TYPE)
2907 {
2908 tree field;
2909
2910 if (TYPE_BINFO (type))
2911 {
2912 tree binfo, base_binfo;
2913 int i;
2914
2915 for (binfo = TYPE_BINFO (type), i = 0;
2916 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2917 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2918 return true;
2919 }
2920 /* And now merge the fields of structure. */
2921 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2922 {
2923 if (TREE_CODE (field) == FIELD_DECL
2924 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2925 return true;
2926 }
2927 }
2928 /* Just for use if some languages passes arrays by value. */
2929 else if (TREE_CODE (type) == ARRAY_TYPE)
2930 {
2931 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2932 return true;
2933 }
2934 else
2935 abort ();
2936 }
2937 return false;
2938 }
2939
2940 /* Gives the alignment boundary, in bits, of an argument with the
2941 specified mode and type. */
2942
2943 int
2944 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2945 {
2946 int align;
2947 if (type)
2948 align = TYPE_ALIGN (type);
2949 else
2950 align = GET_MODE_ALIGNMENT (mode);
2951 if (align < PARM_BOUNDARY)
2952 align = PARM_BOUNDARY;
2953 if (!TARGET_64BIT)
2954 {
2955 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2956 make an exception for SSE modes since these require 128bit
2957 alignment.
2958
2959 The handling here differs from field_alignment. ICC aligns MMX
2960 arguments to 4 byte boundaries, while structure fields are aligned
2961 to 8 byte boundaries. */
2962 if (!TARGET_SSE)
2963 align = PARM_BOUNDARY;
2964 else if (!type)
2965 {
2966 if (!SSE_REG_MODE_P (mode))
2967 align = PARM_BOUNDARY;
2968 }
2969 else
2970 {
2971 if (!contains_128bit_aligned_vector_p (type))
2972 align = PARM_BOUNDARY;
2973 }
2974 }
2975 if (align > 128)
2976 align = 128;
2977 return align;
2978 }
2979
2980 /* Return true if N is a possible register number of function value. */
2981 bool
2982 ix86_function_value_regno_p (int regno)
2983 {
2984 if (!TARGET_64BIT)
2985 {
2986 return ((regno) == 0
2987 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2988 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2989 }
2990 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2991 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2992 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2993 }
2994
2995 /* Define how to find the value returned by a function.
2996 VALTYPE is the data type of the value (as a tree).
2997 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2998 otherwise, FUNC is 0. */
2999 rtx
3000 ix86_function_value (tree valtype)
3001 {
3002 if (TARGET_64BIT)
3003 {
3004 rtx ret = construct_container (type_natural_mode (valtype),
3005 TYPE_MODE (valtype), valtype,
3006 1, REGPARM_MAX, SSE_REGPARM_MAX,
3007 x86_64_int_return_registers, 0);
3008 /* For zero sized structures, construct_container return NULL, but we
3009 need to keep rest of compiler happy by returning meaningful value. */
3010 if (!ret)
3011 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3012 return ret;
3013 }
3014 else
3015 return gen_rtx_REG (TYPE_MODE (valtype),
3016 ix86_value_regno (TYPE_MODE (valtype)));
3017 }
3018
3019 /* Return false iff type is returned in memory. */
3020 int
3021 ix86_return_in_memory (tree type)
3022 {
3023 int needed_intregs, needed_sseregs, size;
3024 enum machine_mode mode = TYPE_MODE (type);
3025
3026 if (TARGET_64BIT)
3027 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3028
3029 if (mode == BLKmode)
3030 return 1;
3031
3032 size = int_size_in_bytes (type);
3033
3034 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3035 return 0;
3036
3037 if (VECTOR_MODE_P (mode) || mode == TImode)
3038 {
3039 /* User-created vectors small enough to fit in EAX. */
3040 if (size < 8)
3041 return 0;
3042
3043 /* MMX/3dNow values are returned on the stack, since we've
3044 got to EMMS/FEMMS before returning. */
3045 if (size == 8)
3046 return 1;
3047
3048 /* SSE values are returned in XMM0, except when it doesn't exist. */
3049 if (size == 16)
3050 return (TARGET_SSE ? 0 : 1);
3051 }
3052
3053 if (mode == XFmode)
3054 return 0;
3055
3056 if (size > 12)
3057 return 1;
3058 return 0;
3059 }
3060
3061 /* When returning SSE vector types, we have a choice of either
3062 (1) being abi incompatible with a -march switch, or
3063 (2) generating an error.
3064 Given no good solution, I think the safest thing is one warning.
3065 The user won't be able to use -Werror, but....
3066
3067 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3068 called in response to actually generating a caller or callee that
3069 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3070 via aggregate_value_p for general type probing from tree-ssa. */
3071
3072 static rtx
3073 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3074 {
3075 static bool warned;
3076
3077 if (!TARGET_SSE && type && !warned)
3078 {
3079 /* Look at the return type of the function, not the function type. */
3080 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3081
3082 if (mode == TImode
3083 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3084 {
3085 warned = true;
3086 warning ("SSE vector return without SSE enabled changes the ABI");
3087 }
3088 }
3089
3090 return NULL;
3091 }
3092
3093 /* Define how to find the value returned by a library function
3094 assuming the value has mode MODE. */
3095 rtx
3096 ix86_libcall_value (enum machine_mode mode)
3097 {
3098 if (TARGET_64BIT)
3099 {
3100 switch (mode)
3101 {
3102 case SFmode:
3103 case SCmode:
3104 case DFmode:
3105 case DCmode:
3106 case TFmode:
3107 return gen_rtx_REG (mode, FIRST_SSE_REG);
3108 case XFmode:
3109 case XCmode:
3110 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3111 case TCmode:
3112 return NULL;
3113 default:
3114 return gen_rtx_REG (mode, 0);
3115 }
3116 }
3117 else
3118 return gen_rtx_REG (mode, ix86_value_regno (mode));
3119 }
3120
3121 /* Given a mode, return the register to use for a return value. */
3122
3123 static int
3124 ix86_value_regno (enum machine_mode mode)
3125 {
3126 /* Floating point return values in %st(0). */
3127 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3128 return FIRST_FLOAT_REG;
3129 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3130 we prevent this case when sse is not available. */
3131 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3132 return FIRST_SSE_REG;
3133 /* Everything else in %eax. */
3134 return 0;
3135 }
3136 \f
3137 /* Create the va_list data type. */
3138
3139 static tree
3140 ix86_build_builtin_va_list (void)
3141 {
3142 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3143
3144 /* For i386 we use plain pointer to argument area. */
3145 if (!TARGET_64BIT)
3146 return build_pointer_type (char_type_node);
3147
3148 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3149 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3150
3151 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3152 unsigned_type_node);
3153 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3154 unsigned_type_node);
3155 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3156 ptr_type_node);
3157 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3158 ptr_type_node);
3159
3160 DECL_FIELD_CONTEXT (f_gpr) = record;
3161 DECL_FIELD_CONTEXT (f_fpr) = record;
3162 DECL_FIELD_CONTEXT (f_ovf) = record;
3163 DECL_FIELD_CONTEXT (f_sav) = record;
3164
3165 TREE_CHAIN (record) = type_decl;
3166 TYPE_NAME (record) = type_decl;
3167 TYPE_FIELDS (record) = f_gpr;
3168 TREE_CHAIN (f_gpr) = f_fpr;
3169 TREE_CHAIN (f_fpr) = f_ovf;
3170 TREE_CHAIN (f_ovf) = f_sav;
3171
3172 layout_type (record);
3173
3174 /* The correct type is an array type of one element. */
3175 return build_array_type (record, build_index_type (size_zero_node));
3176 }
3177
3178 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3179
3180 static void
3181 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3182 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3183 int no_rtl)
3184 {
3185 CUMULATIVE_ARGS next_cum;
3186 rtx save_area = NULL_RTX, mem;
3187 rtx label;
3188 rtx label_ref;
3189 rtx tmp_reg;
3190 rtx nsse_reg;
3191 int set;
3192 tree fntype;
3193 int stdarg_p;
3194 int i;
3195
3196 if (!TARGET_64BIT)
3197 return;
3198
3199 /* Indicate to allocate space on the stack for varargs save area. */
3200 ix86_save_varrargs_registers = 1;
3201
3202 cfun->stack_alignment_needed = 128;
3203
3204 fntype = TREE_TYPE (current_function_decl);
3205 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3206 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3207 != void_type_node));
3208
3209 /* For varargs, we do not want to skip the dummy va_dcl argument.
3210 For stdargs, we do want to skip the last named argument. */
3211 next_cum = *cum;
3212 if (stdarg_p)
3213 function_arg_advance (&next_cum, mode, type, 1);
3214
3215 if (!no_rtl)
3216 save_area = frame_pointer_rtx;
3217
3218 set = get_varargs_alias_set ();
3219
3220 for (i = next_cum.regno; i < ix86_regparm; i++)
3221 {
3222 mem = gen_rtx_MEM (Pmode,
3223 plus_constant (save_area, i * UNITS_PER_WORD));
3224 set_mem_alias_set (mem, set);
3225 emit_move_insn (mem, gen_rtx_REG (Pmode,
3226 x86_64_int_parameter_registers[i]));
3227 }
3228
3229 if (next_cum.sse_nregs)
3230 {
3231 /* Now emit code to save SSE registers. The AX parameter contains number
3232 of SSE parameter registers used to call this function. We use
3233 sse_prologue_save insn template that produces computed jump across
3234 SSE saves. We need some preparation work to get this working. */
3235
3236 label = gen_label_rtx ();
3237 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3238
3239 /* Compute address to jump to :
3240 label - 5*eax + nnamed_sse_arguments*5 */
3241 tmp_reg = gen_reg_rtx (Pmode);
3242 nsse_reg = gen_reg_rtx (Pmode);
3243 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3244 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3245 gen_rtx_MULT (Pmode, nsse_reg,
3246 GEN_INT (4))));
3247 if (next_cum.sse_regno)
3248 emit_move_insn
3249 (nsse_reg,
3250 gen_rtx_CONST (DImode,
3251 gen_rtx_PLUS (DImode,
3252 label_ref,
3253 GEN_INT (next_cum.sse_regno * 4))));
3254 else
3255 emit_move_insn (nsse_reg, label_ref);
3256 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3257
3258 /* Compute address of memory block we save into. We always use pointer
3259 pointing 127 bytes after first byte to store - this is needed to keep
3260 instruction size limited by 4 bytes. */
3261 tmp_reg = gen_reg_rtx (Pmode);
3262 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3263 plus_constant (save_area,
3264 8 * REGPARM_MAX + 127)));
3265 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3266 set_mem_alias_set (mem, set);
3267 set_mem_align (mem, BITS_PER_WORD);
3268
3269 /* And finally do the dirty job! */
3270 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3271 GEN_INT (next_cum.sse_regno), label));
3272 }
3273
3274 }
3275
3276 /* Implement va_start. */
3277
3278 void
3279 ix86_va_start (tree valist, rtx nextarg)
3280 {
3281 HOST_WIDE_INT words, n_gpr, n_fpr;
3282 tree f_gpr, f_fpr, f_ovf, f_sav;
3283 tree gpr, fpr, ovf, sav, t;
3284
3285 /* Only 64bit target needs something special. */
3286 if (!TARGET_64BIT)
3287 {
3288 std_expand_builtin_va_start (valist, nextarg);
3289 return;
3290 }
3291
3292 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3293 f_fpr = TREE_CHAIN (f_gpr);
3294 f_ovf = TREE_CHAIN (f_fpr);
3295 f_sav = TREE_CHAIN (f_ovf);
3296
3297 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3298 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3299 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3300 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3301 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3302
3303 /* Count number of gp and fp argument registers used. */
3304 words = current_function_args_info.words;
3305 n_gpr = current_function_args_info.regno;
3306 n_fpr = current_function_args_info.sse_regno;
3307
3308 if (TARGET_DEBUG_ARG)
3309 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3310 (int) words, (int) n_gpr, (int) n_fpr);
3311
3312 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3313 build_int_cst (NULL_TREE, n_gpr * 8));
3314 TREE_SIDE_EFFECTS (t) = 1;
3315 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3316
3317 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3318 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3319 TREE_SIDE_EFFECTS (t) = 1;
3320 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3321
3322 /* Find the overflow area. */
3323 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3324 if (words != 0)
3325 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3326 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3327 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3328 TREE_SIDE_EFFECTS (t) = 1;
3329 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3330
3331 /* Find the register save area.
3332 Prologue of the function save it right above stack frame. */
3333 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3334 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3335 TREE_SIDE_EFFECTS (t) = 1;
3336 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3337 }
3338
3339 /* Implement va_arg. */
3340
3341 tree
3342 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3343 {
3344 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3345 tree f_gpr, f_fpr, f_ovf, f_sav;
3346 tree gpr, fpr, ovf, sav, t;
3347 int size, rsize;
3348 tree lab_false, lab_over = NULL_TREE;
3349 tree addr, t2;
3350 rtx container;
3351 int indirect_p = 0;
3352 tree ptrtype;
3353 enum machine_mode nat_mode;
3354
3355 /* Only 64bit target needs something special. */
3356 if (!TARGET_64BIT)
3357 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3358
3359 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3360 f_fpr = TREE_CHAIN (f_gpr);
3361 f_ovf = TREE_CHAIN (f_fpr);
3362 f_sav = TREE_CHAIN (f_ovf);
3363
3364 valist = build_va_arg_indirect_ref (valist);
3365 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3366 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3367 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3368 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3369
3370 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3371 if (indirect_p)
3372 type = build_pointer_type (type);
3373 size = int_size_in_bytes (type);
3374 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3375
3376 nat_mode = type_natural_mode (type);
3377 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3378 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3379
3380 /* Pull the value out of the saved registers. */
3381
3382 addr = create_tmp_var (ptr_type_node, "addr");
3383 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3384
3385 if (container)
3386 {
3387 int needed_intregs, needed_sseregs;
3388 bool need_temp;
3389 tree int_addr, sse_addr;
3390
3391 lab_false = create_artificial_label ();
3392 lab_over = create_artificial_label ();
3393
3394 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3395
3396 need_temp = (!REG_P (container)
3397 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3398 || TYPE_ALIGN (type) > 128));
3399
3400 /* In case we are passing structure, verify that it is consecutive block
3401 on the register save area. If not we need to do moves. */
3402 if (!need_temp && !REG_P (container))
3403 {
3404 /* Verify that all registers are strictly consecutive */
3405 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3406 {
3407 int i;
3408
3409 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3410 {
3411 rtx slot = XVECEXP (container, 0, i);
3412 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3413 || INTVAL (XEXP (slot, 1)) != i * 16)
3414 need_temp = 1;
3415 }
3416 }
3417 else
3418 {
3419 int i;
3420
3421 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3422 {
3423 rtx slot = XVECEXP (container, 0, i);
3424 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3425 || INTVAL (XEXP (slot, 1)) != i * 8)
3426 need_temp = 1;
3427 }
3428 }
3429 }
3430 if (!need_temp)
3431 {
3432 int_addr = addr;
3433 sse_addr = addr;
3434 }
3435 else
3436 {
3437 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3438 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3439 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3440 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3441 }
3442
3443 /* First ensure that we fit completely in registers. */
3444 if (needed_intregs)
3445 {
3446 t = build_int_cst (TREE_TYPE (gpr),
3447 (REGPARM_MAX - needed_intregs + 1) * 8);
3448 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3449 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3450 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3451 gimplify_and_add (t, pre_p);
3452 }
3453 if (needed_sseregs)
3454 {
3455 t = build_int_cst (TREE_TYPE (fpr),
3456 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3457 + REGPARM_MAX * 8);
3458 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3459 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3460 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3461 gimplify_and_add (t, pre_p);
3462 }
3463
3464 /* Compute index to start of area used for integer regs. */
3465 if (needed_intregs)
3466 {
3467 /* int_addr = gpr + sav; */
3468 t = fold_convert (ptr_type_node, gpr);
3469 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3470 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3471 gimplify_and_add (t, pre_p);
3472 }
3473 if (needed_sseregs)
3474 {
3475 /* sse_addr = fpr + sav; */
3476 t = fold_convert (ptr_type_node, fpr);
3477 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3478 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3479 gimplify_and_add (t, pre_p);
3480 }
3481 if (need_temp)
3482 {
3483 int i;
3484 tree temp = create_tmp_var (type, "va_arg_tmp");
3485
3486 /* addr = &temp; */
3487 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3488 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3489 gimplify_and_add (t, pre_p);
3490
3491 for (i = 0; i < XVECLEN (container, 0); i++)
3492 {
3493 rtx slot = XVECEXP (container, 0, i);
3494 rtx reg = XEXP (slot, 0);
3495 enum machine_mode mode = GET_MODE (reg);
3496 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3497 tree addr_type = build_pointer_type (piece_type);
3498 tree src_addr, src;
3499 int src_offset;
3500 tree dest_addr, dest;
3501
3502 if (SSE_REGNO_P (REGNO (reg)))
3503 {
3504 src_addr = sse_addr;
3505 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3506 }
3507 else
3508 {
3509 src_addr = int_addr;
3510 src_offset = REGNO (reg) * 8;
3511 }
3512 src_addr = fold_convert (addr_type, src_addr);
3513 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3514 size_int (src_offset)));
3515 src = build_va_arg_indirect_ref (src_addr);
3516
3517 dest_addr = fold_convert (addr_type, addr);
3518 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3519 size_int (INTVAL (XEXP (slot, 1)))));
3520 dest = build_va_arg_indirect_ref (dest_addr);
3521
3522 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3523 gimplify_and_add (t, pre_p);
3524 }
3525 }
3526
3527 if (needed_intregs)
3528 {
3529 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3530 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3531 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3532 gimplify_and_add (t, pre_p);
3533 }
3534 if (needed_sseregs)
3535 {
3536 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3537 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3538 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3539 gimplify_and_add (t, pre_p);
3540 }
3541
3542 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3543 gimplify_and_add (t, pre_p);
3544
3545 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3546 append_to_statement_list (t, pre_p);
3547 }
3548
3549 /* ... otherwise out of the overflow area. */
3550
3551 /* Care for on-stack alignment if needed. */
3552 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3553 t = ovf;
3554 else
3555 {
3556 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3557 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3558 build_int_cst (TREE_TYPE (ovf), align - 1));
3559 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3560 build_int_cst (TREE_TYPE (t), -align));
3561 }
3562 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3563
3564 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3565 gimplify_and_add (t2, pre_p);
3566
3567 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3568 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3569 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3570 gimplify_and_add (t, pre_p);
3571
3572 if (container)
3573 {
3574 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3575 append_to_statement_list (t, pre_p);
3576 }
3577
3578 ptrtype = build_pointer_type (type);
3579 addr = fold_convert (ptrtype, addr);
3580
3581 if (indirect_p)
3582 addr = build_va_arg_indirect_ref (addr);
3583 return build_va_arg_indirect_ref (addr);
3584 }
3585 \f
3586 /* Return nonzero if OPNUM's MEM should be matched
3587 in movabs* patterns. */
3588
3589 int
3590 ix86_check_movabs (rtx insn, int opnum)
3591 {
3592 rtx set, mem;
3593
3594 set = PATTERN (insn);
3595 if (GET_CODE (set) == PARALLEL)
3596 set = XVECEXP (set, 0, 0);
3597 if (GET_CODE (set) != SET)
3598 abort ();
3599 mem = XEXP (set, opnum);
3600 while (GET_CODE (mem) == SUBREG)
3601 mem = SUBREG_REG (mem);
3602 if (GET_CODE (mem) != MEM)
3603 abort ();
3604 return (volatile_ok || !MEM_VOLATILE_P (mem));
3605 }
3606 \f
3607 /* Initialize the table of extra 80387 mathematical constants. */
3608
3609 static void
3610 init_ext_80387_constants (void)
3611 {
3612 static const char * cst[5] =
3613 {
3614 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3615 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3616 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3617 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3618 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3619 };
3620 int i;
3621
3622 for (i = 0; i < 5; i++)
3623 {
3624 real_from_string (&ext_80387_constants_table[i], cst[i]);
3625 /* Ensure each constant is rounded to XFmode precision. */
3626 real_convert (&ext_80387_constants_table[i],
3627 XFmode, &ext_80387_constants_table[i]);
3628 }
3629
3630 ext_80387_constants_init = 1;
3631 }
3632
3633 /* Return true if the constant is something that can be loaded with
3634 a special instruction. */
3635
3636 int
3637 standard_80387_constant_p (rtx x)
3638 {
3639 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3640 return -1;
3641
3642 if (x == CONST0_RTX (GET_MODE (x)))
3643 return 1;
3644 if (x == CONST1_RTX (GET_MODE (x)))
3645 return 2;
3646
3647 /* For XFmode constants, try to find a special 80387 instruction when
3648 optimizing for size or on those CPUs that benefit from them. */
3649 if (GET_MODE (x) == XFmode
3650 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3651 {
3652 REAL_VALUE_TYPE r;
3653 int i;
3654
3655 if (! ext_80387_constants_init)
3656 init_ext_80387_constants ();
3657
3658 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3659 for (i = 0; i < 5; i++)
3660 if (real_identical (&r, &ext_80387_constants_table[i]))
3661 return i + 3;
3662 }
3663
3664 return 0;
3665 }
3666
3667 /* Return the opcode of the special instruction to be used to load
3668 the constant X. */
3669
3670 const char *
3671 standard_80387_constant_opcode (rtx x)
3672 {
3673 switch (standard_80387_constant_p (x))
3674 {
3675 case 1:
3676 return "fldz";
3677 case 2:
3678 return "fld1";
3679 case 3:
3680 return "fldlg2";
3681 case 4:
3682 return "fldln2";
3683 case 5:
3684 return "fldl2e";
3685 case 6:
3686 return "fldl2t";
3687 case 7:
3688 return "fldpi";
3689 }
3690 abort ();
3691 }
3692
3693 /* Return the CONST_DOUBLE representing the 80387 constant that is
3694 loaded by the specified special instruction. The argument IDX
3695 matches the return value from standard_80387_constant_p. */
3696
3697 rtx
3698 standard_80387_constant_rtx (int idx)
3699 {
3700 int i;
3701
3702 if (! ext_80387_constants_init)
3703 init_ext_80387_constants ();
3704
3705 switch (idx)
3706 {
3707 case 3:
3708 case 4:
3709 case 5:
3710 case 6:
3711 case 7:
3712 i = idx - 3;
3713 break;
3714
3715 default:
3716 abort ();
3717 }
3718
3719 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3720 XFmode);
3721 }
3722
3723 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3724 */
3725 int
3726 standard_sse_constant_p (rtx x)
3727 {
3728 if (x == const0_rtx)
3729 return 1;
3730 return (x == CONST0_RTX (GET_MODE (x)));
3731 }
3732
3733 /* Returns 1 if OP contains a symbol reference */
3734
3735 int
3736 symbolic_reference_mentioned_p (rtx op)
3737 {
3738 const char *fmt;
3739 int i;
3740
3741 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3742 return 1;
3743
3744 fmt = GET_RTX_FORMAT (GET_CODE (op));
3745 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3746 {
3747 if (fmt[i] == 'E')
3748 {
3749 int j;
3750
3751 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3752 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3753 return 1;
3754 }
3755
3756 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3757 return 1;
3758 }
3759
3760 return 0;
3761 }
3762
3763 /* Return 1 if it is appropriate to emit `ret' instructions in the
3764 body of a function. Do this only if the epilogue is simple, needing a
3765 couple of insns. Prior to reloading, we can't tell how many registers
3766 must be saved, so return 0 then. Return 0 if there is no frame
3767 marker to de-allocate. */
3768
3769 int
3770 ix86_can_use_return_insn_p (void)
3771 {
3772 struct ix86_frame frame;
3773
3774 if (! reload_completed || frame_pointer_needed)
3775 return 0;
3776
3777 /* Don't allow more than 32 pop, since that's all we can do
3778 with one instruction. */
3779 if (current_function_pops_args
3780 && current_function_args_size >= 32768)
3781 return 0;
3782
3783 ix86_compute_frame_layout (&frame);
3784 return frame.to_allocate == 0 && frame.nregs == 0;
3785 }
3786 \f
3787 /* Value should be nonzero if functions must have frame pointers.
3788 Zero means the frame pointer need not be set up (and parms may
3789 be accessed via the stack pointer) in functions that seem suitable. */
3790
3791 int
3792 ix86_frame_pointer_required (void)
3793 {
3794 /* If we accessed previous frames, then the generated code expects
3795 to be able to access the saved ebp value in our frame. */
3796 if (cfun->machine->accesses_prev_frame)
3797 return 1;
3798
3799 /* Several x86 os'es need a frame pointer for other reasons,
3800 usually pertaining to setjmp. */
3801 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3802 return 1;
3803
3804 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3805 the frame pointer by default. Turn it back on now if we've not
3806 got a leaf function. */
3807 if (TARGET_OMIT_LEAF_FRAME_POINTER
3808 && (!current_function_is_leaf))
3809 return 1;
3810
3811 if (current_function_profile)
3812 return 1;
3813
3814 return 0;
3815 }
3816
3817 /* Record that the current function accesses previous call frames. */
3818
3819 void
3820 ix86_setup_frame_addresses (void)
3821 {
3822 cfun->machine->accesses_prev_frame = 1;
3823 }
3824 \f
3825 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3826 # define USE_HIDDEN_LINKONCE 1
3827 #else
3828 # define USE_HIDDEN_LINKONCE 0
3829 #endif
3830
3831 static int pic_labels_used;
3832
3833 /* Fills in the label name that should be used for a pc thunk for
3834 the given register. */
3835
3836 static void
3837 get_pc_thunk_name (char name[32], unsigned int regno)
3838 {
3839 if (USE_HIDDEN_LINKONCE)
3840 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3841 else
3842 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3843 }
3844
3845
3846 /* This function generates code for -fpic that loads %ebx with
3847 the return address of the caller and then returns. */
3848
3849 void
3850 ix86_file_end (void)
3851 {
3852 rtx xops[2];
3853 int regno;
3854
3855 for (regno = 0; regno < 8; ++regno)
3856 {
3857 char name[32];
3858
3859 if (! ((pic_labels_used >> regno) & 1))
3860 continue;
3861
3862 get_pc_thunk_name (name, regno);
3863
3864 if (USE_HIDDEN_LINKONCE)
3865 {
3866 tree decl;
3867
3868 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3869 error_mark_node);
3870 TREE_PUBLIC (decl) = 1;
3871 TREE_STATIC (decl) = 1;
3872 DECL_ONE_ONLY (decl) = 1;
3873
3874 (*targetm.asm_out.unique_section) (decl, 0);
3875 named_section (decl, NULL, 0);
3876
3877 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3878 fputs ("\t.hidden\t", asm_out_file);
3879 assemble_name (asm_out_file, name);
3880 fputc ('\n', asm_out_file);
3881 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3882 }
3883 else
3884 {
3885 text_section ();
3886 ASM_OUTPUT_LABEL (asm_out_file, name);
3887 }
3888
3889 xops[0] = gen_rtx_REG (SImode, regno);
3890 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3891 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3892 output_asm_insn ("ret", xops);
3893 }
3894
3895 if (NEED_INDICATE_EXEC_STACK)
3896 file_end_indicate_exec_stack ();
3897 }
3898
3899 /* Emit code for the SET_GOT patterns. */
3900
3901 const char *
3902 output_set_got (rtx dest)
3903 {
3904 rtx xops[3];
3905
3906 xops[0] = dest;
3907 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3908
3909 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3910 {
3911 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3912
3913 if (!flag_pic)
3914 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3915 else
3916 output_asm_insn ("call\t%a2", xops);
3917
3918 #if TARGET_MACHO
3919 /* Output the "canonical" label name ("Lxx$pb") here too. This
3920 is what will be referred to by the Mach-O PIC subsystem. */
3921 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3922 #endif
3923 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3924 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3925
3926 if (flag_pic)
3927 output_asm_insn ("pop{l}\t%0", xops);
3928 }
3929 else
3930 {
3931 char name[32];
3932 get_pc_thunk_name (name, REGNO (dest));
3933 pic_labels_used |= 1 << REGNO (dest);
3934
3935 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3936 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3937 output_asm_insn ("call\t%X2", xops);
3938 }
3939
3940 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3941 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3942 else if (!TARGET_MACHO)
3943 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3944
3945 return "";
3946 }
3947
3948 /* Generate an "push" pattern for input ARG. */
3949
3950 static rtx
3951 gen_push (rtx arg)
3952 {
3953 return gen_rtx_SET (VOIDmode,
3954 gen_rtx_MEM (Pmode,
3955 gen_rtx_PRE_DEC (Pmode,
3956 stack_pointer_rtx)),
3957 arg);
3958 }
3959
3960 /* Return >= 0 if there is an unused call-clobbered register available
3961 for the entire function. */
3962
3963 static unsigned int
3964 ix86_select_alt_pic_regnum (void)
3965 {
3966 if (current_function_is_leaf && !current_function_profile)
3967 {
3968 int i;
3969 for (i = 2; i >= 0; --i)
3970 if (!regs_ever_live[i])
3971 return i;
3972 }
3973
3974 return INVALID_REGNUM;
3975 }
3976
3977 /* Return 1 if we need to save REGNO. */
3978 static int
3979 ix86_save_reg (unsigned int regno, int maybe_eh_return)
3980 {
3981 if (pic_offset_table_rtx
3982 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3983 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
3984 || current_function_profile
3985 || current_function_calls_eh_return
3986 || current_function_uses_const_pool))
3987 {
3988 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3989 return 0;
3990 return 1;
3991 }
3992
3993 if (current_function_calls_eh_return && maybe_eh_return)
3994 {
3995 unsigned i;
3996 for (i = 0; ; i++)
3997 {
3998 unsigned test = EH_RETURN_DATA_REGNO (i);
3999 if (test == INVALID_REGNUM)
4000 break;
4001 if (test == regno)
4002 return 1;
4003 }
4004 }
4005
4006 return (regs_ever_live[regno]
4007 && !call_used_regs[regno]
4008 && !fixed_regs[regno]
4009 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4010 }
4011
4012 /* Return number of registers to be saved on the stack. */
4013
4014 static int
4015 ix86_nsaved_regs (void)
4016 {
4017 int nregs = 0;
4018 int regno;
4019
4020 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4021 if (ix86_save_reg (regno, true))
4022 nregs++;
4023 return nregs;
4024 }
4025
4026 /* Return the offset between two registers, one to be eliminated, and the other
4027 its replacement, at the start of a routine. */
4028
4029 HOST_WIDE_INT
4030 ix86_initial_elimination_offset (int from, int to)
4031 {
4032 struct ix86_frame frame;
4033 ix86_compute_frame_layout (&frame);
4034
4035 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4036 return frame.hard_frame_pointer_offset;
4037 else if (from == FRAME_POINTER_REGNUM
4038 && to == HARD_FRAME_POINTER_REGNUM)
4039 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4040 else
4041 {
4042 if (to != STACK_POINTER_REGNUM)
4043 abort ();
4044 else if (from == ARG_POINTER_REGNUM)
4045 return frame.stack_pointer_offset;
4046 else if (from != FRAME_POINTER_REGNUM)
4047 abort ();
4048 else
4049 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4050 }
4051 }
4052
4053 /* Fill structure ix86_frame about frame of currently computed function. */
4054
4055 static void
4056 ix86_compute_frame_layout (struct ix86_frame *frame)
4057 {
4058 HOST_WIDE_INT total_size;
4059 unsigned int stack_alignment_needed;
4060 HOST_WIDE_INT offset;
4061 unsigned int preferred_alignment;
4062 HOST_WIDE_INT size = get_frame_size ();
4063
4064 frame->nregs = ix86_nsaved_regs ();
4065 total_size = size;
4066
4067 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4068 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4069
4070 /* During reload iteration the amount of registers saved can change.
4071 Recompute the value as needed. Do not recompute when amount of registers
4072 didn't change as reload does mutiple calls to the function and does not
4073 expect the decision to change within single iteration. */
4074 if (!optimize_size
4075 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4076 {
4077 int count = frame->nregs;
4078
4079 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4080 /* The fast prologue uses move instead of push to save registers. This
4081 is significantly longer, but also executes faster as modern hardware
4082 can execute the moves in parallel, but can't do that for push/pop.
4083
4084 Be careful about choosing what prologue to emit: When function takes
4085 many instructions to execute we may use slow version as well as in
4086 case function is known to be outside hot spot (this is known with
4087 feedback only). Weight the size of function by number of registers
4088 to save as it is cheap to use one or two push instructions but very
4089 slow to use many of them. */
4090 if (count)
4091 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4092 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4093 || (flag_branch_probabilities
4094 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4095 cfun->machine->use_fast_prologue_epilogue = false;
4096 else
4097 cfun->machine->use_fast_prologue_epilogue
4098 = !expensive_function_p (count);
4099 }
4100 if (TARGET_PROLOGUE_USING_MOVE
4101 && cfun->machine->use_fast_prologue_epilogue)
4102 frame->save_regs_using_mov = true;
4103 else
4104 frame->save_regs_using_mov = false;
4105
4106
4107 /* Skip return address and saved base pointer. */
4108 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4109
4110 frame->hard_frame_pointer_offset = offset;
4111
4112 /* Do some sanity checking of stack_alignment_needed and
4113 preferred_alignment, since i386 port is the only using those features
4114 that may break easily. */
4115
4116 if (size && !stack_alignment_needed)
4117 abort ();
4118 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4119 abort ();
4120 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4121 abort ();
4122 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4123 abort ();
4124
4125 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4126 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4127
4128 /* Register save area */
4129 offset += frame->nregs * UNITS_PER_WORD;
4130
4131 /* Va-arg area */
4132 if (ix86_save_varrargs_registers)
4133 {
4134 offset += X86_64_VARARGS_SIZE;
4135 frame->va_arg_size = X86_64_VARARGS_SIZE;
4136 }
4137 else
4138 frame->va_arg_size = 0;
4139
4140 /* Align start of frame for local function. */
4141 frame->padding1 = ((offset + stack_alignment_needed - 1)
4142 & -stack_alignment_needed) - offset;
4143
4144 offset += frame->padding1;
4145
4146 /* Frame pointer points here. */
4147 frame->frame_pointer_offset = offset;
4148
4149 offset += size;
4150
4151 /* Add outgoing arguments area. Can be skipped if we eliminated
4152 all the function calls as dead code.
4153 Skipping is however impossible when function calls alloca. Alloca
4154 expander assumes that last current_function_outgoing_args_size
4155 of stack frame are unused. */
4156 if (ACCUMULATE_OUTGOING_ARGS
4157 && (!current_function_is_leaf || current_function_calls_alloca))
4158 {
4159 offset += current_function_outgoing_args_size;
4160 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4161 }
4162 else
4163 frame->outgoing_arguments_size = 0;
4164
4165 /* Align stack boundary. Only needed if we're calling another function
4166 or using alloca. */
4167 if (!current_function_is_leaf || current_function_calls_alloca)
4168 frame->padding2 = ((offset + preferred_alignment - 1)
4169 & -preferred_alignment) - offset;
4170 else
4171 frame->padding2 = 0;
4172
4173 offset += frame->padding2;
4174
4175 /* We've reached end of stack frame. */
4176 frame->stack_pointer_offset = offset;
4177
4178 /* Size prologue needs to allocate. */
4179 frame->to_allocate =
4180 (size + frame->padding1 + frame->padding2
4181 + frame->outgoing_arguments_size + frame->va_arg_size);
4182
4183 if ((!frame->to_allocate && frame->nregs <= 1)
4184 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4185 frame->save_regs_using_mov = false;
4186
4187 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4188 && current_function_is_leaf)
4189 {
4190 frame->red_zone_size = frame->to_allocate;
4191 if (frame->save_regs_using_mov)
4192 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4193 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4194 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4195 }
4196 else
4197 frame->red_zone_size = 0;
4198 frame->to_allocate -= frame->red_zone_size;
4199 frame->stack_pointer_offset -= frame->red_zone_size;
4200 #if 0
4201 fprintf (stderr, "nregs: %i\n", frame->nregs);
4202 fprintf (stderr, "size: %i\n", size);
4203 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4204 fprintf (stderr, "padding1: %i\n", frame->padding1);
4205 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4206 fprintf (stderr, "padding2: %i\n", frame->padding2);
4207 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4208 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4209 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4210 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4211 frame->hard_frame_pointer_offset);
4212 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4213 #endif
4214 }
4215
4216 /* Emit code to save registers in the prologue. */
4217
4218 static void
4219 ix86_emit_save_regs (void)
4220 {
4221 int regno;
4222 rtx insn;
4223
4224 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4225 if (ix86_save_reg (regno, true))
4226 {
4227 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4228 RTX_FRAME_RELATED_P (insn) = 1;
4229 }
4230 }
4231
4232 /* Emit code to save registers using MOV insns. First register
4233 is restored from POINTER + OFFSET. */
4234 static void
4235 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4236 {
4237 int regno;
4238 rtx insn;
4239
4240 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4241 if (ix86_save_reg (regno, true))
4242 {
4243 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4244 Pmode, offset),
4245 gen_rtx_REG (Pmode, regno));
4246 RTX_FRAME_RELATED_P (insn) = 1;
4247 offset += UNITS_PER_WORD;
4248 }
4249 }
4250
4251 /* Expand prologue or epilogue stack adjustment.
4252 The pattern exist to put a dependency on all ebp-based memory accesses.
4253 STYLE should be negative if instructions should be marked as frame related,
4254 zero if %r11 register is live and cannot be freely used and positive
4255 otherwise. */
4256
4257 static void
4258 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4259 {
4260 rtx insn;
4261
4262 if (! TARGET_64BIT)
4263 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4264 else if (x86_64_immediate_operand (offset, DImode))
4265 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4266 else
4267 {
4268 rtx r11;
4269 /* r11 is used by indirect sibcall return as well, set before the
4270 epilogue and used after the epilogue. ATM indirect sibcall
4271 shouldn't be used together with huge frame sizes in one
4272 function because of the frame_size check in sibcall.c. */
4273 if (style == 0)
4274 abort ();
4275 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4276 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4277 if (style < 0)
4278 RTX_FRAME_RELATED_P (insn) = 1;
4279 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4280 offset));
4281 }
4282 if (style < 0)
4283 RTX_FRAME_RELATED_P (insn) = 1;
4284 }
4285
4286 /* Expand the prologue into a bunch of separate insns. */
4287
4288 void
4289 ix86_expand_prologue (void)
4290 {
4291 rtx insn;
4292 bool pic_reg_used;
4293 struct ix86_frame frame;
4294 HOST_WIDE_INT allocate;
4295
4296 ix86_compute_frame_layout (&frame);
4297
4298 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4299 slower on all targets. Also sdb doesn't like it. */
4300
4301 if (frame_pointer_needed)
4302 {
4303 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4304 RTX_FRAME_RELATED_P (insn) = 1;
4305
4306 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4307 RTX_FRAME_RELATED_P (insn) = 1;
4308 }
4309
4310 allocate = frame.to_allocate;
4311
4312 if (!frame.save_regs_using_mov)
4313 ix86_emit_save_regs ();
4314 else
4315 allocate += frame.nregs * UNITS_PER_WORD;
4316
4317 /* When using red zone we may start register saving before allocating
4318 the stack frame saving one cycle of the prologue. */
4319 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4320 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4321 : stack_pointer_rtx,
4322 -frame.nregs * UNITS_PER_WORD);
4323
4324 if (allocate == 0)
4325 ;
4326 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4327 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4328 GEN_INT (-allocate), -1);
4329 else
4330 {
4331 /* Only valid for Win32. */
4332 rtx eax = gen_rtx_REG (SImode, 0);
4333 bool eax_live = ix86_eax_live_at_start_p ();
4334
4335 if (TARGET_64BIT)
4336 abort ();
4337
4338 if (eax_live)
4339 {
4340 emit_insn (gen_push (eax));
4341 allocate -= 4;
4342 }
4343
4344 insn = emit_move_insn (eax, GEN_INT (allocate));
4345 RTX_FRAME_RELATED_P (insn) = 1;
4346
4347 insn = emit_insn (gen_allocate_stack_worker (eax));
4348 RTX_FRAME_RELATED_P (insn) = 1;
4349
4350 if (eax_live)
4351 {
4352 rtx t;
4353 if (frame_pointer_needed)
4354 t = plus_constant (hard_frame_pointer_rtx,
4355 allocate
4356 - frame.to_allocate
4357 - frame.nregs * UNITS_PER_WORD);
4358 else
4359 t = plus_constant (stack_pointer_rtx, allocate);
4360 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4361 }
4362 }
4363
4364 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4365 {
4366 if (!frame_pointer_needed || !frame.to_allocate)
4367 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4368 else
4369 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4370 -frame.nregs * UNITS_PER_WORD);
4371 }
4372
4373 pic_reg_used = false;
4374 if (pic_offset_table_rtx
4375 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4376 || current_function_profile))
4377 {
4378 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4379
4380 if (alt_pic_reg_used != INVALID_REGNUM)
4381 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4382
4383 pic_reg_used = true;
4384 }
4385
4386 if (pic_reg_used)
4387 {
4388 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4389
4390 /* Even with accurate pre-reload life analysis, we can wind up
4391 deleting all references to the pic register after reload.
4392 Consider if cross-jumping unifies two sides of a branch
4393 controlled by a comparison vs the only read from a global.
4394 In which case, allow the set_got to be deleted, though we're
4395 too late to do anything about the ebx save in the prologue. */
4396 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4397 }
4398
4399 /* Prevent function calls from be scheduled before the call to mcount.
4400 In the pic_reg_used case, make sure that the got load isn't deleted. */
4401 if (current_function_profile)
4402 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4403 }
4404
4405 /* Emit code to restore saved registers using MOV insns. First register
4406 is restored from POINTER + OFFSET. */
4407 static void
4408 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4409 int maybe_eh_return)
4410 {
4411 int regno;
4412 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4413
4414 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4415 if (ix86_save_reg (regno, maybe_eh_return))
4416 {
4417 /* Ensure that adjust_address won't be forced to produce pointer
4418 out of range allowed by x86-64 instruction set. */
4419 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4420 {
4421 rtx r11;
4422
4423 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4424 emit_move_insn (r11, GEN_INT (offset));
4425 emit_insn (gen_adddi3 (r11, r11, pointer));
4426 base_address = gen_rtx_MEM (Pmode, r11);
4427 offset = 0;
4428 }
4429 emit_move_insn (gen_rtx_REG (Pmode, regno),
4430 adjust_address (base_address, Pmode, offset));
4431 offset += UNITS_PER_WORD;
4432 }
4433 }
4434
4435 /* Restore function stack, frame, and registers. */
4436
4437 void
4438 ix86_expand_epilogue (int style)
4439 {
4440 int regno;
4441 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4442 struct ix86_frame frame;
4443 HOST_WIDE_INT offset;
4444
4445 ix86_compute_frame_layout (&frame);
4446
4447 /* Calculate start of saved registers relative to ebp. Special care
4448 must be taken for the normal return case of a function using
4449 eh_return: the eax and edx registers are marked as saved, but not
4450 restored along this path. */
4451 offset = frame.nregs;
4452 if (current_function_calls_eh_return && style != 2)
4453 offset -= 2;
4454 offset *= -UNITS_PER_WORD;
4455
4456 /* If we're only restoring one register and sp is not valid then
4457 using a move instruction to restore the register since it's
4458 less work than reloading sp and popping the register.
4459
4460 The default code result in stack adjustment using add/lea instruction,
4461 while this code results in LEAVE instruction (or discrete equivalent),
4462 so it is profitable in some other cases as well. Especially when there
4463 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4464 and there is exactly one register to pop. This heuristic may need some
4465 tuning in future. */
4466 if ((!sp_valid && frame.nregs <= 1)
4467 || (TARGET_EPILOGUE_USING_MOVE
4468 && cfun->machine->use_fast_prologue_epilogue
4469 && (frame.nregs > 1 || frame.to_allocate))
4470 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4471 || (frame_pointer_needed && TARGET_USE_LEAVE
4472 && cfun->machine->use_fast_prologue_epilogue
4473 && frame.nregs == 1)
4474 || current_function_calls_eh_return)
4475 {
4476 /* Restore registers. We can use ebp or esp to address the memory
4477 locations. If both are available, default to ebp, since offsets
4478 are known to be small. Only exception is esp pointing directly to the
4479 end of block of saved registers, where we may simplify addressing
4480 mode. */
4481
4482 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4483 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4484 frame.to_allocate, style == 2);
4485 else
4486 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4487 offset, style == 2);
4488
4489 /* eh_return epilogues need %ecx added to the stack pointer. */
4490 if (style == 2)
4491 {
4492 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4493
4494 if (frame_pointer_needed)
4495 {
4496 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4497 tmp = plus_constant (tmp, UNITS_PER_WORD);
4498 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4499
4500 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4501 emit_move_insn (hard_frame_pointer_rtx, tmp);
4502
4503 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4504 const0_rtx, style);
4505 }
4506 else
4507 {
4508 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4509 tmp = plus_constant (tmp, (frame.to_allocate
4510 + frame.nregs * UNITS_PER_WORD));
4511 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4512 }
4513 }
4514 else if (!frame_pointer_needed)
4515 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4516 GEN_INT (frame.to_allocate
4517 + frame.nregs * UNITS_PER_WORD),
4518 style);
4519 /* If not an i386, mov & pop is faster than "leave". */
4520 else if (TARGET_USE_LEAVE || optimize_size
4521 || !cfun->machine->use_fast_prologue_epilogue)
4522 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4523 else
4524 {
4525 pro_epilogue_adjust_stack (stack_pointer_rtx,
4526 hard_frame_pointer_rtx,
4527 const0_rtx, style);
4528 if (TARGET_64BIT)
4529 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4530 else
4531 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4532 }
4533 }
4534 else
4535 {
4536 /* First step is to deallocate the stack frame so that we can
4537 pop the registers. */
4538 if (!sp_valid)
4539 {
4540 if (!frame_pointer_needed)
4541 abort ();
4542 pro_epilogue_adjust_stack (stack_pointer_rtx,
4543 hard_frame_pointer_rtx,
4544 GEN_INT (offset), style);
4545 }
4546 else if (frame.to_allocate)
4547 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4548 GEN_INT (frame.to_allocate), style);
4549
4550 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4551 if (ix86_save_reg (regno, false))
4552 {
4553 if (TARGET_64BIT)
4554 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4555 else
4556 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4557 }
4558 if (frame_pointer_needed)
4559 {
4560 /* Leave results in shorter dependency chains on CPUs that are
4561 able to grok it fast. */
4562 if (TARGET_USE_LEAVE)
4563 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4564 else if (TARGET_64BIT)
4565 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4566 else
4567 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4568 }
4569 }
4570
4571 /* Sibcall epilogues don't want a return instruction. */
4572 if (style == 0)
4573 return;
4574
4575 if (current_function_pops_args && current_function_args_size)
4576 {
4577 rtx popc = GEN_INT (current_function_pops_args);
4578
4579 /* i386 can only pop 64K bytes. If asked to pop more, pop
4580 return address, do explicit add, and jump indirectly to the
4581 caller. */
4582
4583 if (current_function_pops_args >= 65536)
4584 {
4585 rtx ecx = gen_rtx_REG (SImode, 2);
4586
4587 /* There is no "pascal" calling convention in 64bit ABI. */
4588 if (TARGET_64BIT)
4589 abort ();
4590
4591 emit_insn (gen_popsi1 (ecx));
4592 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4593 emit_jump_insn (gen_return_indirect_internal (ecx));
4594 }
4595 else
4596 emit_jump_insn (gen_return_pop_internal (popc));
4597 }
4598 else
4599 emit_jump_insn (gen_return_internal ());
4600 }
4601
4602 /* Reset from the function's potential modifications. */
4603
4604 static void
4605 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4606 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4607 {
4608 if (pic_offset_table_rtx)
4609 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4610 }
4611 \f
4612 /* Extract the parts of an RTL expression that is a valid memory address
4613 for an instruction. Return 0 if the structure of the address is
4614 grossly off. Return -1 if the address contains ASHIFT, so it is not
4615 strictly valid, but still used for computing length of lea instruction. */
4616
4617 int
4618 ix86_decompose_address (rtx addr, struct ix86_address *out)
4619 {
4620 rtx base = NULL_RTX;
4621 rtx index = NULL_RTX;
4622 rtx disp = NULL_RTX;
4623 HOST_WIDE_INT scale = 1;
4624 rtx scale_rtx = NULL_RTX;
4625 int retval = 1;
4626 enum ix86_address_seg seg = SEG_DEFAULT;
4627
4628 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4629 base = addr;
4630 else if (GET_CODE (addr) == PLUS)
4631 {
4632 rtx addends[4], op;
4633 int n = 0, i;
4634
4635 op = addr;
4636 do
4637 {
4638 if (n >= 4)
4639 return 0;
4640 addends[n++] = XEXP (op, 1);
4641 op = XEXP (op, 0);
4642 }
4643 while (GET_CODE (op) == PLUS);
4644 if (n >= 4)
4645 return 0;
4646 addends[n] = op;
4647
4648 for (i = n; i >= 0; --i)
4649 {
4650 op = addends[i];
4651 switch (GET_CODE (op))
4652 {
4653 case MULT:
4654 if (index)
4655 return 0;
4656 index = XEXP (op, 0);
4657 scale_rtx = XEXP (op, 1);
4658 break;
4659
4660 case UNSPEC:
4661 if (XINT (op, 1) == UNSPEC_TP
4662 && TARGET_TLS_DIRECT_SEG_REFS
4663 && seg == SEG_DEFAULT)
4664 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4665 else
4666 return 0;
4667 break;
4668
4669 case REG:
4670 case SUBREG:
4671 if (!base)
4672 base = op;
4673 else if (!index)
4674 index = op;
4675 else
4676 return 0;
4677 break;
4678
4679 case CONST:
4680 case CONST_INT:
4681 case SYMBOL_REF:
4682 case LABEL_REF:
4683 if (disp)
4684 return 0;
4685 disp = op;
4686 break;
4687
4688 default:
4689 return 0;
4690 }
4691 }
4692 }
4693 else if (GET_CODE (addr) == MULT)
4694 {
4695 index = XEXP (addr, 0); /* index*scale */
4696 scale_rtx = XEXP (addr, 1);
4697 }
4698 else if (GET_CODE (addr) == ASHIFT)
4699 {
4700 rtx tmp;
4701
4702 /* We're called for lea too, which implements ashift on occasion. */
4703 index = XEXP (addr, 0);
4704 tmp = XEXP (addr, 1);
4705 if (GET_CODE (tmp) != CONST_INT)
4706 return 0;
4707 scale = INTVAL (tmp);
4708 if ((unsigned HOST_WIDE_INT) scale > 3)
4709 return 0;
4710 scale = 1 << scale;
4711 retval = -1;
4712 }
4713 else
4714 disp = addr; /* displacement */
4715
4716 /* Extract the integral value of scale. */
4717 if (scale_rtx)
4718 {
4719 if (GET_CODE (scale_rtx) != CONST_INT)
4720 return 0;
4721 scale = INTVAL (scale_rtx);
4722 }
4723
4724 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4725 if (base && index && scale == 1
4726 && (index == arg_pointer_rtx
4727 || index == frame_pointer_rtx
4728 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4729 {
4730 rtx tmp = base;
4731 base = index;
4732 index = tmp;
4733 }
4734
4735 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4736 if ((base == hard_frame_pointer_rtx
4737 || base == frame_pointer_rtx
4738 || base == arg_pointer_rtx) && !disp)
4739 disp = const0_rtx;
4740
4741 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4742 Avoid this by transforming to [%esi+0]. */
4743 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4744 && base && !index && !disp
4745 && REG_P (base)
4746 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4747 disp = const0_rtx;
4748
4749 /* Special case: encode reg+reg instead of reg*2. */
4750 if (!base && index && scale && scale == 2)
4751 base = index, scale = 1;
4752
4753 /* Special case: scaling cannot be encoded without base or displacement. */
4754 if (!base && !disp && index && scale != 1)
4755 disp = const0_rtx;
4756
4757 out->base = base;
4758 out->index = index;
4759 out->disp = disp;
4760 out->scale = scale;
4761 out->seg = seg;
4762
4763 return retval;
4764 }
4765 \f
4766 /* Return cost of the memory address x.
4767 For i386, it is better to use a complex address than let gcc copy
4768 the address into a reg and make a new pseudo. But not if the address
4769 requires to two regs - that would mean more pseudos with longer
4770 lifetimes. */
4771 static int
4772 ix86_address_cost (rtx x)
4773 {
4774 struct ix86_address parts;
4775 int cost = 1;
4776
4777 if (!ix86_decompose_address (x, &parts))
4778 abort ();
4779
4780 /* More complex memory references are better. */
4781 if (parts.disp && parts.disp != const0_rtx)
4782 cost--;
4783 if (parts.seg != SEG_DEFAULT)
4784 cost--;
4785
4786 /* Attempt to minimize number of registers in the address. */
4787 if ((parts.base
4788 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4789 || (parts.index
4790 && (!REG_P (parts.index)
4791 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4792 cost++;
4793
4794 if (parts.base
4795 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4796 && parts.index
4797 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4798 && parts.base != parts.index)
4799 cost++;
4800
4801 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4802 since it's predecode logic can't detect the length of instructions
4803 and it degenerates to vector decoded. Increase cost of such
4804 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4805 to split such addresses or even refuse such addresses at all.
4806
4807 Following addressing modes are affected:
4808 [base+scale*index]
4809 [scale*index+disp]
4810 [base+index]
4811
4812 The first and last case may be avoidable by explicitly coding the zero in
4813 memory address, but I don't have AMD-K6 machine handy to check this
4814 theory. */
4815
4816 if (TARGET_K6
4817 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4818 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4819 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4820 cost += 10;
4821
4822 return cost;
4823 }
4824 \f
4825 /* If X is a machine specific address (i.e. a symbol or label being
4826 referenced as a displacement from the GOT implemented using an
4827 UNSPEC), then return the base term. Otherwise return X. */
4828
4829 rtx
4830 ix86_find_base_term (rtx x)
4831 {
4832 rtx term;
4833
4834 if (TARGET_64BIT)
4835 {
4836 if (GET_CODE (x) != CONST)
4837 return x;
4838 term = XEXP (x, 0);
4839 if (GET_CODE (term) == PLUS
4840 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4841 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4842 term = XEXP (term, 0);
4843 if (GET_CODE (term) != UNSPEC
4844 || XINT (term, 1) != UNSPEC_GOTPCREL)
4845 return x;
4846
4847 term = XVECEXP (term, 0, 0);
4848
4849 if (GET_CODE (term) != SYMBOL_REF
4850 && GET_CODE (term) != LABEL_REF)
4851 return x;
4852
4853 return term;
4854 }
4855
4856 term = ix86_delegitimize_address (x);
4857
4858 if (GET_CODE (term) != SYMBOL_REF
4859 && GET_CODE (term) != LABEL_REF)
4860 return x;
4861
4862 return term;
4863 }
4864
4865 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4866 this is used for to form addresses to local data when -fPIC is in
4867 use. */
4868
4869 static bool
4870 darwin_local_data_pic (rtx disp)
4871 {
4872 if (GET_CODE (disp) == MINUS)
4873 {
4874 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4875 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4876 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4877 {
4878 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4879 if (! strcmp (sym_name, "<pic base>"))
4880 return true;
4881 }
4882 }
4883
4884 return false;
4885 }
4886 \f
4887 /* Determine if a given RTX is a valid constant. We already know this
4888 satisfies CONSTANT_P. */
4889
4890 bool
4891 legitimate_constant_p (rtx x)
4892 {
4893 switch (GET_CODE (x))
4894 {
4895 case CONST:
4896 x = XEXP (x, 0);
4897
4898 if (GET_CODE (x) == PLUS)
4899 {
4900 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4901 return false;
4902 x = XEXP (x, 0);
4903 }
4904
4905 if (TARGET_MACHO && darwin_local_data_pic (x))
4906 return true;
4907
4908 /* Only some unspecs are valid as "constants". */
4909 if (GET_CODE (x) == UNSPEC)
4910 switch (XINT (x, 1))
4911 {
4912 case UNSPEC_TPOFF:
4913 case UNSPEC_NTPOFF:
4914 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4915 case UNSPEC_DTPOFF:
4916 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4917 default:
4918 return false;
4919 }
4920
4921 /* We must have drilled down to a symbol. */
4922 if (!symbolic_operand (x, Pmode))
4923 return false;
4924 /* FALLTHRU */
4925
4926 case SYMBOL_REF:
4927 /* TLS symbols are never valid. */
4928 if (tls_symbolic_operand (x, Pmode))
4929 return false;
4930 break;
4931
4932 default:
4933 break;
4934 }
4935
4936 /* Otherwise we handle everything else in the move patterns. */
4937 return true;
4938 }
4939
4940 /* Determine if it's legal to put X into the constant pool. This
4941 is not possible for the address of thread-local symbols, which
4942 is checked above. */
4943
4944 static bool
4945 ix86_cannot_force_const_mem (rtx x)
4946 {
4947 return !legitimate_constant_p (x);
4948 }
4949
4950 /* Determine if a given RTX is a valid constant address. */
4951
4952 bool
4953 constant_address_p (rtx x)
4954 {
4955 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4956 }
4957
4958 /* Nonzero if the constant value X is a legitimate general operand
4959 when generating PIC code. It is given that flag_pic is on and
4960 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4961
4962 bool
4963 legitimate_pic_operand_p (rtx x)
4964 {
4965 rtx inner;
4966
4967 switch (GET_CODE (x))
4968 {
4969 case CONST:
4970 inner = XEXP (x, 0);
4971
4972 /* Only some unspecs are valid as "constants". */
4973 if (GET_CODE (inner) == UNSPEC)
4974 switch (XINT (inner, 1))
4975 {
4976 case UNSPEC_TPOFF:
4977 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4978 default:
4979 return false;
4980 }
4981 /* FALLTHRU */
4982
4983 case SYMBOL_REF:
4984 case LABEL_REF:
4985 return legitimate_pic_address_disp_p (x);
4986
4987 default:
4988 return true;
4989 }
4990 }
4991
4992 /* Determine if a given CONST RTX is a valid memory displacement
4993 in PIC mode. */
4994
4995 int
4996 legitimate_pic_address_disp_p (rtx disp)
4997 {
4998 bool saw_plus;
4999
5000 /* In 64bit mode we can allow direct addresses of symbols and labels
5001 when they are not dynamic symbols. */
5002 if (TARGET_64BIT)
5003 {
5004 /* TLS references should always be enclosed in UNSPEC. */
5005 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5006 return 0;
5007 if (GET_CODE (disp) == SYMBOL_REF
5008 && ix86_cmodel == CM_SMALL_PIC
5009 && SYMBOL_REF_LOCAL_P (disp))
5010 return 1;
5011 if (GET_CODE (disp) == LABEL_REF)
5012 return 1;
5013 if (GET_CODE (disp) == CONST
5014 && GET_CODE (XEXP (disp, 0)) == PLUS)
5015 {
5016 rtx op0 = XEXP (XEXP (disp, 0), 0);
5017 rtx op1 = XEXP (XEXP (disp, 0), 1);
5018
5019 /* TLS references should always be enclosed in UNSPEC. */
5020 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5021 return 0;
5022 if (((GET_CODE (op0) == SYMBOL_REF
5023 && ix86_cmodel == CM_SMALL_PIC
5024 && SYMBOL_REF_LOCAL_P (op0))
5025 || GET_CODE (op0) == LABEL_REF)
5026 && GET_CODE (op1) == CONST_INT
5027 && INTVAL (op1) < 16*1024*1024
5028 && INTVAL (op1) >= -16*1024*1024)
5029 return 1;
5030 }
5031 }
5032 if (GET_CODE (disp) != CONST)
5033 return 0;
5034 disp = XEXP (disp, 0);
5035
5036 if (TARGET_64BIT)
5037 {
5038 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5039 of GOT tables. We should not need these anyway. */
5040 if (GET_CODE (disp) != UNSPEC
5041 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5042 return 0;
5043
5044 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5045 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5046 return 0;
5047 return 1;
5048 }
5049
5050 saw_plus = false;
5051 if (GET_CODE (disp) == PLUS)
5052 {
5053 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5054 return 0;
5055 disp = XEXP (disp, 0);
5056 saw_plus = true;
5057 }
5058
5059 if (TARGET_MACHO && darwin_local_data_pic (disp))
5060 return 1;
5061
5062 if (GET_CODE (disp) != UNSPEC)
5063 return 0;
5064
5065 switch (XINT (disp, 1))
5066 {
5067 case UNSPEC_GOT:
5068 if (saw_plus)
5069 return false;
5070 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5071 case UNSPEC_GOTOFF:
5072 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5073 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5074 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5075 return false;
5076 case UNSPEC_GOTTPOFF:
5077 case UNSPEC_GOTNTPOFF:
5078 case UNSPEC_INDNTPOFF:
5079 if (saw_plus)
5080 return false;
5081 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5082 case UNSPEC_NTPOFF:
5083 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5084 case UNSPEC_DTPOFF:
5085 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5086 }
5087
5088 return 0;
5089 }
5090
5091 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5092 memory address for an instruction. The MODE argument is the machine mode
5093 for the MEM expression that wants to use this address.
5094
5095 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5096 convert common non-canonical forms to canonical form so that they will
5097 be recognized. */
5098
5099 int
5100 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5101 {
5102 struct ix86_address parts;
5103 rtx base, index, disp;
5104 HOST_WIDE_INT scale;
5105 const char *reason = NULL;
5106 rtx reason_rtx = NULL_RTX;
5107
5108 if (TARGET_DEBUG_ADDR)
5109 {
5110 fprintf (stderr,
5111 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5112 GET_MODE_NAME (mode), strict);
5113 debug_rtx (addr);
5114 }
5115
5116 if (ix86_decompose_address (addr, &parts) <= 0)
5117 {
5118 reason = "decomposition failed";
5119 goto report_error;
5120 }
5121
5122 base = parts.base;
5123 index = parts.index;
5124 disp = parts.disp;
5125 scale = parts.scale;
5126
5127 /* Validate base register.
5128
5129 Don't allow SUBREG's here, it can lead to spill failures when the base
5130 is one word out of a two word structure, which is represented internally
5131 as a DImode int. */
5132
5133 if (base)
5134 {
5135 reason_rtx = base;
5136
5137 if (GET_CODE (base) != REG)
5138 {
5139 reason = "base is not a register";
5140 goto report_error;
5141 }
5142
5143 if (GET_MODE (base) != Pmode)
5144 {
5145 reason = "base is not in Pmode";
5146 goto report_error;
5147 }
5148
5149 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5150 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5151 {
5152 reason = "base is not valid";
5153 goto report_error;
5154 }
5155 }
5156
5157 /* Validate index register.
5158
5159 Don't allow SUBREG's here, it can lead to spill failures when the index
5160 is one word out of a two word structure, which is represented internally
5161 as a DImode int. */
5162
5163 if (index)
5164 {
5165 reason_rtx = index;
5166
5167 if (GET_CODE (index) != REG)
5168 {
5169 reason = "index is not a register";
5170 goto report_error;
5171 }
5172
5173 if (GET_MODE (index) != Pmode)
5174 {
5175 reason = "index is not in Pmode";
5176 goto report_error;
5177 }
5178
5179 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5180 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5181 {
5182 reason = "index is not valid";
5183 goto report_error;
5184 }
5185 }
5186
5187 /* Validate scale factor. */
5188 if (scale != 1)
5189 {
5190 reason_rtx = GEN_INT (scale);
5191 if (!index)
5192 {
5193 reason = "scale without index";
5194 goto report_error;
5195 }
5196
5197 if (scale != 2 && scale != 4 && scale != 8)
5198 {
5199 reason = "scale is not a valid multiplier";
5200 goto report_error;
5201 }
5202 }
5203
5204 /* Validate displacement. */
5205 if (disp)
5206 {
5207 reason_rtx = disp;
5208
5209 if (GET_CODE (disp) == CONST
5210 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5211 switch (XINT (XEXP (disp, 0), 1))
5212 {
5213 case UNSPEC_GOT:
5214 case UNSPEC_GOTOFF:
5215 case UNSPEC_GOTPCREL:
5216 if (!flag_pic)
5217 abort ();
5218 goto is_legitimate_pic;
5219
5220 case UNSPEC_GOTTPOFF:
5221 case UNSPEC_GOTNTPOFF:
5222 case UNSPEC_INDNTPOFF:
5223 case UNSPEC_NTPOFF:
5224 case UNSPEC_DTPOFF:
5225 break;
5226
5227 default:
5228 reason = "invalid address unspec";
5229 goto report_error;
5230 }
5231
5232 else if (flag_pic && (SYMBOLIC_CONST (disp)
5233 #if TARGET_MACHO
5234 && !machopic_operand_p (disp)
5235 #endif
5236 ))
5237 {
5238 is_legitimate_pic:
5239 if (TARGET_64BIT && (index || base))
5240 {
5241 /* foo@dtpoff(%rX) is ok. */
5242 if (GET_CODE (disp) != CONST
5243 || GET_CODE (XEXP (disp, 0)) != PLUS
5244 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5245 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5246 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5247 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5248 {
5249 reason = "non-constant pic memory reference";
5250 goto report_error;
5251 }
5252 }
5253 else if (! legitimate_pic_address_disp_p (disp))
5254 {
5255 reason = "displacement is an invalid pic construct";
5256 goto report_error;
5257 }
5258
5259 /* This code used to verify that a symbolic pic displacement
5260 includes the pic_offset_table_rtx register.
5261
5262 While this is good idea, unfortunately these constructs may
5263 be created by "adds using lea" optimization for incorrect
5264 code like:
5265
5266 int a;
5267 int foo(int i)
5268 {
5269 return *(&a+i);
5270 }
5271
5272 This code is nonsensical, but results in addressing
5273 GOT table with pic_offset_table_rtx base. We can't
5274 just refuse it easily, since it gets matched by
5275 "addsi3" pattern, that later gets split to lea in the
5276 case output register differs from input. While this
5277 can be handled by separate addsi pattern for this case
5278 that never results in lea, this seems to be easier and
5279 correct fix for crash to disable this test. */
5280 }
5281 else if (GET_CODE (disp) != LABEL_REF
5282 && GET_CODE (disp) != CONST_INT
5283 && (GET_CODE (disp) != CONST
5284 || !legitimate_constant_p (disp))
5285 && (GET_CODE (disp) != SYMBOL_REF
5286 || !legitimate_constant_p (disp)))
5287 {
5288 reason = "displacement is not constant";
5289 goto report_error;
5290 }
5291 else if (TARGET_64BIT
5292 && !x86_64_immediate_operand (disp, VOIDmode))
5293 {
5294 reason = "displacement is out of range";
5295 goto report_error;
5296 }
5297 }
5298
5299 /* Everything looks valid. */
5300 if (TARGET_DEBUG_ADDR)
5301 fprintf (stderr, "Success.\n");
5302 return TRUE;
5303
5304 report_error:
5305 if (TARGET_DEBUG_ADDR)
5306 {
5307 fprintf (stderr, "Error: %s\n", reason);
5308 debug_rtx (reason_rtx);
5309 }
5310 return FALSE;
5311 }
5312 \f
5313 /* Return an unique alias set for the GOT. */
5314
5315 static HOST_WIDE_INT
5316 ix86_GOT_alias_set (void)
5317 {
5318 static HOST_WIDE_INT set = -1;
5319 if (set == -1)
5320 set = new_alias_set ();
5321 return set;
5322 }
5323
5324 /* Return a legitimate reference for ORIG (an address) using the
5325 register REG. If REG is 0, a new pseudo is generated.
5326
5327 There are two types of references that must be handled:
5328
5329 1. Global data references must load the address from the GOT, via
5330 the PIC reg. An insn is emitted to do this load, and the reg is
5331 returned.
5332
5333 2. Static data references, constant pool addresses, and code labels
5334 compute the address as an offset from the GOT, whose base is in
5335 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5336 differentiate them from global data objects. The returned
5337 address is the PIC reg + an unspec constant.
5338
5339 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5340 reg also appears in the address. */
5341
5342 static rtx
5343 legitimize_pic_address (rtx orig, rtx reg)
5344 {
5345 rtx addr = orig;
5346 rtx new = orig;
5347 rtx base;
5348
5349 #if TARGET_MACHO
5350 if (reg == 0)
5351 reg = gen_reg_rtx (Pmode);
5352 /* Use the generic Mach-O PIC machinery. */
5353 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5354 #endif
5355
5356 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5357 new = addr;
5358 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5359 {
5360 /* This symbol may be referenced via a displacement from the PIC
5361 base address (@GOTOFF). */
5362
5363 if (reload_in_progress)
5364 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5365 if (GET_CODE (addr) == CONST)
5366 addr = XEXP (addr, 0);
5367 if (GET_CODE (addr) == PLUS)
5368 {
5369 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5370 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5371 }
5372 else
5373 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5374 new = gen_rtx_CONST (Pmode, new);
5375 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5376
5377 if (reg != 0)
5378 {
5379 emit_move_insn (reg, new);
5380 new = reg;
5381 }
5382 }
5383 else if (GET_CODE (addr) == SYMBOL_REF)
5384 {
5385 if (TARGET_64BIT)
5386 {
5387 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5388 new = gen_rtx_CONST (Pmode, new);
5389 new = gen_const_mem (Pmode, new);
5390 set_mem_alias_set (new, ix86_GOT_alias_set ());
5391
5392 if (reg == 0)
5393 reg = gen_reg_rtx (Pmode);
5394 /* Use directly gen_movsi, otherwise the address is loaded
5395 into register for CSE. We don't want to CSE this addresses,
5396 instead we CSE addresses from the GOT table, so skip this. */
5397 emit_insn (gen_movsi (reg, new));
5398 new = reg;
5399 }
5400 else
5401 {
5402 /* This symbol must be referenced via a load from the
5403 Global Offset Table (@GOT). */
5404
5405 if (reload_in_progress)
5406 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5407 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5408 new = gen_rtx_CONST (Pmode, new);
5409 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5410 new = gen_const_mem (Pmode, new);
5411 set_mem_alias_set (new, ix86_GOT_alias_set ());
5412
5413 if (reg == 0)
5414 reg = gen_reg_rtx (Pmode);
5415 emit_move_insn (reg, new);
5416 new = reg;
5417 }
5418 }
5419 else
5420 {
5421 if (GET_CODE (addr) == CONST)
5422 {
5423 addr = XEXP (addr, 0);
5424
5425 /* We must match stuff we generate before. Assume the only
5426 unspecs that can get here are ours. Not that we could do
5427 anything with them anyway.... */
5428 if (GET_CODE (addr) == UNSPEC
5429 || (GET_CODE (addr) == PLUS
5430 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5431 return orig;
5432 if (GET_CODE (addr) != PLUS)
5433 abort ();
5434 }
5435 if (GET_CODE (addr) == PLUS)
5436 {
5437 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5438
5439 /* Check first to see if this is a constant offset from a @GOTOFF
5440 symbol reference. */
5441 if (local_symbolic_operand (op0, Pmode)
5442 && GET_CODE (op1) == CONST_INT)
5443 {
5444 if (!TARGET_64BIT)
5445 {
5446 if (reload_in_progress)
5447 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5448 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5449 UNSPEC_GOTOFF);
5450 new = gen_rtx_PLUS (Pmode, new, op1);
5451 new = gen_rtx_CONST (Pmode, new);
5452 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5453
5454 if (reg != 0)
5455 {
5456 emit_move_insn (reg, new);
5457 new = reg;
5458 }
5459 }
5460 else
5461 {
5462 if (INTVAL (op1) < -16*1024*1024
5463 || INTVAL (op1) >= 16*1024*1024)
5464 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5465 }
5466 }
5467 else
5468 {
5469 base = legitimize_pic_address (XEXP (addr, 0), reg);
5470 new = legitimize_pic_address (XEXP (addr, 1),
5471 base == reg ? NULL_RTX : reg);
5472
5473 if (GET_CODE (new) == CONST_INT)
5474 new = plus_constant (base, INTVAL (new));
5475 else
5476 {
5477 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5478 {
5479 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5480 new = XEXP (new, 1);
5481 }
5482 new = gen_rtx_PLUS (Pmode, base, new);
5483 }
5484 }
5485 }
5486 }
5487 return new;
5488 }
5489 \f
5490 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5491
5492 static rtx
5493 get_thread_pointer (int to_reg)
5494 {
5495 rtx tp, reg, insn;
5496
5497 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5498 if (!to_reg)
5499 return tp;
5500
5501 reg = gen_reg_rtx (Pmode);
5502 insn = gen_rtx_SET (VOIDmode, reg, tp);
5503 insn = emit_insn (insn);
5504
5505 return reg;
5506 }
5507
5508 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5509 false if we expect this to be used for a memory address and true if
5510 we expect to load the address into a register. */
5511
5512 static rtx
5513 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5514 {
5515 rtx dest, base, off, pic;
5516 int type;
5517
5518 switch (model)
5519 {
5520 case TLS_MODEL_GLOBAL_DYNAMIC:
5521 dest = gen_reg_rtx (Pmode);
5522 if (TARGET_64BIT)
5523 {
5524 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5525
5526 start_sequence ();
5527 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5528 insns = get_insns ();
5529 end_sequence ();
5530
5531 emit_libcall_block (insns, dest, rax, x);
5532 }
5533 else
5534 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5535 break;
5536
5537 case TLS_MODEL_LOCAL_DYNAMIC:
5538 base = gen_reg_rtx (Pmode);
5539 if (TARGET_64BIT)
5540 {
5541 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5542
5543 start_sequence ();
5544 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5545 insns = get_insns ();
5546 end_sequence ();
5547
5548 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5549 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5550 emit_libcall_block (insns, base, rax, note);
5551 }
5552 else
5553 emit_insn (gen_tls_local_dynamic_base_32 (base));
5554
5555 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5556 off = gen_rtx_CONST (Pmode, off);
5557
5558 return gen_rtx_PLUS (Pmode, base, off);
5559
5560 case TLS_MODEL_INITIAL_EXEC:
5561 if (TARGET_64BIT)
5562 {
5563 pic = NULL;
5564 type = UNSPEC_GOTNTPOFF;
5565 }
5566 else if (flag_pic)
5567 {
5568 if (reload_in_progress)
5569 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5570 pic = pic_offset_table_rtx;
5571 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5572 }
5573 else if (!TARGET_GNU_TLS)
5574 {
5575 pic = gen_reg_rtx (Pmode);
5576 emit_insn (gen_set_got (pic));
5577 type = UNSPEC_GOTTPOFF;
5578 }
5579 else
5580 {
5581 pic = NULL;
5582 type = UNSPEC_INDNTPOFF;
5583 }
5584
5585 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5586 off = gen_rtx_CONST (Pmode, off);
5587 if (pic)
5588 off = gen_rtx_PLUS (Pmode, pic, off);
5589 off = gen_const_mem (Pmode, off);
5590 set_mem_alias_set (off, ix86_GOT_alias_set ());
5591
5592 if (TARGET_64BIT || TARGET_GNU_TLS)
5593 {
5594 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5595 off = force_reg (Pmode, off);
5596 return gen_rtx_PLUS (Pmode, base, off);
5597 }
5598 else
5599 {
5600 base = get_thread_pointer (true);
5601 dest = gen_reg_rtx (Pmode);
5602 emit_insn (gen_subsi3 (dest, base, off));
5603 }
5604 break;
5605
5606 case TLS_MODEL_LOCAL_EXEC:
5607 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5608 (TARGET_64BIT || TARGET_GNU_TLS)
5609 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5610 off = gen_rtx_CONST (Pmode, off);
5611
5612 if (TARGET_64BIT || TARGET_GNU_TLS)
5613 {
5614 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5615 return gen_rtx_PLUS (Pmode, base, off);
5616 }
5617 else
5618 {
5619 base = get_thread_pointer (true);
5620 dest = gen_reg_rtx (Pmode);
5621 emit_insn (gen_subsi3 (dest, base, off));
5622 }
5623 break;
5624
5625 default:
5626 abort ();
5627 }
5628
5629 return dest;
5630 }
5631
5632 /* Try machine-dependent ways of modifying an illegitimate address
5633 to be legitimate. If we find one, return the new, valid address.
5634 This macro is used in only one place: `memory_address' in explow.c.
5635
5636 OLDX is the address as it was before break_out_memory_refs was called.
5637 In some cases it is useful to look at this to decide what needs to be done.
5638
5639 MODE and WIN are passed so that this macro can use
5640 GO_IF_LEGITIMATE_ADDRESS.
5641
5642 It is always safe for this macro to do nothing. It exists to recognize
5643 opportunities to optimize the output.
5644
5645 For the 80386, we handle X+REG by loading X into a register R and
5646 using R+REG. R will go in a general reg and indexing will be used.
5647 However, if REG is a broken-out memory address or multiplication,
5648 nothing needs to be done because REG can certainly go in a general reg.
5649
5650 When -fpic is used, special handling is needed for symbolic references.
5651 See comments by legitimize_pic_address in i386.c for details. */
5652
5653 rtx
5654 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5655 {
5656 int changed = 0;
5657 unsigned log;
5658
5659 if (TARGET_DEBUG_ADDR)
5660 {
5661 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5662 GET_MODE_NAME (mode));
5663 debug_rtx (x);
5664 }
5665
5666 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5667 if (log)
5668 return legitimize_tls_address (x, log, false);
5669 if (GET_CODE (x) == CONST
5670 && GET_CODE (XEXP (x, 0)) == PLUS
5671 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5672 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5673 {
5674 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5675 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5676 }
5677
5678 if (flag_pic && SYMBOLIC_CONST (x))
5679 return legitimize_pic_address (x, 0);
5680
5681 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5682 if (GET_CODE (x) == ASHIFT
5683 && GET_CODE (XEXP (x, 1)) == CONST_INT
5684 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5685 {
5686 changed = 1;
5687 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5688 GEN_INT (1 << log));
5689 }
5690
5691 if (GET_CODE (x) == PLUS)
5692 {
5693 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5694
5695 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5696 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5697 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5698 {
5699 changed = 1;
5700 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5701 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5702 GEN_INT (1 << log));
5703 }
5704
5705 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5706 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5707 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5708 {
5709 changed = 1;
5710 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5711 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5712 GEN_INT (1 << log));
5713 }
5714
5715 /* Put multiply first if it isn't already. */
5716 if (GET_CODE (XEXP (x, 1)) == MULT)
5717 {
5718 rtx tmp = XEXP (x, 0);
5719 XEXP (x, 0) = XEXP (x, 1);
5720 XEXP (x, 1) = tmp;
5721 changed = 1;
5722 }
5723
5724 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5725 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5726 created by virtual register instantiation, register elimination, and
5727 similar optimizations. */
5728 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5729 {
5730 changed = 1;
5731 x = gen_rtx_PLUS (Pmode,
5732 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5733 XEXP (XEXP (x, 1), 0)),
5734 XEXP (XEXP (x, 1), 1));
5735 }
5736
5737 /* Canonicalize
5738 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5739 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5740 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5741 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5742 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5743 && CONSTANT_P (XEXP (x, 1)))
5744 {
5745 rtx constant;
5746 rtx other = NULL_RTX;
5747
5748 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5749 {
5750 constant = XEXP (x, 1);
5751 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5752 }
5753 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5754 {
5755 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5756 other = XEXP (x, 1);
5757 }
5758 else
5759 constant = 0;
5760
5761 if (constant)
5762 {
5763 changed = 1;
5764 x = gen_rtx_PLUS (Pmode,
5765 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5766 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5767 plus_constant (other, INTVAL (constant)));
5768 }
5769 }
5770
5771 if (changed && legitimate_address_p (mode, x, FALSE))
5772 return x;
5773
5774 if (GET_CODE (XEXP (x, 0)) == MULT)
5775 {
5776 changed = 1;
5777 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5778 }
5779
5780 if (GET_CODE (XEXP (x, 1)) == MULT)
5781 {
5782 changed = 1;
5783 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5784 }
5785
5786 if (changed
5787 && GET_CODE (XEXP (x, 1)) == REG
5788 && GET_CODE (XEXP (x, 0)) == REG)
5789 return x;
5790
5791 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5792 {
5793 changed = 1;
5794 x = legitimize_pic_address (x, 0);
5795 }
5796
5797 if (changed && legitimate_address_p (mode, x, FALSE))
5798 return x;
5799
5800 if (GET_CODE (XEXP (x, 0)) == REG)
5801 {
5802 rtx temp = gen_reg_rtx (Pmode);
5803 rtx val = force_operand (XEXP (x, 1), temp);
5804 if (val != temp)
5805 emit_move_insn (temp, val);
5806
5807 XEXP (x, 1) = temp;
5808 return x;
5809 }
5810
5811 else if (GET_CODE (XEXP (x, 1)) == REG)
5812 {
5813 rtx temp = gen_reg_rtx (Pmode);
5814 rtx val = force_operand (XEXP (x, 0), temp);
5815 if (val != temp)
5816 emit_move_insn (temp, val);
5817
5818 XEXP (x, 0) = temp;
5819 return x;
5820 }
5821 }
5822
5823 return x;
5824 }
5825 \f
5826 /* Print an integer constant expression in assembler syntax. Addition
5827 and subtraction are the only arithmetic that may appear in these
5828 expressions. FILE is the stdio stream to write to, X is the rtx, and
5829 CODE is the operand print code from the output string. */
5830
5831 static void
5832 output_pic_addr_const (FILE *file, rtx x, int code)
5833 {
5834 char buf[256];
5835
5836 switch (GET_CODE (x))
5837 {
5838 case PC:
5839 if (flag_pic)
5840 putc ('.', file);
5841 else
5842 abort ();
5843 break;
5844
5845 case SYMBOL_REF:
5846 /* Mark the decl as referenced so that cgraph will output the function. */
5847 if (SYMBOL_REF_DECL (x))
5848 mark_decl_referenced (SYMBOL_REF_DECL (x));
5849
5850 assemble_name (file, XSTR (x, 0));
5851 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5852 fputs ("@PLT", file);
5853 break;
5854
5855 case LABEL_REF:
5856 x = XEXP (x, 0);
5857 /* FALLTHRU */
5858 case CODE_LABEL:
5859 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5860 assemble_name (asm_out_file, buf);
5861 break;
5862
5863 case CONST_INT:
5864 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5865 break;
5866
5867 case CONST:
5868 /* This used to output parentheses around the expression,
5869 but that does not work on the 386 (either ATT or BSD assembler). */
5870 output_pic_addr_const (file, XEXP (x, 0), code);
5871 break;
5872
5873 case CONST_DOUBLE:
5874 if (GET_MODE (x) == VOIDmode)
5875 {
5876 /* We can use %d if the number is <32 bits and positive. */
5877 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5878 fprintf (file, "0x%lx%08lx",
5879 (unsigned long) CONST_DOUBLE_HIGH (x),
5880 (unsigned long) CONST_DOUBLE_LOW (x));
5881 else
5882 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5883 }
5884 else
5885 /* We can't handle floating point constants;
5886 PRINT_OPERAND must handle them. */
5887 output_operand_lossage ("floating constant misused");
5888 break;
5889
5890 case PLUS:
5891 /* Some assemblers need integer constants to appear first. */
5892 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5893 {
5894 output_pic_addr_const (file, XEXP (x, 0), code);
5895 putc ('+', file);
5896 output_pic_addr_const (file, XEXP (x, 1), code);
5897 }
5898 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5899 {
5900 output_pic_addr_const (file, XEXP (x, 1), code);
5901 putc ('+', file);
5902 output_pic_addr_const (file, XEXP (x, 0), code);
5903 }
5904 else
5905 abort ();
5906 break;
5907
5908 case MINUS:
5909 if (!TARGET_MACHO)
5910 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5911 output_pic_addr_const (file, XEXP (x, 0), code);
5912 putc ('-', file);
5913 output_pic_addr_const (file, XEXP (x, 1), code);
5914 if (!TARGET_MACHO)
5915 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5916 break;
5917
5918 case UNSPEC:
5919 if (XVECLEN (x, 0) != 1)
5920 abort ();
5921 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5922 switch (XINT (x, 1))
5923 {
5924 case UNSPEC_GOT:
5925 fputs ("@GOT", file);
5926 break;
5927 case UNSPEC_GOTOFF:
5928 fputs ("@GOTOFF", file);
5929 break;
5930 case UNSPEC_GOTPCREL:
5931 fputs ("@GOTPCREL(%rip)", file);
5932 break;
5933 case UNSPEC_GOTTPOFF:
5934 /* FIXME: This might be @TPOFF in Sun ld too. */
5935 fputs ("@GOTTPOFF", file);
5936 break;
5937 case UNSPEC_TPOFF:
5938 fputs ("@TPOFF", file);
5939 break;
5940 case UNSPEC_NTPOFF:
5941 if (TARGET_64BIT)
5942 fputs ("@TPOFF", file);
5943 else
5944 fputs ("@NTPOFF", file);
5945 break;
5946 case UNSPEC_DTPOFF:
5947 fputs ("@DTPOFF", file);
5948 break;
5949 case UNSPEC_GOTNTPOFF:
5950 if (TARGET_64BIT)
5951 fputs ("@GOTTPOFF(%rip)", file);
5952 else
5953 fputs ("@GOTNTPOFF", file);
5954 break;
5955 case UNSPEC_INDNTPOFF:
5956 fputs ("@INDNTPOFF", file);
5957 break;
5958 default:
5959 output_operand_lossage ("invalid UNSPEC as operand");
5960 break;
5961 }
5962 break;
5963
5964 default:
5965 output_operand_lossage ("invalid expression as operand");
5966 }
5967 }
5968
5969 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5970 We need to emit DTP-relative relocations. */
5971
5972 void
5973 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
5974 {
5975 fputs (ASM_LONG, file);
5976 output_addr_const (file, x);
5977 fputs ("@DTPOFF", file);
5978 switch (size)
5979 {
5980 case 4:
5981 break;
5982 case 8:
5983 fputs (", 0", file);
5984 break;
5985 default:
5986 abort ();
5987 }
5988 }
5989
5990 /* In the name of slightly smaller debug output, and to cater to
5991 general assembler losage, recognize PIC+GOTOFF and turn it back
5992 into a direct symbol reference. */
5993
5994 static rtx
5995 ix86_delegitimize_address (rtx orig_x)
5996 {
5997 rtx x = orig_x, y;
5998
5999 if (GET_CODE (x) == MEM)
6000 x = XEXP (x, 0);
6001
6002 if (TARGET_64BIT)
6003 {
6004 if (GET_CODE (x) != CONST
6005 || GET_CODE (XEXP (x, 0)) != UNSPEC
6006 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6007 || GET_CODE (orig_x) != MEM)
6008 return orig_x;
6009 return XVECEXP (XEXP (x, 0), 0, 0);
6010 }
6011
6012 if (GET_CODE (x) != PLUS
6013 || GET_CODE (XEXP (x, 1)) != CONST)
6014 return orig_x;
6015
6016 if (GET_CODE (XEXP (x, 0)) == REG
6017 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6018 /* %ebx + GOT/GOTOFF */
6019 y = NULL;
6020 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6021 {
6022 /* %ebx + %reg * scale + GOT/GOTOFF */
6023 y = XEXP (x, 0);
6024 if (GET_CODE (XEXP (y, 0)) == REG
6025 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6026 y = XEXP (y, 1);
6027 else if (GET_CODE (XEXP (y, 1)) == REG
6028 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6029 y = XEXP (y, 0);
6030 else
6031 return orig_x;
6032 if (GET_CODE (y) != REG
6033 && GET_CODE (y) != MULT
6034 && GET_CODE (y) != ASHIFT)
6035 return orig_x;
6036 }
6037 else
6038 return orig_x;
6039
6040 x = XEXP (XEXP (x, 1), 0);
6041 if (GET_CODE (x) == UNSPEC
6042 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6043 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6044 {
6045 if (y)
6046 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6047 return XVECEXP (x, 0, 0);
6048 }
6049
6050 if (GET_CODE (x) == PLUS
6051 && GET_CODE (XEXP (x, 0)) == UNSPEC
6052 && GET_CODE (XEXP (x, 1)) == CONST_INT
6053 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6054 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6055 && GET_CODE (orig_x) != MEM)))
6056 {
6057 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6058 if (y)
6059 return gen_rtx_PLUS (Pmode, y, x);
6060 return x;
6061 }
6062
6063 return orig_x;
6064 }
6065 \f
6066 static void
6067 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6068 int fp, FILE *file)
6069 {
6070 const char *suffix;
6071
6072 if (mode == CCFPmode || mode == CCFPUmode)
6073 {
6074 enum rtx_code second_code, bypass_code;
6075 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6076 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6077 abort ();
6078 code = ix86_fp_compare_code_to_integer (code);
6079 mode = CCmode;
6080 }
6081 if (reverse)
6082 code = reverse_condition (code);
6083
6084 switch (code)
6085 {
6086 case EQ:
6087 suffix = "e";
6088 break;
6089 case NE:
6090 suffix = "ne";
6091 break;
6092 case GT:
6093 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6094 abort ();
6095 suffix = "g";
6096 break;
6097 case GTU:
6098 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6099 Those same assemblers have the same but opposite losage on cmov. */
6100 if (mode != CCmode)
6101 abort ();
6102 suffix = fp ? "nbe" : "a";
6103 break;
6104 case LT:
6105 if (mode == CCNOmode || mode == CCGOCmode)
6106 suffix = "s";
6107 else if (mode == CCmode || mode == CCGCmode)
6108 suffix = "l";
6109 else
6110 abort ();
6111 break;
6112 case LTU:
6113 if (mode != CCmode)
6114 abort ();
6115 suffix = "b";
6116 break;
6117 case GE:
6118 if (mode == CCNOmode || mode == CCGOCmode)
6119 suffix = "ns";
6120 else if (mode == CCmode || mode == CCGCmode)
6121 suffix = "ge";
6122 else
6123 abort ();
6124 break;
6125 case GEU:
6126 /* ??? As above. */
6127 if (mode != CCmode)
6128 abort ();
6129 suffix = fp ? "nb" : "ae";
6130 break;
6131 case LE:
6132 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6133 abort ();
6134 suffix = "le";
6135 break;
6136 case LEU:
6137 if (mode != CCmode)
6138 abort ();
6139 suffix = "be";
6140 break;
6141 case UNORDERED:
6142 suffix = fp ? "u" : "p";
6143 break;
6144 case ORDERED:
6145 suffix = fp ? "nu" : "np";
6146 break;
6147 default:
6148 abort ();
6149 }
6150 fputs (suffix, file);
6151 }
6152
6153 /* Print the name of register X to FILE based on its machine mode and number.
6154 If CODE is 'w', pretend the mode is HImode.
6155 If CODE is 'b', pretend the mode is QImode.
6156 If CODE is 'k', pretend the mode is SImode.
6157 If CODE is 'q', pretend the mode is DImode.
6158 If CODE is 'h', pretend the reg is the `high' byte register.
6159 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6160
6161 void
6162 print_reg (rtx x, int code, FILE *file)
6163 {
6164 if (REGNO (x) == ARG_POINTER_REGNUM
6165 || REGNO (x) == FRAME_POINTER_REGNUM
6166 || REGNO (x) == FLAGS_REG
6167 || REGNO (x) == FPSR_REG)
6168 abort ();
6169
6170 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6171 putc ('%', file);
6172
6173 if (code == 'w' || MMX_REG_P (x))
6174 code = 2;
6175 else if (code == 'b')
6176 code = 1;
6177 else if (code == 'k')
6178 code = 4;
6179 else if (code == 'q')
6180 code = 8;
6181 else if (code == 'y')
6182 code = 3;
6183 else if (code == 'h')
6184 code = 0;
6185 else
6186 code = GET_MODE_SIZE (GET_MODE (x));
6187
6188 /* Irritatingly, AMD extended registers use different naming convention
6189 from the normal registers. */
6190 if (REX_INT_REG_P (x))
6191 {
6192 if (!TARGET_64BIT)
6193 abort ();
6194 switch (code)
6195 {
6196 case 0:
6197 error ("extended registers have no high halves");
6198 break;
6199 case 1:
6200 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6201 break;
6202 case 2:
6203 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6204 break;
6205 case 4:
6206 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6207 break;
6208 case 8:
6209 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6210 break;
6211 default:
6212 error ("unsupported operand size for extended register");
6213 break;
6214 }
6215 return;
6216 }
6217 switch (code)
6218 {
6219 case 3:
6220 if (STACK_TOP_P (x))
6221 {
6222 fputs ("st(0)", file);
6223 break;
6224 }
6225 /* FALLTHRU */
6226 case 8:
6227 case 4:
6228 case 12:
6229 if (! ANY_FP_REG_P (x))
6230 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6231 /* FALLTHRU */
6232 case 16:
6233 case 2:
6234 normal:
6235 fputs (hi_reg_name[REGNO (x)], file);
6236 break;
6237 case 1:
6238 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6239 goto normal;
6240 fputs (qi_reg_name[REGNO (x)], file);
6241 break;
6242 case 0:
6243 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6244 goto normal;
6245 fputs (qi_high_reg_name[REGNO (x)], file);
6246 break;
6247 default:
6248 abort ();
6249 }
6250 }
6251
6252 /* Locate some local-dynamic symbol still in use by this function
6253 so that we can print its name in some tls_local_dynamic_base
6254 pattern. */
6255
6256 static const char *
6257 get_some_local_dynamic_name (void)
6258 {
6259 rtx insn;
6260
6261 if (cfun->machine->some_ld_name)
6262 return cfun->machine->some_ld_name;
6263
6264 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6265 if (INSN_P (insn)
6266 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6267 return cfun->machine->some_ld_name;
6268
6269 abort ();
6270 }
6271
6272 static int
6273 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6274 {
6275 rtx x = *px;
6276
6277 if (GET_CODE (x) == SYMBOL_REF
6278 && local_dynamic_symbolic_operand (x, Pmode))
6279 {
6280 cfun->machine->some_ld_name = XSTR (x, 0);
6281 return 1;
6282 }
6283
6284 return 0;
6285 }
6286
6287 /* Meaning of CODE:
6288 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6289 C -- print opcode suffix for set/cmov insn.
6290 c -- like C, but print reversed condition
6291 F,f -- likewise, but for floating-point.
6292 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6293 otherwise nothing
6294 R -- print the prefix for register names.
6295 z -- print the opcode suffix for the size of the current operand.
6296 * -- print a star (in certain assembler syntax)
6297 A -- print an absolute memory reference.
6298 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6299 s -- print a shift double count, followed by the assemblers argument
6300 delimiter.
6301 b -- print the QImode name of the register for the indicated operand.
6302 %b0 would print %al if operands[0] is reg 0.
6303 w -- likewise, print the HImode name of the register.
6304 k -- likewise, print the SImode name of the register.
6305 q -- likewise, print the DImode name of the register.
6306 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6307 y -- print "st(0)" instead of "st" as a register.
6308 D -- print condition for SSE cmp instruction.
6309 P -- if PIC, print an @PLT suffix.
6310 X -- don't print any sort of PIC '@' suffix for a symbol.
6311 & -- print some in-use local-dynamic symbol name.
6312 */
6313
6314 void
6315 print_operand (FILE *file, rtx x, int code)
6316 {
6317 if (code)
6318 {
6319 switch (code)
6320 {
6321 case '*':
6322 if (ASSEMBLER_DIALECT == ASM_ATT)
6323 putc ('*', file);
6324 return;
6325
6326 case '&':
6327 assemble_name (file, get_some_local_dynamic_name ());
6328 return;
6329
6330 case 'A':
6331 if (ASSEMBLER_DIALECT == ASM_ATT)
6332 putc ('*', file);
6333 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6334 {
6335 /* Intel syntax. For absolute addresses, registers should not
6336 be surrounded by braces. */
6337 if (GET_CODE (x) != REG)
6338 {
6339 putc ('[', file);
6340 PRINT_OPERAND (file, x, 0);
6341 putc (']', file);
6342 return;
6343 }
6344 }
6345 else
6346 abort ();
6347
6348 PRINT_OPERAND (file, x, 0);
6349 return;
6350
6351
6352 case 'L':
6353 if (ASSEMBLER_DIALECT == ASM_ATT)
6354 putc ('l', file);
6355 return;
6356
6357 case 'W':
6358 if (ASSEMBLER_DIALECT == ASM_ATT)
6359 putc ('w', file);
6360 return;
6361
6362 case 'B':
6363 if (ASSEMBLER_DIALECT == ASM_ATT)
6364 putc ('b', file);
6365 return;
6366
6367 case 'Q':
6368 if (ASSEMBLER_DIALECT == ASM_ATT)
6369 putc ('l', file);
6370 return;
6371
6372 case 'S':
6373 if (ASSEMBLER_DIALECT == ASM_ATT)
6374 putc ('s', file);
6375 return;
6376
6377 case 'T':
6378 if (ASSEMBLER_DIALECT == ASM_ATT)
6379 putc ('t', file);
6380 return;
6381
6382 case 'z':
6383 /* 387 opcodes don't get size suffixes if the operands are
6384 registers. */
6385 if (STACK_REG_P (x))
6386 return;
6387
6388 /* Likewise if using Intel opcodes. */
6389 if (ASSEMBLER_DIALECT == ASM_INTEL)
6390 return;
6391
6392 /* This is the size of op from size of operand. */
6393 switch (GET_MODE_SIZE (GET_MODE (x)))
6394 {
6395 case 2:
6396 #ifdef HAVE_GAS_FILDS_FISTS
6397 putc ('s', file);
6398 #endif
6399 return;
6400
6401 case 4:
6402 if (GET_MODE (x) == SFmode)
6403 {
6404 putc ('s', file);
6405 return;
6406 }
6407 else
6408 putc ('l', file);
6409 return;
6410
6411 case 12:
6412 case 16:
6413 putc ('t', file);
6414 return;
6415
6416 case 8:
6417 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6418 {
6419 #ifdef GAS_MNEMONICS
6420 putc ('q', file);
6421 #else
6422 putc ('l', file);
6423 putc ('l', file);
6424 #endif
6425 }
6426 else
6427 putc ('l', file);
6428 return;
6429
6430 default:
6431 abort ();
6432 }
6433
6434 case 'b':
6435 case 'w':
6436 case 'k':
6437 case 'q':
6438 case 'h':
6439 case 'y':
6440 case 'X':
6441 case 'P':
6442 break;
6443
6444 case 's':
6445 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6446 {
6447 PRINT_OPERAND (file, x, 0);
6448 putc (',', file);
6449 }
6450 return;
6451
6452 case 'D':
6453 /* Little bit of braindamage here. The SSE compare instructions
6454 does use completely different names for the comparisons that the
6455 fp conditional moves. */
6456 switch (GET_CODE (x))
6457 {
6458 case EQ:
6459 case UNEQ:
6460 fputs ("eq", file);
6461 break;
6462 case LT:
6463 case UNLT:
6464 fputs ("lt", file);
6465 break;
6466 case LE:
6467 case UNLE:
6468 fputs ("le", file);
6469 break;
6470 case UNORDERED:
6471 fputs ("unord", file);
6472 break;
6473 case NE:
6474 case LTGT:
6475 fputs ("neq", file);
6476 break;
6477 case UNGE:
6478 case GE:
6479 fputs ("nlt", file);
6480 break;
6481 case UNGT:
6482 case GT:
6483 fputs ("nle", file);
6484 break;
6485 case ORDERED:
6486 fputs ("ord", file);
6487 break;
6488 default:
6489 abort ();
6490 break;
6491 }
6492 return;
6493 case 'O':
6494 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6495 if (ASSEMBLER_DIALECT == ASM_ATT)
6496 {
6497 switch (GET_MODE (x))
6498 {
6499 case HImode: putc ('w', file); break;
6500 case SImode:
6501 case SFmode: putc ('l', file); break;
6502 case DImode:
6503 case DFmode: putc ('q', file); break;
6504 default: abort ();
6505 }
6506 putc ('.', file);
6507 }
6508 #endif
6509 return;
6510 case 'C':
6511 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6512 return;
6513 case 'F':
6514 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6515 if (ASSEMBLER_DIALECT == ASM_ATT)
6516 putc ('.', file);
6517 #endif
6518 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6519 return;
6520
6521 /* Like above, but reverse condition */
6522 case 'c':
6523 /* Check to see if argument to %c is really a constant
6524 and not a condition code which needs to be reversed. */
6525 if (!COMPARISON_P (x))
6526 {
6527 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6528 return;
6529 }
6530 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6531 return;
6532 case 'f':
6533 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6534 if (ASSEMBLER_DIALECT == ASM_ATT)
6535 putc ('.', file);
6536 #endif
6537 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6538 return;
6539 case '+':
6540 {
6541 rtx x;
6542
6543 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6544 return;
6545
6546 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6547 if (x)
6548 {
6549 int pred_val = INTVAL (XEXP (x, 0));
6550
6551 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6552 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6553 {
6554 int taken = pred_val > REG_BR_PROB_BASE / 2;
6555 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6556
6557 /* Emit hints only in the case default branch prediction
6558 heuristics would fail. */
6559 if (taken != cputaken)
6560 {
6561 /* We use 3e (DS) prefix for taken branches and
6562 2e (CS) prefix for not taken branches. */
6563 if (taken)
6564 fputs ("ds ; ", file);
6565 else
6566 fputs ("cs ; ", file);
6567 }
6568 }
6569 }
6570 return;
6571 }
6572 default:
6573 output_operand_lossage ("invalid operand code '%c'", code);
6574 }
6575 }
6576
6577 if (GET_CODE (x) == REG)
6578 print_reg (x, code, file);
6579
6580 else if (GET_CODE (x) == MEM)
6581 {
6582 /* No `byte ptr' prefix for call instructions. */
6583 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6584 {
6585 const char * size;
6586 switch (GET_MODE_SIZE (GET_MODE (x)))
6587 {
6588 case 1: size = "BYTE"; break;
6589 case 2: size = "WORD"; break;
6590 case 4: size = "DWORD"; break;
6591 case 8: size = "QWORD"; break;
6592 case 12: size = "XWORD"; break;
6593 case 16: size = "XMMWORD"; break;
6594 default:
6595 abort ();
6596 }
6597
6598 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6599 if (code == 'b')
6600 size = "BYTE";
6601 else if (code == 'w')
6602 size = "WORD";
6603 else if (code == 'k')
6604 size = "DWORD";
6605
6606 fputs (size, file);
6607 fputs (" PTR ", file);
6608 }
6609
6610 x = XEXP (x, 0);
6611 /* Avoid (%rip) for call operands. */
6612 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6613 && GET_CODE (x) != CONST_INT)
6614 output_addr_const (file, x);
6615 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6616 output_operand_lossage ("invalid constraints for operand");
6617 else
6618 output_address (x);
6619 }
6620
6621 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6622 {
6623 REAL_VALUE_TYPE r;
6624 long l;
6625
6626 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6627 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6628
6629 if (ASSEMBLER_DIALECT == ASM_ATT)
6630 putc ('$', file);
6631 fprintf (file, "0x%08lx", l);
6632 }
6633
6634 /* These float cases don't actually occur as immediate operands. */
6635 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6636 {
6637 char dstr[30];
6638
6639 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6640 fprintf (file, "%s", dstr);
6641 }
6642
6643 else if (GET_CODE (x) == CONST_DOUBLE
6644 && GET_MODE (x) == XFmode)
6645 {
6646 char dstr[30];
6647
6648 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6649 fprintf (file, "%s", dstr);
6650 }
6651
6652 else
6653 {
6654 if (code != 'P')
6655 {
6656 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6657 {
6658 if (ASSEMBLER_DIALECT == ASM_ATT)
6659 putc ('$', file);
6660 }
6661 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6662 || GET_CODE (x) == LABEL_REF)
6663 {
6664 if (ASSEMBLER_DIALECT == ASM_ATT)
6665 putc ('$', file);
6666 else
6667 fputs ("OFFSET FLAT:", file);
6668 }
6669 }
6670 if (GET_CODE (x) == CONST_INT)
6671 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6672 else if (flag_pic)
6673 output_pic_addr_const (file, x, code);
6674 else
6675 output_addr_const (file, x);
6676 }
6677 }
6678 \f
6679 /* Print a memory operand whose address is ADDR. */
6680
6681 void
6682 print_operand_address (FILE *file, rtx addr)
6683 {
6684 struct ix86_address parts;
6685 rtx base, index, disp;
6686 int scale;
6687
6688 if (! ix86_decompose_address (addr, &parts))
6689 abort ();
6690
6691 base = parts.base;
6692 index = parts.index;
6693 disp = parts.disp;
6694 scale = parts.scale;
6695
6696 switch (parts.seg)
6697 {
6698 case SEG_DEFAULT:
6699 break;
6700 case SEG_FS:
6701 case SEG_GS:
6702 if (USER_LABEL_PREFIX[0] == 0)
6703 putc ('%', file);
6704 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6705 break;
6706 default:
6707 abort ();
6708 }
6709
6710 if (!base && !index)
6711 {
6712 /* Displacement only requires special attention. */
6713
6714 if (GET_CODE (disp) == CONST_INT)
6715 {
6716 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6717 {
6718 if (USER_LABEL_PREFIX[0] == 0)
6719 putc ('%', file);
6720 fputs ("ds:", file);
6721 }
6722 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6723 }
6724 else if (flag_pic)
6725 output_pic_addr_const (file, disp, 0);
6726 else
6727 output_addr_const (file, disp);
6728
6729 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6730 if (TARGET_64BIT
6731 && ((GET_CODE (disp) == SYMBOL_REF
6732 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6733 || GET_CODE (disp) == LABEL_REF
6734 || (GET_CODE (disp) == CONST
6735 && GET_CODE (XEXP (disp, 0)) == PLUS
6736 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6737 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6738 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6739 fputs ("(%rip)", file);
6740 }
6741 else
6742 {
6743 if (ASSEMBLER_DIALECT == ASM_ATT)
6744 {
6745 if (disp)
6746 {
6747 if (flag_pic)
6748 output_pic_addr_const (file, disp, 0);
6749 else if (GET_CODE (disp) == LABEL_REF)
6750 output_asm_label (disp);
6751 else
6752 output_addr_const (file, disp);
6753 }
6754
6755 putc ('(', file);
6756 if (base)
6757 print_reg (base, 0, file);
6758 if (index)
6759 {
6760 putc (',', file);
6761 print_reg (index, 0, file);
6762 if (scale != 1)
6763 fprintf (file, ",%d", scale);
6764 }
6765 putc (')', file);
6766 }
6767 else
6768 {
6769 rtx offset = NULL_RTX;
6770
6771 if (disp)
6772 {
6773 /* Pull out the offset of a symbol; print any symbol itself. */
6774 if (GET_CODE (disp) == CONST
6775 && GET_CODE (XEXP (disp, 0)) == PLUS
6776 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6777 {
6778 offset = XEXP (XEXP (disp, 0), 1);
6779 disp = gen_rtx_CONST (VOIDmode,
6780 XEXP (XEXP (disp, 0), 0));
6781 }
6782
6783 if (flag_pic)
6784 output_pic_addr_const (file, disp, 0);
6785 else if (GET_CODE (disp) == LABEL_REF)
6786 output_asm_label (disp);
6787 else if (GET_CODE (disp) == CONST_INT)
6788 offset = disp;
6789 else
6790 output_addr_const (file, disp);
6791 }
6792
6793 putc ('[', file);
6794 if (base)
6795 {
6796 print_reg (base, 0, file);
6797 if (offset)
6798 {
6799 if (INTVAL (offset) >= 0)
6800 putc ('+', file);
6801 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6802 }
6803 }
6804 else if (offset)
6805 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6806 else
6807 putc ('0', file);
6808
6809 if (index)
6810 {
6811 putc ('+', file);
6812 print_reg (index, 0, file);
6813 if (scale != 1)
6814 fprintf (file, "*%d", scale);
6815 }
6816 putc (']', file);
6817 }
6818 }
6819 }
6820
6821 bool
6822 output_addr_const_extra (FILE *file, rtx x)
6823 {
6824 rtx op;
6825
6826 if (GET_CODE (x) != UNSPEC)
6827 return false;
6828
6829 op = XVECEXP (x, 0, 0);
6830 switch (XINT (x, 1))
6831 {
6832 case UNSPEC_GOTTPOFF:
6833 output_addr_const (file, op);
6834 /* FIXME: This might be @TPOFF in Sun ld. */
6835 fputs ("@GOTTPOFF", file);
6836 break;
6837 case UNSPEC_TPOFF:
6838 output_addr_const (file, op);
6839 fputs ("@TPOFF", file);
6840 break;
6841 case UNSPEC_NTPOFF:
6842 output_addr_const (file, op);
6843 if (TARGET_64BIT)
6844 fputs ("@TPOFF", file);
6845 else
6846 fputs ("@NTPOFF", file);
6847 break;
6848 case UNSPEC_DTPOFF:
6849 output_addr_const (file, op);
6850 fputs ("@DTPOFF", file);
6851 break;
6852 case UNSPEC_GOTNTPOFF:
6853 output_addr_const (file, op);
6854 if (TARGET_64BIT)
6855 fputs ("@GOTTPOFF(%rip)", file);
6856 else
6857 fputs ("@GOTNTPOFF", file);
6858 break;
6859 case UNSPEC_INDNTPOFF:
6860 output_addr_const (file, op);
6861 fputs ("@INDNTPOFF", file);
6862 break;
6863
6864 default:
6865 return false;
6866 }
6867
6868 return true;
6869 }
6870 \f
6871 /* Split one or more DImode RTL references into pairs of SImode
6872 references. The RTL can be REG, offsettable MEM, integer constant, or
6873 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6874 split and "num" is its length. lo_half and hi_half are output arrays
6875 that parallel "operands". */
6876
6877 void
6878 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6879 {
6880 while (num--)
6881 {
6882 rtx op = operands[num];
6883
6884 /* simplify_subreg refuse to split volatile memory addresses,
6885 but we still have to handle it. */
6886 if (GET_CODE (op) == MEM)
6887 {
6888 lo_half[num] = adjust_address (op, SImode, 0);
6889 hi_half[num] = adjust_address (op, SImode, 4);
6890 }
6891 else
6892 {
6893 lo_half[num] = simplify_gen_subreg (SImode, op,
6894 GET_MODE (op) == VOIDmode
6895 ? DImode : GET_MODE (op), 0);
6896 hi_half[num] = simplify_gen_subreg (SImode, op,
6897 GET_MODE (op) == VOIDmode
6898 ? DImode : GET_MODE (op), 4);
6899 }
6900 }
6901 }
6902 /* Split one or more TImode RTL references into pairs of SImode
6903 references. The RTL can be REG, offsettable MEM, integer constant, or
6904 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6905 split and "num" is its length. lo_half and hi_half are output arrays
6906 that parallel "operands". */
6907
6908 void
6909 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6910 {
6911 while (num--)
6912 {
6913 rtx op = operands[num];
6914
6915 /* simplify_subreg refuse to split volatile memory addresses, but we
6916 still have to handle it. */
6917 if (GET_CODE (op) == MEM)
6918 {
6919 lo_half[num] = adjust_address (op, DImode, 0);
6920 hi_half[num] = adjust_address (op, DImode, 8);
6921 }
6922 else
6923 {
6924 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6925 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6926 }
6927 }
6928 }
6929 \f
6930 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6931 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6932 is the expression of the binary operation. The output may either be
6933 emitted here, or returned to the caller, like all output_* functions.
6934
6935 There is no guarantee that the operands are the same mode, as they
6936 might be within FLOAT or FLOAT_EXTEND expressions. */
6937
6938 #ifndef SYSV386_COMPAT
6939 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6940 wants to fix the assemblers because that causes incompatibility
6941 with gcc. No-one wants to fix gcc because that causes
6942 incompatibility with assemblers... You can use the option of
6943 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6944 #define SYSV386_COMPAT 1
6945 #endif
6946
6947 const char *
6948 output_387_binary_op (rtx insn, rtx *operands)
6949 {
6950 static char buf[30];
6951 const char *p;
6952 const char *ssep;
6953 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
6954
6955 #ifdef ENABLE_CHECKING
6956 /* Even if we do not want to check the inputs, this documents input
6957 constraints. Which helps in understanding the following code. */
6958 if (STACK_REG_P (operands[0])
6959 && ((REG_P (operands[1])
6960 && REGNO (operands[0]) == REGNO (operands[1])
6961 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6962 || (REG_P (operands[2])
6963 && REGNO (operands[0]) == REGNO (operands[2])
6964 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6965 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6966 ; /* ok */
6967 else if (!is_sse)
6968 abort ();
6969 #endif
6970
6971 switch (GET_CODE (operands[3]))
6972 {
6973 case PLUS:
6974 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6975 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6976 p = "fiadd";
6977 else
6978 p = "fadd";
6979 ssep = "add";
6980 break;
6981
6982 case MINUS:
6983 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6984 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6985 p = "fisub";
6986 else
6987 p = "fsub";
6988 ssep = "sub";
6989 break;
6990
6991 case MULT:
6992 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6993 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6994 p = "fimul";
6995 else
6996 p = "fmul";
6997 ssep = "mul";
6998 break;
6999
7000 case DIV:
7001 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7002 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7003 p = "fidiv";
7004 else
7005 p = "fdiv";
7006 ssep = "div";
7007 break;
7008
7009 default:
7010 abort ();
7011 }
7012
7013 if (is_sse)
7014 {
7015 strcpy (buf, ssep);
7016 if (GET_MODE (operands[0]) == SFmode)
7017 strcat (buf, "ss\t{%2, %0|%0, %2}");
7018 else
7019 strcat (buf, "sd\t{%2, %0|%0, %2}");
7020 return buf;
7021 }
7022 strcpy (buf, p);
7023
7024 switch (GET_CODE (operands[3]))
7025 {
7026 case MULT:
7027 case PLUS:
7028 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7029 {
7030 rtx temp = operands[2];
7031 operands[2] = operands[1];
7032 operands[1] = temp;
7033 }
7034
7035 /* know operands[0] == operands[1]. */
7036
7037 if (GET_CODE (operands[2]) == MEM)
7038 {
7039 p = "%z2\t%2";
7040 break;
7041 }
7042
7043 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7044 {
7045 if (STACK_TOP_P (operands[0]))
7046 /* How is it that we are storing to a dead operand[2]?
7047 Well, presumably operands[1] is dead too. We can't
7048 store the result to st(0) as st(0) gets popped on this
7049 instruction. Instead store to operands[2] (which I
7050 think has to be st(1)). st(1) will be popped later.
7051 gcc <= 2.8.1 didn't have this check and generated
7052 assembly code that the Unixware assembler rejected. */
7053 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7054 else
7055 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7056 break;
7057 }
7058
7059 if (STACK_TOP_P (operands[0]))
7060 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7061 else
7062 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7063 break;
7064
7065 case MINUS:
7066 case DIV:
7067 if (GET_CODE (operands[1]) == MEM)
7068 {
7069 p = "r%z1\t%1";
7070 break;
7071 }
7072
7073 if (GET_CODE (operands[2]) == MEM)
7074 {
7075 p = "%z2\t%2";
7076 break;
7077 }
7078
7079 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7080 {
7081 #if SYSV386_COMPAT
7082 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7083 derived assemblers, confusingly reverse the direction of
7084 the operation for fsub{r} and fdiv{r} when the
7085 destination register is not st(0). The Intel assembler
7086 doesn't have this brain damage. Read !SYSV386_COMPAT to
7087 figure out what the hardware really does. */
7088 if (STACK_TOP_P (operands[0]))
7089 p = "{p\t%0, %2|rp\t%2, %0}";
7090 else
7091 p = "{rp\t%2, %0|p\t%0, %2}";
7092 #else
7093 if (STACK_TOP_P (operands[0]))
7094 /* As above for fmul/fadd, we can't store to st(0). */
7095 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7096 else
7097 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7098 #endif
7099 break;
7100 }
7101
7102 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7103 {
7104 #if SYSV386_COMPAT
7105 if (STACK_TOP_P (operands[0]))
7106 p = "{rp\t%0, %1|p\t%1, %0}";
7107 else
7108 p = "{p\t%1, %0|rp\t%0, %1}";
7109 #else
7110 if (STACK_TOP_P (operands[0]))
7111 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7112 else
7113 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7114 #endif
7115 break;
7116 }
7117
7118 if (STACK_TOP_P (operands[0]))
7119 {
7120 if (STACK_TOP_P (operands[1]))
7121 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7122 else
7123 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7124 break;
7125 }
7126 else if (STACK_TOP_P (operands[1]))
7127 {
7128 #if SYSV386_COMPAT
7129 p = "{\t%1, %0|r\t%0, %1}";
7130 #else
7131 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7132 #endif
7133 }
7134 else
7135 {
7136 #if SYSV386_COMPAT
7137 p = "{r\t%2, %0|\t%0, %2}";
7138 #else
7139 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7140 #endif
7141 }
7142 break;
7143
7144 default:
7145 abort ();
7146 }
7147
7148 strcat (buf, p);
7149 return buf;
7150 }
7151
7152 /* Output code to initialize control word copies used by trunc?f?i and
7153 rounding patterns. CURRENT_MODE is set to current control word,
7154 while NEW_MODE is set to new control word. */
7155
7156 void
7157 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7158 {
7159 rtx reg = gen_reg_rtx (HImode);
7160
7161 emit_insn (gen_x86_fnstcw_1 (current_mode));
7162 emit_move_insn (reg, current_mode);
7163
7164 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7165 && !TARGET_64BIT)
7166 {
7167 switch (mode)
7168 {
7169 case I387_CW_FLOOR:
7170 /* round down toward -oo */
7171 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7172 break;
7173
7174 case I387_CW_CEIL:
7175 /* round up toward +oo */
7176 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7177 break;
7178
7179 case I387_CW_TRUNC:
7180 /* round toward zero (truncate) */
7181 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7182 break;
7183
7184 case I387_CW_MASK_PM:
7185 /* mask precision exception for nearbyint() */
7186 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7187 break;
7188
7189 default:
7190 abort();
7191 }
7192 }
7193 else
7194 {
7195 switch (mode)
7196 {
7197 case I387_CW_FLOOR:
7198 /* round down toward -oo */
7199 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7200 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7201 break;
7202
7203 case I387_CW_CEIL:
7204 /* round up toward +oo */
7205 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7206 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7207 break;
7208
7209 case I387_CW_TRUNC:
7210 /* round toward zero (truncate) */
7211 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7212 break;
7213
7214 case I387_CW_MASK_PM:
7215 /* mask precision exception for nearbyint() */
7216 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7217 break;
7218
7219 default:
7220 abort();
7221 }
7222 }
7223
7224 emit_move_insn (new_mode, reg);
7225 }
7226
7227 /* Output code for INSN to convert a float to a signed int. OPERANDS
7228 are the insn operands. The output may be [HSD]Imode and the input
7229 operand may be [SDX]Fmode. */
7230
7231 const char *
7232 output_fix_trunc (rtx insn, rtx *operands)
7233 {
7234 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7235 int dimode_p = GET_MODE (operands[0]) == DImode;
7236
7237 /* Jump through a hoop or two for DImode, since the hardware has no
7238 non-popping instruction. We used to do this a different way, but
7239 that was somewhat fragile and broke with post-reload splitters. */
7240 if (dimode_p && !stack_top_dies)
7241 output_asm_insn ("fld\t%y1", operands);
7242
7243 if (!STACK_TOP_P (operands[1]))
7244 abort ();
7245
7246 if (GET_CODE (operands[0]) != MEM)
7247 abort ();
7248
7249 output_asm_insn ("fldcw\t%3", operands);
7250 if (stack_top_dies || dimode_p)
7251 output_asm_insn ("fistp%z0\t%0", operands);
7252 else
7253 output_asm_insn ("fist%z0\t%0", operands);
7254 output_asm_insn ("fldcw\t%2", operands);
7255
7256 return "";
7257 }
7258
7259 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7260 should be used. UNORDERED_P is true when fucom should be used. */
7261
7262 const char *
7263 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7264 {
7265 int stack_top_dies;
7266 rtx cmp_op0, cmp_op1;
7267 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7268
7269 if (eflags_p)
7270 {
7271 cmp_op0 = operands[0];
7272 cmp_op1 = operands[1];
7273 }
7274 else
7275 {
7276 cmp_op0 = operands[1];
7277 cmp_op1 = operands[2];
7278 }
7279
7280 if (is_sse)
7281 {
7282 if (GET_MODE (operands[0]) == SFmode)
7283 if (unordered_p)
7284 return "ucomiss\t{%1, %0|%0, %1}";
7285 else
7286 return "comiss\t{%1, %0|%0, %1}";
7287 else
7288 if (unordered_p)
7289 return "ucomisd\t{%1, %0|%0, %1}";
7290 else
7291 return "comisd\t{%1, %0|%0, %1}";
7292 }
7293
7294 if (! STACK_TOP_P (cmp_op0))
7295 abort ();
7296
7297 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7298
7299 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7300 {
7301 if (stack_top_dies)
7302 {
7303 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7304 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7305 }
7306 else
7307 return "ftst\n\tfnstsw\t%0";
7308 }
7309
7310 if (STACK_REG_P (cmp_op1)
7311 && stack_top_dies
7312 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7313 && REGNO (cmp_op1) != FIRST_STACK_REG)
7314 {
7315 /* If both the top of the 387 stack dies, and the other operand
7316 is also a stack register that dies, then this must be a
7317 `fcompp' float compare */
7318
7319 if (eflags_p)
7320 {
7321 /* There is no double popping fcomi variant. Fortunately,
7322 eflags is immune from the fstp's cc clobbering. */
7323 if (unordered_p)
7324 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7325 else
7326 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7327 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7328 }
7329 else
7330 {
7331 if (unordered_p)
7332 return "fucompp\n\tfnstsw\t%0";
7333 else
7334 return "fcompp\n\tfnstsw\t%0";
7335 }
7336 }
7337 else
7338 {
7339 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7340
7341 static const char * const alt[16] =
7342 {
7343 "fcom%z2\t%y2\n\tfnstsw\t%0",
7344 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7345 "fucom%z2\t%y2\n\tfnstsw\t%0",
7346 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7347
7348 "ficom%z2\t%y2\n\tfnstsw\t%0",
7349 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7350 NULL,
7351 NULL,
7352
7353 "fcomi\t{%y1, %0|%0, %y1}",
7354 "fcomip\t{%y1, %0|%0, %y1}",
7355 "fucomi\t{%y1, %0|%0, %y1}",
7356 "fucomip\t{%y1, %0|%0, %y1}",
7357
7358 NULL,
7359 NULL,
7360 NULL,
7361 NULL
7362 };
7363
7364 int mask;
7365 const char *ret;
7366
7367 mask = eflags_p << 3;
7368 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7369 mask |= unordered_p << 1;
7370 mask |= stack_top_dies;
7371
7372 if (mask >= 16)
7373 abort ();
7374 ret = alt[mask];
7375 if (ret == NULL)
7376 abort ();
7377
7378 return ret;
7379 }
7380 }
7381
7382 void
7383 ix86_output_addr_vec_elt (FILE *file, int value)
7384 {
7385 const char *directive = ASM_LONG;
7386
7387 if (TARGET_64BIT)
7388 {
7389 #ifdef ASM_QUAD
7390 directive = ASM_QUAD;
7391 #else
7392 abort ();
7393 #endif
7394 }
7395
7396 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7397 }
7398
7399 void
7400 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7401 {
7402 if (TARGET_64BIT)
7403 fprintf (file, "%s%s%d-%s%d\n",
7404 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7405 else if (HAVE_AS_GOTOFF_IN_DATA)
7406 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7407 #if TARGET_MACHO
7408 else if (TARGET_MACHO)
7409 {
7410 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7411 machopic_output_function_base_name (file);
7412 fprintf(file, "\n");
7413 }
7414 #endif
7415 else
7416 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7417 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7418 }
7419 \f
7420 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7421 for the target. */
7422
7423 void
7424 ix86_expand_clear (rtx dest)
7425 {
7426 rtx tmp;
7427
7428 /* We play register width games, which are only valid after reload. */
7429 if (!reload_completed)
7430 abort ();
7431
7432 /* Avoid HImode and its attendant prefix byte. */
7433 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7434 dest = gen_rtx_REG (SImode, REGNO (dest));
7435
7436 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7437
7438 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7439 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7440 {
7441 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7442 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7443 }
7444
7445 emit_insn (tmp);
7446 }
7447
7448 /* X is an unchanging MEM. If it is a constant pool reference, return
7449 the constant pool rtx, else NULL. */
7450
7451 rtx
7452 maybe_get_pool_constant (rtx x)
7453 {
7454 x = ix86_delegitimize_address (XEXP (x, 0));
7455
7456 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7457 return get_pool_constant (x);
7458
7459 return NULL_RTX;
7460 }
7461
7462 void
7463 ix86_expand_move (enum machine_mode mode, rtx operands[])
7464 {
7465 int strict = (reload_in_progress || reload_completed);
7466 rtx op0, op1;
7467 enum tls_model model;
7468
7469 op0 = operands[0];
7470 op1 = operands[1];
7471
7472 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
7473 if (model)
7474 {
7475 op1 = legitimize_tls_address (op1, model, true);
7476 op1 = force_operand (op1, op0);
7477 if (op1 == op0)
7478 return;
7479 }
7480
7481 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7482 {
7483 #if TARGET_MACHO
7484 if (MACHOPIC_PURE)
7485 {
7486 rtx temp = ((reload_in_progress
7487 || ((op0 && GET_CODE (op0) == REG)
7488 && mode == Pmode))
7489 ? op0 : gen_reg_rtx (Pmode));
7490 op1 = machopic_indirect_data_reference (op1, temp);
7491 op1 = machopic_legitimize_pic_address (op1, mode,
7492 temp == op1 ? 0 : temp);
7493 }
7494 else if (MACHOPIC_INDIRECT)
7495 op1 = machopic_indirect_data_reference (op1, 0);
7496 if (op0 == op1)
7497 return;
7498 #else
7499 if (GET_CODE (op0) == MEM)
7500 op1 = force_reg (Pmode, op1);
7501 else
7502 op1 = legitimize_address (op1, op1, Pmode);
7503 #endif /* TARGET_MACHO */
7504 }
7505 else
7506 {
7507 if (GET_CODE (op0) == MEM
7508 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7509 || !push_operand (op0, mode))
7510 && GET_CODE (op1) == MEM)
7511 op1 = force_reg (mode, op1);
7512
7513 if (push_operand (op0, mode)
7514 && ! general_no_elim_operand (op1, mode))
7515 op1 = copy_to_mode_reg (mode, op1);
7516
7517 /* Force large constants in 64bit compilation into register
7518 to get them CSEed. */
7519 if (TARGET_64BIT && mode == DImode
7520 && immediate_operand (op1, mode)
7521 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7522 && !register_operand (op0, mode)
7523 && optimize && !reload_completed && !reload_in_progress)
7524 op1 = copy_to_mode_reg (mode, op1);
7525
7526 if (FLOAT_MODE_P (mode))
7527 {
7528 /* If we are loading a floating point constant to a register,
7529 force the value to memory now, since we'll get better code
7530 out the back end. */
7531
7532 if (strict)
7533 ;
7534 else if (GET_CODE (op1) == CONST_DOUBLE)
7535 {
7536 op1 = validize_mem (force_const_mem (mode, op1));
7537 if (!register_operand (op0, mode))
7538 {
7539 rtx temp = gen_reg_rtx (mode);
7540 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7541 emit_move_insn (op0, temp);
7542 return;
7543 }
7544 }
7545 }
7546 }
7547
7548 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7549 }
7550
7551 void
7552 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7553 {
7554 rtx op0 = operands[0], op1 = operands[1];
7555
7556 /* Force constants other than zero into memory. We do not know how
7557 the instructions used to build constants modify the upper 64 bits
7558 of the register, once we have that information we may be able
7559 to handle some of them more efficiently. */
7560 if ((reload_in_progress | reload_completed) == 0
7561 && register_operand (op0, mode)
7562 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7563 op1 = validize_mem (force_const_mem (mode, op1));
7564
7565 /* Make operand1 a register if it isn't already. */
7566 if (!no_new_pseudos
7567 && !register_operand (op0, mode)
7568 && !register_operand (op1, mode))
7569 {
7570 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7571 return;
7572 }
7573
7574 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7575 }
7576
7577 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7578 straight to ix86_expand_vector_move. */
7579
7580 void
7581 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7582 {
7583 rtx op0, op1, m;
7584
7585 op0 = operands[0];
7586 op1 = operands[1];
7587
7588 if (MEM_P (op1))
7589 {
7590 /* If we're optimizing for size, movups is the smallest. */
7591 if (optimize_size)
7592 {
7593 op0 = gen_lowpart (V4SFmode, op0);
7594 op1 = gen_lowpart (V4SFmode, op1);
7595 emit_insn (gen_sse_movups (op0, op1));
7596 return;
7597 }
7598
7599 /* ??? If we have typed data, then it would appear that using
7600 movdqu is the only way to get unaligned data loaded with
7601 integer type. */
7602 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7603 {
7604 op0 = gen_lowpart (V16QImode, op0);
7605 op1 = gen_lowpart (V16QImode, op1);
7606 emit_insn (gen_sse2_movdqu (op0, op1));
7607 return;
7608 }
7609
7610 if (TARGET_SSE2 && mode == V2DFmode)
7611 {
7612 /* When SSE registers are split into halves, we can avoid
7613 writing to the top half twice. */
7614 if (TARGET_SSE_SPLIT_REGS)
7615 {
7616 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7617 m = adjust_address (op1, DFmode, 0);
7618 emit_insn (gen_sse2_loadlpd (op0, op0, m));
7619 m = adjust_address (op1, DFmode, 8);
7620 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7621 }
7622 else
7623 {
7624 /* ??? Not sure about the best option for the Intel chips.
7625 The following would seem to satisfy; the register is
7626 entirely cleared, breaking the dependency chain. We
7627 then store to the upper half, with a dependency depth
7628 of one. A rumor has it that Intel recommends two movsd
7629 followed by an unpacklpd, but this is unconfirmed. And
7630 given that the dependency depth of the unpacklpd would
7631 still be one, I'm not sure why this would be better. */
7632 m = adjust_address (op1, DFmode, 0);
7633 emit_insn (gen_sse2_loadsd (op0, m));
7634 m = adjust_address (op1, DFmode, 8);
7635 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7636 }
7637 }
7638 else
7639 {
7640 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7641 emit_move_insn (op0, CONST0_RTX (mode));
7642 else
7643 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7644
7645 op0 = gen_lowpart (V4SFmode, op0);
7646 m = adjust_address (op1, V4SFmode, 0);
7647 emit_insn (gen_sse_movlps (op0, op0, m));
7648 m = adjust_address (op1, V4SFmode, 8);
7649 emit_insn (gen_sse_movhps (op0, op0, m));
7650 }
7651 }
7652 else if (MEM_P (op0))
7653 {
7654 /* If we're optimizing for size, movups is the smallest. */
7655 if (optimize_size)
7656 {
7657 op0 = gen_lowpart (V4SFmode, op0);
7658 op1 = gen_lowpart (V4SFmode, op1);
7659 emit_insn (gen_sse_movups (op0, op1));
7660 return;
7661 }
7662
7663 /* ??? Similar to above, only less clear because of quote
7664 typeless stores unquote. */
7665 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7666 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7667 {
7668 op0 = gen_lowpart (V16QImode, op0);
7669 op1 = gen_lowpart (V16QImode, op1);
7670 emit_insn (gen_sse2_movdqu (op0, op1));
7671 return;
7672 }
7673
7674 if (TARGET_SSE2 && mode == V2DFmode)
7675 {
7676 m = adjust_address (op0, DFmode, 0);
7677 emit_insn (gen_sse2_storelpd (m, op1));
7678 m = adjust_address (op0, DFmode, 8);
7679 emit_insn (gen_sse2_storehpd (m, op1));
7680 return;
7681 }
7682 else
7683 {
7684 op1 = gen_lowpart (V4SFmode, op1);
7685 m = adjust_address (op0, V4SFmode, 0);
7686 emit_insn (gen_sse_movlps (m, m, op1));
7687 m = adjust_address (op0, V4SFmode, 8);
7688 emit_insn (gen_sse_movhps (m, m, op1));
7689 return;
7690 }
7691 }
7692 else
7693 gcc_unreachable ();
7694 }
7695
7696
7697 /* Attempt to expand a binary operator. Make the expansion closer to the
7698 actual machine, then just general_operand, which will allow 3 separate
7699 memory references (one output, two input) in a single insn. */
7700
7701 void
7702 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7703 rtx operands[])
7704 {
7705 int matching_memory;
7706 rtx src1, src2, dst, op, clob;
7707
7708 dst = operands[0];
7709 src1 = operands[1];
7710 src2 = operands[2];
7711
7712 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7713 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7714 && (rtx_equal_p (dst, src2)
7715 || immediate_operand (src1, mode)))
7716 {
7717 rtx temp = src1;
7718 src1 = src2;
7719 src2 = temp;
7720 }
7721
7722 /* If the destination is memory, and we do not have matching source
7723 operands, do things in registers. */
7724 matching_memory = 0;
7725 if (GET_CODE (dst) == MEM)
7726 {
7727 if (rtx_equal_p (dst, src1))
7728 matching_memory = 1;
7729 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7730 && rtx_equal_p (dst, src2))
7731 matching_memory = 2;
7732 else
7733 dst = gen_reg_rtx (mode);
7734 }
7735
7736 /* Both source operands cannot be in memory. */
7737 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7738 {
7739 if (matching_memory != 2)
7740 src2 = force_reg (mode, src2);
7741 else
7742 src1 = force_reg (mode, src1);
7743 }
7744
7745 /* If the operation is not commutable, source 1 cannot be a constant
7746 or non-matching memory. */
7747 if ((CONSTANT_P (src1)
7748 || (!matching_memory && GET_CODE (src1) == MEM))
7749 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7750 src1 = force_reg (mode, src1);
7751
7752 /* If optimizing, copy to regs to improve CSE */
7753 if (optimize && ! no_new_pseudos)
7754 {
7755 if (GET_CODE (dst) == MEM)
7756 dst = gen_reg_rtx (mode);
7757 if (GET_CODE (src1) == MEM)
7758 src1 = force_reg (mode, src1);
7759 if (GET_CODE (src2) == MEM)
7760 src2 = force_reg (mode, src2);
7761 }
7762
7763 /* Emit the instruction. */
7764
7765 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7766 if (reload_in_progress)
7767 {
7768 /* Reload doesn't know about the flags register, and doesn't know that
7769 it doesn't want to clobber it. We can only do this with PLUS. */
7770 if (code != PLUS)
7771 abort ();
7772 emit_insn (op);
7773 }
7774 else
7775 {
7776 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7777 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7778 }
7779
7780 /* Fix up the destination if needed. */
7781 if (dst != operands[0])
7782 emit_move_insn (operands[0], dst);
7783 }
7784
7785 /* Return TRUE or FALSE depending on whether the binary operator meets the
7786 appropriate constraints. */
7787
7788 int
7789 ix86_binary_operator_ok (enum rtx_code code,
7790 enum machine_mode mode ATTRIBUTE_UNUSED,
7791 rtx operands[3])
7792 {
7793 /* Both source operands cannot be in memory. */
7794 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7795 return 0;
7796 /* If the operation is not commutable, source 1 cannot be a constant. */
7797 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7798 return 0;
7799 /* If the destination is memory, we must have a matching source operand. */
7800 if (GET_CODE (operands[0]) == MEM
7801 && ! (rtx_equal_p (operands[0], operands[1])
7802 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7803 && rtx_equal_p (operands[0], operands[2]))))
7804 return 0;
7805 /* If the operation is not commutable and the source 1 is memory, we must
7806 have a matching destination. */
7807 if (GET_CODE (operands[1]) == MEM
7808 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7809 && ! rtx_equal_p (operands[0], operands[1]))
7810 return 0;
7811 return 1;
7812 }
7813
7814 /* Attempt to expand a unary operator. Make the expansion closer to the
7815 actual machine, then just general_operand, which will allow 2 separate
7816 memory references (one output, one input) in a single insn. */
7817
7818 void
7819 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7820 rtx operands[])
7821 {
7822 int matching_memory;
7823 rtx src, dst, op, clob;
7824
7825 dst = operands[0];
7826 src = operands[1];
7827
7828 /* If the destination is memory, and we do not have matching source
7829 operands, do things in registers. */
7830 matching_memory = 0;
7831 if (MEM_P (dst))
7832 {
7833 if (rtx_equal_p (dst, src))
7834 matching_memory = 1;
7835 else
7836 dst = gen_reg_rtx (mode);
7837 }
7838
7839 /* When source operand is memory, destination must match. */
7840 if (MEM_P (src) && !matching_memory)
7841 src = force_reg (mode, src);
7842
7843 /* If optimizing, copy to regs to improve CSE. */
7844 if (optimize && ! no_new_pseudos)
7845 {
7846 if (GET_CODE (dst) == MEM)
7847 dst = gen_reg_rtx (mode);
7848 if (GET_CODE (src) == MEM)
7849 src = force_reg (mode, src);
7850 }
7851
7852 /* Emit the instruction. */
7853
7854 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7855 if (reload_in_progress || code == NOT)
7856 {
7857 /* Reload doesn't know about the flags register, and doesn't know that
7858 it doesn't want to clobber it. */
7859 if (code != NOT)
7860 abort ();
7861 emit_insn (op);
7862 }
7863 else
7864 {
7865 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7866 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7867 }
7868
7869 /* Fix up the destination if needed. */
7870 if (dst != operands[0])
7871 emit_move_insn (operands[0], dst);
7872 }
7873
7874 /* Return TRUE or FALSE depending on whether the unary operator meets the
7875 appropriate constraints. */
7876
7877 int
7878 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7879 enum machine_mode mode ATTRIBUTE_UNUSED,
7880 rtx operands[2] ATTRIBUTE_UNUSED)
7881 {
7882 /* If one of operands is memory, source and destination must match. */
7883 if ((GET_CODE (operands[0]) == MEM
7884 || GET_CODE (operands[1]) == MEM)
7885 && ! rtx_equal_p (operands[0], operands[1]))
7886 return FALSE;
7887 return TRUE;
7888 }
7889
7890 /* Generate code for floating point ABS or NEG. */
7891
7892 void
7893 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
7894 rtx operands[])
7895 {
7896 rtx mask, set, use, clob, dst, src;
7897 bool matching_memory;
7898 bool use_sse = false;
7899
7900 if (TARGET_SSE_MATH)
7901 {
7902 if (mode == SFmode)
7903 use_sse = true;
7904 else if (mode == DFmode && TARGET_SSE2)
7905 use_sse = true;
7906 }
7907
7908 /* NEG and ABS performed with SSE use bitwise mask operations.
7909 Create the appropriate mask now. */
7910 if (use_sse)
7911 {
7912 HOST_WIDE_INT hi, lo;
7913 int shift = 63;
7914
7915 /* Find the sign bit, sign extended to 2*HWI. */
7916 if (mode == SFmode)
7917 lo = 0x80000000, hi = lo < 0;
7918 else if (HOST_BITS_PER_WIDE_INT >= 64)
7919 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
7920 else
7921 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
7922
7923 /* If we're looking for the absolute value, then we want
7924 the compliment. */
7925 if (code == ABS)
7926 lo = ~lo, hi = ~hi;
7927
7928 /* Force this value into the low part of a fp vector constant. */
7929 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
7930 mask = gen_lowpart (mode, mask);
7931 if (mode == SFmode)
7932 mask = gen_rtx_CONST_VECTOR (V4SFmode,
7933 gen_rtvec (4, mask, CONST0_RTX (SFmode),
7934 CONST0_RTX (SFmode),
7935 CONST0_RTX (SFmode)));
7936 else
7937 mask = gen_rtx_CONST_VECTOR (V2DFmode,
7938 gen_rtvec (2, mask, CONST0_RTX (DFmode)));
7939 mask = force_reg (GET_MODE (mask), mask);
7940 }
7941 else
7942 {
7943 /* When not using SSE, we don't use the mask, but prefer to keep the
7944 same general form of the insn pattern to reduce duplication when
7945 it comes time to split. */
7946 mask = const0_rtx;
7947 }
7948
7949 dst = operands[0];
7950 src = operands[1];
7951
7952 /* If the destination is memory, and we don't have matching source
7953 operands, do things in registers. */
7954 matching_memory = false;
7955 if (MEM_P (dst))
7956 {
7957 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
7958 matching_memory = true;
7959 else
7960 dst = gen_reg_rtx (mode);
7961 }
7962 if (MEM_P (src) && !matching_memory)
7963 src = force_reg (mode, src);
7964
7965 set = gen_rtx_fmt_e (code, mode, src);
7966 set = gen_rtx_SET (VOIDmode, dst, set);
7967 use = gen_rtx_USE (VOIDmode, mask);
7968 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7969 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
7970
7971 if (dst != operands[0])
7972 emit_move_insn (operands[0], dst);
7973 }
7974
7975 /* Return TRUE or FALSE depending on whether the first SET in INSN
7976 has source and destination with matching CC modes, and that the
7977 CC mode is at least as constrained as REQ_MODE. */
7978
7979 int
7980 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
7981 {
7982 rtx set;
7983 enum machine_mode set_mode;
7984
7985 set = PATTERN (insn);
7986 if (GET_CODE (set) == PARALLEL)
7987 set = XVECEXP (set, 0, 0);
7988 if (GET_CODE (set) != SET)
7989 abort ();
7990 if (GET_CODE (SET_SRC (set)) != COMPARE)
7991 abort ();
7992
7993 set_mode = GET_MODE (SET_DEST (set));
7994 switch (set_mode)
7995 {
7996 case CCNOmode:
7997 if (req_mode != CCNOmode
7998 && (req_mode != CCmode
7999 || XEXP (SET_SRC (set), 1) != const0_rtx))
8000 return 0;
8001 break;
8002 case CCmode:
8003 if (req_mode == CCGCmode)
8004 return 0;
8005 /* FALLTHRU */
8006 case CCGCmode:
8007 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8008 return 0;
8009 /* FALLTHRU */
8010 case CCGOCmode:
8011 if (req_mode == CCZmode)
8012 return 0;
8013 /* FALLTHRU */
8014 case CCZmode:
8015 break;
8016
8017 default:
8018 abort ();
8019 }
8020
8021 return (GET_MODE (SET_SRC (set)) == set_mode);
8022 }
8023
8024 /* Generate insn patterns to do an integer compare of OPERANDS. */
8025
8026 static rtx
8027 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8028 {
8029 enum machine_mode cmpmode;
8030 rtx tmp, flags;
8031
8032 cmpmode = SELECT_CC_MODE (code, op0, op1);
8033 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8034
8035 /* This is very simple, but making the interface the same as in the
8036 FP case makes the rest of the code easier. */
8037 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8038 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8039
8040 /* Return the test that should be put into the flags user, i.e.
8041 the bcc, scc, or cmov instruction. */
8042 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8043 }
8044
8045 /* Figure out whether to use ordered or unordered fp comparisons.
8046 Return the appropriate mode to use. */
8047
8048 enum machine_mode
8049 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8050 {
8051 /* ??? In order to make all comparisons reversible, we do all comparisons
8052 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8053 all forms trapping and nontrapping comparisons, we can make inequality
8054 comparisons trapping again, since it results in better code when using
8055 FCOM based compares. */
8056 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8057 }
8058
8059 enum machine_mode
8060 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8061 {
8062 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8063 return ix86_fp_compare_mode (code);
8064 switch (code)
8065 {
8066 /* Only zero flag is needed. */
8067 case EQ: /* ZF=0 */
8068 case NE: /* ZF!=0 */
8069 return CCZmode;
8070 /* Codes needing carry flag. */
8071 case GEU: /* CF=0 */
8072 case GTU: /* CF=0 & ZF=0 */
8073 case LTU: /* CF=1 */
8074 case LEU: /* CF=1 | ZF=1 */
8075 return CCmode;
8076 /* Codes possibly doable only with sign flag when
8077 comparing against zero. */
8078 case GE: /* SF=OF or SF=0 */
8079 case LT: /* SF<>OF or SF=1 */
8080 if (op1 == const0_rtx)
8081 return CCGOCmode;
8082 else
8083 /* For other cases Carry flag is not required. */
8084 return CCGCmode;
8085 /* Codes doable only with sign flag when comparing
8086 against zero, but we miss jump instruction for it
8087 so we need to use relational tests against overflow
8088 that thus needs to be zero. */
8089 case GT: /* ZF=0 & SF=OF */
8090 case LE: /* ZF=1 | SF<>OF */
8091 if (op1 == const0_rtx)
8092 return CCNOmode;
8093 else
8094 return CCGCmode;
8095 /* strcmp pattern do (use flags) and combine may ask us for proper
8096 mode. */
8097 case USE:
8098 return CCmode;
8099 default:
8100 abort ();
8101 }
8102 }
8103
8104 /* Return the fixed registers used for condition codes. */
8105
8106 static bool
8107 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8108 {
8109 *p1 = FLAGS_REG;
8110 *p2 = FPSR_REG;
8111 return true;
8112 }
8113
8114 /* If two condition code modes are compatible, return a condition code
8115 mode which is compatible with both. Otherwise, return
8116 VOIDmode. */
8117
8118 static enum machine_mode
8119 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8120 {
8121 if (m1 == m2)
8122 return m1;
8123
8124 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8125 return VOIDmode;
8126
8127 if ((m1 == CCGCmode && m2 == CCGOCmode)
8128 || (m1 == CCGOCmode && m2 == CCGCmode))
8129 return CCGCmode;
8130
8131 switch (m1)
8132 {
8133 default:
8134 abort ();
8135
8136 case CCmode:
8137 case CCGCmode:
8138 case CCGOCmode:
8139 case CCNOmode:
8140 case CCZmode:
8141 switch (m2)
8142 {
8143 default:
8144 return VOIDmode;
8145
8146 case CCmode:
8147 case CCGCmode:
8148 case CCGOCmode:
8149 case CCNOmode:
8150 case CCZmode:
8151 return CCmode;
8152 }
8153
8154 case CCFPmode:
8155 case CCFPUmode:
8156 /* These are only compatible with themselves, which we already
8157 checked above. */
8158 return VOIDmode;
8159 }
8160 }
8161
8162 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8163
8164 int
8165 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8166 {
8167 enum rtx_code swapped_code = swap_condition (code);
8168 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8169 || (ix86_fp_comparison_cost (swapped_code)
8170 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8171 }
8172
8173 /* Swap, force into registers, or otherwise massage the two operands
8174 to a fp comparison. The operands are updated in place; the new
8175 comparison code is returned. */
8176
8177 static enum rtx_code
8178 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8179 {
8180 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8181 rtx op0 = *pop0, op1 = *pop1;
8182 enum machine_mode op_mode = GET_MODE (op0);
8183 int is_sse = SSE_REG_P (op0) || SSE_REG_P (op1);
8184
8185 /* All of the unordered compare instructions only work on registers.
8186 The same is true of the fcomi compare instructions. The same is
8187 true of the XFmode compare instructions if not comparing with
8188 zero (ftst insn is used in this case). */
8189
8190 if (!is_sse
8191 && (fpcmp_mode == CCFPUmode
8192 || (op_mode == XFmode
8193 && ! (standard_80387_constant_p (op0) == 1
8194 || standard_80387_constant_p (op1) == 1))
8195 || ix86_use_fcomi_compare (code)))
8196 {
8197 op0 = force_reg (op_mode, op0);
8198 op1 = force_reg (op_mode, op1);
8199 }
8200 else
8201 {
8202 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8203 things around if they appear profitable, otherwise force op0
8204 into a register. */
8205
8206 if (standard_80387_constant_p (op0) == 0
8207 || (GET_CODE (op0) == MEM
8208 && ! (standard_80387_constant_p (op1) == 0
8209 || GET_CODE (op1) == MEM)))
8210 {
8211 rtx tmp;
8212 tmp = op0, op0 = op1, op1 = tmp;
8213 code = swap_condition (code);
8214 }
8215
8216 if (GET_CODE (op0) != REG)
8217 op0 = force_reg (op_mode, op0);
8218
8219 if (CONSTANT_P (op1))
8220 {
8221 int tmp = standard_80387_constant_p (op1);
8222 if (tmp == 0)
8223 op1 = validize_mem (force_const_mem (op_mode, op1));
8224 else if (tmp == 1)
8225 {
8226 if (TARGET_CMOVE)
8227 op1 = force_reg (op_mode, op1);
8228 }
8229 else
8230 op1 = force_reg (op_mode, op1);
8231 }
8232 }
8233
8234 /* Try to rearrange the comparison to make it cheaper. */
8235 if (ix86_fp_comparison_cost (code)
8236 > ix86_fp_comparison_cost (swap_condition (code))
8237 && (GET_CODE (op1) == REG || !no_new_pseudos))
8238 {
8239 rtx tmp;
8240 tmp = op0, op0 = op1, op1 = tmp;
8241 code = swap_condition (code);
8242 if (GET_CODE (op0) != REG)
8243 op0 = force_reg (op_mode, op0);
8244 }
8245
8246 *pop0 = op0;
8247 *pop1 = op1;
8248 return code;
8249 }
8250
8251 /* Convert comparison codes we use to represent FP comparison to integer
8252 code that will result in proper branch. Return UNKNOWN if no such code
8253 is available. */
8254
8255 enum rtx_code
8256 ix86_fp_compare_code_to_integer (enum rtx_code code)
8257 {
8258 switch (code)
8259 {
8260 case GT:
8261 return GTU;
8262 case GE:
8263 return GEU;
8264 case ORDERED:
8265 case UNORDERED:
8266 return code;
8267 break;
8268 case UNEQ:
8269 return EQ;
8270 break;
8271 case UNLT:
8272 return LTU;
8273 break;
8274 case UNLE:
8275 return LEU;
8276 break;
8277 case LTGT:
8278 return NE;
8279 break;
8280 default:
8281 return UNKNOWN;
8282 }
8283 }
8284
8285 /* Split comparison code CODE into comparisons we can do using branch
8286 instructions. BYPASS_CODE is comparison code for branch that will
8287 branch around FIRST_CODE and SECOND_CODE. If some of branches
8288 is not required, set value to UNKNOWN.
8289 We never require more than two branches. */
8290
8291 void
8292 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8293 enum rtx_code *first_code,
8294 enum rtx_code *second_code)
8295 {
8296 *first_code = code;
8297 *bypass_code = UNKNOWN;
8298 *second_code = UNKNOWN;
8299
8300 /* The fcomi comparison sets flags as follows:
8301
8302 cmp ZF PF CF
8303 > 0 0 0
8304 < 0 0 1
8305 = 1 0 0
8306 un 1 1 1 */
8307
8308 switch (code)
8309 {
8310 case GT: /* GTU - CF=0 & ZF=0 */
8311 case GE: /* GEU - CF=0 */
8312 case ORDERED: /* PF=0 */
8313 case UNORDERED: /* PF=1 */
8314 case UNEQ: /* EQ - ZF=1 */
8315 case UNLT: /* LTU - CF=1 */
8316 case UNLE: /* LEU - CF=1 | ZF=1 */
8317 case LTGT: /* EQ - ZF=0 */
8318 break;
8319 case LT: /* LTU - CF=1 - fails on unordered */
8320 *first_code = UNLT;
8321 *bypass_code = UNORDERED;
8322 break;
8323 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8324 *first_code = UNLE;
8325 *bypass_code = UNORDERED;
8326 break;
8327 case EQ: /* EQ - ZF=1 - fails on unordered */
8328 *first_code = UNEQ;
8329 *bypass_code = UNORDERED;
8330 break;
8331 case NE: /* NE - ZF=0 - fails on unordered */
8332 *first_code = LTGT;
8333 *second_code = UNORDERED;
8334 break;
8335 case UNGE: /* GEU - CF=0 - fails on unordered */
8336 *first_code = GE;
8337 *second_code = UNORDERED;
8338 break;
8339 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8340 *first_code = GT;
8341 *second_code = UNORDERED;
8342 break;
8343 default:
8344 abort ();
8345 }
8346 if (!TARGET_IEEE_FP)
8347 {
8348 *second_code = UNKNOWN;
8349 *bypass_code = UNKNOWN;
8350 }
8351 }
8352
8353 /* Return cost of comparison done fcom + arithmetics operations on AX.
8354 All following functions do use number of instructions as a cost metrics.
8355 In future this should be tweaked to compute bytes for optimize_size and
8356 take into account performance of various instructions on various CPUs. */
8357 static int
8358 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8359 {
8360 if (!TARGET_IEEE_FP)
8361 return 4;
8362 /* The cost of code output by ix86_expand_fp_compare. */
8363 switch (code)
8364 {
8365 case UNLE:
8366 case UNLT:
8367 case LTGT:
8368 case GT:
8369 case GE:
8370 case UNORDERED:
8371 case ORDERED:
8372 case UNEQ:
8373 return 4;
8374 break;
8375 case LT:
8376 case NE:
8377 case EQ:
8378 case UNGE:
8379 return 5;
8380 break;
8381 case LE:
8382 case UNGT:
8383 return 6;
8384 break;
8385 default:
8386 abort ();
8387 }
8388 }
8389
8390 /* Return cost of comparison done using fcomi operation.
8391 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8392 static int
8393 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8394 {
8395 enum rtx_code bypass_code, first_code, second_code;
8396 /* Return arbitrarily high cost when instruction is not supported - this
8397 prevents gcc from using it. */
8398 if (!TARGET_CMOVE)
8399 return 1024;
8400 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8401 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8402 }
8403
8404 /* Return cost of comparison done using sahf operation.
8405 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8406 static int
8407 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8408 {
8409 enum rtx_code bypass_code, first_code, second_code;
8410 /* Return arbitrarily high cost when instruction is not preferred - this
8411 avoids gcc from using it. */
8412 if (!TARGET_USE_SAHF && !optimize_size)
8413 return 1024;
8414 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8415 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8416 }
8417
8418 /* Compute cost of the comparison done using any method.
8419 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8420 static int
8421 ix86_fp_comparison_cost (enum rtx_code code)
8422 {
8423 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8424 int min;
8425
8426 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8427 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8428
8429 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8430 if (min > sahf_cost)
8431 min = sahf_cost;
8432 if (min > fcomi_cost)
8433 min = fcomi_cost;
8434 return min;
8435 }
8436
8437 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8438
8439 static rtx
8440 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8441 rtx *second_test, rtx *bypass_test)
8442 {
8443 enum machine_mode fpcmp_mode, intcmp_mode;
8444 rtx tmp, tmp2;
8445 int cost = ix86_fp_comparison_cost (code);
8446 enum rtx_code bypass_code, first_code, second_code;
8447
8448 fpcmp_mode = ix86_fp_compare_mode (code);
8449 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8450
8451 if (second_test)
8452 *second_test = NULL_RTX;
8453 if (bypass_test)
8454 *bypass_test = NULL_RTX;
8455
8456 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8457
8458 /* Do fcomi/sahf based test when profitable. */
8459 if ((bypass_code == UNKNOWN || bypass_test)
8460 && (second_code == UNKNOWN || second_test)
8461 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8462 {
8463 if (TARGET_CMOVE)
8464 {
8465 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8466 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8467 tmp);
8468 emit_insn (tmp);
8469 }
8470 else
8471 {
8472 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8473 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8474 if (!scratch)
8475 scratch = gen_reg_rtx (HImode);
8476 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8477 emit_insn (gen_x86_sahf_1 (scratch));
8478 }
8479
8480 /* The FP codes work out to act like unsigned. */
8481 intcmp_mode = fpcmp_mode;
8482 code = first_code;
8483 if (bypass_code != UNKNOWN)
8484 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8485 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8486 const0_rtx);
8487 if (second_code != UNKNOWN)
8488 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8489 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8490 const0_rtx);
8491 }
8492 else
8493 {
8494 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8495 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8496 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8497 if (!scratch)
8498 scratch = gen_reg_rtx (HImode);
8499 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8500
8501 /* In the unordered case, we have to check C2 for NaN's, which
8502 doesn't happen to work out to anything nice combination-wise.
8503 So do some bit twiddling on the value we've got in AH to come
8504 up with an appropriate set of condition codes. */
8505
8506 intcmp_mode = CCNOmode;
8507 switch (code)
8508 {
8509 case GT:
8510 case UNGT:
8511 if (code == GT || !TARGET_IEEE_FP)
8512 {
8513 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8514 code = EQ;
8515 }
8516 else
8517 {
8518 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8519 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8520 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8521 intcmp_mode = CCmode;
8522 code = GEU;
8523 }
8524 break;
8525 case LT:
8526 case UNLT:
8527 if (code == LT && TARGET_IEEE_FP)
8528 {
8529 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8530 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8531 intcmp_mode = CCmode;
8532 code = EQ;
8533 }
8534 else
8535 {
8536 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8537 code = NE;
8538 }
8539 break;
8540 case GE:
8541 case UNGE:
8542 if (code == GE || !TARGET_IEEE_FP)
8543 {
8544 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8545 code = EQ;
8546 }
8547 else
8548 {
8549 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8550 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8551 GEN_INT (0x01)));
8552 code = NE;
8553 }
8554 break;
8555 case LE:
8556 case UNLE:
8557 if (code == LE && TARGET_IEEE_FP)
8558 {
8559 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8560 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8561 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8562 intcmp_mode = CCmode;
8563 code = LTU;
8564 }
8565 else
8566 {
8567 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8568 code = NE;
8569 }
8570 break;
8571 case EQ:
8572 case UNEQ:
8573 if (code == EQ && TARGET_IEEE_FP)
8574 {
8575 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8576 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8577 intcmp_mode = CCmode;
8578 code = EQ;
8579 }
8580 else
8581 {
8582 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8583 code = NE;
8584 break;
8585 }
8586 break;
8587 case NE:
8588 case LTGT:
8589 if (code == NE && TARGET_IEEE_FP)
8590 {
8591 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8592 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8593 GEN_INT (0x40)));
8594 code = NE;
8595 }
8596 else
8597 {
8598 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8599 code = EQ;
8600 }
8601 break;
8602
8603 case UNORDERED:
8604 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8605 code = NE;
8606 break;
8607 case ORDERED:
8608 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8609 code = EQ;
8610 break;
8611
8612 default:
8613 abort ();
8614 }
8615 }
8616
8617 /* Return the test that should be put into the flags user, i.e.
8618 the bcc, scc, or cmov instruction. */
8619 return gen_rtx_fmt_ee (code, VOIDmode,
8620 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8621 const0_rtx);
8622 }
8623
8624 rtx
8625 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8626 {
8627 rtx op0, op1, ret;
8628 op0 = ix86_compare_op0;
8629 op1 = ix86_compare_op1;
8630
8631 if (second_test)
8632 *second_test = NULL_RTX;
8633 if (bypass_test)
8634 *bypass_test = NULL_RTX;
8635
8636 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8637 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8638 second_test, bypass_test);
8639 else
8640 ret = ix86_expand_int_compare (code, op0, op1);
8641
8642 return ret;
8643 }
8644
8645 /* Return true if the CODE will result in nontrivial jump sequence. */
8646 bool
8647 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8648 {
8649 enum rtx_code bypass_code, first_code, second_code;
8650 if (!TARGET_CMOVE)
8651 return true;
8652 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8653 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8654 }
8655
8656 void
8657 ix86_expand_branch (enum rtx_code code, rtx label)
8658 {
8659 rtx tmp;
8660
8661 switch (GET_MODE (ix86_compare_op0))
8662 {
8663 case QImode:
8664 case HImode:
8665 case SImode:
8666 simple:
8667 tmp = ix86_expand_compare (code, NULL, NULL);
8668 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8669 gen_rtx_LABEL_REF (VOIDmode, label),
8670 pc_rtx);
8671 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8672 return;
8673
8674 case SFmode:
8675 case DFmode:
8676 case XFmode:
8677 {
8678 rtvec vec;
8679 int use_fcomi;
8680 enum rtx_code bypass_code, first_code, second_code;
8681
8682 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8683 &ix86_compare_op1);
8684
8685 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8686
8687 /* Check whether we will use the natural sequence with one jump. If
8688 so, we can expand jump early. Otherwise delay expansion by
8689 creating compound insn to not confuse optimizers. */
8690 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8691 && TARGET_CMOVE)
8692 {
8693 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8694 gen_rtx_LABEL_REF (VOIDmode, label),
8695 pc_rtx, NULL_RTX, NULL_RTX);
8696 }
8697 else
8698 {
8699 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8700 ix86_compare_op0, ix86_compare_op1);
8701 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8702 gen_rtx_LABEL_REF (VOIDmode, label),
8703 pc_rtx);
8704 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8705
8706 use_fcomi = ix86_use_fcomi_compare (code);
8707 vec = rtvec_alloc (3 + !use_fcomi);
8708 RTVEC_ELT (vec, 0) = tmp;
8709 RTVEC_ELT (vec, 1)
8710 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8711 RTVEC_ELT (vec, 2)
8712 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8713 if (! use_fcomi)
8714 RTVEC_ELT (vec, 3)
8715 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8716
8717 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8718 }
8719 return;
8720 }
8721
8722 case DImode:
8723 if (TARGET_64BIT)
8724 goto simple;
8725 /* Expand DImode branch into multiple compare+branch. */
8726 {
8727 rtx lo[2], hi[2], label2;
8728 enum rtx_code code1, code2, code3;
8729
8730 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8731 {
8732 tmp = ix86_compare_op0;
8733 ix86_compare_op0 = ix86_compare_op1;
8734 ix86_compare_op1 = tmp;
8735 code = swap_condition (code);
8736 }
8737 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8738 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8739
8740 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8741 avoid two branches. This costs one extra insn, so disable when
8742 optimizing for size. */
8743
8744 if ((code == EQ || code == NE)
8745 && (!optimize_size
8746 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8747 {
8748 rtx xor0, xor1;
8749
8750 xor1 = hi[0];
8751 if (hi[1] != const0_rtx)
8752 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8753 NULL_RTX, 0, OPTAB_WIDEN);
8754
8755 xor0 = lo[0];
8756 if (lo[1] != const0_rtx)
8757 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8758 NULL_RTX, 0, OPTAB_WIDEN);
8759
8760 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8761 NULL_RTX, 0, OPTAB_WIDEN);
8762
8763 ix86_compare_op0 = tmp;
8764 ix86_compare_op1 = const0_rtx;
8765 ix86_expand_branch (code, label);
8766 return;
8767 }
8768
8769 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8770 op1 is a constant and the low word is zero, then we can just
8771 examine the high word. */
8772
8773 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8774 switch (code)
8775 {
8776 case LT: case LTU: case GE: case GEU:
8777 ix86_compare_op0 = hi[0];
8778 ix86_compare_op1 = hi[1];
8779 ix86_expand_branch (code, label);
8780 return;
8781 default:
8782 break;
8783 }
8784
8785 /* Otherwise, we need two or three jumps. */
8786
8787 label2 = gen_label_rtx ();
8788
8789 code1 = code;
8790 code2 = swap_condition (code);
8791 code3 = unsigned_condition (code);
8792
8793 switch (code)
8794 {
8795 case LT: case GT: case LTU: case GTU:
8796 break;
8797
8798 case LE: code1 = LT; code2 = GT; break;
8799 case GE: code1 = GT; code2 = LT; break;
8800 case LEU: code1 = LTU; code2 = GTU; break;
8801 case GEU: code1 = GTU; code2 = LTU; break;
8802
8803 case EQ: code1 = UNKNOWN; code2 = NE; break;
8804 case NE: code2 = UNKNOWN; break;
8805
8806 default:
8807 abort ();
8808 }
8809
8810 /*
8811 * a < b =>
8812 * if (hi(a) < hi(b)) goto true;
8813 * if (hi(a) > hi(b)) goto false;
8814 * if (lo(a) < lo(b)) goto true;
8815 * false:
8816 */
8817
8818 ix86_compare_op0 = hi[0];
8819 ix86_compare_op1 = hi[1];
8820
8821 if (code1 != UNKNOWN)
8822 ix86_expand_branch (code1, label);
8823 if (code2 != UNKNOWN)
8824 ix86_expand_branch (code2, label2);
8825
8826 ix86_compare_op0 = lo[0];
8827 ix86_compare_op1 = lo[1];
8828 ix86_expand_branch (code3, label);
8829
8830 if (code2 != UNKNOWN)
8831 emit_label (label2);
8832 return;
8833 }
8834
8835 default:
8836 abort ();
8837 }
8838 }
8839
8840 /* Split branch based on floating point condition. */
8841 void
8842 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8843 rtx target1, rtx target2, rtx tmp, rtx pushed)
8844 {
8845 rtx second, bypass;
8846 rtx label = NULL_RTX;
8847 rtx condition;
8848 int bypass_probability = -1, second_probability = -1, probability = -1;
8849 rtx i;
8850
8851 if (target2 != pc_rtx)
8852 {
8853 rtx tmp = target2;
8854 code = reverse_condition_maybe_unordered (code);
8855 target2 = target1;
8856 target1 = tmp;
8857 }
8858
8859 condition = ix86_expand_fp_compare (code, op1, op2,
8860 tmp, &second, &bypass);
8861
8862 /* Remove pushed operand from stack. */
8863 if (pushed)
8864 ix86_free_from_memory (GET_MODE (pushed));
8865
8866 if (split_branch_probability >= 0)
8867 {
8868 /* Distribute the probabilities across the jumps.
8869 Assume the BYPASS and SECOND to be always test
8870 for UNORDERED. */
8871 probability = split_branch_probability;
8872
8873 /* Value of 1 is low enough to make no need for probability
8874 to be updated. Later we may run some experiments and see
8875 if unordered values are more frequent in practice. */
8876 if (bypass)
8877 bypass_probability = 1;
8878 if (second)
8879 second_probability = 1;
8880 }
8881 if (bypass != NULL_RTX)
8882 {
8883 label = gen_label_rtx ();
8884 i = emit_jump_insn (gen_rtx_SET
8885 (VOIDmode, pc_rtx,
8886 gen_rtx_IF_THEN_ELSE (VOIDmode,
8887 bypass,
8888 gen_rtx_LABEL_REF (VOIDmode,
8889 label),
8890 pc_rtx)));
8891 if (bypass_probability >= 0)
8892 REG_NOTES (i)
8893 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8894 GEN_INT (bypass_probability),
8895 REG_NOTES (i));
8896 }
8897 i = emit_jump_insn (gen_rtx_SET
8898 (VOIDmode, pc_rtx,
8899 gen_rtx_IF_THEN_ELSE (VOIDmode,
8900 condition, target1, target2)));
8901 if (probability >= 0)
8902 REG_NOTES (i)
8903 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8904 GEN_INT (probability),
8905 REG_NOTES (i));
8906 if (second != NULL_RTX)
8907 {
8908 i = emit_jump_insn (gen_rtx_SET
8909 (VOIDmode, pc_rtx,
8910 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8911 target2)));
8912 if (second_probability >= 0)
8913 REG_NOTES (i)
8914 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8915 GEN_INT (second_probability),
8916 REG_NOTES (i));
8917 }
8918 if (label != NULL_RTX)
8919 emit_label (label);
8920 }
8921
8922 int
8923 ix86_expand_setcc (enum rtx_code code, rtx dest)
8924 {
8925 rtx ret, tmp, tmpreg, equiv;
8926 rtx second_test, bypass_test;
8927
8928 if (GET_MODE (ix86_compare_op0) == DImode
8929 && !TARGET_64BIT)
8930 return 0; /* FAIL */
8931
8932 if (GET_MODE (dest) != QImode)
8933 abort ();
8934
8935 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8936 PUT_MODE (ret, QImode);
8937
8938 tmp = dest;
8939 tmpreg = dest;
8940
8941 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8942 if (bypass_test || second_test)
8943 {
8944 rtx test = second_test;
8945 int bypass = 0;
8946 rtx tmp2 = gen_reg_rtx (QImode);
8947 if (bypass_test)
8948 {
8949 if (second_test)
8950 abort ();
8951 test = bypass_test;
8952 bypass = 1;
8953 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8954 }
8955 PUT_MODE (test, QImode);
8956 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8957
8958 if (bypass)
8959 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8960 else
8961 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8962 }
8963
8964 /* Attach a REG_EQUAL note describing the comparison result. */
8965 equiv = simplify_gen_relational (code, QImode,
8966 GET_MODE (ix86_compare_op0),
8967 ix86_compare_op0, ix86_compare_op1);
8968 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8969
8970 return 1; /* DONE */
8971 }
8972
8973 /* Expand comparison setting or clearing carry flag. Return true when
8974 successful and set pop for the operation. */
8975 static bool
8976 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
8977 {
8978 enum machine_mode mode =
8979 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8980
8981 /* Do not handle DImode compares that go trought special path. Also we can't
8982 deal with FP compares yet. This is possible to add. */
8983 if ((mode == DImode && !TARGET_64BIT))
8984 return false;
8985 if (FLOAT_MODE_P (mode))
8986 {
8987 rtx second_test = NULL, bypass_test = NULL;
8988 rtx compare_op, compare_seq;
8989
8990 /* Shortcut: following common codes never translate into carry flag compares. */
8991 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8992 || code == ORDERED || code == UNORDERED)
8993 return false;
8994
8995 /* These comparisons require zero flag; swap operands so they won't. */
8996 if ((code == GT || code == UNLE || code == LE || code == UNGT)
8997 && !TARGET_IEEE_FP)
8998 {
8999 rtx tmp = op0;
9000 op0 = op1;
9001 op1 = tmp;
9002 code = swap_condition (code);
9003 }
9004
9005 /* Try to expand the comparison and verify that we end up with carry flag
9006 based comparison. This is fails to be true only when we decide to expand
9007 comparison using arithmetic that is not too common scenario. */
9008 start_sequence ();
9009 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9010 &second_test, &bypass_test);
9011 compare_seq = get_insns ();
9012 end_sequence ();
9013
9014 if (second_test || bypass_test)
9015 return false;
9016 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9017 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9018 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9019 else
9020 code = GET_CODE (compare_op);
9021 if (code != LTU && code != GEU)
9022 return false;
9023 emit_insn (compare_seq);
9024 *pop = compare_op;
9025 return true;
9026 }
9027 if (!INTEGRAL_MODE_P (mode))
9028 return false;
9029 switch (code)
9030 {
9031 case LTU:
9032 case GEU:
9033 break;
9034
9035 /* Convert a==0 into (unsigned)a<1. */
9036 case EQ:
9037 case NE:
9038 if (op1 != const0_rtx)
9039 return false;
9040 op1 = const1_rtx;
9041 code = (code == EQ ? LTU : GEU);
9042 break;
9043
9044 /* Convert a>b into b<a or a>=b-1. */
9045 case GTU:
9046 case LEU:
9047 if (GET_CODE (op1) == CONST_INT)
9048 {
9049 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9050 /* Bail out on overflow. We still can swap operands but that
9051 would force loading of the constant into register. */
9052 if (op1 == const0_rtx
9053 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9054 return false;
9055 code = (code == GTU ? GEU : LTU);
9056 }
9057 else
9058 {
9059 rtx tmp = op1;
9060 op1 = op0;
9061 op0 = tmp;
9062 code = (code == GTU ? LTU : GEU);
9063 }
9064 break;
9065
9066 /* Convert a>=0 into (unsigned)a<0x80000000. */
9067 case LT:
9068 case GE:
9069 if (mode == DImode || op1 != const0_rtx)
9070 return false;
9071 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9072 code = (code == LT ? GEU : LTU);
9073 break;
9074 case LE:
9075 case GT:
9076 if (mode == DImode || op1 != constm1_rtx)
9077 return false;
9078 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9079 code = (code == LE ? GEU : LTU);
9080 break;
9081
9082 default:
9083 return false;
9084 }
9085 /* Swapping operands may cause constant to appear as first operand. */
9086 if (!nonimmediate_operand (op0, VOIDmode))
9087 {
9088 if (no_new_pseudos)
9089 return false;
9090 op0 = force_reg (mode, op0);
9091 }
9092 ix86_compare_op0 = op0;
9093 ix86_compare_op1 = op1;
9094 *pop = ix86_expand_compare (code, NULL, NULL);
9095 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9096 abort ();
9097 return true;
9098 }
9099
9100 int
9101 ix86_expand_int_movcc (rtx operands[])
9102 {
9103 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9104 rtx compare_seq, compare_op;
9105 rtx second_test, bypass_test;
9106 enum machine_mode mode = GET_MODE (operands[0]);
9107 bool sign_bit_compare_p = false;;
9108
9109 start_sequence ();
9110 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9111 compare_seq = get_insns ();
9112 end_sequence ();
9113
9114 compare_code = GET_CODE (compare_op);
9115
9116 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9117 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9118 sign_bit_compare_p = true;
9119
9120 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9121 HImode insns, we'd be swallowed in word prefix ops. */
9122
9123 if ((mode != HImode || TARGET_FAST_PREFIX)
9124 && (mode != DImode || TARGET_64BIT)
9125 && GET_CODE (operands[2]) == CONST_INT
9126 && GET_CODE (operands[3]) == CONST_INT)
9127 {
9128 rtx out = operands[0];
9129 HOST_WIDE_INT ct = INTVAL (operands[2]);
9130 HOST_WIDE_INT cf = INTVAL (operands[3]);
9131 HOST_WIDE_INT diff;
9132
9133 diff = ct - cf;
9134 /* Sign bit compares are better done using shifts than we do by using
9135 sbb. */
9136 if (sign_bit_compare_p
9137 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9138 ix86_compare_op1, &compare_op))
9139 {
9140 /* Detect overlap between destination and compare sources. */
9141 rtx tmp = out;
9142
9143 if (!sign_bit_compare_p)
9144 {
9145 bool fpcmp = false;
9146
9147 compare_code = GET_CODE (compare_op);
9148
9149 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9150 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9151 {
9152 fpcmp = true;
9153 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9154 }
9155
9156 /* To simplify rest of code, restrict to the GEU case. */
9157 if (compare_code == LTU)
9158 {
9159 HOST_WIDE_INT tmp = ct;
9160 ct = cf;
9161 cf = tmp;
9162 compare_code = reverse_condition (compare_code);
9163 code = reverse_condition (code);
9164 }
9165 else
9166 {
9167 if (fpcmp)
9168 PUT_CODE (compare_op,
9169 reverse_condition_maybe_unordered
9170 (GET_CODE (compare_op)));
9171 else
9172 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9173 }
9174 diff = ct - cf;
9175
9176 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9177 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9178 tmp = gen_reg_rtx (mode);
9179
9180 if (mode == DImode)
9181 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9182 else
9183 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9184 }
9185 else
9186 {
9187 if (code == GT || code == GE)
9188 code = reverse_condition (code);
9189 else
9190 {
9191 HOST_WIDE_INT tmp = ct;
9192 ct = cf;
9193 cf = tmp;
9194 diff = ct - cf;
9195 }
9196 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9197 ix86_compare_op1, VOIDmode, 0, -1);
9198 }
9199
9200 if (diff == 1)
9201 {
9202 /*
9203 * cmpl op0,op1
9204 * sbbl dest,dest
9205 * [addl dest, ct]
9206 *
9207 * Size 5 - 8.
9208 */
9209 if (ct)
9210 tmp = expand_simple_binop (mode, PLUS,
9211 tmp, GEN_INT (ct),
9212 copy_rtx (tmp), 1, OPTAB_DIRECT);
9213 }
9214 else if (cf == -1)
9215 {
9216 /*
9217 * cmpl op0,op1
9218 * sbbl dest,dest
9219 * orl $ct, dest
9220 *
9221 * Size 8.
9222 */
9223 tmp = expand_simple_binop (mode, IOR,
9224 tmp, GEN_INT (ct),
9225 copy_rtx (tmp), 1, OPTAB_DIRECT);
9226 }
9227 else if (diff == -1 && ct)
9228 {
9229 /*
9230 * cmpl op0,op1
9231 * sbbl dest,dest
9232 * notl dest
9233 * [addl dest, cf]
9234 *
9235 * Size 8 - 11.
9236 */
9237 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9238 if (cf)
9239 tmp = expand_simple_binop (mode, PLUS,
9240 copy_rtx (tmp), GEN_INT (cf),
9241 copy_rtx (tmp), 1, OPTAB_DIRECT);
9242 }
9243 else
9244 {
9245 /*
9246 * cmpl op0,op1
9247 * sbbl dest,dest
9248 * [notl dest]
9249 * andl cf - ct, dest
9250 * [addl dest, ct]
9251 *
9252 * Size 8 - 11.
9253 */
9254
9255 if (cf == 0)
9256 {
9257 cf = ct;
9258 ct = 0;
9259 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9260 }
9261
9262 tmp = expand_simple_binop (mode, AND,
9263 copy_rtx (tmp),
9264 gen_int_mode (cf - ct, mode),
9265 copy_rtx (tmp), 1, OPTAB_DIRECT);
9266 if (ct)
9267 tmp = expand_simple_binop (mode, PLUS,
9268 copy_rtx (tmp), GEN_INT (ct),
9269 copy_rtx (tmp), 1, OPTAB_DIRECT);
9270 }
9271
9272 if (!rtx_equal_p (tmp, out))
9273 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9274
9275 return 1; /* DONE */
9276 }
9277
9278 if (diff < 0)
9279 {
9280 HOST_WIDE_INT tmp;
9281 tmp = ct, ct = cf, cf = tmp;
9282 diff = -diff;
9283 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9284 {
9285 /* We may be reversing unordered compare to normal compare, that
9286 is not valid in general (we may convert non-trapping condition
9287 to trapping one), however on i386 we currently emit all
9288 comparisons unordered. */
9289 compare_code = reverse_condition_maybe_unordered (compare_code);
9290 code = reverse_condition_maybe_unordered (code);
9291 }
9292 else
9293 {
9294 compare_code = reverse_condition (compare_code);
9295 code = reverse_condition (code);
9296 }
9297 }
9298
9299 compare_code = UNKNOWN;
9300 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9301 && GET_CODE (ix86_compare_op1) == CONST_INT)
9302 {
9303 if (ix86_compare_op1 == const0_rtx
9304 && (code == LT || code == GE))
9305 compare_code = code;
9306 else if (ix86_compare_op1 == constm1_rtx)
9307 {
9308 if (code == LE)
9309 compare_code = LT;
9310 else if (code == GT)
9311 compare_code = GE;
9312 }
9313 }
9314
9315 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9316 if (compare_code != UNKNOWN
9317 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9318 && (cf == -1 || ct == -1))
9319 {
9320 /* If lea code below could be used, only optimize
9321 if it results in a 2 insn sequence. */
9322
9323 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9324 || diff == 3 || diff == 5 || diff == 9)
9325 || (compare_code == LT && ct == -1)
9326 || (compare_code == GE && cf == -1))
9327 {
9328 /*
9329 * notl op1 (if necessary)
9330 * sarl $31, op1
9331 * orl cf, op1
9332 */
9333 if (ct != -1)
9334 {
9335 cf = ct;
9336 ct = -1;
9337 code = reverse_condition (code);
9338 }
9339
9340 out = emit_store_flag (out, code, ix86_compare_op0,
9341 ix86_compare_op1, VOIDmode, 0, -1);
9342
9343 out = expand_simple_binop (mode, IOR,
9344 out, GEN_INT (cf),
9345 out, 1, OPTAB_DIRECT);
9346 if (out != operands[0])
9347 emit_move_insn (operands[0], out);
9348
9349 return 1; /* DONE */
9350 }
9351 }
9352
9353
9354 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9355 || diff == 3 || diff == 5 || diff == 9)
9356 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9357 && (mode != DImode
9358 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9359 {
9360 /*
9361 * xorl dest,dest
9362 * cmpl op1,op2
9363 * setcc dest
9364 * lea cf(dest*(ct-cf)),dest
9365 *
9366 * Size 14.
9367 *
9368 * This also catches the degenerate setcc-only case.
9369 */
9370
9371 rtx tmp;
9372 int nops;
9373
9374 out = emit_store_flag (out, code, ix86_compare_op0,
9375 ix86_compare_op1, VOIDmode, 0, 1);
9376
9377 nops = 0;
9378 /* On x86_64 the lea instruction operates on Pmode, so we need
9379 to get arithmetics done in proper mode to match. */
9380 if (diff == 1)
9381 tmp = copy_rtx (out);
9382 else
9383 {
9384 rtx out1;
9385 out1 = copy_rtx (out);
9386 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9387 nops++;
9388 if (diff & 1)
9389 {
9390 tmp = gen_rtx_PLUS (mode, tmp, out1);
9391 nops++;
9392 }
9393 }
9394 if (cf != 0)
9395 {
9396 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9397 nops++;
9398 }
9399 if (!rtx_equal_p (tmp, out))
9400 {
9401 if (nops == 1)
9402 out = force_operand (tmp, copy_rtx (out));
9403 else
9404 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9405 }
9406 if (!rtx_equal_p (out, operands[0]))
9407 emit_move_insn (operands[0], copy_rtx (out));
9408
9409 return 1; /* DONE */
9410 }
9411
9412 /*
9413 * General case: Jumpful:
9414 * xorl dest,dest cmpl op1, op2
9415 * cmpl op1, op2 movl ct, dest
9416 * setcc dest jcc 1f
9417 * decl dest movl cf, dest
9418 * andl (cf-ct),dest 1:
9419 * addl ct,dest
9420 *
9421 * Size 20. Size 14.
9422 *
9423 * This is reasonably steep, but branch mispredict costs are
9424 * high on modern cpus, so consider failing only if optimizing
9425 * for space.
9426 */
9427
9428 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9429 && BRANCH_COST >= 2)
9430 {
9431 if (cf == 0)
9432 {
9433 cf = ct;
9434 ct = 0;
9435 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9436 /* We may be reversing unordered compare to normal compare,
9437 that is not valid in general (we may convert non-trapping
9438 condition to trapping one), however on i386 we currently
9439 emit all comparisons unordered. */
9440 code = reverse_condition_maybe_unordered (code);
9441 else
9442 {
9443 code = reverse_condition (code);
9444 if (compare_code != UNKNOWN)
9445 compare_code = reverse_condition (compare_code);
9446 }
9447 }
9448
9449 if (compare_code != UNKNOWN)
9450 {
9451 /* notl op1 (if needed)
9452 sarl $31, op1
9453 andl (cf-ct), op1
9454 addl ct, op1
9455
9456 For x < 0 (resp. x <= -1) there will be no notl,
9457 so if possible swap the constants to get rid of the
9458 complement.
9459 True/false will be -1/0 while code below (store flag
9460 followed by decrement) is 0/-1, so the constants need
9461 to be exchanged once more. */
9462
9463 if (compare_code == GE || !cf)
9464 {
9465 code = reverse_condition (code);
9466 compare_code = LT;
9467 }
9468 else
9469 {
9470 HOST_WIDE_INT tmp = cf;
9471 cf = ct;
9472 ct = tmp;
9473 }
9474
9475 out = emit_store_flag (out, code, ix86_compare_op0,
9476 ix86_compare_op1, VOIDmode, 0, -1);
9477 }
9478 else
9479 {
9480 out = emit_store_flag (out, code, ix86_compare_op0,
9481 ix86_compare_op1, VOIDmode, 0, 1);
9482
9483 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9484 copy_rtx (out), 1, OPTAB_DIRECT);
9485 }
9486
9487 out = expand_simple_binop (mode, AND, copy_rtx (out),
9488 gen_int_mode (cf - ct, mode),
9489 copy_rtx (out), 1, OPTAB_DIRECT);
9490 if (ct)
9491 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9492 copy_rtx (out), 1, OPTAB_DIRECT);
9493 if (!rtx_equal_p (out, operands[0]))
9494 emit_move_insn (operands[0], copy_rtx (out));
9495
9496 return 1; /* DONE */
9497 }
9498 }
9499
9500 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9501 {
9502 /* Try a few things more with specific constants and a variable. */
9503
9504 optab op;
9505 rtx var, orig_out, out, tmp;
9506
9507 if (BRANCH_COST <= 2)
9508 return 0; /* FAIL */
9509
9510 /* If one of the two operands is an interesting constant, load a
9511 constant with the above and mask it in with a logical operation. */
9512
9513 if (GET_CODE (operands[2]) == CONST_INT)
9514 {
9515 var = operands[3];
9516 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9517 operands[3] = constm1_rtx, op = and_optab;
9518 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9519 operands[3] = const0_rtx, op = ior_optab;
9520 else
9521 return 0; /* FAIL */
9522 }
9523 else if (GET_CODE (operands[3]) == CONST_INT)
9524 {
9525 var = operands[2];
9526 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9527 operands[2] = constm1_rtx, op = and_optab;
9528 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9529 operands[2] = const0_rtx, op = ior_optab;
9530 else
9531 return 0; /* FAIL */
9532 }
9533 else
9534 return 0; /* FAIL */
9535
9536 orig_out = operands[0];
9537 tmp = gen_reg_rtx (mode);
9538 operands[0] = tmp;
9539
9540 /* Recurse to get the constant loaded. */
9541 if (ix86_expand_int_movcc (operands) == 0)
9542 return 0; /* FAIL */
9543
9544 /* Mask in the interesting variable. */
9545 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9546 OPTAB_WIDEN);
9547 if (!rtx_equal_p (out, orig_out))
9548 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9549
9550 return 1; /* DONE */
9551 }
9552
9553 /*
9554 * For comparison with above,
9555 *
9556 * movl cf,dest
9557 * movl ct,tmp
9558 * cmpl op1,op2
9559 * cmovcc tmp,dest
9560 *
9561 * Size 15.
9562 */
9563
9564 if (! nonimmediate_operand (operands[2], mode))
9565 operands[2] = force_reg (mode, operands[2]);
9566 if (! nonimmediate_operand (operands[3], mode))
9567 operands[3] = force_reg (mode, operands[3]);
9568
9569 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9570 {
9571 rtx tmp = gen_reg_rtx (mode);
9572 emit_move_insn (tmp, operands[3]);
9573 operands[3] = tmp;
9574 }
9575 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9576 {
9577 rtx tmp = gen_reg_rtx (mode);
9578 emit_move_insn (tmp, operands[2]);
9579 operands[2] = tmp;
9580 }
9581
9582 if (! register_operand (operands[2], VOIDmode)
9583 && (mode == QImode
9584 || ! register_operand (operands[3], VOIDmode)))
9585 operands[2] = force_reg (mode, operands[2]);
9586
9587 if (mode == QImode
9588 && ! register_operand (operands[3], VOIDmode))
9589 operands[3] = force_reg (mode, operands[3]);
9590
9591 emit_insn (compare_seq);
9592 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9593 gen_rtx_IF_THEN_ELSE (mode,
9594 compare_op, operands[2],
9595 operands[3])));
9596 if (bypass_test)
9597 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9598 gen_rtx_IF_THEN_ELSE (mode,
9599 bypass_test,
9600 copy_rtx (operands[3]),
9601 copy_rtx (operands[0]))));
9602 if (second_test)
9603 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9604 gen_rtx_IF_THEN_ELSE (mode,
9605 second_test,
9606 copy_rtx (operands[2]),
9607 copy_rtx (operands[0]))));
9608
9609 return 1; /* DONE */
9610 }
9611
9612 int
9613 ix86_expand_fp_movcc (rtx operands[])
9614 {
9615 enum rtx_code code;
9616 rtx tmp;
9617 rtx compare_op, second_test, bypass_test;
9618
9619 /* For SF/DFmode conditional moves based on comparisons
9620 in same mode, we may want to use SSE min/max instructions. */
9621 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9622 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9623 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9624 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9625 && (!TARGET_IEEE_FP
9626 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9627 /* We may be called from the post-reload splitter. */
9628 && (!REG_P (operands[0])
9629 || SSE_REG_P (operands[0])
9630 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9631 {
9632 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9633 code = GET_CODE (operands[1]);
9634
9635 /* See if we have (cross) match between comparison operands and
9636 conditional move operands. */
9637 if (rtx_equal_p (operands[2], op1))
9638 {
9639 rtx tmp = op0;
9640 op0 = op1;
9641 op1 = tmp;
9642 code = reverse_condition_maybe_unordered (code);
9643 }
9644 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9645 {
9646 /* Check for min operation. */
9647 if (code == LT || code == UNLE)
9648 {
9649 if (code == UNLE)
9650 {
9651 rtx tmp = op0;
9652 op0 = op1;
9653 op1 = tmp;
9654 }
9655 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9656 if (memory_operand (op0, VOIDmode))
9657 op0 = force_reg (GET_MODE (operands[0]), op0);
9658 if (GET_MODE (operands[0]) == SFmode)
9659 emit_insn (gen_minsf3 (operands[0], op0, op1));
9660 else
9661 emit_insn (gen_mindf3 (operands[0], op0, op1));
9662 return 1;
9663 }
9664 /* Check for max operation. */
9665 if (code == GT || code == UNGE)
9666 {
9667 if (code == UNGE)
9668 {
9669 rtx tmp = op0;
9670 op0 = op1;
9671 op1 = tmp;
9672 }
9673 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9674 if (memory_operand (op0, VOIDmode))
9675 op0 = force_reg (GET_MODE (operands[0]), op0);
9676 if (GET_MODE (operands[0]) == SFmode)
9677 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9678 else
9679 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9680 return 1;
9681 }
9682 }
9683 /* Manage condition to be sse_comparison_operator. In case we are
9684 in non-ieee mode, try to canonicalize the destination operand
9685 to be first in the comparison - this helps reload to avoid extra
9686 moves. */
9687 if (!sse_comparison_operator (operands[1], VOIDmode)
9688 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9689 {
9690 rtx tmp = ix86_compare_op0;
9691 ix86_compare_op0 = ix86_compare_op1;
9692 ix86_compare_op1 = tmp;
9693 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9694 VOIDmode, ix86_compare_op0,
9695 ix86_compare_op1);
9696 }
9697 /* Similarly try to manage result to be first operand of conditional
9698 move. We also don't support the NE comparison on SSE, so try to
9699 avoid it. */
9700 if ((rtx_equal_p (operands[0], operands[3])
9701 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9702 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9703 {
9704 rtx tmp = operands[2];
9705 operands[2] = operands[3];
9706 operands[3] = tmp;
9707 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9708 (GET_CODE (operands[1])),
9709 VOIDmode, ix86_compare_op0,
9710 ix86_compare_op1);
9711 }
9712 if (GET_MODE (operands[0]) == SFmode)
9713 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9714 operands[2], operands[3],
9715 ix86_compare_op0, ix86_compare_op1));
9716 else
9717 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9718 operands[2], operands[3],
9719 ix86_compare_op0, ix86_compare_op1));
9720 return 1;
9721 }
9722
9723 /* The floating point conditional move instructions don't directly
9724 support conditions resulting from a signed integer comparison. */
9725
9726 code = GET_CODE (operands[1]);
9727 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9728
9729 /* The floating point conditional move instructions don't directly
9730 support signed integer comparisons. */
9731
9732 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9733 {
9734 if (second_test != NULL || bypass_test != NULL)
9735 abort ();
9736 tmp = gen_reg_rtx (QImode);
9737 ix86_expand_setcc (code, tmp);
9738 code = NE;
9739 ix86_compare_op0 = tmp;
9740 ix86_compare_op1 = const0_rtx;
9741 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9742 }
9743 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9744 {
9745 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9746 emit_move_insn (tmp, operands[3]);
9747 operands[3] = tmp;
9748 }
9749 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9750 {
9751 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9752 emit_move_insn (tmp, operands[2]);
9753 operands[2] = tmp;
9754 }
9755
9756 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9757 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9758 compare_op,
9759 operands[2],
9760 operands[3])));
9761 if (bypass_test)
9762 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9763 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9764 bypass_test,
9765 operands[3],
9766 operands[0])));
9767 if (second_test)
9768 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9769 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9770 second_test,
9771 operands[2],
9772 operands[0])));
9773
9774 return 1;
9775 }
9776
9777 /* Expand conditional increment or decrement using adb/sbb instructions.
9778 The default case using setcc followed by the conditional move can be
9779 done by generic code. */
9780 int
9781 ix86_expand_int_addcc (rtx operands[])
9782 {
9783 enum rtx_code code = GET_CODE (operands[1]);
9784 rtx compare_op;
9785 rtx val = const0_rtx;
9786 bool fpcmp = false;
9787 enum machine_mode mode = GET_MODE (operands[0]);
9788
9789 if (operands[3] != const1_rtx
9790 && operands[3] != constm1_rtx)
9791 return 0;
9792 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9793 ix86_compare_op1, &compare_op))
9794 return 0;
9795 code = GET_CODE (compare_op);
9796
9797 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9798 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9799 {
9800 fpcmp = true;
9801 code = ix86_fp_compare_code_to_integer (code);
9802 }
9803
9804 if (code != LTU)
9805 {
9806 val = constm1_rtx;
9807 if (fpcmp)
9808 PUT_CODE (compare_op,
9809 reverse_condition_maybe_unordered
9810 (GET_CODE (compare_op)));
9811 else
9812 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9813 }
9814 PUT_MODE (compare_op, mode);
9815
9816 /* Construct either adc or sbb insn. */
9817 if ((code == LTU) == (operands[3] == constm1_rtx))
9818 {
9819 switch (GET_MODE (operands[0]))
9820 {
9821 case QImode:
9822 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9823 break;
9824 case HImode:
9825 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9826 break;
9827 case SImode:
9828 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9829 break;
9830 case DImode:
9831 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9832 break;
9833 default:
9834 abort ();
9835 }
9836 }
9837 else
9838 {
9839 switch (GET_MODE (operands[0]))
9840 {
9841 case QImode:
9842 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9843 break;
9844 case HImode:
9845 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9846 break;
9847 case SImode:
9848 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
9849 break;
9850 case DImode:
9851 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9852 break;
9853 default:
9854 abort ();
9855 }
9856 }
9857 return 1; /* DONE */
9858 }
9859
9860
9861 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9862 works for floating pointer parameters and nonoffsetable memories.
9863 For pushes, it returns just stack offsets; the values will be saved
9864 in the right order. Maximally three parts are generated. */
9865
9866 static int
9867 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
9868 {
9869 int size;
9870
9871 if (!TARGET_64BIT)
9872 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
9873 else
9874 size = (GET_MODE_SIZE (mode) + 4) / 8;
9875
9876 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9877 abort ();
9878 if (size < 2 || size > 3)
9879 abort ();
9880
9881 /* Optimize constant pool reference to immediates. This is used by fp
9882 moves, that force all constants to memory to allow combining. */
9883 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
9884 {
9885 rtx tmp = maybe_get_pool_constant (operand);
9886 if (tmp)
9887 operand = tmp;
9888 }
9889
9890 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9891 {
9892 /* The only non-offsetable memories we handle are pushes. */
9893 if (! push_operand (operand, VOIDmode))
9894 abort ();
9895
9896 operand = copy_rtx (operand);
9897 PUT_MODE (operand, Pmode);
9898 parts[0] = parts[1] = parts[2] = operand;
9899 }
9900 else if (!TARGET_64BIT)
9901 {
9902 if (mode == DImode)
9903 split_di (&operand, 1, &parts[0], &parts[1]);
9904 else
9905 {
9906 if (REG_P (operand))
9907 {
9908 if (!reload_completed)
9909 abort ();
9910 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9911 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9912 if (size == 3)
9913 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9914 }
9915 else if (offsettable_memref_p (operand))
9916 {
9917 operand = adjust_address (operand, SImode, 0);
9918 parts[0] = operand;
9919 parts[1] = adjust_address (operand, SImode, 4);
9920 if (size == 3)
9921 parts[2] = adjust_address (operand, SImode, 8);
9922 }
9923 else if (GET_CODE (operand) == CONST_DOUBLE)
9924 {
9925 REAL_VALUE_TYPE r;
9926 long l[4];
9927
9928 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9929 switch (mode)
9930 {
9931 case XFmode:
9932 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9933 parts[2] = gen_int_mode (l[2], SImode);
9934 break;
9935 case DFmode:
9936 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9937 break;
9938 default:
9939 abort ();
9940 }
9941 parts[1] = gen_int_mode (l[1], SImode);
9942 parts[0] = gen_int_mode (l[0], SImode);
9943 }
9944 else
9945 abort ();
9946 }
9947 }
9948 else
9949 {
9950 if (mode == TImode)
9951 split_ti (&operand, 1, &parts[0], &parts[1]);
9952 if (mode == XFmode || mode == TFmode)
9953 {
9954 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
9955 if (REG_P (operand))
9956 {
9957 if (!reload_completed)
9958 abort ();
9959 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9960 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
9961 }
9962 else if (offsettable_memref_p (operand))
9963 {
9964 operand = adjust_address (operand, DImode, 0);
9965 parts[0] = operand;
9966 parts[1] = adjust_address (operand, upper_mode, 8);
9967 }
9968 else if (GET_CODE (operand) == CONST_DOUBLE)
9969 {
9970 REAL_VALUE_TYPE r;
9971 long l[4];
9972
9973 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9974 real_to_target (l, &r, mode);
9975
9976 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9977 if (HOST_BITS_PER_WIDE_INT >= 64)
9978 parts[0]
9979 = gen_int_mode
9980 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9981 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9982 DImode);
9983 else
9984 parts[0] = immed_double_const (l[0], l[1], DImode);
9985
9986 if (upper_mode == SImode)
9987 parts[1] = gen_int_mode (l[2], SImode);
9988 else if (HOST_BITS_PER_WIDE_INT >= 64)
9989 parts[1]
9990 = gen_int_mode
9991 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9992 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9993 DImode);
9994 else
9995 parts[1] = immed_double_const (l[2], l[3], DImode);
9996 }
9997 else
9998 abort ();
9999 }
10000 }
10001
10002 return size;
10003 }
10004
10005 /* Emit insns to perform a move or push of DI, DF, and XF values.
10006 Return false when normal moves are needed; true when all required
10007 insns have been emitted. Operands 2-4 contain the input values
10008 int the correct order; operands 5-7 contain the output values. */
10009
10010 void
10011 ix86_split_long_move (rtx operands[])
10012 {
10013 rtx part[2][3];
10014 int nparts;
10015 int push = 0;
10016 int collisions = 0;
10017 enum machine_mode mode = GET_MODE (operands[0]);
10018
10019 /* The DFmode expanders may ask us to move double.
10020 For 64bit target this is single move. By hiding the fact
10021 here we simplify i386.md splitters. */
10022 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10023 {
10024 /* Optimize constant pool reference to immediates. This is used by
10025 fp moves, that force all constants to memory to allow combining. */
10026
10027 if (GET_CODE (operands[1]) == MEM
10028 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10029 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10030 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10031 if (push_operand (operands[0], VOIDmode))
10032 {
10033 operands[0] = copy_rtx (operands[0]);
10034 PUT_MODE (operands[0], Pmode);
10035 }
10036 else
10037 operands[0] = gen_lowpart (DImode, operands[0]);
10038 operands[1] = gen_lowpart (DImode, operands[1]);
10039 emit_move_insn (operands[0], operands[1]);
10040 return;
10041 }
10042
10043 /* The only non-offsettable memory we handle is push. */
10044 if (push_operand (operands[0], VOIDmode))
10045 push = 1;
10046 else if (GET_CODE (operands[0]) == MEM
10047 && ! offsettable_memref_p (operands[0]))
10048 abort ();
10049
10050 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10051 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10052
10053 /* When emitting push, take care for source operands on the stack. */
10054 if (push && GET_CODE (operands[1]) == MEM
10055 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10056 {
10057 if (nparts == 3)
10058 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10059 XEXP (part[1][2], 0));
10060 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10061 XEXP (part[1][1], 0));
10062 }
10063
10064 /* We need to do copy in the right order in case an address register
10065 of the source overlaps the destination. */
10066 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10067 {
10068 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10069 collisions++;
10070 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10071 collisions++;
10072 if (nparts == 3
10073 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10074 collisions++;
10075
10076 /* Collision in the middle part can be handled by reordering. */
10077 if (collisions == 1 && nparts == 3
10078 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10079 {
10080 rtx tmp;
10081 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10082 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10083 }
10084
10085 /* If there are more collisions, we can't handle it by reordering.
10086 Do an lea to the last part and use only one colliding move. */
10087 else if (collisions > 1)
10088 {
10089 rtx base;
10090
10091 collisions = 1;
10092
10093 base = part[0][nparts - 1];
10094
10095 /* Handle the case when the last part isn't valid for lea.
10096 Happens in 64-bit mode storing the 12-byte XFmode. */
10097 if (GET_MODE (base) != Pmode)
10098 base = gen_rtx_REG (Pmode, REGNO (base));
10099
10100 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10101 part[1][0] = replace_equiv_address (part[1][0], base);
10102 part[1][1] = replace_equiv_address (part[1][1],
10103 plus_constant (base, UNITS_PER_WORD));
10104 if (nparts == 3)
10105 part[1][2] = replace_equiv_address (part[1][2],
10106 plus_constant (base, 8));
10107 }
10108 }
10109
10110 if (push)
10111 {
10112 if (!TARGET_64BIT)
10113 {
10114 if (nparts == 3)
10115 {
10116 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10117 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10118 emit_move_insn (part[0][2], part[1][2]);
10119 }
10120 }
10121 else
10122 {
10123 /* In 64bit mode we don't have 32bit push available. In case this is
10124 register, it is OK - we will just use larger counterpart. We also
10125 retype memory - these comes from attempt to avoid REX prefix on
10126 moving of second half of TFmode value. */
10127 if (GET_MODE (part[1][1]) == SImode)
10128 {
10129 if (GET_CODE (part[1][1]) == MEM)
10130 part[1][1] = adjust_address (part[1][1], DImode, 0);
10131 else if (REG_P (part[1][1]))
10132 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10133 else
10134 abort ();
10135 if (GET_MODE (part[1][0]) == SImode)
10136 part[1][0] = part[1][1];
10137 }
10138 }
10139 emit_move_insn (part[0][1], part[1][1]);
10140 emit_move_insn (part[0][0], part[1][0]);
10141 return;
10142 }
10143
10144 /* Choose correct order to not overwrite the source before it is copied. */
10145 if ((REG_P (part[0][0])
10146 && REG_P (part[1][1])
10147 && (REGNO (part[0][0]) == REGNO (part[1][1])
10148 || (nparts == 3
10149 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10150 || (collisions > 0
10151 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10152 {
10153 if (nparts == 3)
10154 {
10155 operands[2] = part[0][2];
10156 operands[3] = part[0][1];
10157 operands[4] = part[0][0];
10158 operands[5] = part[1][2];
10159 operands[6] = part[1][1];
10160 operands[7] = part[1][0];
10161 }
10162 else
10163 {
10164 operands[2] = part[0][1];
10165 operands[3] = part[0][0];
10166 operands[5] = part[1][1];
10167 operands[6] = part[1][0];
10168 }
10169 }
10170 else
10171 {
10172 if (nparts == 3)
10173 {
10174 operands[2] = part[0][0];
10175 operands[3] = part[0][1];
10176 operands[4] = part[0][2];
10177 operands[5] = part[1][0];
10178 operands[6] = part[1][1];
10179 operands[7] = part[1][2];
10180 }
10181 else
10182 {
10183 operands[2] = part[0][0];
10184 operands[3] = part[0][1];
10185 operands[5] = part[1][0];
10186 operands[6] = part[1][1];
10187 }
10188 }
10189
10190 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10191 if (optimize_size)
10192 {
10193 if (GET_CODE (operands[5]) == CONST_INT
10194 && operands[5] != const0_rtx
10195 && REG_P (operands[2]))
10196 {
10197 if (GET_CODE (operands[6]) == CONST_INT
10198 && INTVAL (operands[6]) == INTVAL (operands[5]))
10199 operands[6] = operands[2];
10200
10201 if (nparts == 3
10202 && GET_CODE (operands[7]) == CONST_INT
10203 && INTVAL (operands[7]) == INTVAL (operands[5]))
10204 operands[7] = operands[2];
10205 }
10206
10207 if (nparts == 3
10208 && GET_CODE (operands[6]) == CONST_INT
10209 && operands[6] != const0_rtx
10210 && REG_P (operands[3])
10211 && GET_CODE (operands[7]) == CONST_INT
10212 && INTVAL (operands[7]) == INTVAL (operands[6]))
10213 operands[7] = operands[3];
10214 }
10215
10216 emit_move_insn (operands[2], operands[5]);
10217 emit_move_insn (operands[3], operands[6]);
10218 if (nparts == 3)
10219 emit_move_insn (operands[4], operands[7]);
10220
10221 return;
10222 }
10223
10224 /* Helper function of ix86_split_ashldi used to generate an SImode
10225 left shift by a constant, either using a single shift or
10226 a sequence of add instructions. */
10227
10228 static void
10229 ix86_expand_ashlsi3_const (rtx operand, int count)
10230 {
10231 if (count == 1)
10232 emit_insn (gen_addsi3 (operand, operand, operand));
10233 else if (!optimize_size
10234 && count * ix86_cost->add <= ix86_cost->shift_const)
10235 {
10236 int i;
10237 for (i=0; i<count; i++)
10238 emit_insn (gen_addsi3 (operand, operand, operand));
10239 }
10240 else
10241 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10242 }
10243
10244 void
10245 ix86_split_ashldi (rtx *operands, rtx scratch)
10246 {
10247 rtx low[2], high[2];
10248 int count;
10249
10250 if (GET_CODE (operands[2]) == CONST_INT)
10251 {
10252 split_di (operands, 2, low, high);
10253 count = INTVAL (operands[2]) & 63;
10254
10255 if (count >= 32)
10256 {
10257 emit_move_insn (high[0], low[1]);
10258 emit_move_insn (low[0], const0_rtx);
10259
10260 if (count > 32)
10261 ix86_expand_ashlsi3_const (high[0], count - 32);
10262 }
10263 else
10264 {
10265 if (!rtx_equal_p (operands[0], operands[1]))
10266 emit_move_insn (operands[0], operands[1]);
10267 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10268 ix86_expand_ashlsi3_const (low[0], count);
10269 }
10270 return;
10271 }
10272
10273 split_di (operands, 1, low, high);
10274
10275 if (operands[1] == const1_rtx)
10276 {
10277 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10278 can be done with two 32-bit shifts, no branches, no cmoves. */
10279 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10280 {
10281 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10282
10283 ix86_expand_clear (low[0]);
10284 ix86_expand_clear (high[0]);
10285 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10286
10287 d = gen_lowpart (QImode, low[0]);
10288 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10289 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10290 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10291
10292 d = gen_lowpart (QImode, high[0]);
10293 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10294 s = gen_rtx_NE (QImode, flags, const0_rtx);
10295 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10296 }
10297
10298 /* Otherwise, we can get the same results by manually performing
10299 a bit extract operation on bit 5, and then performing the two
10300 shifts. The two methods of getting 0/1 into low/high are exactly
10301 the same size. Avoiding the shift in the bit extract case helps
10302 pentium4 a bit; no one else seems to care much either way. */
10303 else
10304 {
10305 rtx x;
10306
10307 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10308 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10309 else
10310 x = gen_lowpart (SImode, operands[2]);
10311 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10312
10313 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10314 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10315 emit_move_insn (low[0], high[0]);
10316 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10317 }
10318
10319 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10320 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10321 return;
10322 }
10323
10324 if (operands[1] == constm1_rtx)
10325 {
10326 /* For -1LL << N, we can avoid the shld instruction, because we
10327 know that we're shifting 0...31 ones into a -1. */
10328 emit_move_insn (low[0], constm1_rtx);
10329 if (optimize_size)
10330 emit_move_insn (high[0], low[0]);
10331 else
10332 emit_move_insn (high[0], constm1_rtx);
10333 }
10334 else
10335 {
10336 if (!rtx_equal_p (operands[0], operands[1]))
10337 emit_move_insn (operands[0], operands[1]);
10338
10339 split_di (operands, 1, low, high);
10340 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10341 }
10342
10343 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10344
10345 if (TARGET_CMOVE && scratch)
10346 {
10347 ix86_expand_clear (scratch);
10348 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10349 }
10350 else
10351 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10352 }
10353
10354 void
10355 ix86_split_ashrdi (rtx *operands, rtx scratch)
10356 {
10357 rtx low[2], high[2];
10358 int count;
10359
10360 if (GET_CODE (operands[2]) == CONST_INT)
10361 {
10362 split_di (operands, 2, low, high);
10363 count = INTVAL (operands[2]) & 63;
10364
10365 if (count == 63)
10366 {
10367 emit_move_insn (high[0], high[1]);
10368 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10369 emit_move_insn (low[0], high[0]);
10370
10371 }
10372 else if (count >= 32)
10373 {
10374 emit_move_insn (low[0], high[1]);
10375 emit_move_insn (high[0], low[0]);
10376 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10377 if (count > 32)
10378 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10379 }
10380 else
10381 {
10382 if (!rtx_equal_p (operands[0], operands[1]))
10383 emit_move_insn (operands[0], operands[1]);
10384 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10385 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10386 }
10387 }
10388 else
10389 {
10390 if (!rtx_equal_p (operands[0], operands[1]))
10391 emit_move_insn (operands[0], operands[1]);
10392
10393 split_di (operands, 1, low, high);
10394
10395 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10396 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10397
10398 if (TARGET_CMOVE && scratch)
10399 {
10400 emit_move_insn (scratch, high[0]);
10401 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10402 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10403 scratch));
10404 }
10405 else
10406 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10407 }
10408 }
10409
10410 void
10411 ix86_split_lshrdi (rtx *operands, rtx scratch)
10412 {
10413 rtx low[2], high[2];
10414 int count;
10415
10416 if (GET_CODE (operands[2]) == CONST_INT)
10417 {
10418 split_di (operands, 2, low, high);
10419 count = INTVAL (operands[2]) & 63;
10420
10421 if (count >= 32)
10422 {
10423 emit_move_insn (low[0], high[1]);
10424 ix86_expand_clear (high[0]);
10425
10426 if (count > 32)
10427 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10428 }
10429 else
10430 {
10431 if (!rtx_equal_p (operands[0], operands[1]))
10432 emit_move_insn (operands[0], operands[1]);
10433 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10434 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10435 }
10436 }
10437 else
10438 {
10439 if (!rtx_equal_p (operands[0], operands[1]))
10440 emit_move_insn (operands[0], operands[1]);
10441
10442 split_di (operands, 1, low, high);
10443
10444 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10445 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10446
10447 /* Heh. By reversing the arguments, we can reuse this pattern. */
10448 if (TARGET_CMOVE && scratch)
10449 {
10450 ix86_expand_clear (scratch);
10451 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10452 scratch));
10453 }
10454 else
10455 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10456 }
10457 }
10458
10459 /* Helper function for the string operations below. Dest VARIABLE whether
10460 it is aligned to VALUE bytes. If true, jump to the label. */
10461 static rtx
10462 ix86_expand_aligntest (rtx variable, int value)
10463 {
10464 rtx label = gen_label_rtx ();
10465 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10466 if (GET_MODE (variable) == DImode)
10467 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10468 else
10469 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10470 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10471 1, label);
10472 return label;
10473 }
10474
10475 /* Adjust COUNTER by the VALUE. */
10476 static void
10477 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10478 {
10479 if (GET_MODE (countreg) == DImode)
10480 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10481 else
10482 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10483 }
10484
10485 /* Zero extend possibly SImode EXP to Pmode register. */
10486 rtx
10487 ix86_zero_extend_to_Pmode (rtx exp)
10488 {
10489 rtx r;
10490 if (GET_MODE (exp) == VOIDmode)
10491 return force_reg (Pmode, exp);
10492 if (GET_MODE (exp) == Pmode)
10493 return copy_to_mode_reg (Pmode, exp);
10494 r = gen_reg_rtx (Pmode);
10495 emit_insn (gen_zero_extendsidi2 (r, exp));
10496 return r;
10497 }
10498
10499 /* Expand string move (memcpy) operation. Use i386 string operations when
10500 profitable. expand_clrmem contains similar code. */
10501 int
10502 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10503 {
10504 rtx srcreg, destreg, countreg, srcexp, destexp;
10505 enum machine_mode counter_mode;
10506 HOST_WIDE_INT align = 0;
10507 unsigned HOST_WIDE_INT count = 0;
10508
10509 if (GET_CODE (align_exp) == CONST_INT)
10510 align = INTVAL (align_exp);
10511
10512 /* Can't use any of this if the user has appropriated esi or edi. */
10513 if (global_regs[4] || global_regs[5])
10514 return 0;
10515
10516 /* This simple hack avoids all inlining code and simplifies code below. */
10517 if (!TARGET_ALIGN_STRINGOPS)
10518 align = 64;
10519
10520 if (GET_CODE (count_exp) == CONST_INT)
10521 {
10522 count = INTVAL (count_exp);
10523 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10524 return 0;
10525 }
10526
10527 /* Figure out proper mode for counter. For 32bits it is always SImode,
10528 for 64bits use SImode when possible, otherwise DImode.
10529 Set count to number of bytes copied when known at compile time. */
10530 if (!TARGET_64BIT
10531 || GET_MODE (count_exp) == SImode
10532 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10533 counter_mode = SImode;
10534 else
10535 counter_mode = DImode;
10536
10537 if (counter_mode != SImode && counter_mode != DImode)
10538 abort ();
10539
10540 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10541 if (destreg != XEXP (dst, 0))
10542 dst = replace_equiv_address_nv (dst, destreg);
10543 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10544 if (srcreg != XEXP (src, 0))
10545 src = replace_equiv_address_nv (src, srcreg);
10546
10547 /* When optimizing for size emit simple rep ; movsb instruction for
10548 counts not divisible by 4. */
10549
10550 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10551 {
10552 emit_insn (gen_cld ());
10553 countreg = ix86_zero_extend_to_Pmode (count_exp);
10554 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10555 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10556 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10557 destexp, srcexp));
10558 }
10559
10560 /* For constant aligned (or small unaligned) copies use rep movsl
10561 followed by code copying the rest. For PentiumPro ensure 8 byte
10562 alignment to allow rep movsl acceleration. */
10563
10564 else if (count != 0
10565 && (align >= 8
10566 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10567 || optimize_size || count < (unsigned int) 64))
10568 {
10569 unsigned HOST_WIDE_INT offset = 0;
10570 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10571 rtx srcmem, dstmem;
10572
10573 emit_insn (gen_cld ());
10574 if (count & ~(size - 1))
10575 {
10576 countreg = copy_to_mode_reg (counter_mode,
10577 GEN_INT ((count >> (size == 4 ? 2 : 3))
10578 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10579 countreg = ix86_zero_extend_to_Pmode (countreg);
10580
10581 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10582 GEN_INT (size == 4 ? 2 : 3));
10583 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10584 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10585
10586 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10587 countreg, destexp, srcexp));
10588 offset = count & ~(size - 1);
10589 }
10590 if (size == 8 && (count & 0x04))
10591 {
10592 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10593 offset);
10594 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10595 offset);
10596 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10597 offset += 4;
10598 }
10599 if (count & 0x02)
10600 {
10601 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10602 offset);
10603 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10604 offset);
10605 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10606 offset += 2;
10607 }
10608 if (count & 0x01)
10609 {
10610 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10611 offset);
10612 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10613 offset);
10614 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10615 }
10616 }
10617 /* The generic code based on the glibc implementation:
10618 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10619 allowing accelerated copying there)
10620 - copy the data using rep movsl
10621 - copy the rest. */
10622 else
10623 {
10624 rtx countreg2;
10625 rtx label = NULL;
10626 rtx srcmem, dstmem;
10627 int desired_alignment = (TARGET_PENTIUMPRO
10628 && (count == 0 || count >= (unsigned int) 260)
10629 ? 8 : UNITS_PER_WORD);
10630 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10631 dst = change_address (dst, BLKmode, destreg);
10632 src = change_address (src, BLKmode, srcreg);
10633
10634 /* In case we don't know anything about the alignment, default to
10635 library version, since it is usually equally fast and result in
10636 shorter code.
10637
10638 Also emit call when we know that the count is large and call overhead
10639 will not be important. */
10640 if (!TARGET_INLINE_ALL_STRINGOPS
10641 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10642 return 0;
10643
10644 if (TARGET_SINGLE_STRINGOP)
10645 emit_insn (gen_cld ());
10646
10647 countreg2 = gen_reg_rtx (Pmode);
10648 countreg = copy_to_mode_reg (counter_mode, count_exp);
10649
10650 /* We don't use loops to align destination and to copy parts smaller
10651 than 4 bytes, because gcc is able to optimize such code better (in
10652 the case the destination or the count really is aligned, gcc is often
10653 able to predict the branches) and also it is friendlier to the
10654 hardware branch prediction.
10655
10656 Using loops is beneficial for generic case, because we can
10657 handle small counts using the loops. Many CPUs (such as Athlon)
10658 have large REP prefix setup costs.
10659
10660 This is quite costly. Maybe we can revisit this decision later or
10661 add some customizability to this code. */
10662
10663 if (count == 0 && align < desired_alignment)
10664 {
10665 label = gen_label_rtx ();
10666 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10667 LEU, 0, counter_mode, 1, label);
10668 }
10669 if (align <= 1)
10670 {
10671 rtx label = ix86_expand_aligntest (destreg, 1);
10672 srcmem = change_address (src, QImode, srcreg);
10673 dstmem = change_address (dst, QImode, destreg);
10674 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10675 ix86_adjust_counter (countreg, 1);
10676 emit_label (label);
10677 LABEL_NUSES (label) = 1;
10678 }
10679 if (align <= 2)
10680 {
10681 rtx label = ix86_expand_aligntest (destreg, 2);
10682 srcmem = change_address (src, HImode, srcreg);
10683 dstmem = change_address (dst, HImode, destreg);
10684 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10685 ix86_adjust_counter (countreg, 2);
10686 emit_label (label);
10687 LABEL_NUSES (label) = 1;
10688 }
10689 if (align <= 4 && desired_alignment > 4)
10690 {
10691 rtx label = ix86_expand_aligntest (destreg, 4);
10692 srcmem = change_address (src, SImode, srcreg);
10693 dstmem = change_address (dst, SImode, destreg);
10694 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10695 ix86_adjust_counter (countreg, 4);
10696 emit_label (label);
10697 LABEL_NUSES (label) = 1;
10698 }
10699
10700 if (label && desired_alignment > 4 && !TARGET_64BIT)
10701 {
10702 emit_label (label);
10703 LABEL_NUSES (label) = 1;
10704 label = NULL_RTX;
10705 }
10706 if (!TARGET_SINGLE_STRINGOP)
10707 emit_insn (gen_cld ());
10708 if (TARGET_64BIT)
10709 {
10710 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10711 GEN_INT (3)));
10712 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10713 }
10714 else
10715 {
10716 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10717 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10718 }
10719 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10720 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10721 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10722 countreg2, destexp, srcexp));
10723
10724 if (label)
10725 {
10726 emit_label (label);
10727 LABEL_NUSES (label) = 1;
10728 }
10729 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10730 {
10731 srcmem = change_address (src, SImode, srcreg);
10732 dstmem = change_address (dst, SImode, destreg);
10733 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10734 }
10735 if ((align <= 4 || count == 0) && TARGET_64BIT)
10736 {
10737 rtx label = ix86_expand_aligntest (countreg, 4);
10738 srcmem = change_address (src, SImode, srcreg);
10739 dstmem = change_address (dst, SImode, destreg);
10740 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10741 emit_label (label);
10742 LABEL_NUSES (label) = 1;
10743 }
10744 if (align > 2 && count != 0 && (count & 2))
10745 {
10746 srcmem = change_address (src, HImode, srcreg);
10747 dstmem = change_address (dst, HImode, destreg);
10748 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10749 }
10750 if (align <= 2 || count == 0)
10751 {
10752 rtx label = ix86_expand_aligntest (countreg, 2);
10753 srcmem = change_address (src, HImode, srcreg);
10754 dstmem = change_address (dst, HImode, destreg);
10755 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10756 emit_label (label);
10757 LABEL_NUSES (label) = 1;
10758 }
10759 if (align > 1 && count != 0 && (count & 1))
10760 {
10761 srcmem = change_address (src, QImode, srcreg);
10762 dstmem = change_address (dst, QImode, destreg);
10763 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10764 }
10765 if (align <= 1 || count == 0)
10766 {
10767 rtx label = ix86_expand_aligntest (countreg, 1);
10768 srcmem = change_address (src, QImode, srcreg);
10769 dstmem = change_address (dst, QImode, destreg);
10770 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10771 emit_label (label);
10772 LABEL_NUSES (label) = 1;
10773 }
10774 }
10775
10776 return 1;
10777 }
10778
10779 /* Expand string clear operation (bzero). Use i386 string operations when
10780 profitable. expand_movmem contains similar code. */
10781 int
10782 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10783 {
10784 rtx destreg, zeroreg, countreg, destexp;
10785 enum machine_mode counter_mode;
10786 HOST_WIDE_INT align = 0;
10787 unsigned HOST_WIDE_INT count = 0;
10788
10789 if (GET_CODE (align_exp) == CONST_INT)
10790 align = INTVAL (align_exp);
10791
10792 /* Can't use any of this if the user has appropriated esi. */
10793 if (global_regs[4])
10794 return 0;
10795
10796 /* This simple hack avoids all inlining code and simplifies code below. */
10797 if (!TARGET_ALIGN_STRINGOPS)
10798 align = 32;
10799
10800 if (GET_CODE (count_exp) == CONST_INT)
10801 {
10802 count = INTVAL (count_exp);
10803 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10804 return 0;
10805 }
10806 /* Figure out proper mode for counter. For 32bits it is always SImode,
10807 for 64bits use SImode when possible, otherwise DImode.
10808 Set count to number of bytes copied when known at compile time. */
10809 if (!TARGET_64BIT
10810 || GET_MODE (count_exp) == SImode
10811 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10812 counter_mode = SImode;
10813 else
10814 counter_mode = DImode;
10815
10816 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10817 if (destreg != XEXP (dst, 0))
10818 dst = replace_equiv_address_nv (dst, destreg);
10819
10820
10821 /* When optimizing for size emit simple rep ; movsb instruction for
10822 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10823 sequence is 7 bytes long, so if optimizing for size and count is
10824 small enough that some stosl, stosw and stosb instructions without
10825 rep are shorter, fall back into the next if. */
10826
10827 if ((!optimize || optimize_size)
10828 && (count == 0
10829 || ((count & 0x03)
10830 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10831 {
10832 emit_insn (gen_cld ());
10833
10834 countreg = ix86_zero_extend_to_Pmode (count_exp);
10835 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10836 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10837 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10838 }
10839 else if (count != 0
10840 && (align >= 8
10841 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10842 || optimize_size || count < (unsigned int) 64))
10843 {
10844 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10845 unsigned HOST_WIDE_INT offset = 0;
10846
10847 emit_insn (gen_cld ());
10848
10849 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10850 if (count & ~(size - 1))
10851 {
10852 unsigned HOST_WIDE_INT repcount;
10853 unsigned int max_nonrep;
10854
10855 repcount = count >> (size == 4 ? 2 : 3);
10856 if (!TARGET_64BIT)
10857 repcount &= 0x3fffffff;
10858
10859 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10860 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10861 bytes. In both cases the latter seems to be faster for small
10862 values of N. */
10863 max_nonrep = size == 4 ? 7 : 4;
10864 if (!optimize_size)
10865 switch (ix86_tune)
10866 {
10867 case PROCESSOR_PENTIUM4:
10868 case PROCESSOR_NOCONA:
10869 max_nonrep = 3;
10870 break;
10871 default:
10872 break;
10873 }
10874
10875 if (repcount <= max_nonrep)
10876 while (repcount-- > 0)
10877 {
10878 rtx mem = adjust_automodify_address_nv (dst,
10879 GET_MODE (zeroreg),
10880 destreg, offset);
10881 emit_insn (gen_strset (destreg, mem, zeroreg));
10882 offset += size;
10883 }
10884 else
10885 {
10886 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10887 countreg = ix86_zero_extend_to_Pmode (countreg);
10888 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10889 GEN_INT (size == 4 ? 2 : 3));
10890 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10891 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10892 destexp));
10893 offset = count & ~(size - 1);
10894 }
10895 }
10896 if (size == 8 && (count & 0x04))
10897 {
10898 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10899 offset);
10900 emit_insn (gen_strset (destreg, mem,
10901 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10902 offset += 4;
10903 }
10904 if (count & 0x02)
10905 {
10906 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10907 offset);
10908 emit_insn (gen_strset (destreg, mem,
10909 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10910 offset += 2;
10911 }
10912 if (count & 0x01)
10913 {
10914 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10915 offset);
10916 emit_insn (gen_strset (destreg, mem,
10917 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10918 }
10919 }
10920 else
10921 {
10922 rtx countreg2;
10923 rtx label = NULL;
10924 /* Compute desired alignment of the string operation. */
10925 int desired_alignment = (TARGET_PENTIUMPRO
10926 && (count == 0 || count >= (unsigned int) 260)
10927 ? 8 : UNITS_PER_WORD);
10928
10929 /* In case we don't know anything about the alignment, default to
10930 library version, since it is usually equally fast and result in
10931 shorter code.
10932
10933 Also emit call when we know that the count is large and call overhead
10934 will not be important. */
10935 if (!TARGET_INLINE_ALL_STRINGOPS
10936 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10937 return 0;
10938
10939 if (TARGET_SINGLE_STRINGOP)
10940 emit_insn (gen_cld ());
10941
10942 countreg2 = gen_reg_rtx (Pmode);
10943 countreg = copy_to_mode_reg (counter_mode, count_exp);
10944 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10945 /* Get rid of MEM_OFFSET, it won't be accurate. */
10946 dst = change_address (dst, BLKmode, destreg);
10947
10948 if (count == 0 && align < desired_alignment)
10949 {
10950 label = gen_label_rtx ();
10951 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10952 LEU, 0, counter_mode, 1, label);
10953 }
10954 if (align <= 1)
10955 {
10956 rtx label = ix86_expand_aligntest (destreg, 1);
10957 emit_insn (gen_strset (destreg, dst,
10958 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10959 ix86_adjust_counter (countreg, 1);
10960 emit_label (label);
10961 LABEL_NUSES (label) = 1;
10962 }
10963 if (align <= 2)
10964 {
10965 rtx label = ix86_expand_aligntest (destreg, 2);
10966 emit_insn (gen_strset (destreg, dst,
10967 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10968 ix86_adjust_counter (countreg, 2);
10969 emit_label (label);
10970 LABEL_NUSES (label) = 1;
10971 }
10972 if (align <= 4 && desired_alignment > 4)
10973 {
10974 rtx label = ix86_expand_aligntest (destreg, 4);
10975 emit_insn (gen_strset (destreg, dst,
10976 (TARGET_64BIT
10977 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10978 : zeroreg)));
10979 ix86_adjust_counter (countreg, 4);
10980 emit_label (label);
10981 LABEL_NUSES (label) = 1;
10982 }
10983
10984 if (label && desired_alignment > 4 && !TARGET_64BIT)
10985 {
10986 emit_label (label);
10987 LABEL_NUSES (label) = 1;
10988 label = NULL_RTX;
10989 }
10990
10991 if (!TARGET_SINGLE_STRINGOP)
10992 emit_insn (gen_cld ());
10993 if (TARGET_64BIT)
10994 {
10995 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10996 GEN_INT (3)));
10997 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10998 }
10999 else
11000 {
11001 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11002 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11003 }
11004 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11005 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11006
11007 if (label)
11008 {
11009 emit_label (label);
11010 LABEL_NUSES (label) = 1;
11011 }
11012
11013 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11014 emit_insn (gen_strset (destreg, dst,
11015 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11016 if (TARGET_64BIT && (align <= 4 || count == 0))
11017 {
11018 rtx label = ix86_expand_aligntest (countreg, 4);
11019 emit_insn (gen_strset (destreg, dst,
11020 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11021 emit_label (label);
11022 LABEL_NUSES (label) = 1;
11023 }
11024 if (align > 2 && count != 0 && (count & 2))
11025 emit_insn (gen_strset (destreg, dst,
11026 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11027 if (align <= 2 || count == 0)
11028 {
11029 rtx label = ix86_expand_aligntest (countreg, 2);
11030 emit_insn (gen_strset (destreg, dst,
11031 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11032 emit_label (label);
11033 LABEL_NUSES (label) = 1;
11034 }
11035 if (align > 1 && count != 0 && (count & 1))
11036 emit_insn (gen_strset (destreg, dst,
11037 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11038 if (align <= 1 || count == 0)
11039 {
11040 rtx label = ix86_expand_aligntest (countreg, 1);
11041 emit_insn (gen_strset (destreg, dst,
11042 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11043 emit_label (label);
11044 LABEL_NUSES (label) = 1;
11045 }
11046 }
11047 return 1;
11048 }
11049
11050 /* Expand strlen. */
11051 int
11052 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11053 {
11054 rtx addr, scratch1, scratch2, scratch3, scratch4;
11055
11056 /* The generic case of strlen expander is long. Avoid it's
11057 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11058
11059 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11060 && !TARGET_INLINE_ALL_STRINGOPS
11061 && !optimize_size
11062 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11063 return 0;
11064
11065 addr = force_reg (Pmode, XEXP (src, 0));
11066 scratch1 = gen_reg_rtx (Pmode);
11067
11068 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11069 && !optimize_size)
11070 {
11071 /* Well it seems that some optimizer does not combine a call like
11072 foo(strlen(bar), strlen(bar));
11073 when the move and the subtraction is done here. It does calculate
11074 the length just once when these instructions are done inside of
11075 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11076 often used and I use one fewer register for the lifetime of
11077 output_strlen_unroll() this is better. */
11078
11079 emit_move_insn (out, addr);
11080
11081 ix86_expand_strlensi_unroll_1 (out, src, align);
11082
11083 /* strlensi_unroll_1 returns the address of the zero at the end of
11084 the string, like memchr(), so compute the length by subtracting
11085 the start address. */
11086 if (TARGET_64BIT)
11087 emit_insn (gen_subdi3 (out, out, addr));
11088 else
11089 emit_insn (gen_subsi3 (out, out, addr));
11090 }
11091 else
11092 {
11093 rtx unspec;
11094 scratch2 = gen_reg_rtx (Pmode);
11095 scratch3 = gen_reg_rtx (Pmode);
11096 scratch4 = force_reg (Pmode, constm1_rtx);
11097
11098 emit_move_insn (scratch3, addr);
11099 eoschar = force_reg (QImode, eoschar);
11100
11101 emit_insn (gen_cld ());
11102 src = replace_equiv_address_nv (src, scratch3);
11103
11104 /* If .md starts supporting :P, this can be done in .md. */
11105 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11106 scratch4), UNSPEC_SCAS);
11107 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11108 if (TARGET_64BIT)
11109 {
11110 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11111 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11112 }
11113 else
11114 {
11115 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11116 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11117 }
11118 }
11119 return 1;
11120 }
11121
11122 /* Expand the appropriate insns for doing strlen if not just doing
11123 repnz; scasb
11124
11125 out = result, initialized with the start address
11126 align_rtx = alignment of the address.
11127 scratch = scratch register, initialized with the startaddress when
11128 not aligned, otherwise undefined
11129
11130 This is just the body. It needs the initializations mentioned above and
11131 some address computing at the end. These things are done in i386.md. */
11132
11133 static void
11134 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11135 {
11136 int align;
11137 rtx tmp;
11138 rtx align_2_label = NULL_RTX;
11139 rtx align_3_label = NULL_RTX;
11140 rtx align_4_label = gen_label_rtx ();
11141 rtx end_0_label = gen_label_rtx ();
11142 rtx mem;
11143 rtx tmpreg = gen_reg_rtx (SImode);
11144 rtx scratch = gen_reg_rtx (SImode);
11145 rtx cmp;
11146
11147 align = 0;
11148 if (GET_CODE (align_rtx) == CONST_INT)
11149 align = INTVAL (align_rtx);
11150
11151 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11152
11153 /* Is there a known alignment and is it less than 4? */
11154 if (align < 4)
11155 {
11156 rtx scratch1 = gen_reg_rtx (Pmode);
11157 emit_move_insn (scratch1, out);
11158 /* Is there a known alignment and is it not 2? */
11159 if (align != 2)
11160 {
11161 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11162 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11163
11164 /* Leave just the 3 lower bits. */
11165 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11166 NULL_RTX, 0, OPTAB_WIDEN);
11167
11168 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11169 Pmode, 1, align_4_label);
11170 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11171 Pmode, 1, align_2_label);
11172 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11173 Pmode, 1, align_3_label);
11174 }
11175 else
11176 {
11177 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11178 check if is aligned to 4 - byte. */
11179
11180 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11181 NULL_RTX, 0, OPTAB_WIDEN);
11182
11183 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11184 Pmode, 1, align_4_label);
11185 }
11186
11187 mem = change_address (src, QImode, out);
11188
11189 /* Now compare the bytes. */
11190
11191 /* Compare the first n unaligned byte on a byte per byte basis. */
11192 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11193 QImode, 1, end_0_label);
11194
11195 /* Increment the address. */
11196 if (TARGET_64BIT)
11197 emit_insn (gen_adddi3 (out, out, const1_rtx));
11198 else
11199 emit_insn (gen_addsi3 (out, out, const1_rtx));
11200
11201 /* Not needed with an alignment of 2 */
11202 if (align != 2)
11203 {
11204 emit_label (align_2_label);
11205
11206 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11207 end_0_label);
11208
11209 if (TARGET_64BIT)
11210 emit_insn (gen_adddi3 (out, out, const1_rtx));
11211 else
11212 emit_insn (gen_addsi3 (out, out, const1_rtx));
11213
11214 emit_label (align_3_label);
11215 }
11216
11217 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11218 end_0_label);
11219
11220 if (TARGET_64BIT)
11221 emit_insn (gen_adddi3 (out, out, const1_rtx));
11222 else
11223 emit_insn (gen_addsi3 (out, out, const1_rtx));
11224 }
11225
11226 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11227 align this loop. It gives only huge programs, but does not help to
11228 speed up. */
11229 emit_label (align_4_label);
11230
11231 mem = change_address (src, SImode, out);
11232 emit_move_insn (scratch, mem);
11233 if (TARGET_64BIT)
11234 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11235 else
11236 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11237
11238 /* This formula yields a nonzero result iff one of the bytes is zero.
11239 This saves three branches inside loop and many cycles. */
11240
11241 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11242 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11243 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11244 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11245 gen_int_mode (0x80808080, SImode)));
11246 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11247 align_4_label);
11248
11249 if (TARGET_CMOVE)
11250 {
11251 rtx reg = gen_reg_rtx (SImode);
11252 rtx reg2 = gen_reg_rtx (Pmode);
11253 emit_move_insn (reg, tmpreg);
11254 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11255
11256 /* If zero is not in the first two bytes, move two bytes forward. */
11257 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11258 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11259 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11260 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11261 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11262 reg,
11263 tmpreg)));
11264 /* Emit lea manually to avoid clobbering of flags. */
11265 emit_insn (gen_rtx_SET (SImode, reg2,
11266 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11267
11268 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11269 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11270 emit_insn (gen_rtx_SET (VOIDmode, out,
11271 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11272 reg2,
11273 out)));
11274
11275 }
11276 else
11277 {
11278 rtx end_2_label = gen_label_rtx ();
11279 /* Is zero in the first two bytes? */
11280
11281 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11282 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11283 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11284 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11285 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11286 pc_rtx);
11287 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11288 JUMP_LABEL (tmp) = end_2_label;
11289
11290 /* Not in the first two. Move two bytes forward. */
11291 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11292 if (TARGET_64BIT)
11293 emit_insn (gen_adddi3 (out, out, const2_rtx));
11294 else
11295 emit_insn (gen_addsi3 (out, out, const2_rtx));
11296
11297 emit_label (end_2_label);
11298
11299 }
11300
11301 /* Avoid branch in fixing the byte. */
11302 tmpreg = gen_lowpart (QImode, tmpreg);
11303 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11304 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11305 if (TARGET_64BIT)
11306 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11307 else
11308 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11309
11310 emit_label (end_0_label);
11311 }
11312
11313 void
11314 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11315 rtx callarg2 ATTRIBUTE_UNUSED,
11316 rtx pop, int sibcall)
11317 {
11318 rtx use = NULL, call;
11319
11320 if (pop == const0_rtx)
11321 pop = NULL;
11322 if (TARGET_64BIT && pop)
11323 abort ();
11324
11325 #if TARGET_MACHO
11326 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11327 fnaddr = machopic_indirect_call_target (fnaddr);
11328 #else
11329 /* Static functions and indirect calls don't need the pic register. */
11330 if (! TARGET_64BIT && flag_pic
11331 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11332 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11333 use_reg (&use, pic_offset_table_rtx);
11334
11335 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11336 {
11337 rtx al = gen_rtx_REG (QImode, 0);
11338 emit_move_insn (al, callarg2);
11339 use_reg (&use, al);
11340 }
11341 #endif /* TARGET_MACHO */
11342
11343 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11344 {
11345 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11346 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11347 }
11348 if (sibcall && TARGET_64BIT
11349 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11350 {
11351 rtx addr;
11352 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11353 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11354 emit_move_insn (fnaddr, addr);
11355 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11356 }
11357
11358 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11359 if (retval)
11360 call = gen_rtx_SET (VOIDmode, retval, call);
11361 if (pop)
11362 {
11363 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11364 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11365 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11366 }
11367
11368 call = emit_call_insn (call);
11369 if (use)
11370 CALL_INSN_FUNCTION_USAGE (call) = use;
11371 }
11372
11373 \f
11374 /* Clear stack slot assignments remembered from previous functions.
11375 This is called from INIT_EXPANDERS once before RTL is emitted for each
11376 function. */
11377
11378 static struct machine_function *
11379 ix86_init_machine_status (void)
11380 {
11381 struct machine_function *f;
11382
11383 f = ggc_alloc_cleared (sizeof (struct machine_function));
11384 f->use_fast_prologue_epilogue_nregs = -1;
11385
11386 return f;
11387 }
11388
11389 /* Return a MEM corresponding to a stack slot with mode MODE.
11390 Allocate a new slot if necessary.
11391
11392 The RTL for a function can have several slots available: N is
11393 which slot to use. */
11394
11395 rtx
11396 assign_386_stack_local (enum machine_mode mode, int n)
11397 {
11398 struct stack_local_entry *s;
11399
11400 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11401 abort ();
11402
11403 for (s = ix86_stack_locals; s; s = s->next)
11404 if (s->mode == mode && s->n == n)
11405 return s->rtl;
11406
11407 s = (struct stack_local_entry *)
11408 ggc_alloc (sizeof (struct stack_local_entry));
11409 s->n = n;
11410 s->mode = mode;
11411 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11412
11413 s->next = ix86_stack_locals;
11414 ix86_stack_locals = s;
11415 return s->rtl;
11416 }
11417
11418 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11419
11420 static GTY(()) rtx ix86_tls_symbol;
11421 rtx
11422 ix86_tls_get_addr (void)
11423 {
11424
11425 if (!ix86_tls_symbol)
11426 {
11427 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11428 (TARGET_GNU_TLS && !TARGET_64BIT)
11429 ? "___tls_get_addr"
11430 : "__tls_get_addr");
11431 }
11432
11433 return ix86_tls_symbol;
11434 }
11435 \f
11436 /* Calculate the length of the memory address in the instruction
11437 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11438
11439 int
11440 memory_address_length (rtx addr)
11441 {
11442 struct ix86_address parts;
11443 rtx base, index, disp;
11444 int len;
11445
11446 if (GET_CODE (addr) == PRE_DEC
11447 || GET_CODE (addr) == POST_INC
11448 || GET_CODE (addr) == PRE_MODIFY
11449 || GET_CODE (addr) == POST_MODIFY)
11450 return 0;
11451
11452 if (! ix86_decompose_address (addr, &parts))
11453 abort ();
11454
11455 base = parts.base;
11456 index = parts.index;
11457 disp = parts.disp;
11458 len = 0;
11459
11460 /* Rule of thumb:
11461 - esp as the base always wants an index,
11462 - ebp as the base always wants a displacement. */
11463
11464 /* Register Indirect. */
11465 if (base && !index && !disp)
11466 {
11467 /* esp (for its index) and ebp (for its displacement) need
11468 the two-byte modrm form. */
11469 if (addr == stack_pointer_rtx
11470 || addr == arg_pointer_rtx
11471 || addr == frame_pointer_rtx
11472 || addr == hard_frame_pointer_rtx)
11473 len = 1;
11474 }
11475
11476 /* Direct Addressing. */
11477 else if (disp && !base && !index)
11478 len = 4;
11479
11480 else
11481 {
11482 /* Find the length of the displacement constant. */
11483 if (disp)
11484 {
11485 if (GET_CODE (disp) == CONST_INT
11486 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11487 && base)
11488 len = 1;
11489 else
11490 len = 4;
11491 }
11492 /* ebp always wants a displacement. */
11493 else if (base == hard_frame_pointer_rtx)
11494 len = 1;
11495
11496 /* An index requires the two-byte modrm form.... */
11497 if (index
11498 /* ...like esp, which always wants an index. */
11499 || base == stack_pointer_rtx
11500 || base == arg_pointer_rtx
11501 || base == frame_pointer_rtx)
11502 len += 1;
11503 }
11504
11505 return len;
11506 }
11507
11508 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11509 is set, expect that insn have 8bit immediate alternative. */
11510 int
11511 ix86_attr_length_immediate_default (rtx insn, int shortform)
11512 {
11513 int len = 0;
11514 int i;
11515 extract_insn_cached (insn);
11516 for (i = recog_data.n_operands - 1; i >= 0; --i)
11517 if (CONSTANT_P (recog_data.operand[i]))
11518 {
11519 if (len)
11520 abort ();
11521 if (shortform
11522 && GET_CODE (recog_data.operand[i]) == CONST_INT
11523 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11524 len = 1;
11525 else
11526 {
11527 switch (get_attr_mode (insn))
11528 {
11529 case MODE_QI:
11530 len+=1;
11531 break;
11532 case MODE_HI:
11533 len+=2;
11534 break;
11535 case MODE_SI:
11536 len+=4;
11537 break;
11538 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11539 case MODE_DI:
11540 len+=4;
11541 break;
11542 default:
11543 fatal_insn ("unknown insn mode", insn);
11544 }
11545 }
11546 }
11547 return len;
11548 }
11549 /* Compute default value for "length_address" attribute. */
11550 int
11551 ix86_attr_length_address_default (rtx insn)
11552 {
11553 int i;
11554
11555 if (get_attr_type (insn) == TYPE_LEA)
11556 {
11557 rtx set = PATTERN (insn);
11558 if (GET_CODE (set) == SET)
11559 ;
11560 else if (GET_CODE (set) == PARALLEL
11561 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11562 set = XVECEXP (set, 0, 0);
11563 else
11564 {
11565 #ifdef ENABLE_CHECKING
11566 abort ();
11567 #endif
11568 return 0;
11569 }
11570
11571 return memory_address_length (SET_SRC (set));
11572 }
11573
11574 extract_insn_cached (insn);
11575 for (i = recog_data.n_operands - 1; i >= 0; --i)
11576 if (GET_CODE (recog_data.operand[i]) == MEM)
11577 {
11578 return memory_address_length (XEXP (recog_data.operand[i], 0));
11579 break;
11580 }
11581 return 0;
11582 }
11583 \f
11584 /* Return the maximum number of instructions a cpu can issue. */
11585
11586 static int
11587 ix86_issue_rate (void)
11588 {
11589 switch (ix86_tune)
11590 {
11591 case PROCESSOR_PENTIUM:
11592 case PROCESSOR_K6:
11593 return 2;
11594
11595 case PROCESSOR_PENTIUMPRO:
11596 case PROCESSOR_PENTIUM4:
11597 case PROCESSOR_ATHLON:
11598 case PROCESSOR_K8:
11599 case PROCESSOR_NOCONA:
11600 return 3;
11601
11602 default:
11603 return 1;
11604 }
11605 }
11606
11607 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11608 by DEP_INSN and nothing set by DEP_INSN. */
11609
11610 static int
11611 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11612 {
11613 rtx set, set2;
11614
11615 /* Simplify the test for uninteresting insns. */
11616 if (insn_type != TYPE_SETCC
11617 && insn_type != TYPE_ICMOV
11618 && insn_type != TYPE_FCMOV
11619 && insn_type != TYPE_IBR)
11620 return 0;
11621
11622 if ((set = single_set (dep_insn)) != 0)
11623 {
11624 set = SET_DEST (set);
11625 set2 = NULL_RTX;
11626 }
11627 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11628 && XVECLEN (PATTERN (dep_insn), 0) == 2
11629 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11630 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11631 {
11632 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11633 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11634 }
11635 else
11636 return 0;
11637
11638 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11639 return 0;
11640
11641 /* This test is true if the dependent insn reads the flags but
11642 not any other potentially set register. */
11643 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11644 return 0;
11645
11646 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11647 return 0;
11648
11649 return 1;
11650 }
11651
11652 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11653 address with operands set by DEP_INSN. */
11654
11655 static int
11656 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11657 {
11658 rtx addr;
11659
11660 if (insn_type == TYPE_LEA
11661 && TARGET_PENTIUM)
11662 {
11663 addr = PATTERN (insn);
11664 if (GET_CODE (addr) == SET)
11665 ;
11666 else if (GET_CODE (addr) == PARALLEL
11667 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11668 addr = XVECEXP (addr, 0, 0);
11669 else
11670 abort ();
11671 addr = SET_SRC (addr);
11672 }
11673 else
11674 {
11675 int i;
11676 extract_insn_cached (insn);
11677 for (i = recog_data.n_operands - 1; i >= 0; --i)
11678 if (GET_CODE (recog_data.operand[i]) == MEM)
11679 {
11680 addr = XEXP (recog_data.operand[i], 0);
11681 goto found;
11682 }
11683 return 0;
11684 found:;
11685 }
11686
11687 return modified_in_p (addr, dep_insn);
11688 }
11689
11690 static int
11691 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11692 {
11693 enum attr_type insn_type, dep_insn_type;
11694 enum attr_memory memory;
11695 rtx set, set2;
11696 int dep_insn_code_number;
11697
11698 /* Anti and output dependencies have zero cost on all CPUs. */
11699 if (REG_NOTE_KIND (link) != 0)
11700 return 0;
11701
11702 dep_insn_code_number = recog_memoized (dep_insn);
11703
11704 /* If we can't recognize the insns, we can't really do anything. */
11705 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11706 return cost;
11707
11708 insn_type = get_attr_type (insn);
11709 dep_insn_type = get_attr_type (dep_insn);
11710
11711 switch (ix86_tune)
11712 {
11713 case PROCESSOR_PENTIUM:
11714 /* Address Generation Interlock adds a cycle of latency. */
11715 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11716 cost += 1;
11717
11718 /* ??? Compares pair with jump/setcc. */
11719 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11720 cost = 0;
11721
11722 /* Floating point stores require value to be ready one cycle earlier. */
11723 if (insn_type == TYPE_FMOV
11724 && get_attr_memory (insn) == MEMORY_STORE
11725 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11726 cost += 1;
11727 break;
11728
11729 case PROCESSOR_PENTIUMPRO:
11730 memory = get_attr_memory (insn);
11731
11732 /* INT->FP conversion is expensive. */
11733 if (get_attr_fp_int_src (dep_insn))
11734 cost += 5;
11735
11736 /* There is one cycle extra latency between an FP op and a store. */
11737 if (insn_type == TYPE_FMOV
11738 && (set = single_set (dep_insn)) != NULL_RTX
11739 && (set2 = single_set (insn)) != NULL_RTX
11740 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11741 && GET_CODE (SET_DEST (set2)) == MEM)
11742 cost += 1;
11743
11744 /* Show ability of reorder buffer to hide latency of load by executing
11745 in parallel with previous instruction in case
11746 previous instruction is not needed to compute the address. */
11747 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11748 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11749 {
11750 /* Claim moves to take one cycle, as core can issue one load
11751 at time and the next load can start cycle later. */
11752 if (dep_insn_type == TYPE_IMOV
11753 || dep_insn_type == TYPE_FMOV)
11754 cost = 1;
11755 else if (cost > 1)
11756 cost--;
11757 }
11758 break;
11759
11760 case PROCESSOR_K6:
11761 memory = get_attr_memory (insn);
11762
11763 /* The esp dependency is resolved before the instruction is really
11764 finished. */
11765 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11766 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11767 return 1;
11768
11769 /* INT->FP conversion is expensive. */
11770 if (get_attr_fp_int_src (dep_insn))
11771 cost += 5;
11772
11773 /* Show ability of reorder buffer to hide latency of load by executing
11774 in parallel with previous instruction in case
11775 previous instruction is not needed to compute the address. */
11776 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11777 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11778 {
11779 /* Claim moves to take one cycle, as core can issue one load
11780 at time and the next load can start cycle later. */
11781 if (dep_insn_type == TYPE_IMOV
11782 || dep_insn_type == TYPE_FMOV)
11783 cost = 1;
11784 else if (cost > 2)
11785 cost -= 2;
11786 else
11787 cost = 1;
11788 }
11789 break;
11790
11791 case PROCESSOR_ATHLON:
11792 case PROCESSOR_K8:
11793 memory = get_attr_memory (insn);
11794
11795 /* Show ability of reorder buffer to hide latency of load by executing
11796 in parallel with previous instruction in case
11797 previous instruction is not needed to compute the address. */
11798 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11799 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11800 {
11801 enum attr_unit unit = get_attr_unit (insn);
11802 int loadcost = 3;
11803
11804 /* Because of the difference between the length of integer and
11805 floating unit pipeline preparation stages, the memory operands
11806 for floating point are cheaper.
11807
11808 ??? For Athlon it the difference is most probably 2. */
11809 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11810 loadcost = 3;
11811 else
11812 loadcost = TARGET_ATHLON ? 2 : 0;
11813
11814 if (cost >= loadcost)
11815 cost -= loadcost;
11816 else
11817 cost = 0;
11818 }
11819
11820 default:
11821 break;
11822 }
11823
11824 return cost;
11825 }
11826
11827 /* How many alternative schedules to try. This should be as wide as the
11828 scheduling freedom in the DFA, but no wider. Making this value too
11829 large results extra work for the scheduler. */
11830
11831 static int
11832 ia32_multipass_dfa_lookahead (void)
11833 {
11834 if (ix86_tune == PROCESSOR_PENTIUM)
11835 return 2;
11836
11837 if (ix86_tune == PROCESSOR_PENTIUMPRO
11838 || ix86_tune == PROCESSOR_K6)
11839 return 1;
11840
11841 else
11842 return 0;
11843 }
11844
11845 \f
11846 /* Compute the alignment given to a constant that is being placed in memory.
11847 EXP is the constant and ALIGN is the alignment that the object would
11848 ordinarily have.
11849 The value of this function is used instead of that alignment to align
11850 the object. */
11851
11852 int
11853 ix86_constant_alignment (tree exp, int align)
11854 {
11855 if (TREE_CODE (exp) == REAL_CST)
11856 {
11857 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11858 return 64;
11859 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11860 return 128;
11861 }
11862 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11863 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11864 return BITS_PER_WORD;
11865
11866 return align;
11867 }
11868
11869 /* Compute the alignment for a static variable.
11870 TYPE is the data type, and ALIGN is the alignment that
11871 the object would ordinarily have. The value of this function is used
11872 instead of that alignment to align the object. */
11873
11874 int
11875 ix86_data_alignment (tree type, int align)
11876 {
11877 if (AGGREGATE_TYPE_P (type)
11878 && TYPE_SIZE (type)
11879 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11880 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11881 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11882 return 256;
11883
11884 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11885 to 16byte boundary. */
11886 if (TARGET_64BIT)
11887 {
11888 if (AGGREGATE_TYPE_P (type)
11889 && TYPE_SIZE (type)
11890 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11891 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11892 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11893 return 128;
11894 }
11895
11896 if (TREE_CODE (type) == ARRAY_TYPE)
11897 {
11898 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11899 return 64;
11900 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11901 return 128;
11902 }
11903 else if (TREE_CODE (type) == COMPLEX_TYPE)
11904 {
11905
11906 if (TYPE_MODE (type) == DCmode && align < 64)
11907 return 64;
11908 if (TYPE_MODE (type) == XCmode && align < 128)
11909 return 128;
11910 }
11911 else if ((TREE_CODE (type) == RECORD_TYPE
11912 || TREE_CODE (type) == UNION_TYPE
11913 || TREE_CODE (type) == QUAL_UNION_TYPE)
11914 && TYPE_FIELDS (type))
11915 {
11916 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11917 return 64;
11918 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11919 return 128;
11920 }
11921 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11922 || TREE_CODE (type) == INTEGER_TYPE)
11923 {
11924 if (TYPE_MODE (type) == DFmode && align < 64)
11925 return 64;
11926 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11927 return 128;
11928 }
11929
11930 return align;
11931 }
11932
11933 /* Compute the alignment for a local variable.
11934 TYPE is the data type, and ALIGN is the alignment that
11935 the object would ordinarily have. The value of this macro is used
11936 instead of that alignment to align the object. */
11937
11938 int
11939 ix86_local_alignment (tree type, int align)
11940 {
11941 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11942 to 16byte boundary. */
11943 if (TARGET_64BIT)
11944 {
11945 if (AGGREGATE_TYPE_P (type)
11946 && TYPE_SIZE (type)
11947 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11948 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11949 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11950 return 128;
11951 }
11952 if (TREE_CODE (type) == ARRAY_TYPE)
11953 {
11954 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11955 return 64;
11956 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11957 return 128;
11958 }
11959 else if (TREE_CODE (type) == COMPLEX_TYPE)
11960 {
11961 if (TYPE_MODE (type) == DCmode && align < 64)
11962 return 64;
11963 if (TYPE_MODE (type) == XCmode && align < 128)
11964 return 128;
11965 }
11966 else if ((TREE_CODE (type) == RECORD_TYPE
11967 || TREE_CODE (type) == UNION_TYPE
11968 || TREE_CODE (type) == QUAL_UNION_TYPE)
11969 && TYPE_FIELDS (type))
11970 {
11971 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11972 return 64;
11973 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11974 return 128;
11975 }
11976 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11977 || TREE_CODE (type) == INTEGER_TYPE)
11978 {
11979
11980 if (TYPE_MODE (type) == DFmode && align < 64)
11981 return 64;
11982 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11983 return 128;
11984 }
11985 return align;
11986 }
11987 \f
11988 /* Emit RTL insns to initialize the variable parts of a trampoline.
11989 FNADDR is an RTX for the address of the function's pure code.
11990 CXT is an RTX for the static chain value for the function. */
11991 void
11992 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
11993 {
11994 if (!TARGET_64BIT)
11995 {
11996 /* Compute offset from the end of the jmp to the target function. */
11997 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11998 plus_constant (tramp, 10),
11999 NULL_RTX, 1, OPTAB_DIRECT);
12000 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12001 gen_int_mode (0xb9, QImode));
12002 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12003 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12004 gen_int_mode (0xe9, QImode));
12005 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12006 }
12007 else
12008 {
12009 int offset = 0;
12010 /* Try to load address using shorter movl instead of movabs.
12011 We may want to support movq for kernel mode, but kernel does not use
12012 trampolines at the moment. */
12013 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12014 {
12015 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12016 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12017 gen_int_mode (0xbb41, HImode));
12018 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12019 gen_lowpart (SImode, fnaddr));
12020 offset += 6;
12021 }
12022 else
12023 {
12024 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12025 gen_int_mode (0xbb49, HImode));
12026 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12027 fnaddr);
12028 offset += 10;
12029 }
12030 /* Load static chain using movabs to r10. */
12031 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12032 gen_int_mode (0xba49, HImode));
12033 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12034 cxt);
12035 offset += 10;
12036 /* Jump to the r11 */
12037 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12038 gen_int_mode (0xff49, HImode));
12039 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12040 gen_int_mode (0xe3, QImode));
12041 offset += 3;
12042 if (offset > TRAMPOLINE_SIZE)
12043 abort ();
12044 }
12045
12046 #ifdef ENABLE_EXECUTE_STACK
12047 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12048 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12049 #endif
12050 }
12051 \f
12052 #define def_builtin(MASK, NAME, TYPE, CODE) \
12053 do { \
12054 if ((MASK) & target_flags \
12055 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12056 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12057 NULL, NULL_TREE); \
12058 } while (0)
12059
12060 struct builtin_description
12061 {
12062 const unsigned int mask;
12063 const enum insn_code icode;
12064 const char *const name;
12065 const enum ix86_builtins code;
12066 const enum rtx_code comparison;
12067 const unsigned int flag;
12068 };
12069
12070 static const struct builtin_description bdesc_comi[] =
12071 {
12072 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12073 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12074 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12075 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12076 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12077 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12078 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12079 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12080 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12081 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12082 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12083 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12084 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12085 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12086 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12087 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12088 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12089 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12090 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12091 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12092 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12093 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12094 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12095 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12096 };
12097
12098 static const struct builtin_description bdesc_2arg[] =
12099 {
12100 /* SSE */
12101 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12102 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12103 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12104 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12105 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12106 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12107 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12108 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12109
12110 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12111 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12112 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12113 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12114 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12115 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12116 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12117 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12118 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12119 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12120 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12121 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12122 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12123 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12124 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12125 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12126 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12127 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12128 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12129 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12130
12131 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12132 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12133 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12134 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12135
12136 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12137 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12138 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12139 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12140
12141 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12142 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12143 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12144 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12145 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12146
12147 /* MMX */
12148 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12149 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12150 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12151 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12152 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12153 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12154 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12155 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12156
12157 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12158 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12159 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12160 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12161 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12162 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12163 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12164 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12165
12166 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12167 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12168 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12169
12170 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12171 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12172 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12173 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12174
12175 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12176 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12177
12178 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12179 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12180 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12181 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12182 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12183 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12184
12185 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12186 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12187 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12188 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12189
12190 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12191 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12192 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12193 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12194 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12195 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12196
12197 /* Special. */
12198 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12199 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12200 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12201
12202 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12203 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12204 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12205
12206 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12207 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12208 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12209 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12210 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12211 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12212
12213 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12214 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12215 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12216 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12217 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12218 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12219
12220 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12221 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12222 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12223 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12224
12225 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12226 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12227
12228 /* SSE2 */
12229 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12230 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12231 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12232 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12233 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12234 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12235 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12236 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12237
12238 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12239 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12240 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12241 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12242 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12243 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12244 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12245 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12246 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12247 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12248 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12249 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12250 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12251 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12252 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12253 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12254 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12255 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12256 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12257 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12258
12259 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12260 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12261 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12262 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12263
12264 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12265 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12266 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12267 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12268
12269 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12270 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12271 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12272
12273 /* SSE2 MMX */
12274 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12275 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12276 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12277 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12278 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12279 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12280 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12281 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12282
12283 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12284 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12285 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12286 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12287 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12288 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12289 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12290 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12291
12292 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12293 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12294
12295 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12296 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12297 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12298 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12299
12300 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12301 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12302
12303 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12304 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12305 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12306 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12307 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12308 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12309
12310 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12311 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12312 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12313 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12314
12315 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12316 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12317 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12318 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12319 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12320 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12321 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12322 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12323
12324 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12325 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12326 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12327
12328 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12329 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12330
12331 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12332 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12333
12334 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12335 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12336 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12337 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12338 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12339 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12340
12341 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12342 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12343 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12344 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12345 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12346 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12347
12348 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12349 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12350 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12351 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12352
12353 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12354
12355 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12356 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12357 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12358 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12359
12360 /* SSE3 MMX */
12361 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12362 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12363 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12364 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12365 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12366 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12367 };
12368
12369 static const struct builtin_description bdesc_1arg[] =
12370 {
12371 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12372 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12373
12374 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12375 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12376 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12377
12378 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12379 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12380 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12381 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12382 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12383 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12384
12385 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12386 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12387 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12388 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12389
12390 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12391
12392 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12393 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12394
12395 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12396 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12397 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12398 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12399 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12400
12401 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12402
12403 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12404 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12405 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12406 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12407
12408 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12409 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12410 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12411
12412 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12413
12414 /* SSE3 */
12415 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12416 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12417 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12418 };
12419
12420 void
12421 ix86_init_builtins (void)
12422 {
12423 if (TARGET_MMX)
12424 ix86_init_mmx_sse_builtins ();
12425 }
12426
12427 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12428 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12429 builtins. */
12430 static void
12431 ix86_init_mmx_sse_builtins (void)
12432 {
12433 const struct builtin_description * d;
12434 size_t i;
12435
12436 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12437 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12438 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12439 tree V2DI_type_node
12440 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
12441 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12442 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12443 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12444 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12445 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12446 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12447
12448 tree pchar_type_node = build_pointer_type (char_type_node);
12449 tree pcchar_type_node = build_pointer_type (
12450 build_type_variant (char_type_node, 1, 0));
12451 tree pfloat_type_node = build_pointer_type (float_type_node);
12452 tree pcfloat_type_node = build_pointer_type (
12453 build_type_variant (float_type_node, 1, 0));
12454 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12455 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12456 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12457
12458 /* Comparisons. */
12459 tree int_ftype_v4sf_v4sf
12460 = build_function_type_list (integer_type_node,
12461 V4SF_type_node, V4SF_type_node, NULL_TREE);
12462 tree v4si_ftype_v4sf_v4sf
12463 = build_function_type_list (V4SI_type_node,
12464 V4SF_type_node, V4SF_type_node, NULL_TREE);
12465 /* MMX/SSE/integer conversions. */
12466 tree int_ftype_v4sf
12467 = build_function_type_list (integer_type_node,
12468 V4SF_type_node, NULL_TREE);
12469 tree int64_ftype_v4sf
12470 = build_function_type_list (long_long_integer_type_node,
12471 V4SF_type_node, NULL_TREE);
12472 tree int_ftype_v8qi
12473 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12474 tree v4sf_ftype_v4sf_int
12475 = build_function_type_list (V4SF_type_node,
12476 V4SF_type_node, integer_type_node, NULL_TREE);
12477 tree v4sf_ftype_v4sf_int64
12478 = build_function_type_list (V4SF_type_node,
12479 V4SF_type_node, long_long_integer_type_node,
12480 NULL_TREE);
12481 tree v4sf_ftype_v4sf_v2si
12482 = build_function_type_list (V4SF_type_node,
12483 V4SF_type_node, V2SI_type_node, NULL_TREE);
12484 tree int_ftype_v4hi_int
12485 = build_function_type_list (integer_type_node,
12486 V4HI_type_node, integer_type_node, NULL_TREE);
12487 tree v4hi_ftype_v4hi_int_int
12488 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12489 integer_type_node, integer_type_node,
12490 NULL_TREE);
12491 /* Miscellaneous. */
12492 tree v8qi_ftype_v4hi_v4hi
12493 = build_function_type_list (V8QI_type_node,
12494 V4HI_type_node, V4HI_type_node, NULL_TREE);
12495 tree v4hi_ftype_v2si_v2si
12496 = build_function_type_list (V4HI_type_node,
12497 V2SI_type_node, V2SI_type_node, NULL_TREE);
12498 tree v4sf_ftype_v4sf_v4sf_int
12499 = build_function_type_list (V4SF_type_node,
12500 V4SF_type_node, V4SF_type_node,
12501 integer_type_node, NULL_TREE);
12502 tree v2si_ftype_v4hi_v4hi
12503 = build_function_type_list (V2SI_type_node,
12504 V4HI_type_node, V4HI_type_node, NULL_TREE);
12505 tree v4hi_ftype_v4hi_int
12506 = build_function_type_list (V4HI_type_node,
12507 V4HI_type_node, integer_type_node, NULL_TREE);
12508 tree v4hi_ftype_v4hi_di
12509 = build_function_type_list (V4HI_type_node,
12510 V4HI_type_node, long_long_unsigned_type_node,
12511 NULL_TREE);
12512 tree v2si_ftype_v2si_di
12513 = build_function_type_list (V2SI_type_node,
12514 V2SI_type_node, long_long_unsigned_type_node,
12515 NULL_TREE);
12516 tree void_ftype_void
12517 = build_function_type (void_type_node, void_list_node);
12518 tree void_ftype_unsigned
12519 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12520 tree void_ftype_unsigned_unsigned
12521 = build_function_type_list (void_type_node, unsigned_type_node,
12522 unsigned_type_node, NULL_TREE);
12523 tree void_ftype_pcvoid_unsigned_unsigned
12524 = build_function_type_list (void_type_node, const_ptr_type_node,
12525 unsigned_type_node, unsigned_type_node,
12526 NULL_TREE);
12527 tree unsigned_ftype_void
12528 = build_function_type (unsigned_type_node, void_list_node);
12529 tree di_ftype_void
12530 = build_function_type (long_long_unsigned_type_node, void_list_node);
12531 tree v4sf_ftype_void
12532 = build_function_type (V4SF_type_node, void_list_node);
12533 tree v2si_ftype_v4sf
12534 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12535 /* Loads/stores. */
12536 tree void_ftype_v8qi_v8qi_pchar
12537 = build_function_type_list (void_type_node,
12538 V8QI_type_node, V8QI_type_node,
12539 pchar_type_node, NULL_TREE);
12540 tree v4sf_ftype_pcfloat
12541 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12542 /* @@@ the type is bogus */
12543 tree v4sf_ftype_v4sf_pv2si
12544 = build_function_type_list (V4SF_type_node,
12545 V4SF_type_node, pv2si_type_node, NULL_TREE);
12546 tree void_ftype_pv2si_v4sf
12547 = build_function_type_list (void_type_node,
12548 pv2si_type_node, V4SF_type_node, NULL_TREE);
12549 tree void_ftype_pfloat_v4sf
12550 = build_function_type_list (void_type_node,
12551 pfloat_type_node, V4SF_type_node, NULL_TREE);
12552 tree void_ftype_pdi_di
12553 = build_function_type_list (void_type_node,
12554 pdi_type_node, long_long_unsigned_type_node,
12555 NULL_TREE);
12556 tree void_ftype_pv2di_v2di
12557 = build_function_type_list (void_type_node,
12558 pv2di_type_node, V2DI_type_node, NULL_TREE);
12559 /* Normal vector unops. */
12560 tree v4sf_ftype_v4sf
12561 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12562
12563 /* Normal vector binops. */
12564 tree v4sf_ftype_v4sf_v4sf
12565 = build_function_type_list (V4SF_type_node,
12566 V4SF_type_node, V4SF_type_node, NULL_TREE);
12567 tree v8qi_ftype_v8qi_v8qi
12568 = build_function_type_list (V8QI_type_node,
12569 V8QI_type_node, V8QI_type_node, NULL_TREE);
12570 tree v4hi_ftype_v4hi_v4hi
12571 = build_function_type_list (V4HI_type_node,
12572 V4HI_type_node, V4HI_type_node, NULL_TREE);
12573 tree v2si_ftype_v2si_v2si
12574 = build_function_type_list (V2SI_type_node,
12575 V2SI_type_node, V2SI_type_node, NULL_TREE);
12576 tree di_ftype_di_di
12577 = build_function_type_list (long_long_unsigned_type_node,
12578 long_long_unsigned_type_node,
12579 long_long_unsigned_type_node, NULL_TREE);
12580
12581 tree v2si_ftype_v2sf
12582 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12583 tree v2sf_ftype_v2si
12584 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12585 tree v2si_ftype_v2si
12586 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12587 tree v2sf_ftype_v2sf
12588 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12589 tree v2sf_ftype_v2sf_v2sf
12590 = build_function_type_list (V2SF_type_node,
12591 V2SF_type_node, V2SF_type_node, NULL_TREE);
12592 tree v2si_ftype_v2sf_v2sf
12593 = build_function_type_list (V2SI_type_node,
12594 V2SF_type_node, V2SF_type_node, NULL_TREE);
12595 tree pint_type_node = build_pointer_type (integer_type_node);
12596 tree pcint_type_node = build_pointer_type (
12597 build_type_variant (integer_type_node, 1, 0));
12598 tree pdouble_type_node = build_pointer_type (double_type_node);
12599 tree pcdouble_type_node = build_pointer_type (
12600 build_type_variant (double_type_node, 1, 0));
12601 tree int_ftype_v2df_v2df
12602 = build_function_type_list (integer_type_node,
12603 V2DF_type_node, V2DF_type_node, NULL_TREE);
12604
12605 tree ti_ftype_void
12606 = build_function_type (intTI_type_node, void_list_node);
12607 tree v2di_ftype_void
12608 = build_function_type (V2DI_type_node, void_list_node);
12609 tree ti_ftype_ti_ti
12610 = build_function_type_list (intTI_type_node,
12611 intTI_type_node, intTI_type_node, NULL_TREE);
12612 tree void_ftype_pcvoid
12613 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12614 tree v2di_ftype_di
12615 = build_function_type_list (V2DI_type_node,
12616 long_long_unsigned_type_node, NULL_TREE);
12617 tree di_ftype_v2di
12618 = build_function_type_list (long_long_unsigned_type_node,
12619 V2DI_type_node, NULL_TREE);
12620 tree v4sf_ftype_v4si
12621 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12622 tree v4si_ftype_v4sf
12623 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12624 tree v2df_ftype_v4si
12625 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12626 tree v4si_ftype_v2df
12627 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12628 tree v2si_ftype_v2df
12629 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12630 tree v4sf_ftype_v2df
12631 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12632 tree v2df_ftype_v2si
12633 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12634 tree v2df_ftype_v4sf
12635 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12636 tree int_ftype_v2df
12637 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12638 tree int64_ftype_v2df
12639 = build_function_type_list (long_long_integer_type_node,
12640 V2DF_type_node, NULL_TREE);
12641 tree v2df_ftype_v2df_int
12642 = build_function_type_list (V2DF_type_node,
12643 V2DF_type_node, integer_type_node, NULL_TREE);
12644 tree v2df_ftype_v2df_int64
12645 = build_function_type_list (V2DF_type_node,
12646 V2DF_type_node, long_long_integer_type_node,
12647 NULL_TREE);
12648 tree v4sf_ftype_v4sf_v2df
12649 = build_function_type_list (V4SF_type_node,
12650 V4SF_type_node, V2DF_type_node, NULL_TREE);
12651 tree v2df_ftype_v2df_v4sf
12652 = build_function_type_list (V2DF_type_node,
12653 V2DF_type_node, V4SF_type_node, NULL_TREE);
12654 tree v2df_ftype_v2df_v2df_int
12655 = build_function_type_list (V2DF_type_node,
12656 V2DF_type_node, V2DF_type_node,
12657 integer_type_node,
12658 NULL_TREE);
12659 tree v2df_ftype_v2df_pcdouble
12660 = build_function_type_list (V2DF_type_node,
12661 V2DF_type_node, pcdouble_type_node, NULL_TREE);
12662 tree void_ftype_pdouble_v2df
12663 = build_function_type_list (void_type_node,
12664 pdouble_type_node, V2DF_type_node, NULL_TREE);
12665 tree void_ftype_pint_int
12666 = build_function_type_list (void_type_node,
12667 pint_type_node, integer_type_node, NULL_TREE);
12668 tree void_ftype_v16qi_v16qi_pchar
12669 = build_function_type_list (void_type_node,
12670 V16QI_type_node, V16QI_type_node,
12671 pchar_type_node, NULL_TREE);
12672 tree v2df_ftype_pcdouble
12673 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12674 tree v2df_ftype_v2df_v2df
12675 = build_function_type_list (V2DF_type_node,
12676 V2DF_type_node, V2DF_type_node, NULL_TREE);
12677 tree v16qi_ftype_v16qi_v16qi
12678 = build_function_type_list (V16QI_type_node,
12679 V16QI_type_node, V16QI_type_node, NULL_TREE);
12680 tree v8hi_ftype_v8hi_v8hi
12681 = build_function_type_list (V8HI_type_node,
12682 V8HI_type_node, V8HI_type_node, NULL_TREE);
12683 tree v4si_ftype_v4si_v4si
12684 = build_function_type_list (V4SI_type_node,
12685 V4SI_type_node, V4SI_type_node, NULL_TREE);
12686 tree v2di_ftype_v2di_v2di
12687 = build_function_type_list (V2DI_type_node,
12688 V2DI_type_node, V2DI_type_node, NULL_TREE);
12689 tree v2di_ftype_v2df_v2df
12690 = build_function_type_list (V2DI_type_node,
12691 V2DF_type_node, V2DF_type_node, NULL_TREE);
12692 tree v2df_ftype_v2df
12693 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12694 tree v2df_ftype_double
12695 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12696 tree v2df_ftype_double_double
12697 = build_function_type_list (V2DF_type_node,
12698 double_type_node, double_type_node, NULL_TREE);
12699 tree int_ftype_v8hi_int
12700 = build_function_type_list (integer_type_node,
12701 V8HI_type_node, integer_type_node, NULL_TREE);
12702 tree v8hi_ftype_v8hi_int_int
12703 = build_function_type_list (V8HI_type_node,
12704 V8HI_type_node, integer_type_node,
12705 integer_type_node, NULL_TREE);
12706 tree v2di_ftype_v2di_int
12707 = build_function_type_list (V2DI_type_node,
12708 V2DI_type_node, integer_type_node, NULL_TREE);
12709 tree v4si_ftype_v4si_int
12710 = build_function_type_list (V4SI_type_node,
12711 V4SI_type_node, integer_type_node, NULL_TREE);
12712 tree v8hi_ftype_v8hi_int
12713 = build_function_type_list (V8HI_type_node,
12714 V8HI_type_node, integer_type_node, NULL_TREE);
12715 tree v8hi_ftype_v8hi_v2di
12716 = build_function_type_list (V8HI_type_node,
12717 V8HI_type_node, V2DI_type_node, NULL_TREE);
12718 tree v4si_ftype_v4si_v2di
12719 = build_function_type_list (V4SI_type_node,
12720 V4SI_type_node, V2DI_type_node, NULL_TREE);
12721 tree v4si_ftype_v8hi_v8hi
12722 = build_function_type_list (V4SI_type_node,
12723 V8HI_type_node, V8HI_type_node, NULL_TREE);
12724 tree di_ftype_v8qi_v8qi
12725 = build_function_type_list (long_long_unsigned_type_node,
12726 V8QI_type_node, V8QI_type_node, NULL_TREE);
12727 tree di_ftype_v2si_v2si
12728 = build_function_type_list (long_long_unsigned_type_node,
12729 V2SI_type_node, V2SI_type_node, NULL_TREE);
12730 tree v2di_ftype_v16qi_v16qi
12731 = build_function_type_list (V2DI_type_node,
12732 V16QI_type_node, V16QI_type_node, NULL_TREE);
12733 tree v2di_ftype_v4si_v4si
12734 = build_function_type_list (V2DI_type_node,
12735 V4SI_type_node, V4SI_type_node, NULL_TREE);
12736 tree int_ftype_v16qi
12737 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12738 tree v16qi_ftype_pcchar
12739 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12740 tree void_ftype_pchar_v16qi
12741 = build_function_type_list (void_type_node,
12742 pchar_type_node, V16QI_type_node, NULL_TREE);
12743 tree v4si_ftype_pcint
12744 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12745 tree void_ftype_pcint_v4si
12746 = build_function_type_list (void_type_node,
12747 pcint_type_node, V4SI_type_node, NULL_TREE);
12748 tree v2di_ftype_v2di
12749 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12750
12751 tree float80_type;
12752 tree float128_type;
12753
12754 /* The __float80 type. */
12755 if (TYPE_MODE (long_double_type_node) == XFmode)
12756 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12757 "__float80");
12758 else
12759 {
12760 /* The __float80 type. */
12761 float80_type = make_node (REAL_TYPE);
12762 TYPE_PRECISION (float80_type) = 80;
12763 layout_type (float80_type);
12764 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12765 }
12766
12767 float128_type = make_node (REAL_TYPE);
12768 TYPE_PRECISION (float128_type) = 128;
12769 layout_type (float128_type);
12770 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12771
12772 /* Add all builtins that are more or less simple operations on two
12773 operands. */
12774 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12775 {
12776 /* Use one of the operands; the target can have a different mode for
12777 mask-generating compares. */
12778 enum machine_mode mode;
12779 tree type;
12780
12781 if (d->name == 0)
12782 continue;
12783 mode = insn_data[d->icode].operand[1].mode;
12784
12785 switch (mode)
12786 {
12787 case V16QImode:
12788 type = v16qi_ftype_v16qi_v16qi;
12789 break;
12790 case V8HImode:
12791 type = v8hi_ftype_v8hi_v8hi;
12792 break;
12793 case V4SImode:
12794 type = v4si_ftype_v4si_v4si;
12795 break;
12796 case V2DImode:
12797 type = v2di_ftype_v2di_v2di;
12798 break;
12799 case V2DFmode:
12800 type = v2df_ftype_v2df_v2df;
12801 break;
12802 case TImode:
12803 type = ti_ftype_ti_ti;
12804 break;
12805 case V4SFmode:
12806 type = v4sf_ftype_v4sf_v4sf;
12807 break;
12808 case V8QImode:
12809 type = v8qi_ftype_v8qi_v8qi;
12810 break;
12811 case V4HImode:
12812 type = v4hi_ftype_v4hi_v4hi;
12813 break;
12814 case V2SImode:
12815 type = v2si_ftype_v2si_v2si;
12816 break;
12817 case DImode:
12818 type = di_ftype_di_di;
12819 break;
12820
12821 default:
12822 abort ();
12823 }
12824
12825 /* Override for comparisons. */
12826 if (d->icode == CODE_FOR_maskcmpv4sf3
12827 || d->icode == CODE_FOR_maskncmpv4sf3
12828 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12829 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12830 type = v4si_ftype_v4sf_v4sf;
12831
12832 if (d->icode == CODE_FOR_maskcmpv2df3
12833 || d->icode == CODE_FOR_maskncmpv2df3
12834 || d->icode == CODE_FOR_vmmaskcmpv2df3
12835 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12836 type = v2di_ftype_v2df_v2df;
12837
12838 def_builtin (d->mask, d->name, type, d->code);
12839 }
12840
12841 /* Add the remaining MMX insns with somewhat more complicated types. */
12842 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12843 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12844 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12845 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12846 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12847
12848 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12849 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12850 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12851
12852 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12853 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12854
12855 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12856 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12857
12858 /* comi/ucomi insns. */
12859 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12860 if (d->mask == MASK_SSE2)
12861 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12862 else
12863 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12864
12865 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12866 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12867 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12868
12869 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12870 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12871 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12872 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12873 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12874 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12875 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12876 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12877 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12878 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12879 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12880
12881 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12882 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12883
12884 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12885
12886 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12887 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12888 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12889 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12890 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12891 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12892
12893 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12894 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12895 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12896 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12897
12898 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12899 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12900 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12901 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12902
12903 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12904
12905 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12906
12907 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12908 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12909 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12910 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12911 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12912 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12913
12914 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12915
12916 /* Original 3DNow! */
12917 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12918 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12919 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12920 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12921 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12922 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12923 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12924 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12925 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12926 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12927 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12928 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12929 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12930 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12931 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12932 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12933 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12934 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12935 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12936 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12937
12938 /* 3DNow! extension as used in the Athlon CPU. */
12939 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12940 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12941 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12942 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12943 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12944 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12945
12946 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12947
12948 /* SSE2 */
12949 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12950 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12951
12952 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12953 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12954 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12955
12956 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12957 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12958 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12959 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12960 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12961 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12962
12963 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
12964 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
12965 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREHPD);
12966 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORELPD);
12967
12968 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12969 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12970 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12971 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12972 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12973
12974 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12975 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12976 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12977 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12978
12979 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12980 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12981
12982 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12983
12984 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12985 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12986
12987 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12988 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12989 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12990 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12991 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12992
12993 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12994
12995 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12996 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12997 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12998 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
12999
13000 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13001 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13002 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13003
13004 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13005 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13006 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13007 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13008
13009 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13010 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13011 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13012 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13013 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13014 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13015 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13016
13017 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13018 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13019 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13020
13021 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13022 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13023 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13024 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13025 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13026 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13027 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13028
13029 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13030
13031 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13032 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13033
13034 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13035 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13036 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13037
13038 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13039 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13040 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13041
13042 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13043 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13044
13045 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13046 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13047 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13048 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13049
13050 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13051 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13052 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13053 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13054
13055 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13056 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13057
13058 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13059
13060 /* Prescott New Instructions. */
13061 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13062 void_ftype_pcvoid_unsigned_unsigned,
13063 IX86_BUILTIN_MONITOR);
13064 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13065 void_ftype_unsigned_unsigned,
13066 IX86_BUILTIN_MWAIT);
13067 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13068 v4sf_ftype_v4sf,
13069 IX86_BUILTIN_MOVSHDUP);
13070 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13071 v4sf_ftype_v4sf,
13072 IX86_BUILTIN_MOVSLDUP);
13073 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13074 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13075 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13076 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13077 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13078 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13079 }
13080
13081 /* Errors in the source file can cause expand_expr to return const0_rtx
13082 where we expect a vector. To avoid crashing, use one of the vector
13083 clear instructions. */
13084 static rtx
13085 safe_vector_operand (rtx x, enum machine_mode mode)
13086 {
13087 if (x != const0_rtx)
13088 return x;
13089 x = gen_reg_rtx (mode);
13090
13091 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13092 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13093 : gen_rtx_SUBREG (DImode, x, 0)));
13094 else
13095 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13096 : gen_rtx_SUBREG (V4SFmode, x, 0),
13097 CONST0_RTX (V4SFmode)));
13098 return x;
13099 }
13100
13101 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13102
13103 static rtx
13104 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13105 {
13106 rtx pat;
13107 tree arg0 = TREE_VALUE (arglist);
13108 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13109 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13110 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13111 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13112 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13113 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13114
13115 if (VECTOR_MODE_P (mode0))
13116 op0 = safe_vector_operand (op0, mode0);
13117 if (VECTOR_MODE_P (mode1))
13118 op1 = safe_vector_operand (op1, mode1);
13119
13120 if (! target
13121 || GET_MODE (target) != tmode
13122 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13123 target = gen_reg_rtx (tmode);
13124
13125 if (GET_MODE (op1) == SImode && mode1 == TImode)
13126 {
13127 rtx x = gen_reg_rtx (V4SImode);
13128 emit_insn (gen_sse2_loadd (x, op1));
13129 op1 = gen_lowpart (TImode, x);
13130 }
13131
13132 /* In case the insn wants input operands in modes different from
13133 the result, abort. */
13134 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13135 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13136 abort ();
13137
13138 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13139 op0 = copy_to_mode_reg (mode0, op0);
13140 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13141 op1 = copy_to_mode_reg (mode1, op1);
13142
13143 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13144 yet one of the two must not be a memory. This is normally enforced
13145 by expanders, but we didn't bother to create one here. */
13146 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13147 op0 = copy_to_mode_reg (mode0, op0);
13148
13149 pat = GEN_FCN (icode) (target, op0, op1);
13150 if (! pat)
13151 return 0;
13152 emit_insn (pat);
13153 return target;
13154 }
13155
13156 /* Subroutine of ix86_expand_builtin to take care of stores. */
13157
13158 static rtx
13159 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13160 {
13161 rtx pat;
13162 tree arg0 = TREE_VALUE (arglist);
13163 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13164 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13165 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13166 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13167 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13168
13169 if (VECTOR_MODE_P (mode1))
13170 op1 = safe_vector_operand (op1, mode1);
13171
13172 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13173 op1 = copy_to_mode_reg (mode1, op1);
13174
13175 pat = GEN_FCN (icode) (op0, op1);
13176 if (pat)
13177 emit_insn (pat);
13178 return 0;
13179 }
13180
13181 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13182
13183 static rtx
13184 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13185 rtx target, int do_load)
13186 {
13187 rtx pat;
13188 tree arg0 = TREE_VALUE (arglist);
13189 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13190 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13191 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13192
13193 if (! target
13194 || GET_MODE (target) != tmode
13195 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13196 target = gen_reg_rtx (tmode);
13197 if (do_load)
13198 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13199 else
13200 {
13201 if (VECTOR_MODE_P (mode0))
13202 op0 = safe_vector_operand (op0, mode0);
13203
13204 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13205 op0 = copy_to_mode_reg (mode0, op0);
13206 }
13207
13208 pat = GEN_FCN (icode) (target, op0);
13209 if (! pat)
13210 return 0;
13211 emit_insn (pat);
13212 return target;
13213 }
13214
13215 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13216 sqrtss, rsqrtss, rcpss. */
13217
13218 static rtx
13219 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13220 {
13221 rtx pat;
13222 tree arg0 = TREE_VALUE (arglist);
13223 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13224 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13225 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13226
13227 if (! target
13228 || GET_MODE (target) != tmode
13229 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13230 target = gen_reg_rtx (tmode);
13231
13232 if (VECTOR_MODE_P (mode0))
13233 op0 = safe_vector_operand (op0, mode0);
13234
13235 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13236 op0 = copy_to_mode_reg (mode0, op0);
13237
13238 op1 = op0;
13239 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13240 op1 = copy_to_mode_reg (mode0, op1);
13241
13242 pat = GEN_FCN (icode) (target, op0, op1);
13243 if (! pat)
13244 return 0;
13245 emit_insn (pat);
13246 return target;
13247 }
13248
13249 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13250
13251 static rtx
13252 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13253 rtx target)
13254 {
13255 rtx pat;
13256 tree arg0 = TREE_VALUE (arglist);
13257 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13258 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13259 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13260 rtx op2;
13261 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13262 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13263 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13264 enum rtx_code comparison = d->comparison;
13265
13266 if (VECTOR_MODE_P (mode0))
13267 op0 = safe_vector_operand (op0, mode0);
13268 if (VECTOR_MODE_P (mode1))
13269 op1 = safe_vector_operand (op1, mode1);
13270
13271 /* Swap operands if we have a comparison that isn't available in
13272 hardware. */
13273 if (d->flag)
13274 {
13275 rtx tmp = gen_reg_rtx (mode1);
13276 emit_move_insn (tmp, op1);
13277 op1 = op0;
13278 op0 = tmp;
13279 }
13280
13281 if (! target
13282 || GET_MODE (target) != tmode
13283 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13284 target = gen_reg_rtx (tmode);
13285
13286 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13287 op0 = copy_to_mode_reg (mode0, op0);
13288 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13289 op1 = copy_to_mode_reg (mode1, op1);
13290
13291 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13292 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13293 if (! pat)
13294 return 0;
13295 emit_insn (pat);
13296 return target;
13297 }
13298
13299 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13300
13301 static rtx
13302 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13303 rtx target)
13304 {
13305 rtx pat;
13306 tree arg0 = TREE_VALUE (arglist);
13307 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13308 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13309 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13310 rtx op2;
13311 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13312 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13313 enum rtx_code comparison = d->comparison;
13314
13315 if (VECTOR_MODE_P (mode0))
13316 op0 = safe_vector_operand (op0, mode0);
13317 if (VECTOR_MODE_P (mode1))
13318 op1 = safe_vector_operand (op1, mode1);
13319
13320 /* Swap operands if we have a comparison that isn't available in
13321 hardware. */
13322 if (d->flag)
13323 {
13324 rtx tmp = op1;
13325 op1 = op0;
13326 op0 = tmp;
13327 }
13328
13329 target = gen_reg_rtx (SImode);
13330 emit_move_insn (target, const0_rtx);
13331 target = gen_rtx_SUBREG (QImode, target, 0);
13332
13333 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13334 op0 = copy_to_mode_reg (mode0, op0);
13335 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13336 op1 = copy_to_mode_reg (mode1, op1);
13337
13338 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13339 pat = GEN_FCN (d->icode) (op0, op1);
13340 if (! pat)
13341 return 0;
13342 emit_insn (pat);
13343 emit_insn (gen_rtx_SET (VOIDmode,
13344 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13345 gen_rtx_fmt_ee (comparison, QImode,
13346 SET_DEST (pat),
13347 const0_rtx)));
13348
13349 return SUBREG_REG (target);
13350 }
13351
13352 /* Expand an expression EXP that calls a built-in function,
13353 with result going to TARGET if that's convenient
13354 (and in mode MODE if that's convenient).
13355 SUBTARGET may be used as the target for computing one of EXP's operands.
13356 IGNORE is nonzero if the value is to be ignored. */
13357
13358 rtx
13359 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13360 enum machine_mode mode ATTRIBUTE_UNUSED,
13361 int ignore ATTRIBUTE_UNUSED)
13362 {
13363 const struct builtin_description *d;
13364 size_t i;
13365 enum insn_code icode;
13366 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13367 tree arglist = TREE_OPERAND (exp, 1);
13368 tree arg0, arg1, arg2;
13369 rtx op0, op1, op2, pat;
13370 enum machine_mode tmode, mode0, mode1, mode2;
13371 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13372
13373 switch (fcode)
13374 {
13375 case IX86_BUILTIN_EMMS:
13376 emit_insn (gen_emms ());
13377 return 0;
13378
13379 case IX86_BUILTIN_SFENCE:
13380 emit_insn (gen_sfence ());
13381 return 0;
13382
13383 case IX86_BUILTIN_PEXTRW:
13384 case IX86_BUILTIN_PEXTRW128:
13385 icode = (fcode == IX86_BUILTIN_PEXTRW
13386 ? CODE_FOR_mmx_pextrw
13387 : CODE_FOR_sse2_pextrw);
13388 arg0 = TREE_VALUE (arglist);
13389 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13390 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13391 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13392 tmode = insn_data[icode].operand[0].mode;
13393 mode0 = insn_data[icode].operand[1].mode;
13394 mode1 = insn_data[icode].operand[2].mode;
13395
13396 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13397 op0 = copy_to_mode_reg (mode0, op0);
13398 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13399 {
13400 error ("selector must be an integer constant in the range 0..%i",
13401 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13402 return gen_reg_rtx (tmode);
13403 }
13404 if (target == 0
13405 || GET_MODE (target) != tmode
13406 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13407 target = gen_reg_rtx (tmode);
13408 pat = GEN_FCN (icode) (target, op0, op1);
13409 if (! pat)
13410 return 0;
13411 emit_insn (pat);
13412 return target;
13413
13414 case IX86_BUILTIN_PINSRW:
13415 case IX86_BUILTIN_PINSRW128:
13416 icode = (fcode == IX86_BUILTIN_PINSRW
13417 ? CODE_FOR_mmx_pinsrw
13418 : CODE_FOR_sse2_pinsrw);
13419 arg0 = TREE_VALUE (arglist);
13420 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13421 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13422 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13423 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13424 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13425 tmode = insn_data[icode].operand[0].mode;
13426 mode0 = insn_data[icode].operand[1].mode;
13427 mode1 = insn_data[icode].operand[2].mode;
13428 mode2 = insn_data[icode].operand[3].mode;
13429
13430 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13431 op0 = copy_to_mode_reg (mode0, op0);
13432 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13433 op1 = copy_to_mode_reg (mode1, op1);
13434 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13435 {
13436 error ("selector must be an integer constant in the range 0..%i",
13437 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13438 return const0_rtx;
13439 }
13440 if (target == 0
13441 || GET_MODE (target) != tmode
13442 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13443 target = gen_reg_rtx (tmode);
13444 pat = GEN_FCN (icode) (target, op0, op1, op2);
13445 if (! pat)
13446 return 0;
13447 emit_insn (pat);
13448 return target;
13449
13450 case IX86_BUILTIN_MASKMOVQ:
13451 case IX86_BUILTIN_MASKMOVDQU:
13452 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13453 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13454 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13455 : CODE_FOR_sse2_maskmovdqu));
13456 /* Note the arg order is different from the operand order. */
13457 arg1 = TREE_VALUE (arglist);
13458 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13459 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13460 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13461 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13462 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13463 mode0 = insn_data[icode].operand[0].mode;
13464 mode1 = insn_data[icode].operand[1].mode;
13465 mode2 = insn_data[icode].operand[2].mode;
13466
13467 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13468 op0 = copy_to_mode_reg (mode0, op0);
13469 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13470 op1 = copy_to_mode_reg (mode1, op1);
13471 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13472 op2 = copy_to_mode_reg (mode2, op2);
13473 pat = GEN_FCN (icode) (op0, op1, op2);
13474 if (! pat)
13475 return 0;
13476 emit_insn (pat);
13477 return 0;
13478
13479 case IX86_BUILTIN_SQRTSS:
13480 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13481 case IX86_BUILTIN_RSQRTSS:
13482 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13483 case IX86_BUILTIN_RCPSS:
13484 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13485
13486 case IX86_BUILTIN_LOADAPS:
13487 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13488
13489 case IX86_BUILTIN_LOADUPS:
13490 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13491
13492 case IX86_BUILTIN_STOREAPS:
13493 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13494
13495 case IX86_BUILTIN_STOREUPS:
13496 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13497
13498 case IX86_BUILTIN_LOADSS:
13499 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13500
13501 case IX86_BUILTIN_STORESS:
13502 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13503
13504 case IX86_BUILTIN_LOADHPS:
13505 case IX86_BUILTIN_LOADLPS:
13506 case IX86_BUILTIN_LOADHPD:
13507 case IX86_BUILTIN_LOADLPD:
13508 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13509 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13510 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
13511 : CODE_FOR_sse2_loadlpd);
13512 arg0 = TREE_VALUE (arglist);
13513 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13514 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13515 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13516 tmode = insn_data[icode].operand[0].mode;
13517 mode0 = insn_data[icode].operand[1].mode;
13518 mode1 = insn_data[icode].operand[2].mode;
13519
13520 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13521 op0 = copy_to_mode_reg (mode0, op0);
13522 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13523 if (target == 0
13524 || GET_MODE (target) != tmode
13525 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13526 target = gen_reg_rtx (tmode);
13527 pat = GEN_FCN (icode) (target, op0, op1);
13528 if (! pat)
13529 return 0;
13530 emit_insn (pat);
13531 return target;
13532
13533 case IX86_BUILTIN_STOREHPS:
13534 case IX86_BUILTIN_STORELPS:
13535 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13536 : CODE_FOR_sse_movlps);
13537 arg0 = TREE_VALUE (arglist);
13538 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13539 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13540 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13541 mode0 = insn_data[icode].operand[1].mode;
13542 mode1 = insn_data[icode].operand[2].mode;
13543
13544 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13545 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13546 op1 = copy_to_mode_reg (mode1, op1);
13547
13548 pat = GEN_FCN (icode) (op0, op0, op1);
13549 if (! pat)
13550 return 0;
13551 emit_insn (pat);
13552 return const0_rtx;
13553
13554 case IX86_BUILTIN_STOREHPD:
13555 case IX86_BUILTIN_STORELPD:
13556 icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
13557 : CODE_FOR_sse2_storelpd);
13558 arg0 = TREE_VALUE (arglist);
13559 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13560 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13561 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13562 mode0 = insn_data[icode].operand[0].mode;
13563 mode1 = insn_data[icode].operand[1].mode;
13564
13565 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13566 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13567 op1 = copy_to_mode_reg (mode1, op1);
13568
13569 pat = GEN_FCN (icode) (op0, op1);
13570 if (! pat)
13571 return 0;
13572 emit_insn (pat);
13573 return const0_rtx;
13574
13575 case IX86_BUILTIN_MOVNTPS:
13576 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13577 case IX86_BUILTIN_MOVNTQ:
13578 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13579
13580 case IX86_BUILTIN_LDMXCSR:
13581 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13582 target = assign_386_stack_local (SImode, 0);
13583 emit_move_insn (target, op0);
13584 emit_insn (gen_ldmxcsr (target));
13585 return 0;
13586
13587 case IX86_BUILTIN_STMXCSR:
13588 target = assign_386_stack_local (SImode, 0);
13589 emit_insn (gen_stmxcsr (target));
13590 return copy_to_mode_reg (SImode, target);
13591
13592 case IX86_BUILTIN_SHUFPS:
13593 case IX86_BUILTIN_SHUFPD:
13594 icode = (fcode == IX86_BUILTIN_SHUFPS
13595 ? CODE_FOR_sse_shufps
13596 : CODE_FOR_sse2_shufpd);
13597 arg0 = TREE_VALUE (arglist);
13598 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13599 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13600 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13601 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13602 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13603 tmode = insn_data[icode].operand[0].mode;
13604 mode0 = insn_data[icode].operand[1].mode;
13605 mode1 = insn_data[icode].operand[2].mode;
13606 mode2 = insn_data[icode].operand[3].mode;
13607
13608 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13609 op0 = copy_to_mode_reg (mode0, op0);
13610 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13611 op1 = copy_to_mode_reg (mode1, op1);
13612 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13613 {
13614 /* @@@ better error message */
13615 error ("mask must be an immediate");
13616 return gen_reg_rtx (tmode);
13617 }
13618 if (target == 0
13619 || GET_MODE (target) != tmode
13620 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13621 target = gen_reg_rtx (tmode);
13622 pat = GEN_FCN (icode) (target, op0, op1, op2);
13623 if (! pat)
13624 return 0;
13625 emit_insn (pat);
13626 return target;
13627
13628 case IX86_BUILTIN_PSHUFW:
13629 case IX86_BUILTIN_PSHUFD:
13630 case IX86_BUILTIN_PSHUFHW:
13631 case IX86_BUILTIN_PSHUFLW:
13632 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13633 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13634 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13635 : CODE_FOR_mmx_pshufw);
13636 arg0 = TREE_VALUE (arglist);
13637 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13638 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13639 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13640 tmode = insn_data[icode].operand[0].mode;
13641 mode1 = insn_data[icode].operand[1].mode;
13642 mode2 = insn_data[icode].operand[2].mode;
13643
13644 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13645 op0 = copy_to_mode_reg (mode1, op0);
13646 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13647 {
13648 /* @@@ better error message */
13649 error ("mask must be an immediate");
13650 return const0_rtx;
13651 }
13652 if (target == 0
13653 || GET_MODE (target) != tmode
13654 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13655 target = gen_reg_rtx (tmode);
13656 pat = GEN_FCN (icode) (target, op0, op1);
13657 if (! pat)
13658 return 0;
13659 emit_insn (pat);
13660 return target;
13661
13662 case IX86_BUILTIN_PSLLDQI128:
13663 case IX86_BUILTIN_PSRLDQI128:
13664 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13665 : CODE_FOR_sse2_lshrti3);
13666 arg0 = TREE_VALUE (arglist);
13667 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13668 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13669 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13670 tmode = insn_data[icode].operand[0].mode;
13671 mode1 = insn_data[icode].operand[1].mode;
13672 mode2 = insn_data[icode].operand[2].mode;
13673
13674 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13675 {
13676 op0 = copy_to_reg (op0);
13677 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13678 }
13679 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13680 {
13681 error ("shift must be an immediate");
13682 return const0_rtx;
13683 }
13684 target = gen_reg_rtx (V2DImode);
13685 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13686 if (! pat)
13687 return 0;
13688 emit_insn (pat);
13689 return target;
13690
13691 case IX86_BUILTIN_FEMMS:
13692 emit_insn (gen_femms ());
13693 return NULL_RTX;
13694
13695 case IX86_BUILTIN_PAVGUSB:
13696 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13697
13698 case IX86_BUILTIN_PF2ID:
13699 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13700
13701 case IX86_BUILTIN_PFACC:
13702 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13703
13704 case IX86_BUILTIN_PFADD:
13705 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13706
13707 case IX86_BUILTIN_PFCMPEQ:
13708 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13709
13710 case IX86_BUILTIN_PFCMPGE:
13711 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13712
13713 case IX86_BUILTIN_PFCMPGT:
13714 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13715
13716 case IX86_BUILTIN_PFMAX:
13717 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13718
13719 case IX86_BUILTIN_PFMIN:
13720 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13721
13722 case IX86_BUILTIN_PFMUL:
13723 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13724
13725 case IX86_BUILTIN_PFRCP:
13726 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13727
13728 case IX86_BUILTIN_PFRCPIT1:
13729 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13730
13731 case IX86_BUILTIN_PFRCPIT2:
13732 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13733
13734 case IX86_BUILTIN_PFRSQIT1:
13735 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13736
13737 case IX86_BUILTIN_PFRSQRT:
13738 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13739
13740 case IX86_BUILTIN_PFSUB:
13741 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13742
13743 case IX86_BUILTIN_PFSUBR:
13744 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13745
13746 case IX86_BUILTIN_PI2FD:
13747 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13748
13749 case IX86_BUILTIN_PMULHRW:
13750 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13751
13752 case IX86_BUILTIN_PF2IW:
13753 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13754
13755 case IX86_BUILTIN_PFNACC:
13756 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13757
13758 case IX86_BUILTIN_PFPNACC:
13759 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13760
13761 case IX86_BUILTIN_PI2FW:
13762 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13763
13764 case IX86_BUILTIN_PSWAPDSI:
13765 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13766
13767 case IX86_BUILTIN_PSWAPDSF:
13768 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13769
13770 case IX86_BUILTIN_SSE_ZERO:
13771 target = gen_reg_rtx (V4SFmode);
13772 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13773 return target;
13774
13775 case IX86_BUILTIN_MMX_ZERO:
13776 target = gen_reg_rtx (DImode);
13777 emit_insn (gen_mmx_clrdi (target));
13778 return target;
13779
13780 case IX86_BUILTIN_CLRTI:
13781 target = gen_reg_rtx (V2DImode);
13782 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13783 return target;
13784
13785
13786 case IX86_BUILTIN_SQRTSD:
13787 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13788 case IX86_BUILTIN_LOADAPD:
13789 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13790 case IX86_BUILTIN_LOADUPD:
13791 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13792
13793 case IX86_BUILTIN_STOREAPD:
13794 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13795 case IX86_BUILTIN_STOREUPD:
13796 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13797
13798 case IX86_BUILTIN_LOADSD:
13799 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13800
13801 case IX86_BUILTIN_STORESD:
13802 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13803
13804 case IX86_BUILTIN_SETPD1:
13805 target = assign_386_stack_local (DFmode, 0);
13806 arg0 = TREE_VALUE (arglist);
13807 emit_move_insn (adjust_address (target, DFmode, 0),
13808 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13809 op0 = gen_reg_rtx (V2DFmode);
13810 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13811 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
13812 return op0;
13813
13814 case IX86_BUILTIN_SETPD:
13815 target = assign_386_stack_local (V2DFmode, 0);
13816 arg0 = TREE_VALUE (arglist);
13817 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13818 emit_move_insn (adjust_address (target, DFmode, 0),
13819 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13820 emit_move_insn (adjust_address (target, DFmode, 8),
13821 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13822 op0 = gen_reg_rtx (V2DFmode);
13823 emit_insn (gen_sse2_movapd (op0, target));
13824 return op0;
13825
13826 case IX86_BUILTIN_LOADRPD:
13827 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13828 gen_reg_rtx (V2DFmode), 1);
13829 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
13830 return target;
13831
13832 case IX86_BUILTIN_LOADPD1:
13833 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13834 gen_reg_rtx (V2DFmode), 1);
13835 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13836 return target;
13837
13838 case IX86_BUILTIN_STOREPD1:
13839 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13840 case IX86_BUILTIN_STORERPD:
13841 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13842
13843 case IX86_BUILTIN_CLRPD:
13844 target = gen_reg_rtx (V2DFmode);
13845 emit_insn (gen_sse_clrv2df (target));
13846 return target;
13847
13848 case IX86_BUILTIN_MFENCE:
13849 emit_insn (gen_sse2_mfence ());
13850 return 0;
13851 case IX86_BUILTIN_LFENCE:
13852 emit_insn (gen_sse2_lfence ());
13853 return 0;
13854
13855 case IX86_BUILTIN_CLFLUSH:
13856 arg0 = TREE_VALUE (arglist);
13857 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13858 icode = CODE_FOR_sse2_clflush;
13859 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13860 op0 = copy_to_mode_reg (Pmode, op0);
13861
13862 emit_insn (gen_sse2_clflush (op0));
13863 return 0;
13864
13865 case IX86_BUILTIN_MOVNTPD:
13866 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13867 case IX86_BUILTIN_MOVNTDQ:
13868 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13869 case IX86_BUILTIN_MOVNTI:
13870 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13871
13872 case IX86_BUILTIN_LOADDQA:
13873 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13874 case IX86_BUILTIN_LOADDQU:
13875 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13876 case IX86_BUILTIN_LOADD:
13877 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13878
13879 case IX86_BUILTIN_STOREDQA:
13880 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13881 case IX86_BUILTIN_STOREDQU:
13882 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13883 case IX86_BUILTIN_STORED:
13884 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13885
13886 case IX86_BUILTIN_MONITOR:
13887 arg0 = TREE_VALUE (arglist);
13888 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13889 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13890 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13891 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13892 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13893 if (!REG_P (op0))
13894 op0 = copy_to_mode_reg (SImode, op0);
13895 if (!REG_P (op1))
13896 op1 = copy_to_mode_reg (SImode, op1);
13897 if (!REG_P (op2))
13898 op2 = copy_to_mode_reg (SImode, op2);
13899 emit_insn (gen_monitor (op0, op1, op2));
13900 return 0;
13901
13902 case IX86_BUILTIN_MWAIT:
13903 arg0 = TREE_VALUE (arglist);
13904 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13905 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13906 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13907 if (!REG_P (op0))
13908 op0 = copy_to_mode_reg (SImode, op0);
13909 if (!REG_P (op1))
13910 op1 = copy_to_mode_reg (SImode, op1);
13911 emit_insn (gen_mwait (op0, op1));
13912 return 0;
13913
13914 case IX86_BUILTIN_LOADDDUP:
13915 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13916
13917 case IX86_BUILTIN_LDDQU:
13918 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13919 1);
13920
13921 default:
13922 break;
13923 }
13924
13925 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13926 if (d->code == fcode)
13927 {
13928 /* Compares are treated specially. */
13929 if (d->icode == CODE_FOR_maskcmpv4sf3
13930 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13931 || d->icode == CODE_FOR_maskncmpv4sf3
13932 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13933 || d->icode == CODE_FOR_maskcmpv2df3
13934 || d->icode == CODE_FOR_vmmaskcmpv2df3
13935 || d->icode == CODE_FOR_maskncmpv2df3
13936 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13937 return ix86_expand_sse_compare (d, arglist, target);
13938
13939 return ix86_expand_binop_builtin (d->icode, arglist, target);
13940 }
13941
13942 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13943 if (d->code == fcode)
13944 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13945
13946 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13947 if (d->code == fcode)
13948 return ix86_expand_sse_comi (d, arglist, target);
13949
13950 /* @@@ Should really do something sensible here. */
13951 return 0;
13952 }
13953
13954 /* Store OPERAND to the memory after reload is completed. This means
13955 that we can't easily use assign_stack_local. */
13956 rtx
13957 ix86_force_to_memory (enum machine_mode mode, rtx operand)
13958 {
13959 rtx result;
13960 if (!reload_completed)
13961 abort ();
13962 if (TARGET_RED_ZONE)
13963 {
13964 result = gen_rtx_MEM (mode,
13965 gen_rtx_PLUS (Pmode,
13966 stack_pointer_rtx,
13967 GEN_INT (-RED_ZONE_SIZE)));
13968 emit_move_insn (result, operand);
13969 }
13970 else if (!TARGET_RED_ZONE && TARGET_64BIT)
13971 {
13972 switch (mode)
13973 {
13974 case HImode:
13975 case SImode:
13976 operand = gen_lowpart (DImode, operand);
13977 /* FALLTHRU */
13978 case DImode:
13979 emit_insn (
13980 gen_rtx_SET (VOIDmode,
13981 gen_rtx_MEM (DImode,
13982 gen_rtx_PRE_DEC (DImode,
13983 stack_pointer_rtx)),
13984 operand));
13985 break;
13986 default:
13987 abort ();
13988 }
13989 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13990 }
13991 else
13992 {
13993 switch (mode)
13994 {
13995 case DImode:
13996 {
13997 rtx operands[2];
13998 split_di (&operand, 1, operands, operands + 1);
13999 emit_insn (
14000 gen_rtx_SET (VOIDmode,
14001 gen_rtx_MEM (SImode,
14002 gen_rtx_PRE_DEC (Pmode,
14003 stack_pointer_rtx)),
14004 operands[1]));
14005 emit_insn (
14006 gen_rtx_SET (VOIDmode,
14007 gen_rtx_MEM (SImode,
14008 gen_rtx_PRE_DEC (Pmode,
14009 stack_pointer_rtx)),
14010 operands[0]));
14011 }
14012 break;
14013 case HImode:
14014 /* It is better to store HImodes as SImodes. */
14015 if (!TARGET_PARTIAL_REG_STALL)
14016 operand = gen_lowpart (SImode, operand);
14017 /* FALLTHRU */
14018 case SImode:
14019 emit_insn (
14020 gen_rtx_SET (VOIDmode,
14021 gen_rtx_MEM (GET_MODE (operand),
14022 gen_rtx_PRE_DEC (SImode,
14023 stack_pointer_rtx)),
14024 operand));
14025 break;
14026 default:
14027 abort ();
14028 }
14029 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14030 }
14031 return result;
14032 }
14033
14034 /* Free operand from the memory. */
14035 void
14036 ix86_free_from_memory (enum machine_mode mode)
14037 {
14038 if (!TARGET_RED_ZONE)
14039 {
14040 int size;
14041
14042 if (mode == DImode || TARGET_64BIT)
14043 size = 8;
14044 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14045 size = 2;
14046 else
14047 size = 4;
14048 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14049 to pop or add instruction if registers are available. */
14050 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14051 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14052 GEN_INT (size))));
14053 }
14054 }
14055
14056 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14057 QImode must go into class Q_REGS.
14058 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14059 movdf to do mem-to-mem moves through integer regs. */
14060 enum reg_class
14061 ix86_preferred_reload_class (rtx x, enum reg_class class)
14062 {
14063 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14064 return NO_REGS;
14065 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14066 {
14067 /* SSE can't load any constant directly yet. */
14068 if (SSE_CLASS_P (class))
14069 return NO_REGS;
14070 /* Floats can load 0 and 1. */
14071 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14072 {
14073 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14074 if (MAYBE_SSE_CLASS_P (class))
14075 return (reg_class_subset_p (class, GENERAL_REGS)
14076 ? GENERAL_REGS : FLOAT_REGS);
14077 else
14078 return class;
14079 }
14080 /* General regs can load everything. */
14081 if (reg_class_subset_p (class, GENERAL_REGS))
14082 return GENERAL_REGS;
14083 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14084 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14085 return NO_REGS;
14086 }
14087 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14088 return NO_REGS;
14089 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14090 return Q_REGS;
14091 return class;
14092 }
14093
14094 /* If we are copying between general and FP registers, we need a memory
14095 location. The same is true for SSE and MMX registers.
14096
14097 The macro can't work reliably when one of the CLASSES is class containing
14098 registers from multiple units (SSE, MMX, integer). We avoid this by never
14099 combining those units in single alternative in the machine description.
14100 Ensure that this constraint holds to avoid unexpected surprises.
14101
14102 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14103 enforce these sanity checks. */
14104 int
14105 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14106 enum machine_mode mode, int strict)
14107 {
14108 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14109 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14110 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14111 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14112 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14113 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14114 {
14115 if (strict)
14116 abort ();
14117 else
14118 return 1;
14119 }
14120 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14121 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14122 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14123 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14124 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14125 }
14126 /* Return the cost of moving data from a register in class CLASS1 to
14127 one in class CLASS2.
14128
14129 It is not required that the cost always equal 2 when FROM is the same as TO;
14130 on some machines it is expensive to move between registers if they are not
14131 general registers. */
14132 int
14133 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14134 enum reg_class class2)
14135 {
14136 /* In case we require secondary memory, compute cost of the store followed
14137 by load. In order to avoid bad register allocation choices, we need
14138 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14139
14140 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14141 {
14142 int cost = 1;
14143
14144 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14145 MEMORY_MOVE_COST (mode, class1, 1));
14146 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14147 MEMORY_MOVE_COST (mode, class2, 1));
14148
14149 /* In case of copying from general_purpose_register we may emit multiple
14150 stores followed by single load causing memory size mismatch stall.
14151 Count this as arbitrarily high cost of 20. */
14152 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14153 cost += 20;
14154
14155 /* In the case of FP/MMX moves, the registers actually overlap, and we
14156 have to switch modes in order to treat them differently. */
14157 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14158 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14159 cost += 20;
14160
14161 return cost;
14162 }
14163
14164 /* Moves between SSE/MMX and integer unit are expensive. */
14165 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14166 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14167 return ix86_cost->mmxsse_to_integer;
14168 if (MAYBE_FLOAT_CLASS_P (class1))
14169 return ix86_cost->fp_move;
14170 if (MAYBE_SSE_CLASS_P (class1))
14171 return ix86_cost->sse_move;
14172 if (MAYBE_MMX_CLASS_P (class1))
14173 return ix86_cost->mmx_move;
14174 return 2;
14175 }
14176
14177 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14178 int
14179 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14180 {
14181 /* Flags and only flags can only hold CCmode values. */
14182 if (CC_REGNO_P (regno))
14183 return GET_MODE_CLASS (mode) == MODE_CC;
14184 if (GET_MODE_CLASS (mode) == MODE_CC
14185 || GET_MODE_CLASS (mode) == MODE_RANDOM
14186 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14187 return 0;
14188 if (FP_REGNO_P (regno))
14189 return VALID_FP_MODE_P (mode);
14190 if (SSE_REGNO_P (regno))
14191 {
14192 /* We implement the move patterns for all vector modes into and
14193 out of SSE registers, even when no operation instructions
14194 are available. */
14195 return (VALID_SSE_REG_MODE (mode)
14196 || VALID_SSE2_REG_MODE (mode)
14197 || VALID_MMX_REG_MODE (mode)
14198 || VALID_MMX_REG_MODE_3DNOW (mode));
14199 }
14200 if (MMX_REGNO_P (regno))
14201 {
14202 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14203 so if the register is available at all, then we can move data of
14204 the given mode into or out of it. */
14205 return (VALID_MMX_REG_MODE (mode)
14206 || VALID_MMX_REG_MODE_3DNOW (mode));
14207 }
14208 /* We handle both integer and floats in the general purpose registers.
14209 In future we should be able to handle vector modes as well. */
14210 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14211 return 0;
14212 /* Take care for QImode values - they can be in non-QI regs, but then
14213 they do cause partial register stalls. */
14214 if (regno < 4 || mode != QImode || TARGET_64BIT)
14215 return 1;
14216 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14217 }
14218
14219 /* Return the cost of moving data of mode M between a
14220 register and memory. A value of 2 is the default; this cost is
14221 relative to those in `REGISTER_MOVE_COST'.
14222
14223 If moving between registers and memory is more expensive than
14224 between two registers, you should define this macro to express the
14225 relative cost.
14226
14227 Model also increased moving costs of QImode registers in non
14228 Q_REGS classes.
14229 */
14230 int
14231 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14232 {
14233 if (FLOAT_CLASS_P (class))
14234 {
14235 int index;
14236 switch (mode)
14237 {
14238 case SFmode:
14239 index = 0;
14240 break;
14241 case DFmode:
14242 index = 1;
14243 break;
14244 case XFmode:
14245 index = 2;
14246 break;
14247 default:
14248 return 100;
14249 }
14250 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14251 }
14252 if (SSE_CLASS_P (class))
14253 {
14254 int index;
14255 switch (GET_MODE_SIZE (mode))
14256 {
14257 case 4:
14258 index = 0;
14259 break;
14260 case 8:
14261 index = 1;
14262 break;
14263 case 16:
14264 index = 2;
14265 break;
14266 default:
14267 return 100;
14268 }
14269 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14270 }
14271 if (MMX_CLASS_P (class))
14272 {
14273 int index;
14274 switch (GET_MODE_SIZE (mode))
14275 {
14276 case 4:
14277 index = 0;
14278 break;
14279 case 8:
14280 index = 1;
14281 break;
14282 default:
14283 return 100;
14284 }
14285 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14286 }
14287 switch (GET_MODE_SIZE (mode))
14288 {
14289 case 1:
14290 if (in)
14291 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14292 : ix86_cost->movzbl_load);
14293 else
14294 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14295 : ix86_cost->int_store[0] + 4);
14296 break;
14297 case 2:
14298 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14299 default:
14300 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14301 if (mode == TFmode)
14302 mode = XFmode;
14303 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14304 * (((int) GET_MODE_SIZE (mode)
14305 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14306 }
14307 }
14308
14309 /* Compute a (partial) cost for rtx X. Return true if the complete
14310 cost has been computed, and false if subexpressions should be
14311 scanned. In either case, *TOTAL contains the cost result. */
14312
14313 static bool
14314 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14315 {
14316 enum machine_mode mode = GET_MODE (x);
14317
14318 switch (code)
14319 {
14320 case CONST_INT:
14321 case CONST:
14322 case LABEL_REF:
14323 case SYMBOL_REF:
14324 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
14325 *total = 3;
14326 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
14327 *total = 2;
14328 else if (flag_pic && SYMBOLIC_CONST (x)
14329 && (!TARGET_64BIT
14330 || (!GET_CODE (x) != LABEL_REF
14331 && (GET_CODE (x) != SYMBOL_REF
14332 || !SYMBOL_REF_LOCAL_P (x)))))
14333 *total = 1;
14334 else
14335 *total = 0;
14336 return true;
14337
14338 case CONST_DOUBLE:
14339 if (mode == VOIDmode)
14340 *total = 0;
14341 else
14342 switch (standard_80387_constant_p (x))
14343 {
14344 case 1: /* 0.0 */
14345 *total = 1;
14346 break;
14347 default: /* Other constants */
14348 *total = 2;
14349 break;
14350 case 0:
14351 case -1:
14352 /* Start with (MEM (SYMBOL_REF)), since that's where
14353 it'll probably end up. Add a penalty for size. */
14354 *total = (COSTS_N_INSNS (1)
14355 + (flag_pic != 0 && !TARGET_64BIT)
14356 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14357 break;
14358 }
14359 return true;
14360
14361 case ZERO_EXTEND:
14362 /* The zero extensions is often completely free on x86_64, so make
14363 it as cheap as possible. */
14364 if (TARGET_64BIT && mode == DImode
14365 && GET_MODE (XEXP (x, 0)) == SImode)
14366 *total = 1;
14367 else if (TARGET_ZERO_EXTEND_WITH_AND)
14368 *total = COSTS_N_INSNS (ix86_cost->add);
14369 else
14370 *total = COSTS_N_INSNS (ix86_cost->movzx);
14371 return false;
14372
14373 case SIGN_EXTEND:
14374 *total = COSTS_N_INSNS (ix86_cost->movsx);
14375 return false;
14376
14377 case ASHIFT:
14378 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14379 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14380 {
14381 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14382 if (value == 1)
14383 {
14384 *total = COSTS_N_INSNS (ix86_cost->add);
14385 return false;
14386 }
14387 if ((value == 2 || value == 3)
14388 && ix86_cost->lea <= ix86_cost->shift_const)
14389 {
14390 *total = COSTS_N_INSNS (ix86_cost->lea);
14391 return false;
14392 }
14393 }
14394 /* FALLTHRU */
14395
14396 case ROTATE:
14397 case ASHIFTRT:
14398 case LSHIFTRT:
14399 case ROTATERT:
14400 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14401 {
14402 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14403 {
14404 if (INTVAL (XEXP (x, 1)) > 32)
14405 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14406 else
14407 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14408 }
14409 else
14410 {
14411 if (GET_CODE (XEXP (x, 1)) == AND)
14412 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14413 else
14414 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14415 }
14416 }
14417 else
14418 {
14419 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14420 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14421 else
14422 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14423 }
14424 return false;
14425
14426 case MULT:
14427 if (FLOAT_MODE_P (mode))
14428 {
14429 *total = COSTS_N_INSNS (ix86_cost->fmul);
14430 return false;
14431 }
14432 else
14433 {
14434 rtx op0 = XEXP (x, 0);
14435 rtx op1 = XEXP (x, 1);
14436 int nbits;
14437 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14438 {
14439 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14440 for (nbits = 0; value != 0; value &= value - 1)
14441 nbits++;
14442 }
14443 else
14444 /* This is arbitrary. */
14445 nbits = 7;
14446
14447 /* Compute costs correctly for widening multiplication. */
14448 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14449 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14450 == GET_MODE_SIZE (mode))
14451 {
14452 int is_mulwiden = 0;
14453 enum machine_mode inner_mode = GET_MODE (op0);
14454
14455 if (GET_CODE (op0) == GET_CODE (op1))
14456 is_mulwiden = 1, op1 = XEXP (op1, 0);
14457 else if (GET_CODE (op1) == CONST_INT)
14458 {
14459 if (GET_CODE (op0) == SIGN_EXTEND)
14460 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14461 == INTVAL (op1);
14462 else
14463 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14464 }
14465
14466 if (is_mulwiden)
14467 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14468 }
14469
14470 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14471 + nbits * ix86_cost->mult_bit)
14472 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14473
14474 return true;
14475 }
14476
14477 case DIV:
14478 case UDIV:
14479 case MOD:
14480 case UMOD:
14481 if (FLOAT_MODE_P (mode))
14482 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14483 else
14484 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14485 return false;
14486
14487 case PLUS:
14488 if (FLOAT_MODE_P (mode))
14489 *total = COSTS_N_INSNS (ix86_cost->fadd);
14490 else if (GET_MODE_CLASS (mode) == MODE_INT
14491 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14492 {
14493 if (GET_CODE (XEXP (x, 0)) == PLUS
14494 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14495 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14496 && CONSTANT_P (XEXP (x, 1)))
14497 {
14498 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14499 if (val == 2 || val == 4 || val == 8)
14500 {
14501 *total = COSTS_N_INSNS (ix86_cost->lea);
14502 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14503 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14504 outer_code);
14505 *total += rtx_cost (XEXP (x, 1), outer_code);
14506 return true;
14507 }
14508 }
14509 else if (GET_CODE (XEXP (x, 0)) == MULT
14510 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14511 {
14512 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14513 if (val == 2 || val == 4 || val == 8)
14514 {
14515 *total = COSTS_N_INSNS (ix86_cost->lea);
14516 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14517 *total += rtx_cost (XEXP (x, 1), outer_code);
14518 return true;
14519 }
14520 }
14521 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14522 {
14523 *total = COSTS_N_INSNS (ix86_cost->lea);
14524 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14525 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14526 *total += rtx_cost (XEXP (x, 1), outer_code);
14527 return true;
14528 }
14529 }
14530 /* FALLTHRU */
14531
14532 case MINUS:
14533 if (FLOAT_MODE_P (mode))
14534 {
14535 *total = COSTS_N_INSNS (ix86_cost->fadd);
14536 return false;
14537 }
14538 /* FALLTHRU */
14539
14540 case AND:
14541 case IOR:
14542 case XOR:
14543 if (!TARGET_64BIT && mode == DImode)
14544 {
14545 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14546 + (rtx_cost (XEXP (x, 0), outer_code)
14547 << (GET_MODE (XEXP (x, 0)) != DImode))
14548 + (rtx_cost (XEXP (x, 1), outer_code)
14549 << (GET_MODE (XEXP (x, 1)) != DImode)));
14550 return true;
14551 }
14552 /* FALLTHRU */
14553
14554 case NEG:
14555 if (FLOAT_MODE_P (mode))
14556 {
14557 *total = COSTS_N_INSNS (ix86_cost->fchs);
14558 return false;
14559 }
14560 /* FALLTHRU */
14561
14562 case NOT:
14563 if (!TARGET_64BIT && mode == DImode)
14564 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14565 else
14566 *total = COSTS_N_INSNS (ix86_cost->add);
14567 return false;
14568
14569 case COMPARE:
14570 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
14571 && XEXP (XEXP (x, 0), 1) == const1_rtx
14572 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
14573 && XEXP (x, 1) == const0_rtx)
14574 {
14575 /* This kind of construct is implemented using test[bwl].
14576 Treat it as if we had an AND. */
14577 *total = (COSTS_N_INSNS (ix86_cost->add)
14578 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
14579 + rtx_cost (const1_rtx, outer_code));
14580 return true;
14581 }
14582 return false;
14583
14584 case FLOAT_EXTEND:
14585 if (!TARGET_SSE_MATH
14586 || mode == XFmode
14587 || (mode == DFmode && !TARGET_SSE2))
14588 *total = 0;
14589 return false;
14590
14591 case ABS:
14592 if (FLOAT_MODE_P (mode))
14593 *total = COSTS_N_INSNS (ix86_cost->fabs);
14594 return false;
14595
14596 case SQRT:
14597 if (FLOAT_MODE_P (mode))
14598 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14599 return false;
14600
14601 case UNSPEC:
14602 if (XINT (x, 1) == UNSPEC_TP)
14603 *total = 0;
14604 return false;
14605
14606 default:
14607 return false;
14608 }
14609 }
14610
14611 #if TARGET_MACHO
14612
14613 static int current_machopic_label_num;
14614
14615 /* Given a symbol name and its associated stub, write out the
14616 definition of the stub. */
14617
14618 void
14619 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14620 {
14621 unsigned int length;
14622 char *binder_name, *symbol_name, lazy_ptr_name[32];
14623 int label = ++current_machopic_label_num;
14624
14625 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14626 symb = (*targetm.strip_name_encoding) (symb);
14627
14628 length = strlen (stub);
14629 binder_name = alloca (length + 32);
14630 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14631
14632 length = strlen (symb);
14633 symbol_name = alloca (length + 32);
14634 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14635
14636 sprintf (lazy_ptr_name, "L%d$lz", label);
14637
14638 if (MACHOPIC_PURE)
14639 machopic_picsymbol_stub_section ();
14640 else
14641 machopic_symbol_stub_section ();
14642
14643 fprintf (file, "%s:\n", stub);
14644 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14645
14646 if (MACHOPIC_PURE)
14647 {
14648 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14649 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14650 fprintf (file, "\tjmp %%edx\n");
14651 }
14652 else
14653 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14654
14655 fprintf (file, "%s:\n", binder_name);
14656
14657 if (MACHOPIC_PURE)
14658 {
14659 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14660 fprintf (file, "\tpushl %%eax\n");
14661 }
14662 else
14663 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14664
14665 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14666
14667 machopic_lazy_symbol_ptr_section ();
14668 fprintf (file, "%s:\n", lazy_ptr_name);
14669 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14670 fprintf (file, "\t.long %s\n", binder_name);
14671 }
14672 #endif /* TARGET_MACHO */
14673
14674 /* Order the registers for register allocator. */
14675
14676 void
14677 x86_order_regs_for_local_alloc (void)
14678 {
14679 int pos = 0;
14680 int i;
14681
14682 /* First allocate the local general purpose registers. */
14683 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14684 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14685 reg_alloc_order [pos++] = i;
14686
14687 /* Global general purpose registers. */
14688 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14689 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14690 reg_alloc_order [pos++] = i;
14691
14692 /* x87 registers come first in case we are doing FP math
14693 using them. */
14694 if (!TARGET_SSE_MATH)
14695 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14696 reg_alloc_order [pos++] = i;
14697
14698 /* SSE registers. */
14699 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14700 reg_alloc_order [pos++] = i;
14701 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14702 reg_alloc_order [pos++] = i;
14703
14704 /* x87 registers. */
14705 if (TARGET_SSE_MATH)
14706 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14707 reg_alloc_order [pos++] = i;
14708
14709 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14710 reg_alloc_order [pos++] = i;
14711
14712 /* Initialize the rest of array as we do not allocate some registers
14713 at all. */
14714 while (pos < FIRST_PSEUDO_REGISTER)
14715 reg_alloc_order [pos++] = 0;
14716 }
14717
14718 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14719 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14720 #endif
14721
14722 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14723 struct attribute_spec.handler. */
14724 static tree
14725 ix86_handle_struct_attribute (tree *node, tree name,
14726 tree args ATTRIBUTE_UNUSED,
14727 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
14728 {
14729 tree *type = NULL;
14730 if (DECL_P (*node))
14731 {
14732 if (TREE_CODE (*node) == TYPE_DECL)
14733 type = &TREE_TYPE (*node);
14734 }
14735 else
14736 type = node;
14737
14738 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14739 || TREE_CODE (*type) == UNION_TYPE)))
14740 {
14741 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
14742 *no_add_attrs = true;
14743 }
14744
14745 else if ((is_attribute_p ("ms_struct", name)
14746 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14747 || ((is_attribute_p ("gcc_struct", name)
14748 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14749 {
14750 warning ("%qs incompatible attribute ignored",
14751 IDENTIFIER_POINTER (name));
14752 *no_add_attrs = true;
14753 }
14754
14755 return NULL_TREE;
14756 }
14757
14758 static bool
14759 ix86_ms_bitfield_layout_p (tree record_type)
14760 {
14761 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14762 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14763 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14764 }
14765
14766 /* Returns an expression indicating where the this parameter is
14767 located on entry to the FUNCTION. */
14768
14769 static rtx
14770 x86_this_parameter (tree function)
14771 {
14772 tree type = TREE_TYPE (function);
14773
14774 if (TARGET_64BIT)
14775 {
14776 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
14777 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14778 }
14779
14780 if (ix86_function_regparm (type, function) > 0)
14781 {
14782 tree parm;
14783
14784 parm = TYPE_ARG_TYPES (type);
14785 /* Figure out whether or not the function has a variable number of
14786 arguments. */
14787 for (; parm; parm = TREE_CHAIN (parm))
14788 if (TREE_VALUE (parm) == void_type_node)
14789 break;
14790 /* If not, the this parameter is in the first argument. */
14791 if (parm)
14792 {
14793 int regno = 0;
14794 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14795 regno = 2;
14796 return gen_rtx_REG (SImode, regno);
14797 }
14798 }
14799
14800 if (aggregate_value_p (TREE_TYPE (type), type))
14801 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14802 else
14803 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14804 }
14805
14806 /* Determine whether x86_output_mi_thunk can succeed. */
14807
14808 static bool
14809 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14810 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14811 HOST_WIDE_INT vcall_offset, tree function)
14812 {
14813 /* 64-bit can handle anything. */
14814 if (TARGET_64BIT)
14815 return true;
14816
14817 /* For 32-bit, everything's fine if we have one free register. */
14818 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
14819 return true;
14820
14821 /* Need a free register for vcall_offset. */
14822 if (vcall_offset)
14823 return false;
14824
14825 /* Need a free register for GOT references. */
14826 if (flag_pic && !(*targetm.binds_local_p) (function))
14827 return false;
14828
14829 /* Otherwise ok. */
14830 return true;
14831 }
14832
14833 /* Output the assembler code for a thunk function. THUNK_DECL is the
14834 declaration for the thunk function itself, FUNCTION is the decl for
14835 the target function. DELTA is an immediate constant offset to be
14836 added to THIS. If VCALL_OFFSET is nonzero, the word at
14837 *(*this + vcall_offset) should be added to THIS. */
14838
14839 static void
14840 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14841 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14842 HOST_WIDE_INT vcall_offset, tree function)
14843 {
14844 rtx xops[3];
14845 rtx this = x86_this_parameter (function);
14846 rtx this_reg, tmp;
14847
14848 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14849 pull it in now and let DELTA benefit. */
14850 if (REG_P (this))
14851 this_reg = this;
14852 else if (vcall_offset)
14853 {
14854 /* Put the this parameter into %eax. */
14855 xops[0] = this;
14856 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14857 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14858 }
14859 else
14860 this_reg = NULL_RTX;
14861
14862 /* Adjust the this parameter by a fixed constant. */
14863 if (delta)
14864 {
14865 xops[0] = GEN_INT (delta);
14866 xops[1] = this_reg ? this_reg : this;
14867 if (TARGET_64BIT)
14868 {
14869 if (!x86_64_general_operand (xops[0], DImode))
14870 {
14871 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14872 xops[1] = tmp;
14873 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14874 xops[0] = tmp;
14875 xops[1] = this;
14876 }
14877 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14878 }
14879 else
14880 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14881 }
14882
14883 /* Adjust the this parameter by a value stored in the vtable. */
14884 if (vcall_offset)
14885 {
14886 if (TARGET_64BIT)
14887 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14888 else
14889 {
14890 int tmp_regno = 2 /* ECX */;
14891 if (lookup_attribute ("fastcall",
14892 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14893 tmp_regno = 0 /* EAX */;
14894 tmp = gen_rtx_REG (SImode, tmp_regno);
14895 }
14896
14897 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14898 xops[1] = tmp;
14899 if (TARGET_64BIT)
14900 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14901 else
14902 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14903
14904 /* Adjust the this parameter. */
14905 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14906 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14907 {
14908 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14909 xops[0] = GEN_INT (vcall_offset);
14910 xops[1] = tmp2;
14911 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14912 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14913 }
14914 xops[1] = this_reg;
14915 if (TARGET_64BIT)
14916 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14917 else
14918 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14919 }
14920
14921 /* If necessary, drop THIS back to its stack slot. */
14922 if (this_reg && this_reg != this)
14923 {
14924 xops[0] = this_reg;
14925 xops[1] = this;
14926 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14927 }
14928
14929 xops[0] = XEXP (DECL_RTL (function), 0);
14930 if (TARGET_64BIT)
14931 {
14932 if (!flag_pic || (*targetm.binds_local_p) (function))
14933 output_asm_insn ("jmp\t%P0", xops);
14934 else
14935 {
14936 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14937 tmp = gen_rtx_CONST (Pmode, tmp);
14938 tmp = gen_rtx_MEM (QImode, tmp);
14939 xops[0] = tmp;
14940 output_asm_insn ("jmp\t%A0", xops);
14941 }
14942 }
14943 else
14944 {
14945 if (!flag_pic || (*targetm.binds_local_p) (function))
14946 output_asm_insn ("jmp\t%P0", xops);
14947 else
14948 #if TARGET_MACHO
14949 if (TARGET_MACHO)
14950 {
14951 rtx sym_ref = XEXP (DECL_RTL (function), 0);
14952 tmp = (gen_rtx_SYMBOL_REF
14953 (Pmode,
14954 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
14955 tmp = gen_rtx_MEM (QImode, tmp);
14956 xops[0] = tmp;
14957 output_asm_insn ("jmp\t%0", xops);
14958 }
14959 else
14960 #endif /* TARGET_MACHO */
14961 {
14962 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14963 output_set_got (tmp);
14964
14965 xops[1] = tmp;
14966 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14967 output_asm_insn ("jmp\t{*}%1", xops);
14968 }
14969 }
14970 }
14971
14972 static void
14973 x86_file_start (void)
14974 {
14975 default_file_start ();
14976 if (X86_FILE_START_VERSION_DIRECTIVE)
14977 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14978 if (X86_FILE_START_FLTUSED)
14979 fputs ("\t.global\t__fltused\n", asm_out_file);
14980 if (ix86_asm_dialect == ASM_INTEL)
14981 fputs ("\t.intel_syntax\n", asm_out_file);
14982 }
14983
14984 int
14985 x86_field_alignment (tree field, int computed)
14986 {
14987 enum machine_mode mode;
14988 tree type = TREE_TYPE (field);
14989
14990 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14991 return computed;
14992 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14993 ? get_inner_array_type (type) : type);
14994 if (mode == DFmode || mode == DCmode
14995 || GET_MODE_CLASS (mode) == MODE_INT
14996 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14997 return MIN (32, computed);
14998 return computed;
14999 }
15000
15001 /* Output assembler code to FILE to increment profiler label # LABELNO
15002 for profiling a function entry. */
15003 void
15004 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15005 {
15006 if (TARGET_64BIT)
15007 if (flag_pic)
15008 {
15009 #ifndef NO_PROFILE_COUNTERS
15010 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15011 #endif
15012 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15013 }
15014 else
15015 {
15016 #ifndef NO_PROFILE_COUNTERS
15017 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15018 #endif
15019 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15020 }
15021 else if (flag_pic)
15022 {
15023 #ifndef NO_PROFILE_COUNTERS
15024 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15025 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15026 #endif
15027 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15028 }
15029 else
15030 {
15031 #ifndef NO_PROFILE_COUNTERS
15032 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15033 PROFILE_COUNT_REGISTER);
15034 #endif
15035 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15036 }
15037 }
15038
15039 /* We don't have exact information about the insn sizes, but we may assume
15040 quite safely that we are informed about all 1 byte insns and memory
15041 address sizes. This is enough to eliminate unnecessary padding in
15042 99% of cases. */
15043
15044 static int
15045 min_insn_size (rtx insn)
15046 {
15047 int l = 0;
15048
15049 if (!INSN_P (insn) || !active_insn_p (insn))
15050 return 0;
15051
15052 /* Discard alignments we've emit and jump instructions. */
15053 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15054 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15055 return 0;
15056 if (GET_CODE (insn) == JUMP_INSN
15057 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15058 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15059 return 0;
15060
15061 /* Important case - calls are always 5 bytes.
15062 It is common to have many calls in the row. */
15063 if (GET_CODE (insn) == CALL_INSN
15064 && symbolic_reference_mentioned_p (PATTERN (insn))
15065 && !SIBLING_CALL_P (insn))
15066 return 5;
15067 if (get_attr_length (insn) <= 1)
15068 return 1;
15069
15070 /* For normal instructions we may rely on the sizes of addresses
15071 and the presence of symbol to require 4 bytes of encoding.
15072 This is not the case for jumps where references are PC relative. */
15073 if (GET_CODE (insn) != JUMP_INSN)
15074 {
15075 l = get_attr_length_address (insn);
15076 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15077 l = 4;
15078 }
15079 if (l)
15080 return 1+l;
15081 else
15082 return 2;
15083 }
15084
15085 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15086 window. */
15087
15088 static void
15089 ix86_avoid_jump_misspredicts (void)
15090 {
15091 rtx insn, start = get_insns ();
15092 int nbytes = 0, njumps = 0;
15093 int isjump = 0;
15094
15095 /* Look for all minimal intervals of instructions containing 4 jumps.
15096 The intervals are bounded by START and INSN. NBYTES is the total
15097 size of instructions in the interval including INSN and not including
15098 START. When the NBYTES is smaller than 16 bytes, it is possible
15099 that the end of START and INSN ends up in the same 16byte page.
15100
15101 The smallest offset in the page INSN can start is the case where START
15102 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15103 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15104 */
15105 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15106 {
15107
15108 nbytes += min_insn_size (insn);
15109 if (dump_file)
15110 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15111 INSN_UID (insn), min_insn_size (insn));
15112 if ((GET_CODE (insn) == JUMP_INSN
15113 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15114 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15115 || GET_CODE (insn) == CALL_INSN)
15116 njumps++;
15117 else
15118 continue;
15119
15120 while (njumps > 3)
15121 {
15122 start = NEXT_INSN (start);
15123 if ((GET_CODE (start) == JUMP_INSN
15124 && GET_CODE (PATTERN (start)) != ADDR_VEC
15125 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15126 || GET_CODE (start) == CALL_INSN)
15127 njumps--, isjump = 1;
15128 else
15129 isjump = 0;
15130 nbytes -= min_insn_size (start);
15131 }
15132 if (njumps < 0)
15133 abort ();
15134 if (dump_file)
15135 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15136 INSN_UID (start), INSN_UID (insn), nbytes);
15137
15138 if (njumps == 3 && isjump && nbytes < 16)
15139 {
15140 int padsize = 15 - nbytes + min_insn_size (insn);
15141
15142 if (dump_file)
15143 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15144 INSN_UID (insn), padsize);
15145 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15146 }
15147 }
15148 }
15149
15150 /* AMD Athlon works faster
15151 when RET is not destination of conditional jump or directly preceded
15152 by other jump instruction. We avoid the penalty by inserting NOP just
15153 before the RET instructions in such cases. */
15154 static void
15155 ix86_pad_returns (void)
15156 {
15157 edge e;
15158 edge_iterator ei;
15159
15160 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15161 {
15162 basic_block bb = e->src;
15163 rtx ret = BB_END (bb);
15164 rtx prev;
15165 bool replace = false;
15166
15167 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15168 || !maybe_hot_bb_p (bb))
15169 continue;
15170 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15171 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15172 break;
15173 if (prev && GET_CODE (prev) == CODE_LABEL)
15174 {
15175 edge e;
15176 edge_iterator ei;
15177
15178 FOR_EACH_EDGE (e, ei, bb->preds)
15179 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15180 && !(e->flags & EDGE_FALLTHRU))
15181 replace = true;
15182 }
15183 if (!replace)
15184 {
15185 prev = prev_active_insn (ret);
15186 if (prev
15187 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15188 || GET_CODE (prev) == CALL_INSN))
15189 replace = true;
15190 /* Empty functions get branch mispredict even when the jump destination
15191 is not visible to us. */
15192 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15193 replace = true;
15194 }
15195 if (replace)
15196 {
15197 emit_insn_before (gen_return_internal_long (), ret);
15198 delete_insn (ret);
15199 }
15200 }
15201 }
15202
15203 /* Implement machine specific optimizations. We implement padding of returns
15204 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15205 static void
15206 ix86_reorg (void)
15207 {
15208 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15209 ix86_pad_returns ();
15210 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15211 ix86_avoid_jump_misspredicts ();
15212 }
15213
15214 /* Return nonzero when QImode register that must be represented via REX prefix
15215 is used. */
15216 bool
15217 x86_extended_QIreg_mentioned_p (rtx insn)
15218 {
15219 int i;
15220 extract_insn_cached (insn);
15221 for (i = 0; i < recog_data.n_operands; i++)
15222 if (REG_P (recog_data.operand[i])
15223 && REGNO (recog_data.operand[i]) >= 4)
15224 return true;
15225 return false;
15226 }
15227
15228 /* Return nonzero when P points to register encoded via REX prefix.
15229 Called via for_each_rtx. */
15230 static int
15231 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15232 {
15233 unsigned int regno;
15234 if (!REG_P (*p))
15235 return 0;
15236 regno = REGNO (*p);
15237 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15238 }
15239
15240 /* Return true when INSN mentions register that must be encoded using REX
15241 prefix. */
15242 bool
15243 x86_extended_reg_mentioned_p (rtx insn)
15244 {
15245 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15246 }
15247
15248 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15249 optabs would emit if we didn't have TFmode patterns. */
15250
15251 void
15252 x86_emit_floatuns (rtx operands[2])
15253 {
15254 rtx neglab, donelab, i0, i1, f0, in, out;
15255 enum machine_mode mode, inmode;
15256
15257 inmode = GET_MODE (operands[1]);
15258 if (inmode != SImode
15259 && inmode != DImode)
15260 abort ();
15261
15262 out = operands[0];
15263 in = force_reg (inmode, operands[1]);
15264 mode = GET_MODE (out);
15265 neglab = gen_label_rtx ();
15266 donelab = gen_label_rtx ();
15267 i1 = gen_reg_rtx (Pmode);
15268 f0 = gen_reg_rtx (mode);
15269
15270 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15271
15272 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15273 emit_jump_insn (gen_jump (donelab));
15274 emit_barrier ();
15275
15276 emit_label (neglab);
15277
15278 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15279 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15280 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15281 expand_float (f0, i0, 0);
15282 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15283
15284 emit_label (donelab);
15285 }
15286
15287 /* Initialize vector TARGET via VALS. */
15288 void
15289 ix86_expand_vector_init (rtx target, rtx vals)
15290 {
15291 enum machine_mode mode = GET_MODE (target);
15292 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15293 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15294 int i;
15295
15296 for (i = n_elts - 1; i >= 0; i--)
15297 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15298 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15299 break;
15300
15301 /* Few special cases first...
15302 ... constants are best loaded from constant pool. */
15303 if (i < 0)
15304 {
15305 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15306 return;
15307 }
15308
15309 /* ... values where only first field is non-constant are best loaded
15310 from the pool and overwritten via move later. */
15311 if (i == 0)
15312 {
15313 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15314 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15315
15316 switch (GET_MODE (target))
15317 {
15318 case V2DFmode:
15319 emit_insn (gen_sse2_loadlpd (target, target, XVECEXP (vals, 0, 0)));
15320 break;
15321
15322 case V4SFmode:
15323 {
15324 /* ??? We can represent this better. */
15325 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15326 GET_MODE_INNER (mode), 0);
15327 op = force_reg (mode, op);
15328 emit_insn (gen_sse_movss (target, target, op));
15329 }
15330 break;
15331
15332 default:
15333 break;
15334 }
15335 return;
15336 }
15337
15338 /* And the busy sequence doing rotations. */
15339 switch (GET_MODE (target))
15340 {
15341 case V2DFmode:
15342 {
15343 rtx vecop0 =
15344 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15345 rtx vecop1 =
15346 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15347
15348 vecop0 = force_reg (V2DFmode, vecop0);
15349 vecop1 = force_reg (V2DFmode, vecop1);
15350 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15351 }
15352 break;
15353 case V4SFmode:
15354 {
15355 rtx vecop0 =
15356 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15357 rtx vecop1 =
15358 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15359 rtx vecop2 =
15360 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15361 rtx vecop3 =
15362 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15363 rtx tmp1 = gen_reg_rtx (V4SFmode);
15364 rtx tmp2 = gen_reg_rtx (V4SFmode);
15365
15366 vecop0 = force_reg (V4SFmode, vecop0);
15367 vecop1 = force_reg (V4SFmode, vecop1);
15368 vecop2 = force_reg (V4SFmode, vecop2);
15369 vecop3 = force_reg (V4SFmode, vecop3);
15370 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15371 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15372 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15373 }
15374 break;
15375 default:
15376 abort ();
15377 }
15378 }
15379
15380 /* Implements target hook vector_mode_supported_p. */
15381 static bool
15382 ix86_vector_mode_supported_p (enum machine_mode mode)
15383 {
15384 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
15385 return true;
15386 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
15387 return true;
15388 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
15389 return true;
15390 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
15391 return true;
15392 return false;
15393 }
15394
15395 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15396
15397 We do this in the new i386 backend to maintain source compatibility
15398 with the old cc0-based compiler. */
15399
15400 static tree
15401 ix86_md_asm_clobbers (tree clobbers)
15402 {
15403 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15404 clobbers);
15405 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15406 clobbers);
15407 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15408 clobbers);
15409 return clobbers;
15410 }
15411
15412 /* Worker function for REVERSE_CONDITION. */
15413
15414 enum rtx_code
15415 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15416 {
15417 return (mode != CCFPmode && mode != CCFPUmode
15418 ? reverse_condition (code)
15419 : reverse_condition_maybe_unordered (code));
15420 }
15421
15422 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15423 to OPERANDS[0]. */
15424
15425 const char *
15426 output_387_reg_move (rtx insn, rtx *operands)
15427 {
15428 if (REG_P (operands[1])
15429 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15430 {
15431 if (REGNO (operands[0]) == FIRST_STACK_REG
15432 && TARGET_USE_FFREEP)
15433 return "ffreep\t%y0";
15434 return "fstp\t%y0";
15435 }
15436 if (STACK_TOP_P (operands[0]))
15437 return "fld%z1\t%y1";
15438 return "fst\t%y0";
15439 }
15440
15441 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15442 FP status register is set. */
15443
15444 void
15445 ix86_emit_fp_unordered_jump (rtx label)
15446 {
15447 rtx reg = gen_reg_rtx (HImode);
15448 rtx temp;
15449
15450 emit_insn (gen_x86_fnstsw_1 (reg));
15451
15452 if (TARGET_USE_SAHF)
15453 {
15454 emit_insn (gen_x86_sahf_1 (reg));
15455
15456 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15457 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15458 }
15459 else
15460 {
15461 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15462
15463 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15464 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15465 }
15466
15467 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15468 gen_rtx_LABEL_REF (VOIDmode, label),
15469 pc_rtx);
15470 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15471 emit_jump_insn (temp);
15472 }
15473
15474 /* Output code to perform a log1p XFmode calculation. */
15475
15476 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15477 {
15478 rtx label1 = gen_label_rtx ();
15479 rtx label2 = gen_label_rtx ();
15480
15481 rtx tmp = gen_reg_rtx (XFmode);
15482 rtx tmp2 = gen_reg_rtx (XFmode);
15483
15484 emit_insn (gen_absxf2 (tmp, op1));
15485 emit_insn (gen_cmpxf (tmp,
15486 CONST_DOUBLE_FROM_REAL_VALUE (
15487 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15488 XFmode)));
15489 emit_jump_insn (gen_bge (label1));
15490
15491 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15492 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15493 emit_jump (label2);
15494
15495 emit_label (label1);
15496 emit_move_insn (tmp, CONST1_RTX (XFmode));
15497 emit_insn (gen_addxf3 (tmp, op1, tmp));
15498 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15499 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15500
15501 emit_label (label2);
15502 }
15503
15504 /* Solaris named-section hook. Parameters are as for
15505 named_section_real. */
15506
15507 static void
15508 i386_solaris_elf_named_section (const char *name, unsigned int flags,
15509 tree decl)
15510 {
15511 /* With Binutils 2.15, the "@unwind" marker must be specified on
15512 every occurrence of the ".eh_frame" section, not just the first
15513 one. */
15514 if (TARGET_64BIT
15515 && strcmp (name, ".eh_frame") == 0)
15516 {
15517 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
15518 flags & SECTION_WRITE ? "aw" : "a");
15519 return;
15520 }
15521 default_elf_asm_named_section (name, flags, decl);
15522 }
15523
15524 #include "gt-i386.h"
This page took 0.718873 seconds and 5 git commands to generate.