]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
optabs.h (enum optab_index): Add new OTI_log1p.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
52 #endif
53
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
61
62 /* Processor costs (relative to an add) */
63 static const
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
98 1, /* Branch cost */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
105 };
106
107 /* Processor costs (relative to an add) */
108 static const
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
120 3, /* MOVE_RATIO */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
143 1, /* Branch cost */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
150 };
151
152 static const
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
164 3, /* MOVE_RATIO */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
187 1, /* Branch cost */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
194 };
195
196 static const
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
208 6, /* MOVE_RATIO */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
231 2, /* Branch cost */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
238 };
239
240 static const
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
275 2, /* Branch cost */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
282 };
283
284 static const
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
296 4, /* MOVE_RATIO */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
319 1, /* Branch cost */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
326 };
327
328 static const
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
340 9, /* MOVE_RATIO */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
363 2, /* Branch cost */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
370 };
371
372 static const
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
414 };
415
416 static const
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
451 2, /* Branch cost */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
458 };
459
460 static const
461 struct processor_costs nocona_cost = {
462 1, /* cost of an add instruction */
463 1, /* cost of a lea instruction */
464 1, /* variable shift costs */
465 1, /* constant shift costs */
466 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
467 0, /* cost of multiply per each bit set */
468 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
469 1, /* cost of movsx */
470 1, /* cost of movzx */
471 16, /* "large" insn */
472 9, /* MOVE_RATIO */
473 4, /* cost for loading QImode using movzbl */
474 {4, 4, 4}, /* cost of loading integer registers
475 in QImode, HImode and SImode.
476 Relative to reg-reg move (2). */
477 {4, 4, 4}, /* cost of storing integer registers */
478 3, /* cost of reg,reg fld/fst */
479 {12, 12, 12}, /* cost of loading fp registers
480 in SFmode, DFmode and XFmode */
481 {4, 4, 4}, /* cost of loading integer registers */
482 6, /* cost of moving MMX register */
483 {12, 12}, /* cost of loading MMX registers
484 in SImode and DImode */
485 {12, 12}, /* cost of storing MMX registers
486 in SImode and DImode */
487 6, /* cost of moving SSE register */
488 {12, 12, 12}, /* cost of loading SSE registers
489 in SImode, DImode and TImode */
490 {12, 12, 12}, /* cost of storing SSE registers
491 in SImode, DImode and TImode */
492 8, /* MMX or SSE register to integer */
493 128, /* size of prefetch block */
494 8, /* number of parallel prefetches */
495 1, /* Branch cost */
496 6, /* cost of FADD and FSUB insns. */
497 8, /* cost of FMUL instruction. */
498 40, /* cost of FDIV instruction. */
499 3, /* cost of FABS instruction. */
500 3, /* cost of FCHS instruction. */
501 44, /* cost of FSQRT instruction. */
502 };
503
504 const struct processor_costs *ix86_cost = &pentium_cost;
505
506 /* Processor feature/optimization bitmasks. */
507 #define m_386 (1<<PROCESSOR_I386)
508 #define m_486 (1<<PROCESSOR_I486)
509 #define m_PENT (1<<PROCESSOR_PENTIUM)
510 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
511 #define m_K6 (1<<PROCESSOR_K6)
512 #define m_ATHLON (1<<PROCESSOR_ATHLON)
513 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
514 #define m_K8 (1<<PROCESSOR_K8)
515 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
516 #define m_NOCONA (1<<PROCESSOR_NOCONA)
517
518 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
519 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
520 const int x86_zero_extend_with_and = m_486 | m_PENT;
521 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
522 const int x86_double_with_add = ~m_386;
523 const int x86_use_bit_test = m_386;
524 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
525 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
526 const int x86_3dnow_a = m_ATHLON_K8;
527 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_branch_hints = m_PENT4 | m_NOCONA;
529 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
530 const int x86_partial_reg_stall = m_PPRO;
531 const int x86_use_loop = m_K6;
532 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
533 const int x86_use_mov0 = m_K6;
534 const int x86_use_cltd = ~(m_PENT | m_K6);
535 const int x86_read_modify_write = ~m_PENT;
536 const int x86_read_modify = ~(m_PENT | m_PPRO);
537 const int x86_split_long_moves = m_PPRO;
538 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
539 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
540 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
541 const int x86_qimode_math = ~(0);
542 const int x86_promote_qi_regs = 0;
543 const int x86_himode_math = ~(m_PPRO);
544 const int x86_promote_hi_regs = m_PPRO;
545 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
546 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
547 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
548 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
550 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
551 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
552 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
553 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
554 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
555 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
556 const int x86_shift1 = ~m_486;
557 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
559 /* Set for machines where the type and dependencies are resolved on SSE register
560 parts instead of whole registers, so we may maintain just lower part of
561 scalar values in proper format leaving the upper part undefined. */
562 const int x86_sse_partial_regs = m_ATHLON_K8;
563 /* Athlon optimizes partial-register FPS special case, thus avoiding the
564 need for extra instructions beforehand */
565 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
566 const int x86_sse_typeless_stores = m_ATHLON_K8;
567 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
568 const int x86_use_ffreep = m_ATHLON_K8;
569 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
570 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
571 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
572 /* Some CPU cores are not able to predict more than 4 branch instructions in
573 the 16 byte window. */
574 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
575
576 /* In case the average insn count for single function invocation is
577 lower than this constant, emit fast (but longer) prologue and
578 epilogue code. */
579 #define FAST_PROLOGUE_INSN_COUNT 20
580
581 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
582 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
583 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
584 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
585
586 /* Array of the smallest class containing reg number REGNO, indexed by
587 REGNO. Used by REGNO_REG_CLASS in i386.h. */
588
589 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
590 {
591 /* ax, dx, cx, bx */
592 AREG, DREG, CREG, BREG,
593 /* si, di, bp, sp */
594 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
595 /* FP registers */
596 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
597 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
598 /* arg pointer */
599 NON_Q_REGS,
600 /* flags, fpsr, dirflag, frame */
601 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
602 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
603 SSE_REGS, SSE_REGS,
604 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
605 MMX_REGS, MMX_REGS,
606 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
607 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
609 SSE_REGS, SSE_REGS,
610 };
611
612 /* The "default" register map used in 32bit mode. */
613
614 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
615 {
616 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
617 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
618 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
619 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
620 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
621 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
622 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
623 };
624
625 static int const x86_64_int_parameter_registers[6] =
626 {
627 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
628 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
629 };
630
631 static int const x86_64_int_return_registers[4] =
632 {
633 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
634 };
635
636 /* The "default" register map used in 64bit mode. */
637 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
638 {
639 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
640 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
641 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
642 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
643 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
644 8,9,10,11,12,13,14,15, /* extended integer registers */
645 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
646 };
647
648 /* Define the register numbers to be used in Dwarf debugging information.
649 The SVR4 reference port C compiler uses the following register numbers
650 in its Dwarf output code:
651 0 for %eax (gcc regno = 0)
652 1 for %ecx (gcc regno = 2)
653 2 for %edx (gcc regno = 1)
654 3 for %ebx (gcc regno = 3)
655 4 for %esp (gcc regno = 7)
656 5 for %ebp (gcc regno = 6)
657 6 for %esi (gcc regno = 4)
658 7 for %edi (gcc regno = 5)
659 The following three DWARF register numbers are never generated by
660 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
661 believes these numbers have these meanings.
662 8 for %eip (no gcc equivalent)
663 9 for %eflags (gcc regno = 17)
664 10 for %trapno (no gcc equivalent)
665 It is not at all clear how we should number the FP stack registers
666 for the x86 architecture. If the version of SDB on x86/svr4 were
667 a bit less brain dead with respect to floating-point then we would
668 have a precedent to follow with respect to DWARF register numbers
669 for x86 FP registers, but the SDB on x86/svr4 is so completely
670 broken with respect to FP registers that it is hardly worth thinking
671 of it as something to strive for compatibility with.
672 The version of x86/svr4 SDB I have at the moment does (partially)
673 seem to believe that DWARF register number 11 is associated with
674 the x86 register %st(0), but that's about all. Higher DWARF
675 register numbers don't seem to be associated with anything in
676 particular, and even for DWARF regno 11, SDB only seems to under-
677 stand that it should say that a variable lives in %st(0) (when
678 asked via an `=' command) if we said it was in DWARF regno 11,
679 but SDB still prints garbage when asked for the value of the
680 variable in question (via a `/' command).
681 (Also note that the labels SDB prints for various FP stack regs
682 when doing an `x' command are all wrong.)
683 Note that these problems generally don't affect the native SVR4
684 C compiler because it doesn't allow the use of -O with -g and
685 because when it is *not* optimizing, it allocates a memory
686 location for each floating-point variable, and the memory
687 location is what gets described in the DWARF AT_location
688 attribute for the variable in question.
689 Regardless of the severe mental illness of the x86/svr4 SDB, we
690 do something sensible here and we use the following DWARF
691 register numbers. Note that these are all stack-top-relative
692 numbers.
693 11 for %st(0) (gcc regno = 8)
694 12 for %st(1) (gcc regno = 9)
695 13 for %st(2) (gcc regno = 10)
696 14 for %st(3) (gcc regno = 11)
697 15 for %st(4) (gcc regno = 12)
698 16 for %st(5) (gcc regno = 13)
699 17 for %st(6) (gcc regno = 14)
700 18 for %st(7) (gcc regno = 15)
701 */
702 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
703 {
704 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
705 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
706 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
707 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
708 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
709 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
710 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
711 };
712
713 /* Test and compare insns in i386.md store the information needed to
714 generate branch and scc insns here. */
715
716 rtx ix86_compare_op0 = NULL_RTX;
717 rtx ix86_compare_op1 = NULL_RTX;
718
719 #define MAX_386_STACK_LOCALS 3
720 /* Size of the register save area. */
721 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
722
723 /* Define the structure for the machine field in struct function. */
724
725 struct stack_local_entry GTY(())
726 {
727 unsigned short mode;
728 unsigned short n;
729 rtx rtl;
730 struct stack_local_entry *next;
731 };
732
733 /* Structure describing stack frame layout.
734 Stack grows downward:
735
736 [arguments]
737 <- ARG_POINTER
738 saved pc
739
740 saved frame pointer if frame_pointer_needed
741 <- HARD_FRAME_POINTER
742 [saved regs]
743
744 [padding1] \
745 )
746 [va_arg registers] (
747 > to_allocate <- FRAME_POINTER
748 [frame] (
749 )
750 [padding2] /
751 */
752 struct ix86_frame
753 {
754 int nregs;
755 int padding1;
756 int va_arg_size;
757 HOST_WIDE_INT frame;
758 int padding2;
759 int outgoing_arguments_size;
760 int red_zone_size;
761
762 HOST_WIDE_INT to_allocate;
763 /* The offsets relative to ARG_POINTER. */
764 HOST_WIDE_INT frame_pointer_offset;
765 HOST_WIDE_INT hard_frame_pointer_offset;
766 HOST_WIDE_INT stack_pointer_offset;
767
768 /* When save_regs_using_mov is set, emit prologue using
769 move instead of push instructions. */
770 bool save_regs_using_mov;
771 };
772
773 /* Used to enable/disable debugging features. */
774 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
775 /* Code model option as passed by user. */
776 const char *ix86_cmodel_string;
777 /* Parsed value. */
778 enum cmodel ix86_cmodel;
779 /* Asm dialect. */
780 const char *ix86_asm_string;
781 enum asm_dialect ix86_asm_dialect = ASM_ATT;
782 /* TLS dialext. */
783 const char *ix86_tls_dialect_string;
784 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
785
786 /* Which unit we are generating floating point math for. */
787 enum fpmath_unit ix86_fpmath;
788
789 /* Which cpu are we scheduling for. */
790 enum processor_type ix86_tune;
791 /* Which instruction set architecture to use. */
792 enum processor_type ix86_arch;
793
794 /* Strings to hold which cpu and instruction set architecture to use. */
795 const char *ix86_tune_string; /* for -mtune=<xxx> */
796 const char *ix86_arch_string; /* for -march=<xxx> */
797 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
798
799 /* # of registers to use to pass arguments. */
800 const char *ix86_regparm_string;
801
802 /* true if sse prefetch instruction is not NOOP. */
803 int x86_prefetch_sse;
804
805 /* ix86_regparm_string as a number */
806 int ix86_regparm;
807
808 /* Alignment to use for loops and jumps: */
809
810 /* Power of two alignment for loops. */
811 const char *ix86_align_loops_string;
812
813 /* Power of two alignment for non-loop jumps. */
814 const char *ix86_align_jumps_string;
815
816 /* Power of two alignment for stack boundary in bytes. */
817 const char *ix86_preferred_stack_boundary_string;
818
819 /* Preferred alignment for stack boundary in bits. */
820 int ix86_preferred_stack_boundary;
821
822 /* Values 1-5: see jump.c */
823 int ix86_branch_cost;
824 const char *ix86_branch_cost_string;
825
826 /* Power of two alignment for functions. */
827 const char *ix86_align_funcs_string;
828
829 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
830 static char internal_label_prefix[16];
831 static int internal_label_prefix_len;
832 \f
833 static int local_symbolic_operand (rtx, enum machine_mode);
834 static int tls_symbolic_operand_1 (rtx, enum tls_model);
835 static void output_pic_addr_const (FILE *, rtx, int);
836 static void put_condition_code (enum rtx_code, enum machine_mode,
837 int, int, FILE *);
838 static const char *get_some_local_dynamic_name (void);
839 static int get_some_local_dynamic_name_1 (rtx *, void *);
840 static rtx maybe_get_pool_constant (rtx);
841 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
842 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
843 rtx *);
844 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
845 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
846 enum machine_mode);
847 static rtx get_thread_pointer (int);
848 static rtx legitimize_tls_address (rtx, enum tls_model, int);
849 static void get_pc_thunk_name (char [32], unsigned int);
850 static rtx gen_push (rtx);
851 static int memory_address_length (rtx addr);
852 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
853 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
854 static struct machine_function * ix86_init_machine_status (void);
855 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
856 static int ix86_nsaved_regs (void);
857 static void ix86_emit_save_regs (void);
858 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
859 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
860 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
861 static HOST_WIDE_INT ix86_GOT_alias_set (void);
862 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
863 static rtx ix86_expand_aligntest (rtx, int);
864 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
865 static int ix86_issue_rate (void);
866 static int ix86_adjust_cost (rtx, rtx, rtx, int);
867 static int ia32_use_dfa_pipeline_interface (void);
868 static int ia32_multipass_dfa_lookahead (void);
869 static void ix86_init_mmx_sse_builtins (void);
870 static rtx x86_this_parameter (tree);
871 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
872 HOST_WIDE_INT, tree);
873 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
874 static void x86_file_start (void);
875 static void ix86_reorg (void);
876 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
877 static tree ix86_build_builtin_va_list (void);
878 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
879 tree, int *, int);
880
881 struct ix86_address
882 {
883 rtx base, index, disp;
884 HOST_WIDE_INT scale;
885 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
886 };
887
888 static int ix86_decompose_address (rtx, struct ix86_address *);
889 static int ix86_address_cost (rtx);
890 static bool ix86_cannot_force_const_mem (rtx);
891 static rtx ix86_delegitimize_address (rtx);
892
893 struct builtin_description;
894 static rtx ix86_expand_sse_comi (const struct builtin_description *,
895 tree, rtx);
896 static rtx ix86_expand_sse_compare (const struct builtin_description *,
897 tree, rtx);
898 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
899 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
900 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
901 static rtx ix86_expand_store_builtin (enum insn_code, tree);
902 static rtx safe_vector_operand (rtx, enum machine_mode);
903 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
904 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
905 enum rtx_code *, enum rtx_code *);
906 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
907 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
908 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
909 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
910 static int ix86_fp_comparison_cost (enum rtx_code code);
911 static unsigned int ix86_select_alt_pic_regnum (void);
912 static int ix86_save_reg (unsigned int, int);
913 static void ix86_compute_frame_layout (struct ix86_frame *);
914 static int ix86_comp_type_attributes (tree, tree);
915 static int ix86_function_regparm (tree, tree);
916 const struct attribute_spec ix86_attribute_table[];
917 static bool ix86_function_ok_for_sibcall (tree, tree);
918 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
919 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
920 static int ix86_value_regno (enum machine_mode);
921 static bool contains_128bit_aligned_vector_p (tree);
922 static bool ix86_ms_bitfield_layout_p (tree);
923 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
924 static int extended_reg_mentioned_1 (rtx *, void *);
925 static bool ix86_rtx_costs (rtx, int, int, int *);
926 static int min_insn_size (rtx);
927 static tree ix86_md_asm_clobbers (tree clobbers);
928
929 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
930 static void ix86_svr3_asm_out_constructor (rtx, int);
931 #endif
932
933 /* Register class used for passing given 64bit part of the argument.
934 These represent classes as documented by the PS ABI, with the exception
935 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
936 use SF or DFmode move instead of DImode to avoid reformatting penalties.
937
938 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
939 whenever possible (upper half does contain padding).
940 */
941 enum x86_64_reg_class
942 {
943 X86_64_NO_CLASS,
944 X86_64_INTEGER_CLASS,
945 X86_64_INTEGERSI_CLASS,
946 X86_64_SSE_CLASS,
947 X86_64_SSESF_CLASS,
948 X86_64_SSEDF_CLASS,
949 X86_64_SSEUP_CLASS,
950 X86_64_X87_CLASS,
951 X86_64_X87UP_CLASS,
952 X86_64_MEMORY_CLASS
953 };
954 static const char * const x86_64_reg_class_name[] =
955 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
956
957 #define MAX_CLASSES 4
958 static int classify_argument (enum machine_mode, tree,
959 enum x86_64_reg_class [MAX_CLASSES], int);
960 static int examine_argument (enum machine_mode, tree, int, int *, int *);
961 static rtx construct_container (enum machine_mode, tree, int, int, int,
962 const int *, int);
963 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
964 enum x86_64_reg_class);
965
966 /* Table of constants used by fldpi, fldln2, etc.... */
967 static REAL_VALUE_TYPE ext_80387_constants_table [5];
968 static bool ext_80387_constants_init = 0;
969 static void init_ext_80387_constants (void);
970 \f
971 /* Initialize the GCC target structure. */
972 #undef TARGET_ATTRIBUTE_TABLE
973 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
974 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
975 # undef TARGET_MERGE_DECL_ATTRIBUTES
976 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
977 #endif
978
979 #undef TARGET_COMP_TYPE_ATTRIBUTES
980 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
981
982 #undef TARGET_INIT_BUILTINS
983 #define TARGET_INIT_BUILTINS ix86_init_builtins
984
985 #undef TARGET_EXPAND_BUILTIN
986 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
987
988 #undef TARGET_ASM_FUNCTION_EPILOGUE
989 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
990
991 #undef TARGET_ASM_OPEN_PAREN
992 #define TARGET_ASM_OPEN_PAREN ""
993 #undef TARGET_ASM_CLOSE_PAREN
994 #define TARGET_ASM_CLOSE_PAREN ""
995
996 #undef TARGET_ASM_ALIGNED_HI_OP
997 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
998 #undef TARGET_ASM_ALIGNED_SI_OP
999 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1000 #ifdef ASM_QUAD
1001 #undef TARGET_ASM_ALIGNED_DI_OP
1002 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1003 #endif
1004
1005 #undef TARGET_ASM_UNALIGNED_HI_OP
1006 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1007 #undef TARGET_ASM_UNALIGNED_SI_OP
1008 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1009 #undef TARGET_ASM_UNALIGNED_DI_OP
1010 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1011
1012 #undef TARGET_SCHED_ADJUST_COST
1013 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1014 #undef TARGET_SCHED_ISSUE_RATE
1015 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1016 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
1017 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
1018 ia32_use_dfa_pipeline_interface
1019 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1020 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1021 ia32_multipass_dfa_lookahead
1022
1023 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1024 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1025
1026 #ifdef HAVE_AS_TLS
1027 #undef TARGET_HAVE_TLS
1028 #define TARGET_HAVE_TLS true
1029 #endif
1030 #undef TARGET_CANNOT_FORCE_CONST_MEM
1031 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1032
1033 #undef TARGET_DELEGITIMIZE_ADDRESS
1034 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1035
1036 #undef TARGET_MS_BITFIELD_LAYOUT_P
1037 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1038
1039 #undef TARGET_ASM_OUTPUT_MI_THUNK
1040 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1041 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1043
1044 #undef TARGET_ASM_FILE_START
1045 #define TARGET_ASM_FILE_START x86_file_start
1046
1047 #undef TARGET_RTX_COSTS
1048 #define TARGET_RTX_COSTS ix86_rtx_costs
1049 #undef TARGET_ADDRESS_COST
1050 #define TARGET_ADDRESS_COST ix86_address_cost
1051
1052 #undef TARGET_FIXED_CONDITION_CODE_REGS
1053 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1054 #undef TARGET_CC_MODES_COMPATIBLE
1055 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1056
1057 #undef TARGET_MACHINE_DEPENDENT_REORG
1058 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1059
1060 #undef TARGET_BUILD_BUILTIN_VA_LIST
1061 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1062
1063 #undef TARGET_MD_ASM_CLOBBERS
1064 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1065
1066 #undef TARGET_PROMOTE_PROTOTYPES
1067 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1068
1069 #undef TARGET_SETUP_INCOMING_VARARGS
1070 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1071
1072 struct gcc_target targetm = TARGET_INITIALIZER;
1073
1074 \f
1075 /* The svr4 ABI for the i386 says that records and unions are returned
1076 in memory. */
1077 #ifndef DEFAULT_PCC_STRUCT_RETURN
1078 #define DEFAULT_PCC_STRUCT_RETURN 1
1079 #endif
1080
1081 /* Sometimes certain combinations of command options do not make
1082 sense on a particular target machine. You can define a macro
1083 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1084 defined, is executed once just after all the command options have
1085 been parsed.
1086
1087 Don't use this macro to turn on various extra optimizations for
1088 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1089
1090 void
1091 override_options (void)
1092 {
1093 int i;
1094 /* Comes from final.c -- no real reason to change it. */
1095 #define MAX_CODE_ALIGN 16
1096
1097 static struct ptt
1098 {
1099 const struct processor_costs *cost; /* Processor costs */
1100 const int target_enable; /* Target flags to enable. */
1101 const int target_disable; /* Target flags to disable. */
1102 const int align_loop; /* Default alignments. */
1103 const int align_loop_max_skip;
1104 const int align_jump;
1105 const int align_jump_max_skip;
1106 const int align_func;
1107 }
1108 const processor_target_table[PROCESSOR_max] =
1109 {
1110 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1111 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1112 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1113 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1114 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1115 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1116 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1117 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1118 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1119 };
1120
1121 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1122 static struct pta
1123 {
1124 const char *const name; /* processor name or nickname. */
1125 const enum processor_type processor;
1126 const enum pta_flags
1127 {
1128 PTA_SSE = 1,
1129 PTA_SSE2 = 2,
1130 PTA_SSE3 = 4,
1131 PTA_MMX = 8,
1132 PTA_PREFETCH_SSE = 16,
1133 PTA_3DNOW = 32,
1134 PTA_3DNOW_A = 64,
1135 PTA_64BIT = 128
1136 } flags;
1137 }
1138 const processor_alias_table[] =
1139 {
1140 {"i386", PROCESSOR_I386, 0},
1141 {"i486", PROCESSOR_I486, 0},
1142 {"i586", PROCESSOR_PENTIUM, 0},
1143 {"pentium", PROCESSOR_PENTIUM, 0},
1144 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1145 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1146 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1147 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1148 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1149 {"i686", PROCESSOR_PENTIUMPRO, 0},
1150 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1151 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1152 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1153 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1154 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1155 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1156 | PTA_MMX | PTA_PREFETCH_SSE},
1157 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1158 | PTA_MMX | PTA_PREFETCH_SSE},
1159 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1160 | PTA_MMX | PTA_PREFETCH_SSE},
1161 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1162 | PTA_MMX | PTA_PREFETCH_SSE},
1163 {"k6", PROCESSOR_K6, PTA_MMX},
1164 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1165 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1166 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1167 | PTA_3DNOW_A},
1168 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1169 | PTA_3DNOW | PTA_3DNOW_A},
1170 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1171 | PTA_3DNOW_A | PTA_SSE},
1172 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1173 | PTA_3DNOW_A | PTA_SSE},
1174 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1175 | PTA_3DNOW_A | PTA_SSE},
1176 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1177 | PTA_SSE | PTA_SSE2 },
1178 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1179 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1180 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1181 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1182 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1183 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1184 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1185 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1186 };
1187
1188 int const pta_size = ARRAY_SIZE (processor_alias_table);
1189
1190 /* Set the default values for switches whose default depends on TARGET_64BIT
1191 in case they weren't overwritten by command line options. */
1192 if (TARGET_64BIT)
1193 {
1194 if (flag_omit_frame_pointer == 2)
1195 flag_omit_frame_pointer = 1;
1196 if (flag_asynchronous_unwind_tables == 2)
1197 flag_asynchronous_unwind_tables = 1;
1198 if (flag_pcc_struct_return == 2)
1199 flag_pcc_struct_return = 0;
1200 }
1201 else
1202 {
1203 if (flag_omit_frame_pointer == 2)
1204 flag_omit_frame_pointer = 0;
1205 if (flag_asynchronous_unwind_tables == 2)
1206 flag_asynchronous_unwind_tables = 0;
1207 if (flag_pcc_struct_return == 2)
1208 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1209 }
1210
1211 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1212 SUBTARGET_OVERRIDE_OPTIONS;
1213 #endif
1214
1215 if (!ix86_tune_string && ix86_arch_string)
1216 ix86_tune_string = ix86_arch_string;
1217 if (!ix86_tune_string)
1218 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1219 if (!ix86_arch_string)
1220 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1221
1222 if (ix86_cmodel_string != 0)
1223 {
1224 if (!strcmp (ix86_cmodel_string, "small"))
1225 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1226 else if (flag_pic)
1227 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1228 else if (!strcmp (ix86_cmodel_string, "32"))
1229 ix86_cmodel = CM_32;
1230 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1231 ix86_cmodel = CM_KERNEL;
1232 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1233 ix86_cmodel = CM_MEDIUM;
1234 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1235 ix86_cmodel = CM_LARGE;
1236 else
1237 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1238 }
1239 else
1240 {
1241 ix86_cmodel = CM_32;
1242 if (TARGET_64BIT)
1243 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1244 }
1245 if (ix86_asm_string != 0)
1246 {
1247 if (!strcmp (ix86_asm_string, "intel"))
1248 ix86_asm_dialect = ASM_INTEL;
1249 else if (!strcmp (ix86_asm_string, "att"))
1250 ix86_asm_dialect = ASM_ATT;
1251 else
1252 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1253 }
1254 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1255 error ("code model `%s' not supported in the %s bit mode",
1256 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1257 if (ix86_cmodel == CM_LARGE)
1258 sorry ("code model `large' not supported yet");
1259 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1260 sorry ("%i-bit mode not compiled in",
1261 (target_flags & MASK_64BIT) ? 64 : 32);
1262
1263 for (i = 0; i < pta_size; i++)
1264 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1265 {
1266 ix86_arch = processor_alias_table[i].processor;
1267 /* Default cpu tuning to the architecture. */
1268 ix86_tune = ix86_arch;
1269 if (processor_alias_table[i].flags & PTA_MMX
1270 && !(target_flags_explicit & MASK_MMX))
1271 target_flags |= MASK_MMX;
1272 if (processor_alias_table[i].flags & PTA_3DNOW
1273 && !(target_flags_explicit & MASK_3DNOW))
1274 target_flags |= MASK_3DNOW;
1275 if (processor_alias_table[i].flags & PTA_3DNOW_A
1276 && !(target_flags_explicit & MASK_3DNOW_A))
1277 target_flags |= MASK_3DNOW_A;
1278 if (processor_alias_table[i].flags & PTA_SSE
1279 && !(target_flags_explicit & MASK_SSE))
1280 target_flags |= MASK_SSE;
1281 if (processor_alias_table[i].flags & PTA_SSE2
1282 && !(target_flags_explicit & MASK_SSE2))
1283 target_flags |= MASK_SSE2;
1284 if (processor_alias_table[i].flags & PTA_SSE3
1285 && !(target_flags_explicit & MASK_SSE3))
1286 target_flags |= MASK_SSE3;
1287 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1288 x86_prefetch_sse = true;
1289 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1290 error ("CPU you selected does not support x86-64 instruction set");
1291 break;
1292 }
1293
1294 if (i == pta_size)
1295 error ("bad value (%s) for -march= switch", ix86_arch_string);
1296
1297 for (i = 0; i < pta_size; i++)
1298 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1299 {
1300 ix86_tune = processor_alias_table[i].processor;
1301 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1302 error ("CPU you selected does not support x86-64 instruction set");
1303 break;
1304 }
1305 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1306 x86_prefetch_sse = true;
1307 if (i == pta_size)
1308 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1309
1310 if (optimize_size)
1311 ix86_cost = &size_cost;
1312 else
1313 ix86_cost = processor_target_table[ix86_tune].cost;
1314 target_flags |= processor_target_table[ix86_tune].target_enable;
1315 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1316
1317 /* Arrange to set up i386_stack_locals for all functions. */
1318 init_machine_status = ix86_init_machine_status;
1319
1320 /* Validate -mregparm= value. */
1321 if (ix86_regparm_string)
1322 {
1323 i = atoi (ix86_regparm_string);
1324 if (i < 0 || i > REGPARM_MAX)
1325 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1326 else
1327 ix86_regparm = i;
1328 }
1329 else
1330 if (TARGET_64BIT)
1331 ix86_regparm = REGPARM_MAX;
1332
1333 /* If the user has provided any of the -malign-* options,
1334 warn and use that value only if -falign-* is not set.
1335 Remove this code in GCC 3.2 or later. */
1336 if (ix86_align_loops_string)
1337 {
1338 warning ("-malign-loops is obsolete, use -falign-loops");
1339 if (align_loops == 0)
1340 {
1341 i = atoi (ix86_align_loops_string);
1342 if (i < 0 || i > MAX_CODE_ALIGN)
1343 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1344 else
1345 align_loops = 1 << i;
1346 }
1347 }
1348
1349 if (ix86_align_jumps_string)
1350 {
1351 warning ("-malign-jumps is obsolete, use -falign-jumps");
1352 if (align_jumps == 0)
1353 {
1354 i = atoi (ix86_align_jumps_string);
1355 if (i < 0 || i > MAX_CODE_ALIGN)
1356 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1357 else
1358 align_jumps = 1 << i;
1359 }
1360 }
1361
1362 if (ix86_align_funcs_string)
1363 {
1364 warning ("-malign-functions is obsolete, use -falign-functions");
1365 if (align_functions == 0)
1366 {
1367 i = atoi (ix86_align_funcs_string);
1368 if (i < 0 || i > MAX_CODE_ALIGN)
1369 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1370 else
1371 align_functions = 1 << i;
1372 }
1373 }
1374
1375 /* Default align_* from the processor table. */
1376 if (align_loops == 0)
1377 {
1378 align_loops = processor_target_table[ix86_tune].align_loop;
1379 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1380 }
1381 if (align_jumps == 0)
1382 {
1383 align_jumps = processor_target_table[ix86_tune].align_jump;
1384 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1385 }
1386 if (align_functions == 0)
1387 {
1388 align_functions = processor_target_table[ix86_tune].align_func;
1389 }
1390
1391 /* Validate -mpreferred-stack-boundary= value, or provide default.
1392 The default of 128 bits is for Pentium III's SSE __m128, but we
1393 don't want additional code to keep the stack aligned when
1394 optimizing for code size. */
1395 ix86_preferred_stack_boundary = (optimize_size
1396 ? TARGET_64BIT ? 128 : 32
1397 : 128);
1398 if (ix86_preferred_stack_boundary_string)
1399 {
1400 i = atoi (ix86_preferred_stack_boundary_string);
1401 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1402 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1403 TARGET_64BIT ? 4 : 2);
1404 else
1405 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1406 }
1407
1408 /* Validate -mbranch-cost= value, or provide default. */
1409 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1410 if (ix86_branch_cost_string)
1411 {
1412 i = atoi (ix86_branch_cost_string);
1413 if (i < 0 || i > 5)
1414 error ("-mbranch-cost=%d is not between 0 and 5", i);
1415 else
1416 ix86_branch_cost = i;
1417 }
1418
1419 if (ix86_tls_dialect_string)
1420 {
1421 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1422 ix86_tls_dialect = TLS_DIALECT_GNU;
1423 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1424 ix86_tls_dialect = TLS_DIALECT_SUN;
1425 else
1426 error ("bad value (%s) for -mtls-dialect= switch",
1427 ix86_tls_dialect_string);
1428 }
1429
1430 /* Keep nonleaf frame pointers. */
1431 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1432 flag_omit_frame_pointer = 1;
1433
1434 /* If we're doing fast math, we don't care about comparison order
1435 wrt NaNs. This lets us use a shorter comparison sequence. */
1436 if (flag_unsafe_math_optimizations)
1437 target_flags &= ~MASK_IEEE_FP;
1438
1439 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1440 since the insns won't need emulation. */
1441 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1442 target_flags &= ~MASK_NO_FANCY_MATH_387;
1443
1444 /* Turn on SSE2 builtins for -msse3. */
1445 if (TARGET_SSE3)
1446 target_flags |= MASK_SSE2;
1447
1448 /* Turn on SSE builtins for -msse2. */
1449 if (TARGET_SSE2)
1450 target_flags |= MASK_SSE;
1451
1452 if (TARGET_64BIT)
1453 {
1454 if (TARGET_ALIGN_DOUBLE)
1455 error ("-malign-double makes no sense in the 64bit mode");
1456 if (TARGET_RTD)
1457 error ("-mrtd calling convention not supported in the 64bit mode");
1458 /* Enable by default the SSE and MMX builtins. */
1459 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1460 ix86_fpmath = FPMATH_SSE;
1461 }
1462 else
1463 {
1464 ix86_fpmath = FPMATH_387;
1465 /* i386 ABI does not specify red zone. It still makes sense to use it
1466 when programmer takes care to stack from being destroyed. */
1467 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1468 target_flags |= MASK_NO_RED_ZONE;
1469 }
1470
1471 if (ix86_fpmath_string != 0)
1472 {
1473 if (! strcmp (ix86_fpmath_string, "387"))
1474 ix86_fpmath = FPMATH_387;
1475 else if (! strcmp (ix86_fpmath_string, "sse"))
1476 {
1477 if (!TARGET_SSE)
1478 {
1479 warning ("SSE instruction set disabled, using 387 arithmetics");
1480 ix86_fpmath = FPMATH_387;
1481 }
1482 else
1483 ix86_fpmath = FPMATH_SSE;
1484 }
1485 else if (! strcmp (ix86_fpmath_string, "387,sse")
1486 || ! strcmp (ix86_fpmath_string, "sse,387"))
1487 {
1488 if (!TARGET_SSE)
1489 {
1490 warning ("SSE instruction set disabled, using 387 arithmetics");
1491 ix86_fpmath = FPMATH_387;
1492 }
1493 else if (!TARGET_80387)
1494 {
1495 warning ("387 instruction set disabled, using SSE arithmetics");
1496 ix86_fpmath = FPMATH_SSE;
1497 }
1498 else
1499 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1500 }
1501 else
1502 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1503 }
1504
1505 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1506 on by -msse. */
1507 if (TARGET_SSE)
1508 {
1509 target_flags |= MASK_MMX;
1510 x86_prefetch_sse = true;
1511 }
1512
1513 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1514 if (TARGET_3DNOW)
1515 {
1516 target_flags |= MASK_MMX;
1517 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1518 extensions it adds. */
1519 if (x86_3dnow_a & (1 << ix86_arch))
1520 target_flags |= MASK_3DNOW_A;
1521 }
1522 if ((x86_accumulate_outgoing_args & TUNEMASK)
1523 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1524 && !optimize_size)
1525 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1526
1527 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1528 {
1529 char *p;
1530 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1531 p = strchr (internal_label_prefix, 'X');
1532 internal_label_prefix_len = p - internal_label_prefix;
1533 *p = '\0';
1534 }
1535 }
1536 \f
1537 void
1538 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1539 {
1540 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1541 make the problem with not enough registers even worse. */
1542 #ifdef INSN_SCHEDULING
1543 if (level > 1)
1544 flag_schedule_insns = 0;
1545 #endif
1546
1547 /* The default values of these switches depend on the TARGET_64BIT
1548 that is not known at this moment. Mark these values with 2 and
1549 let user the to override these. In case there is no command line option
1550 specifying them, we will set the defaults in override_options. */
1551 if (optimize >= 1)
1552 flag_omit_frame_pointer = 2;
1553 flag_pcc_struct_return = 2;
1554 flag_asynchronous_unwind_tables = 2;
1555 }
1556 \f
1557 /* Table of valid machine attributes. */
1558 const struct attribute_spec ix86_attribute_table[] =
1559 {
1560 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1561 /* Stdcall attribute says callee is responsible for popping arguments
1562 if they are not variable. */
1563 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1564 /* Fastcall attribute says callee is responsible for popping arguments
1565 if they are not variable. */
1566 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1567 /* Cdecl attribute says the callee is a normal C declaration */
1568 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1569 /* Regparm attribute specifies how many integer arguments are to be
1570 passed in registers. */
1571 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1572 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1573 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1574 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1575 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1576 #endif
1577 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1578 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1579 { NULL, 0, 0, false, false, false, NULL }
1580 };
1581
1582 /* Decide whether we can make a sibling call to a function. DECL is the
1583 declaration of the function being targeted by the call and EXP is the
1584 CALL_EXPR representing the call. */
1585
1586 static bool
1587 ix86_function_ok_for_sibcall (tree decl, tree exp)
1588 {
1589 /* If we are generating position-independent code, we cannot sibcall
1590 optimize any indirect call, or a direct call to a global function,
1591 as the PLT requires %ebx be live. */
1592 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1593 return false;
1594
1595 /* If we are returning floats on the 80387 register stack, we cannot
1596 make a sibcall from a function that doesn't return a float to a
1597 function that does or, conversely, from a function that does return
1598 a float to a function that doesn't; the necessary stack adjustment
1599 would not be executed. */
1600 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1601 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1602 return false;
1603
1604 /* If this call is indirect, we'll need to be able to use a call-clobbered
1605 register for the address of the target function. Make sure that all
1606 such registers are not used for passing parameters. */
1607 if (!decl && !TARGET_64BIT)
1608 {
1609 tree type;
1610
1611 /* We're looking at the CALL_EXPR, we need the type of the function. */
1612 type = TREE_OPERAND (exp, 0); /* pointer expression */
1613 type = TREE_TYPE (type); /* pointer type */
1614 type = TREE_TYPE (type); /* function type */
1615
1616 if (ix86_function_regparm (type, NULL) >= 3)
1617 {
1618 /* ??? Need to count the actual number of registers to be used,
1619 not the possible number of registers. Fix later. */
1620 return false;
1621 }
1622 }
1623
1624 /* Otherwise okay. That also includes certain types of indirect calls. */
1625 return true;
1626 }
1627
1628 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1629 arguments as in struct attribute_spec.handler. */
1630 static tree
1631 ix86_handle_cdecl_attribute (tree *node, tree name,
1632 tree args ATTRIBUTE_UNUSED,
1633 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1634 {
1635 if (TREE_CODE (*node) != FUNCTION_TYPE
1636 && TREE_CODE (*node) != METHOD_TYPE
1637 && TREE_CODE (*node) != FIELD_DECL
1638 && TREE_CODE (*node) != TYPE_DECL)
1639 {
1640 warning ("`%s' attribute only applies to functions",
1641 IDENTIFIER_POINTER (name));
1642 *no_add_attrs = true;
1643 }
1644 else
1645 {
1646 if (is_attribute_p ("fastcall", name))
1647 {
1648 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1649 {
1650 error ("fastcall and stdcall attributes are not compatible");
1651 }
1652 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1653 {
1654 error ("fastcall and regparm attributes are not compatible");
1655 }
1656 }
1657 else if (is_attribute_p ("stdcall", name))
1658 {
1659 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1660 {
1661 error ("fastcall and stdcall attributes are not compatible");
1662 }
1663 }
1664 }
1665
1666 if (TARGET_64BIT)
1667 {
1668 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1669 *no_add_attrs = true;
1670 }
1671
1672 return NULL_TREE;
1673 }
1674
1675 /* Handle a "regparm" attribute;
1676 arguments as in struct attribute_spec.handler. */
1677 static tree
1678 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1679 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1680 {
1681 if (TREE_CODE (*node) != FUNCTION_TYPE
1682 && TREE_CODE (*node) != METHOD_TYPE
1683 && TREE_CODE (*node) != FIELD_DECL
1684 && TREE_CODE (*node) != TYPE_DECL)
1685 {
1686 warning ("`%s' attribute only applies to functions",
1687 IDENTIFIER_POINTER (name));
1688 *no_add_attrs = true;
1689 }
1690 else
1691 {
1692 tree cst;
1693
1694 cst = TREE_VALUE (args);
1695 if (TREE_CODE (cst) != INTEGER_CST)
1696 {
1697 warning ("`%s' attribute requires an integer constant argument",
1698 IDENTIFIER_POINTER (name));
1699 *no_add_attrs = true;
1700 }
1701 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1702 {
1703 warning ("argument to `%s' attribute larger than %d",
1704 IDENTIFIER_POINTER (name), REGPARM_MAX);
1705 *no_add_attrs = true;
1706 }
1707
1708 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1709 {
1710 error ("fastcall and regparm attributes are not compatible");
1711 }
1712 }
1713
1714 return NULL_TREE;
1715 }
1716
1717 /* Return 0 if the attributes for two types are incompatible, 1 if they
1718 are compatible, and 2 if they are nearly compatible (which causes a
1719 warning to be generated). */
1720
1721 static int
1722 ix86_comp_type_attributes (tree type1, tree type2)
1723 {
1724 /* Check for mismatch of non-default calling convention. */
1725 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1726
1727 if (TREE_CODE (type1) != FUNCTION_TYPE)
1728 return 1;
1729
1730 /* Check for mismatched fastcall types */
1731 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1732 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1733 return 0;
1734
1735 /* Check for mismatched return types (cdecl vs stdcall). */
1736 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1737 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1738 return 0;
1739 if (ix86_function_regparm (type1, NULL)
1740 != ix86_function_regparm (type2, NULL))
1741 return 0;
1742 return 1;
1743 }
1744 \f
1745 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1746 DECL may be NULL when calling function indirectly
1747 or considering a libcall. */
1748
1749 static int
1750 ix86_function_regparm (tree type, tree decl)
1751 {
1752 tree attr;
1753 int regparm = ix86_regparm;
1754 bool user_convention = false;
1755
1756 if (!TARGET_64BIT)
1757 {
1758 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1759 if (attr)
1760 {
1761 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1762 user_convention = true;
1763 }
1764
1765 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1766 {
1767 regparm = 2;
1768 user_convention = true;
1769 }
1770
1771 /* Use register calling convention for local functions when possible. */
1772 if (!TARGET_64BIT && !user_convention && decl
1773 && flag_unit_at_a_time && !profile_flag)
1774 {
1775 struct cgraph_local_info *i = cgraph_local_info (decl);
1776 if (i && i->local)
1777 {
1778 /* We can't use regparm(3) for nested functions as these use
1779 static chain pointer in third argument. */
1780 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1781 regparm = 2;
1782 else
1783 regparm = 3;
1784 }
1785 }
1786 }
1787 return regparm;
1788 }
1789
1790 /* Return true if EAX is live at the start of the function. Used by
1791 ix86_expand_prologue to determine if we need special help before
1792 calling allocate_stack_worker. */
1793
1794 static bool
1795 ix86_eax_live_at_start_p (void)
1796 {
1797 /* Cheat. Don't bother working forward from ix86_function_regparm
1798 to the function type to whether an actual argument is located in
1799 eax. Instead just look at cfg info, which is still close enough
1800 to correct at this point. This gives false positives for broken
1801 functions that might use uninitialized data that happens to be
1802 allocated in eax, but who cares? */
1803 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1804 }
1805
1806 /* Value is the number of bytes of arguments automatically
1807 popped when returning from a subroutine call.
1808 FUNDECL is the declaration node of the function (as a tree),
1809 FUNTYPE is the data type of the function (as a tree),
1810 or for a library call it is an identifier node for the subroutine name.
1811 SIZE is the number of bytes of arguments passed on the stack.
1812
1813 On the 80386, the RTD insn may be used to pop them if the number
1814 of args is fixed, but if the number is variable then the caller
1815 must pop them all. RTD can't be used for library calls now
1816 because the library is compiled with the Unix compiler.
1817 Use of RTD is a selectable option, since it is incompatible with
1818 standard Unix calling sequences. If the option is not selected,
1819 the caller must always pop the args.
1820
1821 The attribute stdcall is equivalent to RTD on a per module basis. */
1822
1823 int
1824 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1825 {
1826 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1827
1828 /* Cdecl functions override -mrtd, and never pop the stack. */
1829 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1830
1831 /* Stdcall and fastcall functions will pop the stack if not
1832 variable args. */
1833 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1834 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1835 rtd = 1;
1836
1837 if (rtd
1838 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1839 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1840 == void_type_node)))
1841 return size;
1842 }
1843
1844 /* Lose any fake structure return argument if it is passed on the stack. */
1845 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1846 && !TARGET_64BIT)
1847 {
1848 int nregs = ix86_function_regparm (funtype, fundecl);
1849
1850 if (!nregs)
1851 return GET_MODE_SIZE (Pmode);
1852 }
1853
1854 return 0;
1855 }
1856 \f
1857 /* Argument support functions. */
1858
1859 /* Return true when register may be used to pass function parameters. */
1860 bool
1861 ix86_function_arg_regno_p (int regno)
1862 {
1863 int i;
1864 if (!TARGET_64BIT)
1865 return (regno < REGPARM_MAX
1866 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1867 if (SSE_REGNO_P (regno) && TARGET_SSE)
1868 return true;
1869 /* RAX is used as hidden argument to va_arg functions. */
1870 if (!regno)
1871 return true;
1872 for (i = 0; i < REGPARM_MAX; i++)
1873 if (regno == x86_64_int_parameter_registers[i])
1874 return true;
1875 return false;
1876 }
1877
1878 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1879 for a call to a function whose data type is FNTYPE.
1880 For a library call, FNTYPE is 0. */
1881
1882 void
1883 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1884 tree fntype, /* tree ptr for function decl */
1885 rtx libname, /* SYMBOL_REF of library name or 0 */
1886 tree fndecl)
1887 {
1888 static CUMULATIVE_ARGS zero_cum;
1889 tree param, next_param;
1890
1891 if (TARGET_DEBUG_ARG)
1892 {
1893 fprintf (stderr, "\ninit_cumulative_args (");
1894 if (fntype)
1895 fprintf (stderr, "fntype code = %s, ret code = %s",
1896 tree_code_name[(int) TREE_CODE (fntype)],
1897 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1898 else
1899 fprintf (stderr, "no fntype");
1900
1901 if (libname)
1902 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1903 }
1904
1905 *cum = zero_cum;
1906
1907 /* Set up the number of registers to use for passing arguments. */
1908 if (fntype)
1909 cum->nregs = ix86_function_regparm (fntype, fndecl);
1910 else
1911 cum->nregs = ix86_regparm;
1912 cum->sse_nregs = SSE_REGPARM_MAX;
1913 cum->mmx_nregs = MMX_REGPARM_MAX;
1914 cum->warn_sse = true;
1915 cum->warn_mmx = true;
1916 cum->maybe_vaarg = false;
1917
1918 /* Use ecx and edx registers if function has fastcall attribute */
1919 if (fntype && !TARGET_64BIT)
1920 {
1921 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1922 {
1923 cum->nregs = 2;
1924 cum->fastcall = 1;
1925 }
1926 }
1927
1928
1929 /* Determine if this function has variable arguments. This is
1930 indicated by the last argument being 'void_type_mode' if there
1931 are no variable arguments. If there are variable arguments, then
1932 we won't pass anything in registers */
1933
1934 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1935 {
1936 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1937 param != 0; param = next_param)
1938 {
1939 next_param = TREE_CHAIN (param);
1940 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1941 {
1942 if (!TARGET_64BIT)
1943 {
1944 cum->nregs = 0;
1945 cum->sse_nregs = 0;
1946 cum->mmx_nregs = 0;
1947 cum->warn_sse = 0;
1948 cum->warn_mmx = 0;
1949 cum->fastcall = 0;
1950 }
1951 cum->maybe_vaarg = true;
1952 }
1953 }
1954 }
1955 if ((!fntype && !libname)
1956 || (fntype && !TYPE_ARG_TYPES (fntype)))
1957 cum->maybe_vaarg = 1;
1958
1959 if (TARGET_DEBUG_ARG)
1960 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1961
1962 return;
1963 }
1964
1965 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1966 of this code is to classify each 8bytes of incoming argument by the register
1967 class and assign registers accordingly. */
1968
1969 /* Return the union class of CLASS1 and CLASS2.
1970 See the x86-64 PS ABI for details. */
1971
1972 static enum x86_64_reg_class
1973 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1974 {
1975 /* Rule #1: If both classes are equal, this is the resulting class. */
1976 if (class1 == class2)
1977 return class1;
1978
1979 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1980 the other class. */
1981 if (class1 == X86_64_NO_CLASS)
1982 return class2;
1983 if (class2 == X86_64_NO_CLASS)
1984 return class1;
1985
1986 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1987 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1988 return X86_64_MEMORY_CLASS;
1989
1990 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1991 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1992 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1993 return X86_64_INTEGERSI_CLASS;
1994 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1995 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1996 return X86_64_INTEGER_CLASS;
1997
1998 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1999 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2000 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2001 return X86_64_MEMORY_CLASS;
2002
2003 /* Rule #6: Otherwise class SSE is used. */
2004 return X86_64_SSE_CLASS;
2005 }
2006
2007 /* Classify the argument of type TYPE and mode MODE.
2008 CLASSES will be filled by the register class used to pass each word
2009 of the operand. The number of words is returned. In case the parameter
2010 should be passed in memory, 0 is returned. As a special case for zero
2011 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2012
2013 BIT_OFFSET is used internally for handling records and specifies offset
2014 of the offset in bits modulo 256 to avoid overflow cases.
2015
2016 See the x86-64 PS ABI for details.
2017 */
2018
2019 static int
2020 classify_argument (enum machine_mode mode, tree type,
2021 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2022 {
2023 HOST_WIDE_INT bytes =
2024 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2025 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2026
2027 /* Variable sized entities are always passed/returned in memory. */
2028 if (bytes < 0)
2029 return 0;
2030
2031 if (mode != VOIDmode
2032 && MUST_PASS_IN_STACK (mode, type))
2033 return 0;
2034
2035 if (type && AGGREGATE_TYPE_P (type))
2036 {
2037 int i;
2038 tree field;
2039 enum x86_64_reg_class subclasses[MAX_CLASSES];
2040
2041 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2042 if (bytes > 16)
2043 return 0;
2044
2045 for (i = 0; i < words; i++)
2046 classes[i] = X86_64_NO_CLASS;
2047
2048 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2049 signalize memory class, so handle it as special case. */
2050 if (!words)
2051 {
2052 classes[0] = X86_64_NO_CLASS;
2053 return 1;
2054 }
2055
2056 /* Classify each field of record and merge classes. */
2057 if (TREE_CODE (type) == RECORD_TYPE)
2058 {
2059 /* For classes first merge in the field of the subclasses. */
2060 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2061 {
2062 tree bases = TYPE_BINFO_BASETYPES (type);
2063 int n_bases = TREE_VEC_LENGTH (bases);
2064 int i;
2065
2066 for (i = 0; i < n_bases; ++i)
2067 {
2068 tree binfo = TREE_VEC_ELT (bases, i);
2069 int num;
2070 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2071 tree type = BINFO_TYPE (binfo);
2072
2073 num = classify_argument (TYPE_MODE (type),
2074 type, subclasses,
2075 (offset + bit_offset) % 256);
2076 if (!num)
2077 return 0;
2078 for (i = 0; i < num; i++)
2079 {
2080 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2081 classes[i + pos] =
2082 merge_classes (subclasses[i], classes[i + pos]);
2083 }
2084 }
2085 }
2086 /* And now merge the fields of structure. */
2087 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2088 {
2089 if (TREE_CODE (field) == FIELD_DECL)
2090 {
2091 int num;
2092
2093 /* Bitfields are always classified as integer. Handle them
2094 early, since later code would consider them to be
2095 misaligned integers. */
2096 if (DECL_BIT_FIELD (field))
2097 {
2098 for (i = int_bit_position (field) / 8 / 8;
2099 i < (int_bit_position (field)
2100 + tree_low_cst (DECL_SIZE (field), 0)
2101 + 63) / 8 / 8; i++)
2102 classes[i] =
2103 merge_classes (X86_64_INTEGER_CLASS,
2104 classes[i]);
2105 }
2106 else
2107 {
2108 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2109 TREE_TYPE (field), subclasses,
2110 (int_bit_position (field)
2111 + bit_offset) % 256);
2112 if (!num)
2113 return 0;
2114 for (i = 0; i < num; i++)
2115 {
2116 int pos =
2117 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2118 classes[i + pos] =
2119 merge_classes (subclasses[i], classes[i + pos]);
2120 }
2121 }
2122 }
2123 }
2124 }
2125 /* Arrays are handled as small records. */
2126 else if (TREE_CODE (type) == ARRAY_TYPE)
2127 {
2128 int num;
2129 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2130 TREE_TYPE (type), subclasses, bit_offset);
2131 if (!num)
2132 return 0;
2133
2134 /* The partial classes are now full classes. */
2135 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2136 subclasses[0] = X86_64_SSE_CLASS;
2137 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2138 subclasses[0] = X86_64_INTEGER_CLASS;
2139
2140 for (i = 0; i < words; i++)
2141 classes[i] = subclasses[i % num];
2142 }
2143 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2144 else if (TREE_CODE (type) == UNION_TYPE
2145 || TREE_CODE (type) == QUAL_UNION_TYPE)
2146 {
2147 /* For classes first merge in the field of the subclasses. */
2148 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2149 {
2150 tree bases = TYPE_BINFO_BASETYPES (type);
2151 int n_bases = TREE_VEC_LENGTH (bases);
2152 int i;
2153
2154 for (i = 0; i < n_bases; ++i)
2155 {
2156 tree binfo = TREE_VEC_ELT (bases, i);
2157 int num;
2158 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2159 tree type = BINFO_TYPE (binfo);
2160
2161 num = classify_argument (TYPE_MODE (type),
2162 type, subclasses,
2163 (offset + (bit_offset % 64)) % 256);
2164 if (!num)
2165 return 0;
2166 for (i = 0; i < num; i++)
2167 {
2168 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2169 classes[i + pos] =
2170 merge_classes (subclasses[i], classes[i + pos]);
2171 }
2172 }
2173 }
2174 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2175 {
2176 if (TREE_CODE (field) == FIELD_DECL)
2177 {
2178 int num;
2179 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2180 TREE_TYPE (field), subclasses,
2181 bit_offset);
2182 if (!num)
2183 return 0;
2184 for (i = 0; i < num; i++)
2185 classes[i] = merge_classes (subclasses[i], classes[i]);
2186 }
2187 }
2188 }
2189 else if (TREE_CODE (type) == SET_TYPE)
2190 {
2191 if (bytes <= 4)
2192 {
2193 classes[0] = X86_64_INTEGERSI_CLASS;
2194 return 1;
2195 }
2196 else if (bytes <= 8)
2197 {
2198 classes[0] = X86_64_INTEGER_CLASS;
2199 return 1;
2200 }
2201 else if (bytes <= 12)
2202 {
2203 classes[0] = X86_64_INTEGER_CLASS;
2204 classes[1] = X86_64_INTEGERSI_CLASS;
2205 return 2;
2206 }
2207 else
2208 {
2209 classes[0] = X86_64_INTEGER_CLASS;
2210 classes[1] = X86_64_INTEGER_CLASS;
2211 return 2;
2212 }
2213 }
2214 else
2215 abort ();
2216
2217 /* Final merger cleanup. */
2218 for (i = 0; i < words; i++)
2219 {
2220 /* If one class is MEMORY, everything should be passed in
2221 memory. */
2222 if (classes[i] == X86_64_MEMORY_CLASS)
2223 return 0;
2224
2225 /* The X86_64_SSEUP_CLASS should be always preceded by
2226 X86_64_SSE_CLASS. */
2227 if (classes[i] == X86_64_SSEUP_CLASS
2228 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2229 classes[i] = X86_64_SSE_CLASS;
2230
2231 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2232 if (classes[i] == X86_64_X87UP_CLASS
2233 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2234 classes[i] = X86_64_SSE_CLASS;
2235 }
2236 return words;
2237 }
2238
2239 /* Compute alignment needed. We align all types to natural boundaries with
2240 exception of XFmode that is aligned to 64bits. */
2241 if (mode != VOIDmode && mode != BLKmode)
2242 {
2243 int mode_alignment = GET_MODE_BITSIZE (mode);
2244
2245 if (mode == XFmode)
2246 mode_alignment = 128;
2247 else if (mode == XCmode)
2248 mode_alignment = 256;
2249 /* Misaligned fields are always returned in memory. */
2250 if (bit_offset % mode_alignment)
2251 return 0;
2252 }
2253
2254 /* Classification of atomic types. */
2255 switch (mode)
2256 {
2257 case DImode:
2258 case SImode:
2259 case HImode:
2260 case QImode:
2261 case CSImode:
2262 case CHImode:
2263 case CQImode:
2264 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2265 classes[0] = X86_64_INTEGERSI_CLASS;
2266 else
2267 classes[0] = X86_64_INTEGER_CLASS;
2268 return 1;
2269 case CDImode:
2270 case TImode:
2271 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2272 return 2;
2273 case CTImode:
2274 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2275 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2276 return 4;
2277 case SFmode:
2278 if (!(bit_offset % 64))
2279 classes[0] = X86_64_SSESF_CLASS;
2280 else
2281 classes[0] = X86_64_SSE_CLASS;
2282 return 1;
2283 case DFmode:
2284 classes[0] = X86_64_SSEDF_CLASS;
2285 return 1;
2286 case XFmode:
2287 classes[0] = X86_64_X87_CLASS;
2288 classes[1] = X86_64_X87UP_CLASS;
2289 return 2;
2290 case TFmode:
2291 case TCmode:
2292 return 0;
2293 case XCmode:
2294 classes[0] = X86_64_X87_CLASS;
2295 classes[1] = X86_64_X87UP_CLASS;
2296 classes[2] = X86_64_X87_CLASS;
2297 classes[3] = X86_64_X87UP_CLASS;
2298 return 4;
2299 case DCmode:
2300 classes[0] = X86_64_SSEDF_CLASS;
2301 classes[1] = X86_64_SSEDF_CLASS;
2302 return 2;
2303 case SCmode:
2304 classes[0] = X86_64_SSE_CLASS;
2305 return 1;
2306 case V4SFmode:
2307 case V4SImode:
2308 case V16QImode:
2309 case V8HImode:
2310 case V2DFmode:
2311 case V2DImode:
2312 classes[0] = X86_64_SSE_CLASS;
2313 classes[1] = X86_64_SSEUP_CLASS;
2314 return 2;
2315 case V2SFmode:
2316 case V2SImode:
2317 case V4HImode:
2318 case V8QImode:
2319 return 0;
2320 case BLKmode:
2321 case VOIDmode:
2322 return 0;
2323 default:
2324 abort ();
2325 }
2326 }
2327
2328 /* Examine the argument and return set number of register required in each
2329 class. Return 0 iff parameter should be passed in memory. */
2330 static int
2331 examine_argument (enum machine_mode mode, tree type, int in_return,
2332 int *int_nregs, int *sse_nregs)
2333 {
2334 enum x86_64_reg_class class[MAX_CLASSES];
2335 int n = classify_argument (mode, type, class, 0);
2336
2337 *int_nregs = 0;
2338 *sse_nregs = 0;
2339 if (!n)
2340 return 0;
2341 for (n--; n >= 0; n--)
2342 switch (class[n])
2343 {
2344 case X86_64_INTEGER_CLASS:
2345 case X86_64_INTEGERSI_CLASS:
2346 (*int_nregs)++;
2347 break;
2348 case X86_64_SSE_CLASS:
2349 case X86_64_SSESF_CLASS:
2350 case X86_64_SSEDF_CLASS:
2351 (*sse_nregs)++;
2352 break;
2353 case X86_64_NO_CLASS:
2354 case X86_64_SSEUP_CLASS:
2355 break;
2356 case X86_64_X87_CLASS:
2357 case X86_64_X87UP_CLASS:
2358 if (!in_return)
2359 return 0;
2360 break;
2361 case X86_64_MEMORY_CLASS:
2362 abort ();
2363 }
2364 return 1;
2365 }
2366 /* Construct container for the argument used by GCC interface. See
2367 FUNCTION_ARG for the detailed description. */
2368 static rtx
2369 construct_container (enum machine_mode mode, tree type, int in_return,
2370 int nintregs, int nsseregs, const int * intreg,
2371 int sse_regno)
2372 {
2373 enum machine_mode tmpmode;
2374 int bytes =
2375 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2376 enum x86_64_reg_class class[MAX_CLASSES];
2377 int n;
2378 int i;
2379 int nexps = 0;
2380 int needed_sseregs, needed_intregs;
2381 rtx exp[MAX_CLASSES];
2382 rtx ret;
2383
2384 n = classify_argument (mode, type, class, 0);
2385 if (TARGET_DEBUG_ARG)
2386 {
2387 if (!n)
2388 fprintf (stderr, "Memory class\n");
2389 else
2390 {
2391 fprintf (stderr, "Classes:");
2392 for (i = 0; i < n; i++)
2393 {
2394 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2395 }
2396 fprintf (stderr, "\n");
2397 }
2398 }
2399 if (!n)
2400 return NULL;
2401 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2402 return NULL;
2403 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2404 return NULL;
2405
2406 /* First construct simple cases. Avoid SCmode, since we want to use
2407 single register to pass this type. */
2408 if (n == 1 && mode != SCmode)
2409 switch (class[0])
2410 {
2411 case X86_64_INTEGER_CLASS:
2412 case X86_64_INTEGERSI_CLASS:
2413 return gen_rtx_REG (mode, intreg[0]);
2414 case X86_64_SSE_CLASS:
2415 case X86_64_SSESF_CLASS:
2416 case X86_64_SSEDF_CLASS:
2417 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2418 case X86_64_X87_CLASS:
2419 return gen_rtx_REG (mode, FIRST_STACK_REG);
2420 case X86_64_NO_CLASS:
2421 /* Zero sized array, struct or class. */
2422 return NULL;
2423 default:
2424 abort ();
2425 }
2426 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2427 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2428 if (n == 2
2429 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2430 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2431 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2432 && class[1] == X86_64_INTEGER_CLASS
2433 && (mode == CDImode || mode == TImode || mode == TFmode)
2434 && intreg[0] + 1 == intreg[1])
2435 return gen_rtx_REG (mode, intreg[0]);
2436 if (n == 4
2437 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2438 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2439 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2440
2441 /* Otherwise figure out the entries of the PARALLEL. */
2442 for (i = 0; i < n; i++)
2443 {
2444 switch (class[i])
2445 {
2446 case X86_64_NO_CLASS:
2447 break;
2448 case X86_64_INTEGER_CLASS:
2449 case X86_64_INTEGERSI_CLASS:
2450 /* Merge TImodes on aligned occasions here too. */
2451 if (i * 8 + 8 > bytes)
2452 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2453 else if (class[i] == X86_64_INTEGERSI_CLASS)
2454 tmpmode = SImode;
2455 else
2456 tmpmode = DImode;
2457 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2458 if (tmpmode == BLKmode)
2459 tmpmode = DImode;
2460 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2461 gen_rtx_REG (tmpmode, *intreg),
2462 GEN_INT (i*8));
2463 intreg++;
2464 break;
2465 case X86_64_SSESF_CLASS:
2466 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2467 gen_rtx_REG (SFmode,
2468 SSE_REGNO (sse_regno)),
2469 GEN_INT (i*8));
2470 sse_regno++;
2471 break;
2472 case X86_64_SSEDF_CLASS:
2473 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2474 gen_rtx_REG (DFmode,
2475 SSE_REGNO (sse_regno)),
2476 GEN_INT (i*8));
2477 sse_regno++;
2478 break;
2479 case X86_64_SSE_CLASS:
2480 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2481 tmpmode = TImode;
2482 else
2483 tmpmode = DImode;
2484 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2485 gen_rtx_REG (tmpmode,
2486 SSE_REGNO (sse_regno)),
2487 GEN_INT (i*8));
2488 if (tmpmode == TImode)
2489 i++;
2490 sse_regno++;
2491 break;
2492 default:
2493 abort ();
2494 }
2495 }
2496 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2497 for (i = 0; i < nexps; i++)
2498 XVECEXP (ret, 0, i) = exp [i];
2499 return ret;
2500 }
2501
2502 /* Update the data in CUM to advance over an argument
2503 of mode MODE and data type TYPE.
2504 (TYPE is null for libcalls where that information may not be available.) */
2505
2506 void
2507 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2508 enum machine_mode mode, /* current arg mode */
2509 tree type, /* type of the argument or 0 if lib support */
2510 int named) /* whether or not the argument was named */
2511 {
2512 int bytes =
2513 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2514 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2515
2516 if (TARGET_DEBUG_ARG)
2517 fprintf (stderr,
2518 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2519 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2520 if (TARGET_64BIT)
2521 {
2522 int int_nregs, sse_nregs;
2523 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2524 cum->words += words;
2525 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2526 {
2527 cum->nregs -= int_nregs;
2528 cum->sse_nregs -= sse_nregs;
2529 cum->regno += int_nregs;
2530 cum->sse_regno += sse_nregs;
2531 }
2532 else
2533 cum->words += words;
2534 }
2535 else
2536 {
2537 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2538 && (!type || !AGGREGATE_TYPE_P (type)))
2539 {
2540 cum->sse_words += words;
2541 cum->sse_nregs -= 1;
2542 cum->sse_regno += 1;
2543 if (cum->sse_nregs <= 0)
2544 {
2545 cum->sse_nregs = 0;
2546 cum->sse_regno = 0;
2547 }
2548 }
2549 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2550 && (!type || !AGGREGATE_TYPE_P (type)))
2551 {
2552 cum->mmx_words += words;
2553 cum->mmx_nregs -= 1;
2554 cum->mmx_regno += 1;
2555 if (cum->mmx_nregs <= 0)
2556 {
2557 cum->mmx_nregs = 0;
2558 cum->mmx_regno = 0;
2559 }
2560 }
2561 else
2562 {
2563 cum->words += words;
2564 cum->nregs -= words;
2565 cum->regno += words;
2566
2567 if (cum->nregs <= 0)
2568 {
2569 cum->nregs = 0;
2570 cum->regno = 0;
2571 }
2572 }
2573 }
2574 return;
2575 }
2576
2577 /* Define where to put the arguments to a function.
2578 Value is zero to push the argument on the stack,
2579 or a hard register in which to store the argument.
2580
2581 MODE is the argument's machine mode.
2582 TYPE is the data type of the argument (as a tree).
2583 This is null for libcalls where that information may
2584 not be available.
2585 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2586 the preceding args and about the function being called.
2587 NAMED is nonzero if this argument is a named parameter
2588 (otherwise it is an extra parameter matching an ellipsis). */
2589
2590 rtx
2591 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2592 enum machine_mode mode, /* current arg mode */
2593 tree type, /* type of the argument or 0 if lib support */
2594 int named) /* != 0 for normal args, == 0 for ... args */
2595 {
2596 rtx ret = NULL_RTX;
2597 int bytes =
2598 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2599 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2600 static bool warnedsse, warnedmmx;
2601
2602 /* Handle a hidden AL argument containing number of registers for varargs
2603 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2604 any AL settings. */
2605 if (mode == VOIDmode)
2606 {
2607 if (TARGET_64BIT)
2608 return GEN_INT (cum->maybe_vaarg
2609 ? (cum->sse_nregs < 0
2610 ? SSE_REGPARM_MAX
2611 : cum->sse_regno)
2612 : -1);
2613 else
2614 return constm1_rtx;
2615 }
2616 if (TARGET_64BIT)
2617 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2618 &x86_64_int_parameter_registers [cum->regno],
2619 cum->sse_regno);
2620 else
2621 switch (mode)
2622 {
2623 /* For now, pass fp/complex values on the stack. */
2624 default:
2625 break;
2626
2627 case BLKmode:
2628 if (bytes < 0)
2629 break;
2630 /* FALLTHRU */
2631 case DImode:
2632 case SImode:
2633 case HImode:
2634 case QImode:
2635 if (words <= cum->nregs)
2636 {
2637 int regno = cum->regno;
2638
2639 /* Fastcall allocates the first two DWORD (SImode) or
2640 smaller arguments to ECX and EDX. */
2641 if (cum->fastcall)
2642 {
2643 if (mode == BLKmode || mode == DImode)
2644 break;
2645
2646 /* ECX not EAX is the first allocated register. */
2647 if (regno == 0)
2648 regno = 2;
2649 }
2650 ret = gen_rtx_REG (mode, regno);
2651 }
2652 break;
2653 case TImode:
2654 case V16QImode:
2655 case V8HImode:
2656 case V4SImode:
2657 case V2DImode:
2658 case V4SFmode:
2659 case V2DFmode:
2660 if (!type || !AGGREGATE_TYPE_P (type))
2661 {
2662 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2663 {
2664 warnedsse = true;
2665 warning ("SSE vector argument without SSE enabled "
2666 "changes the ABI");
2667 }
2668 if (cum->sse_nregs)
2669 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2670 }
2671 break;
2672 case V8QImode:
2673 case V4HImode:
2674 case V2SImode:
2675 case V2SFmode:
2676 if (!type || !AGGREGATE_TYPE_P (type))
2677 {
2678 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2679 {
2680 warnedmmx = true;
2681 warning ("MMX vector argument without MMX enabled "
2682 "changes the ABI");
2683 }
2684 if (cum->mmx_nregs)
2685 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2686 }
2687 break;
2688 }
2689
2690 if (TARGET_DEBUG_ARG)
2691 {
2692 fprintf (stderr,
2693 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2694 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2695
2696 if (ret)
2697 print_simple_rtl (stderr, ret);
2698 else
2699 fprintf (stderr, ", stack");
2700
2701 fprintf (stderr, " )\n");
2702 }
2703
2704 return ret;
2705 }
2706
2707 /* A C expression that indicates when an argument must be passed by
2708 reference. If nonzero for an argument, a copy of that argument is
2709 made in memory and a pointer to the argument is passed instead of
2710 the argument itself. The pointer is passed in whatever way is
2711 appropriate for passing a pointer to that type. */
2712
2713 int
2714 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2715 enum machine_mode mode ATTRIBUTE_UNUSED,
2716 tree type, int named ATTRIBUTE_UNUSED)
2717 {
2718 if (!TARGET_64BIT)
2719 return 0;
2720
2721 if (type && int_size_in_bytes (type) == -1)
2722 {
2723 if (TARGET_DEBUG_ARG)
2724 fprintf (stderr, "function_arg_pass_by_reference\n");
2725 return 1;
2726 }
2727
2728 return 0;
2729 }
2730
2731 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2732 ABI */
2733 static bool
2734 contains_128bit_aligned_vector_p (tree type)
2735 {
2736 enum machine_mode mode = TYPE_MODE (type);
2737 if (SSE_REG_MODE_P (mode)
2738 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2739 return true;
2740 if (TYPE_ALIGN (type) < 128)
2741 return false;
2742
2743 if (AGGREGATE_TYPE_P (type))
2744 {
2745 /* Walk the aggregates recursively. */
2746 if (TREE_CODE (type) == RECORD_TYPE
2747 || TREE_CODE (type) == UNION_TYPE
2748 || TREE_CODE (type) == QUAL_UNION_TYPE)
2749 {
2750 tree field;
2751
2752 if (TYPE_BINFO (type) != NULL
2753 && TYPE_BINFO_BASETYPES (type) != NULL)
2754 {
2755 tree bases = TYPE_BINFO_BASETYPES (type);
2756 int n_bases = TREE_VEC_LENGTH (bases);
2757 int i;
2758
2759 for (i = 0; i < n_bases; ++i)
2760 {
2761 tree binfo = TREE_VEC_ELT (bases, i);
2762 tree type = BINFO_TYPE (binfo);
2763
2764 if (contains_128bit_aligned_vector_p (type))
2765 return true;
2766 }
2767 }
2768 /* And now merge the fields of structure. */
2769 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2770 {
2771 if (TREE_CODE (field) == FIELD_DECL
2772 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2773 return true;
2774 }
2775 }
2776 /* Just for use if some languages passes arrays by value. */
2777 else if (TREE_CODE (type) == ARRAY_TYPE)
2778 {
2779 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2780 return true;
2781 }
2782 else
2783 abort ();
2784 }
2785 return false;
2786 }
2787
2788 /* Gives the alignment boundary, in bits, of an argument with the
2789 specified mode and type. */
2790
2791 int
2792 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2793 {
2794 int align;
2795 if (type)
2796 align = TYPE_ALIGN (type);
2797 else
2798 align = GET_MODE_ALIGNMENT (mode);
2799 if (align < PARM_BOUNDARY)
2800 align = PARM_BOUNDARY;
2801 if (!TARGET_64BIT)
2802 {
2803 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2804 make an exception for SSE modes since these require 128bit
2805 alignment.
2806
2807 The handling here differs from field_alignment. ICC aligns MMX
2808 arguments to 4 byte boundaries, while structure fields are aligned
2809 to 8 byte boundaries. */
2810 if (!type)
2811 {
2812 if (!SSE_REG_MODE_P (mode))
2813 align = PARM_BOUNDARY;
2814 }
2815 else
2816 {
2817 if (!contains_128bit_aligned_vector_p (type))
2818 align = PARM_BOUNDARY;
2819 }
2820 }
2821 if (align > 128)
2822 align = 128;
2823 return align;
2824 }
2825
2826 /* Return true if N is a possible register number of function value. */
2827 bool
2828 ix86_function_value_regno_p (int regno)
2829 {
2830 if (!TARGET_64BIT)
2831 {
2832 return ((regno) == 0
2833 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2834 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2835 }
2836 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2837 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2838 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2839 }
2840
2841 /* Define how to find the value returned by a function.
2842 VALTYPE is the data type of the value (as a tree).
2843 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2844 otherwise, FUNC is 0. */
2845 rtx
2846 ix86_function_value (tree valtype)
2847 {
2848 if (TARGET_64BIT)
2849 {
2850 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2851 REGPARM_MAX, SSE_REGPARM_MAX,
2852 x86_64_int_return_registers, 0);
2853 /* For zero sized structures, construct_container return NULL, but we need
2854 to keep rest of compiler happy by returning meaningful value. */
2855 if (!ret)
2856 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2857 return ret;
2858 }
2859 else
2860 return gen_rtx_REG (TYPE_MODE (valtype),
2861 ix86_value_regno (TYPE_MODE (valtype)));
2862 }
2863
2864 /* Return false iff type is returned in memory. */
2865 int
2866 ix86_return_in_memory (tree type)
2867 {
2868 int needed_intregs, needed_sseregs, size;
2869 enum machine_mode mode = TYPE_MODE (type);
2870
2871 if (TARGET_64BIT)
2872 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2873
2874 if (mode == BLKmode)
2875 return 1;
2876
2877 size = int_size_in_bytes (type);
2878
2879 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2880 return 0;
2881
2882 if (VECTOR_MODE_P (mode) || mode == TImode)
2883 {
2884 /* User-created vectors small enough to fit in EAX. */
2885 if (size < 8)
2886 return 0;
2887
2888 /* MMX/3dNow values are returned on the stack, since we've
2889 got to EMMS/FEMMS before returning. */
2890 if (size == 8)
2891 return 1;
2892
2893 /* SSE values are returned in XMM0. */
2894 /* ??? Except when it doesn't exist? We have a choice of
2895 either (1) being abi incompatible with a -march switch,
2896 or (2) generating an error here. Given no good solution,
2897 I think the safest thing is one warning. The user won't
2898 be able to use -Werror, but.... */
2899 if (size == 16)
2900 {
2901 static bool warned;
2902
2903 if (TARGET_SSE)
2904 return 0;
2905
2906 if (!warned)
2907 {
2908 warned = true;
2909 warning ("SSE vector return without SSE enabled "
2910 "changes the ABI");
2911 }
2912 return 1;
2913 }
2914 }
2915
2916 if (mode == XFmode)
2917 return 0;
2918
2919 if (size > 12)
2920 return 1;
2921 return 0;
2922 }
2923
2924 /* Define how to find the value returned by a library function
2925 assuming the value has mode MODE. */
2926 rtx
2927 ix86_libcall_value (enum machine_mode mode)
2928 {
2929 if (TARGET_64BIT)
2930 {
2931 switch (mode)
2932 {
2933 case SFmode:
2934 case SCmode:
2935 case DFmode:
2936 case DCmode:
2937 return gen_rtx_REG (mode, FIRST_SSE_REG);
2938 case XFmode:
2939 case XCmode:
2940 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2941 case TFmode:
2942 case TCmode:
2943 return NULL;
2944 default:
2945 return gen_rtx_REG (mode, 0);
2946 }
2947 }
2948 else
2949 return gen_rtx_REG (mode, ix86_value_regno (mode));
2950 }
2951
2952 /* Given a mode, return the register to use for a return value. */
2953
2954 static int
2955 ix86_value_regno (enum machine_mode mode)
2956 {
2957 /* Floating point return values in %st(0). */
2958 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2959 return FIRST_FLOAT_REG;
2960 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2961 we prevent this case when sse is not available. */
2962 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2963 return FIRST_SSE_REG;
2964 /* Everything else in %eax. */
2965 return 0;
2966 }
2967 \f
2968 /* Create the va_list data type. */
2969
2970 static tree
2971 ix86_build_builtin_va_list (void)
2972 {
2973 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2974
2975 /* For i386 we use plain pointer to argument area. */
2976 if (!TARGET_64BIT)
2977 return build_pointer_type (char_type_node);
2978
2979 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2980 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2981
2982 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2983 unsigned_type_node);
2984 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2985 unsigned_type_node);
2986 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2987 ptr_type_node);
2988 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2989 ptr_type_node);
2990
2991 DECL_FIELD_CONTEXT (f_gpr) = record;
2992 DECL_FIELD_CONTEXT (f_fpr) = record;
2993 DECL_FIELD_CONTEXT (f_ovf) = record;
2994 DECL_FIELD_CONTEXT (f_sav) = record;
2995
2996 TREE_CHAIN (record) = type_decl;
2997 TYPE_NAME (record) = type_decl;
2998 TYPE_FIELDS (record) = f_gpr;
2999 TREE_CHAIN (f_gpr) = f_fpr;
3000 TREE_CHAIN (f_fpr) = f_ovf;
3001 TREE_CHAIN (f_ovf) = f_sav;
3002
3003 layout_type (record);
3004
3005 /* The correct type is an array type of one element. */
3006 return build_array_type (record, build_index_type (size_zero_node));
3007 }
3008
3009 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3010
3011 static void
3012 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3013 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3014 int no_rtl)
3015 {
3016 CUMULATIVE_ARGS next_cum;
3017 rtx save_area = NULL_RTX, mem;
3018 rtx label;
3019 rtx label_ref;
3020 rtx tmp_reg;
3021 rtx nsse_reg;
3022 int set;
3023 tree fntype;
3024 int stdarg_p;
3025 int i;
3026
3027 if (!TARGET_64BIT)
3028 return;
3029
3030 /* Indicate to allocate space on the stack for varargs save area. */
3031 ix86_save_varrargs_registers = 1;
3032
3033 cfun->stack_alignment_needed = 128;
3034
3035 fntype = TREE_TYPE (current_function_decl);
3036 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3037 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3038 != void_type_node));
3039
3040 /* For varargs, we do not want to skip the dummy va_dcl argument.
3041 For stdargs, we do want to skip the last named argument. */
3042 next_cum = *cum;
3043 if (stdarg_p)
3044 function_arg_advance (&next_cum, mode, type, 1);
3045
3046 if (!no_rtl)
3047 save_area = frame_pointer_rtx;
3048
3049 set = get_varargs_alias_set ();
3050
3051 for (i = next_cum.regno; i < ix86_regparm; i++)
3052 {
3053 mem = gen_rtx_MEM (Pmode,
3054 plus_constant (save_area, i * UNITS_PER_WORD));
3055 set_mem_alias_set (mem, set);
3056 emit_move_insn (mem, gen_rtx_REG (Pmode,
3057 x86_64_int_parameter_registers[i]));
3058 }
3059
3060 if (next_cum.sse_nregs)
3061 {
3062 /* Now emit code to save SSE registers. The AX parameter contains number
3063 of SSE parameter registers used to call this function. We use
3064 sse_prologue_save insn template that produces computed jump across
3065 SSE saves. We need some preparation work to get this working. */
3066
3067 label = gen_label_rtx ();
3068 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3069
3070 /* Compute address to jump to :
3071 label - 5*eax + nnamed_sse_arguments*5 */
3072 tmp_reg = gen_reg_rtx (Pmode);
3073 nsse_reg = gen_reg_rtx (Pmode);
3074 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3075 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3076 gen_rtx_MULT (Pmode, nsse_reg,
3077 GEN_INT (4))));
3078 if (next_cum.sse_regno)
3079 emit_move_insn
3080 (nsse_reg,
3081 gen_rtx_CONST (DImode,
3082 gen_rtx_PLUS (DImode,
3083 label_ref,
3084 GEN_INT (next_cum.sse_regno * 4))));
3085 else
3086 emit_move_insn (nsse_reg, label_ref);
3087 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3088
3089 /* Compute address of memory block we save into. We always use pointer
3090 pointing 127 bytes after first byte to store - this is needed to keep
3091 instruction size limited by 4 bytes. */
3092 tmp_reg = gen_reg_rtx (Pmode);
3093 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3094 plus_constant (save_area,
3095 8 * REGPARM_MAX + 127)));
3096 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3097 set_mem_alias_set (mem, set);
3098 set_mem_align (mem, BITS_PER_WORD);
3099
3100 /* And finally do the dirty job! */
3101 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3102 GEN_INT (next_cum.sse_regno), label));
3103 }
3104
3105 }
3106
3107 /* Implement va_start. */
3108
3109 void
3110 ix86_va_start (tree valist, rtx nextarg)
3111 {
3112 HOST_WIDE_INT words, n_gpr, n_fpr;
3113 tree f_gpr, f_fpr, f_ovf, f_sav;
3114 tree gpr, fpr, ovf, sav, t;
3115
3116 /* Only 64bit target needs something special. */
3117 if (!TARGET_64BIT)
3118 {
3119 std_expand_builtin_va_start (valist, nextarg);
3120 return;
3121 }
3122
3123 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3124 f_fpr = TREE_CHAIN (f_gpr);
3125 f_ovf = TREE_CHAIN (f_fpr);
3126 f_sav = TREE_CHAIN (f_ovf);
3127
3128 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3129 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3130 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3131 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3132 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3133
3134 /* Count number of gp and fp argument registers used. */
3135 words = current_function_args_info.words;
3136 n_gpr = current_function_args_info.regno;
3137 n_fpr = current_function_args_info.sse_regno;
3138
3139 if (TARGET_DEBUG_ARG)
3140 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3141 (int) words, (int) n_gpr, (int) n_fpr);
3142
3143 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3144 build_int_2 (n_gpr * 8, 0));
3145 TREE_SIDE_EFFECTS (t) = 1;
3146 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3147
3148 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3149 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3150 TREE_SIDE_EFFECTS (t) = 1;
3151 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3152
3153 /* Find the overflow area. */
3154 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3155 if (words != 0)
3156 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3157 build_int_2 (words * UNITS_PER_WORD, 0));
3158 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3159 TREE_SIDE_EFFECTS (t) = 1;
3160 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3161
3162 /* Find the register save area.
3163 Prologue of the function save it right above stack frame. */
3164 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3165 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3166 TREE_SIDE_EFFECTS (t) = 1;
3167 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3168 }
3169
3170 /* Implement va_arg. */
3171 rtx
3172 ix86_va_arg (tree valist, tree type)
3173 {
3174 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3175 tree f_gpr, f_fpr, f_ovf, f_sav;
3176 tree gpr, fpr, ovf, sav, t;
3177 int size, rsize;
3178 rtx lab_false, lab_over = NULL_RTX;
3179 rtx addr_rtx, r;
3180 rtx container;
3181 int indirect_p = 0;
3182
3183 /* Only 64bit target needs something special. */
3184 if (!TARGET_64BIT)
3185 {
3186 return std_expand_builtin_va_arg (valist, type);
3187 }
3188
3189 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3190 f_fpr = TREE_CHAIN (f_gpr);
3191 f_ovf = TREE_CHAIN (f_fpr);
3192 f_sav = TREE_CHAIN (f_ovf);
3193
3194 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3195 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3196 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3197 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3198 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3199
3200 size = int_size_in_bytes (type);
3201 if (size == -1)
3202 {
3203 /* Passed by reference. */
3204 indirect_p = 1;
3205 type = build_pointer_type (type);
3206 size = int_size_in_bytes (type);
3207 }
3208 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3209
3210 container = construct_container (TYPE_MODE (type), type, 0,
3211 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3212 /*
3213 * Pull the value out of the saved registers ...
3214 */
3215
3216 addr_rtx = gen_reg_rtx (Pmode);
3217
3218 if (container)
3219 {
3220 rtx int_addr_rtx, sse_addr_rtx;
3221 int needed_intregs, needed_sseregs;
3222 int need_temp;
3223
3224 lab_over = gen_label_rtx ();
3225 lab_false = gen_label_rtx ();
3226
3227 examine_argument (TYPE_MODE (type), type, 0,
3228 &needed_intregs, &needed_sseregs);
3229
3230
3231 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3232 || TYPE_ALIGN (type) > 128);
3233
3234 /* In case we are passing structure, verify that it is consecutive block
3235 on the register save area. If not we need to do moves. */
3236 if (!need_temp && !REG_P (container))
3237 {
3238 /* Verify that all registers are strictly consecutive */
3239 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3240 {
3241 int i;
3242
3243 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3244 {
3245 rtx slot = XVECEXP (container, 0, i);
3246 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3247 || INTVAL (XEXP (slot, 1)) != i * 16)
3248 need_temp = 1;
3249 }
3250 }
3251 else
3252 {
3253 int i;
3254
3255 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3256 {
3257 rtx slot = XVECEXP (container, 0, i);
3258 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3259 || INTVAL (XEXP (slot, 1)) != i * 8)
3260 need_temp = 1;
3261 }
3262 }
3263 }
3264 if (!need_temp)
3265 {
3266 int_addr_rtx = addr_rtx;
3267 sse_addr_rtx = addr_rtx;
3268 }
3269 else
3270 {
3271 int_addr_rtx = gen_reg_rtx (Pmode);
3272 sse_addr_rtx = gen_reg_rtx (Pmode);
3273 }
3274 /* First ensure that we fit completely in registers. */
3275 if (needed_intregs)
3276 {
3277 emit_cmp_and_jump_insns (expand_expr
3278 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3279 GEN_INT ((REGPARM_MAX - needed_intregs +
3280 1) * 8), GE, const1_rtx, SImode,
3281 1, lab_false);
3282 }
3283 if (needed_sseregs)
3284 {
3285 emit_cmp_and_jump_insns (expand_expr
3286 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3287 GEN_INT ((SSE_REGPARM_MAX -
3288 needed_sseregs + 1) * 16 +
3289 REGPARM_MAX * 8), GE, const1_rtx,
3290 SImode, 1, lab_false);
3291 }
3292
3293 /* Compute index to start of area used for integer regs. */
3294 if (needed_intregs)
3295 {
3296 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3297 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3298 if (r != int_addr_rtx)
3299 emit_move_insn (int_addr_rtx, r);
3300 }
3301 if (needed_sseregs)
3302 {
3303 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3304 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3305 if (r != sse_addr_rtx)
3306 emit_move_insn (sse_addr_rtx, r);
3307 }
3308 if (need_temp)
3309 {
3310 int i;
3311 rtx mem;
3312 rtx x;
3313
3314 /* Never use the memory itself, as it has the alias set. */
3315 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3316 mem = gen_rtx_MEM (BLKmode, x);
3317 force_operand (x, addr_rtx);
3318 set_mem_alias_set (mem, get_varargs_alias_set ());
3319 set_mem_align (mem, BITS_PER_UNIT);
3320
3321 for (i = 0; i < XVECLEN (container, 0); i++)
3322 {
3323 rtx slot = XVECEXP (container, 0, i);
3324 rtx reg = XEXP (slot, 0);
3325 enum machine_mode mode = GET_MODE (reg);
3326 rtx src_addr;
3327 rtx src_mem;
3328 int src_offset;
3329 rtx dest_mem;
3330
3331 if (SSE_REGNO_P (REGNO (reg)))
3332 {
3333 src_addr = sse_addr_rtx;
3334 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3335 }
3336 else
3337 {
3338 src_addr = int_addr_rtx;
3339 src_offset = REGNO (reg) * 8;
3340 }
3341 src_mem = gen_rtx_MEM (mode, src_addr);
3342 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3343 src_mem = adjust_address (src_mem, mode, src_offset);
3344 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3345 emit_move_insn (dest_mem, src_mem);
3346 }
3347 }
3348
3349 if (needed_intregs)
3350 {
3351 t =
3352 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3353 build_int_2 (needed_intregs * 8, 0));
3354 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3355 TREE_SIDE_EFFECTS (t) = 1;
3356 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3357 }
3358 if (needed_sseregs)
3359 {
3360 t =
3361 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3362 build_int_2 (needed_sseregs * 16, 0));
3363 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3364 TREE_SIDE_EFFECTS (t) = 1;
3365 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3366 }
3367
3368 emit_jump_insn (gen_jump (lab_over));
3369 emit_barrier ();
3370 emit_label (lab_false);
3371 }
3372
3373 /* ... otherwise out of the overflow area. */
3374
3375 /* Care for on-stack alignment if needed. */
3376 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3377 t = ovf;
3378 else
3379 {
3380 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3381 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3382 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3383 }
3384 t = save_expr (t);
3385
3386 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3387 if (r != addr_rtx)
3388 emit_move_insn (addr_rtx, r);
3389
3390 t =
3391 build (PLUS_EXPR, TREE_TYPE (t), t,
3392 build_int_2 (rsize * UNITS_PER_WORD, 0));
3393 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3394 TREE_SIDE_EFFECTS (t) = 1;
3395 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3396
3397 if (container)
3398 emit_label (lab_over);
3399
3400 if (indirect_p)
3401 {
3402 r = gen_rtx_MEM (Pmode, addr_rtx);
3403 set_mem_alias_set (r, get_varargs_alias_set ());
3404 emit_move_insn (addr_rtx, r);
3405 }
3406
3407 return addr_rtx;
3408 }
3409 \f
3410 /* Return nonzero if OP is either a i387 or SSE fp register. */
3411 int
3412 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3413 {
3414 return ANY_FP_REG_P (op);
3415 }
3416
3417 /* Return nonzero if OP is an i387 fp register. */
3418 int
3419 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3420 {
3421 return FP_REG_P (op);
3422 }
3423
3424 /* Return nonzero if OP is a non-fp register_operand. */
3425 int
3426 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3427 {
3428 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3429 }
3430
3431 /* Return nonzero if OP is a register operand other than an
3432 i387 fp register. */
3433 int
3434 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3435 {
3436 return register_operand (op, mode) && !FP_REG_P (op);
3437 }
3438
3439 /* Return nonzero if OP is general operand representable on x86_64. */
3440
3441 int
3442 x86_64_general_operand (rtx op, enum machine_mode mode)
3443 {
3444 if (!TARGET_64BIT)
3445 return general_operand (op, mode);
3446 if (nonimmediate_operand (op, mode))
3447 return 1;
3448 return x86_64_sign_extended_value (op);
3449 }
3450
3451 /* Return nonzero if OP is general operand representable on x86_64
3452 as either sign extended or zero extended constant. */
3453
3454 int
3455 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3456 {
3457 if (!TARGET_64BIT)
3458 return general_operand (op, mode);
3459 if (nonimmediate_operand (op, mode))
3460 return 1;
3461 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3462 }
3463
3464 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3465
3466 int
3467 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3468 {
3469 if (!TARGET_64BIT)
3470 return nonmemory_operand (op, mode);
3471 if (register_operand (op, mode))
3472 return 1;
3473 return x86_64_sign_extended_value (op);
3474 }
3475
3476 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3477
3478 int
3479 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3480 {
3481 if (!TARGET_64BIT || !flag_pic)
3482 return nonmemory_operand (op, mode);
3483 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3484 return 1;
3485 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3486 return 1;
3487 return 0;
3488 }
3489
3490 /* Return nonzero if OPNUM's MEM should be matched
3491 in movabs* patterns. */
3492
3493 int
3494 ix86_check_movabs (rtx insn, int opnum)
3495 {
3496 rtx set, mem;
3497
3498 set = PATTERN (insn);
3499 if (GET_CODE (set) == PARALLEL)
3500 set = XVECEXP (set, 0, 0);
3501 if (GET_CODE (set) != SET)
3502 abort ();
3503 mem = XEXP (set, opnum);
3504 while (GET_CODE (mem) == SUBREG)
3505 mem = SUBREG_REG (mem);
3506 if (GET_CODE (mem) != MEM)
3507 abort ();
3508 return (volatile_ok || !MEM_VOLATILE_P (mem));
3509 }
3510
3511 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3512
3513 int
3514 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3515 {
3516 if (!TARGET_64BIT)
3517 return nonmemory_operand (op, mode);
3518 if (register_operand (op, mode))
3519 return 1;
3520 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3521 }
3522
3523 /* Return nonzero if OP is immediate operand representable on x86_64. */
3524
3525 int
3526 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3527 {
3528 if (!TARGET_64BIT)
3529 return immediate_operand (op, mode);
3530 return x86_64_sign_extended_value (op);
3531 }
3532
3533 /* Return nonzero if OP is immediate operand representable on x86_64. */
3534
3535 int
3536 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3537 {
3538 return x86_64_zero_extended_value (op);
3539 }
3540
3541 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3542 for shift & compare patterns, as shifting by 0 does not change flags),
3543 else return zero. */
3544
3545 int
3546 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3547 {
3548 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3549 }
3550
3551 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3552 reference and a constant. */
3553
3554 int
3555 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3556 {
3557 switch (GET_CODE (op))
3558 {
3559 case SYMBOL_REF:
3560 case LABEL_REF:
3561 return 1;
3562
3563 case CONST:
3564 op = XEXP (op, 0);
3565 if (GET_CODE (op) == SYMBOL_REF
3566 || GET_CODE (op) == LABEL_REF
3567 || (GET_CODE (op) == UNSPEC
3568 && (XINT (op, 1) == UNSPEC_GOT
3569 || XINT (op, 1) == UNSPEC_GOTOFF
3570 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3571 return 1;
3572 if (GET_CODE (op) != PLUS
3573 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3574 return 0;
3575
3576 op = XEXP (op, 0);
3577 if (GET_CODE (op) == SYMBOL_REF
3578 || GET_CODE (op) == LABEL_REF)
3579 return 1;
3580 /* Only @GOTOFF gets offsets. */
3581 if (GET_CODE (op) != UNSPEC
3582 || XINT (op, 1) != UNSPEC_GOTOFF)
3583 return 0;
3584
3585 op = XVECEXP (op, 0, 0);
3586 if (GET_CODE (op) == SYMBOL_REF
3587 || GET_CODE (op) == LABEL_REF)
3588 return 1;
3589 return 0;
3590
3591 default:
3592 return 0;
3593 }
3594 }
3595
3596 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3597
3598 int
3599 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3600 {
3601 if (GET_CODE (op) != CONST)
3602 return 0;
3603 op = XEXP (op, 0);
3604 if (TARGET_64BIT)
3605 {
3606 if (GET_CODE (op) == UNSPEC
3607 && XINT (op, 1) == UNSPEC_GOTPCREL)
3608 return 1;
3609 if (GET_CODE (op) == PLUS
3610 && GET_CODE (XEXP (op, 0)) == UNSPEC
3611 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3612 return 1;
3613 }
3614 else
3615 {
3616 if (GET_CODE (op) == UNSPEC)
3617 return 1;
3618 if (GET_CODE (op) != PLUS
3619 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3620 return 0;
3621 op = XEXP (op, 0);
3622 if (GET_CODE (op) == UNSPEC)
3623 return 1;
3624 }
3625 return 0;
3626 }
3627
3628 /* Return true if OP is a symbolic operand that resolves locally. */
3629
3630 static int
3631 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3632 {
3633 if (GET_CODE (op) == CONST
3634 && GET_CODE (XEXP (op, 0)) == PLUS
3635 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3636 op = XEXP (XEXP (op, 0), 0);
3637
3638 if (GET_CODE (op) == LABEL_REF)
3639 return 1;
3640
3641 if (GET_CODE (op) != SYMBOL_REF)
3642 return 0;
3643
3644 if (SYMBOL_REF_LOCAL_P (op))
3645 return 1;
3646
3647 /* There is, however, a not insubstantial body of code in the rest of
3648 the compiler that assumes it can just stick the results of
3649 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3650 /* ??? This is a hack. Should update the body of the compiler to
3651 always create a DECL an invoke targetm.encode_section_info. */
3652 if (strncmp (XSTR (op, 0), internal_label_prefix,
3653 internal_label_prefix_len) == 0)
3654 return 1;
3655
3656 return 0;
3657 }
3658
3659 /* Test for various thread-local symbols. */
3660
3661 int
3662 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3663 {
3664 if (GET_CODE (op) != SYMBOL_REF)
3665 return 0;
3666 return SYMBOL_REF_TLS_MODEL (op);
3667 }
3668
3669 static inline int
3670 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3671 {
3672 if (GET_CODE (op) != SYMBOL_REF)
3673 return 0;
3674 return SYMBOL_REF_TLS_MODEL (op) == kind;
3675 }
3676
3677 int
3678 global_dynamic_symbolic_operand (rtx op,
3679 enum machine_mode mode ATTRIBUTE_UNUSED)
3680 {
3681 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3682 }
3683
3684 int
3685 local_dynamic_symbolic_operand (rtx op,
3686 enum machine_mode mode ATTRIBUTE_UNUSED)
3687 {
3688 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3689 }
3690
3691 int
3692 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3693 {
3694 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3695 }
3696
3697 int
3698 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3699 {
3700 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3701 }
3702
3703 /* Test for a valid operand for a call instruction. Don't allow the
3704 arg pointer register or virtual regs since they may decay into
3705 reg + const, which the patterns can't handle. */
3706
3707 int
3708 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3709 {
3710 /* Disallow indirect through a virtual register. This leads to
3711 compiler aborts when trying to eliminate them. */
3712 if (GET_CODE (op) == REG
3713 && (op == arg_pointer_rtx
3714 || op == frame_pointer_rtx
3715 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3716 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3717 return 0;
3718
3719 /* Disallow `call 1234'. Due to varying assembler lameness this
3720 gets either rejected or translated to `call .+1234'. */
3721 if (GET_CODE (op) == CONST_INT)
3722 return 0;
3723
3724 /* Explicitly allow SYMBOL_REF even if pic. */
3725 if (GET_CODE (op) == SYMBOL_REF)
3726 return 1;
3727
3728 /* Otherwise we can allow any general_operand in the address. */
3729 return general_operand (op, Pmode);
3730 }
3731
3732 /* Test for a valid operand for a call instruction. Don't allow the
3733 arg pointer register or virtual regs since they may decay into
3734 reg + const, which the patterns can't handle. */
3735
3736 int
3737 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3738 {
3739 /* Disallow indirect through a virtual register. This leads to
3740 compiler aborts when trying to eliminate them. */
3741 if (GET_CODE (op) == REG
3742 && (op == arg_pointer_rtx
3743 || op == frame_pointer_rtx
3744 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3745 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3746 return 0;
3747
3748 /* Explicitly allow SYMBOL_REF even if pic. */
3749 if (GET_CODE (op) == SYMBOL_REF)
3750 return 1;
3751
3752 /* Otherwise we can only allow register operands. */
3753 return register_operand (op, Pmode);
3754 }
3755
3756 int
3757 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3758 {
3759 if (GET_CODE (op) == CONST
3760 && GET_CODE (XEXP (op, 0)) == PLUS
3761 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3762 op = XEXP (XEXP (op, 0), 0);
3763 return GET_CODE (op) == SYMBOL_REF;
3764 }
3765
3766 /* Match exactly zero and one. */
3767
3768 int
3769 const0_operand (rtx op, enum machine_mode mode)
3770 {
3771 return op == CONST0_RTX (mode);
3772 }
3773
3774 int
3775 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3776 {
3777 return op == const1_rtx;
3778 }
3779
3780 /* Match 2, 4, or 8. Used for leal multiplicands. */
3781
3782 int
3783 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3784 {
3785 return (GET_CODE (op) == CONST_INT
3786 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3787 }
3788
3789 int
3790 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3791 {
3792 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3793 }
3794
3795 int
3796 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3797 {
3798 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3799 }
3800
3801 int
3802 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3803 {
3804 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3805 }
3806
3807 int
3808 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3809 {
3810 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3811 }
3812
3813
3814 /* True if this is a constant appropriate for an increment or decrement. */
3815
3816 int
3817 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3818 {
3819 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3820 registers, since carry flag is not set. */
3821 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
3822 return 0;
3823 return op == const1_rtx || op == constm1_rtx;
3824 }
3825
3826 /* Return nonzero if OP is acceptable as operand of DImode shift
3827 expander. */
3828
3829 int
3830 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3831 {
3832 if (TARGET_64BIT)
3833 return nonimmediate_operand (op, mode);
3834 else
3835 return register_operand (op, mode);
3836 }
3837
3838 /* Return false if this is the stack pointer, or any other fake
3839 register eliminable to the stack pointer. Otherwise, this is
3840 a register operand.
3841
3842 This is used to prevent esp from being used as an index reg.
3843 Which would only happen in pathological cases. */
3844
3845 int
3846 reg_no_sp_operand (rtx op, enum machine_mode mode)
3847 {
3848 rtx t = op;
3849 if (GET_CODE (t) == SUBREG)
3850 t = SUBREG_REG (t);
3851 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3852 return 0;
3853
3854 return register_operand (op, mode);
3855 }
3856
3857 int
3858 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3859 {
3860 return MMX_REG_P (op);
3861 }
3862
3863 /* Return false if this is any eliminable register. Otherwise
3864 general_operand. */
3865
3866 int
3867 general_no_elim_operand (rtx op, enum machine_mode mode)
3868 {
3869 rtx t = op;
3870 if (GET_CODE (t) == SUBREG)
3871 t = SUBREG_REG (t);
3872 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3873 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3874 || t == virtual_stack_dynamic_rtx)
3875 return 0;
3876 if (REG_P (t)
3877 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3878 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3879 return 0;
3880
3881 return general_operand (op, mode);
3882 }
3883
3884 /* Return false if this is any eliminable register. Otherwise
3885 register_operand or const_int. */
3886
3887 int
3888 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3889 {
3890 rtx t = op;
3891 if (GET_CODE (t) == SUBREG)
3892 t = SUBREG_REG (t);
3893 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3894 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3895 || t == virtual_stack_dynamic_rtx)
3896 return 0;
3897
3898 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3899 }
3900
3901 /* Return false if this is any eliminable register or stack register,
3902 otherwise work like register_operand. */
3903
3904 int
3905 index_register_operand (rtx op, enum machine_mode mode)
3906 {
3907 rtx t = op;
3908 if (GET_CODE (t) == SUBREG)
3909 t = SUBREG_REG (t);
3910 if (!REG_P (t))
3911 return 0;
3912 if (t == arg_pointer_rtx
3913 || t == frame_pointer_rtx
3914 || t == virtual_incoming_args_rtx
3915 || t == virtual_stack_vars_rtx
3916 || t == virtual_stack_dynamic_rtx
3917 || REGNO (t) == STACK_POINTER_REGNUM)
3918 return 0;
3919
3920 return general_operand (op, mode);
3921 }
3922
3923 /* Return true if op is a Q_REGS class register. */
3924
3925 int
3926 q_regs_operand (rtx op, enum machine_mode mode)
3927 {
3928 if (mode != VOIDmode && GET_MODE (op) != mode)
3929 return 0;
3930 if (GET_CODE (op) == SUBREG)
3931 op = SUBREG_REG (op);
3932 return ANY_QI_REG_P (op);
3933 }
3934
3935 /* Return true if op is an flags register. */
3936
3937 int
3938 flags_reg_operand (rtx op, enum machine_mode mode)
3939 {
3940 if (mode != VOIDmode && GET_MODE (op) != mode)
3941 return 0;
3942 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3943 }
3944
3945 /* Return true if op is a NON_Q_REGS class register. */
3946
3947 int
3948 non_q_regs_operand (rtx op, enum machine_mode mode)
3949 {
3950 if (mode != VOIDmode && GET_MODE (op) != mode)
3951 return 0;
3952 if (GET_CODE (op) == SUBREG)
3953 op = SUBREG_REG (op);
3954 return NON_QI_REG_P (op);
3955 }
3956
3957 int
3958 zero_extended_scalar_load_operand (rtx op,
3959 enum machine_mode mode ATTRIBUTE_UNUSED)
3960 {
3961 unsigned n_elts;
3962 if (GET_CODE (op) != MEM)
3963 return 0;
3964 op = maybe_get_pool_constant (op);
3965 if (!op)
3966 return 0;
3967 if (GET_CODE (op) != CONST_VECTOR)
3968 return 0;
3969 n_elts =
3970 (GET_MODE_SIZE (GET_MODE (op)) /
3971 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3972 for (n_elts--; n_elts > 0; n_elts--)
3973 {
3974 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3975 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3976 return 0;
3977 }
3978 return 1;
3979 }
3980
3981 /* Return 1 when OP is operand acceptable for standard SSE move. */
3982 int
3983 vector_move_operand (rtx op, enum machine_mode mode)
3984 {
3985 if (nonimmediate_operand (op, mode))
3986 return 1;
3987 if (GET_MODE (op) != mode && mode != VOIDmode)
3988 return 0;
3989 return (op == CONST0_RTX (GET_MODE (op)));
3990 }
3991
3992 /* Return true if op if a valid address, and does not contain
3993 a segment override. */
3994
3995 int
3996 no_seg_address_operand (rtx op, enum machine_mode mode)
3997 {
3998 struct ix86_address parts;
3999
4000 if (! address_operand (op, mode))
4001 return 0;
4002
4003 if (! ix86_decompose_address (op, &parts))
4004 abort ();
4005
4006 return parts.seg == SEG_DEFAULT;
4007 }
4008
4009 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4010 insns. */
4011 int
4012 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4013 {
4014 enum rtx_code code = GET_CODE (op);
4015 switch (code)
4016 {
4017 /* Operations supported directly. */
4018 case EQ:
4019 case LT:
4020 case LE:
4021 case UNORDERED:
4022 case NE:
4023 case UNGE:
4024 case UNGT:
4025 case ORDERED:
4026 return 1;
4027 /* These are equivalent to ones above in non-IEEE comparisons. */
4028 case UNEQ:
4029 case UNLT:
4030 case UNLE:
4031 case LTGT:
4032 case GE:
4033 case GT:
4034 return !TARGET_IEEE_FP;
4035 default:
4036 return 0;
4037 }
4038 }
4039 /* Return 1 if OP is a valid comparison operator in valid mode. */
4040 int
4041 ix86_comparison_operator (rtx op, enum machine_mode mode)
4042 {
4043 enum machine_mode inmode;
4044 enum rtx_code code = GET_CODE (op);
4045 if (mode != VOIDmode && GET_MODE (op) != mode)
4046 return 0;
4047 if (!COMPARISON_P (op))
4048 return 0;
4049 inmode = GET_MODE (XEXP (op, 0));
4050
4051 if (inmode == CCFPmode || inmode == CCFPUmode)
4052 {
4053 enum rtx_code second_code, bypass_code;
4054 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4055 return (bypass_code == NIL && second_code == NIL);
4056 }
4057 switch (code)
4058 {
4059 case EQ: case NE:
4060 return 1;
4061 case LT: case GE:
4062 if (inmode == CCmode || inmode == CCGCmode
4063 || inmode == CCGOCmode || inmode == CCNOmode)
4064 return 1;
4065 return 0;
4066 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4067 if (inmode == CCmode)
4068 return 1;
4069 return 0;
4070 case GT: case LE:
4071 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4072 return 1;
4073 return 0;
4074 default:
4075 return 0;
4076 }
4077 }
4078
4079 /* Return 1 if OP is a valid comparison operator testing carry flag
4080 to be set. */
4081 int
4082 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4083 {
4084 enum machine_mode inmode;
4085 enum rtx_code code = GET_CODE (op);
4086
4087 if (mode != VOIDmode && GET_MODE (op) != mode)
4088 return 0;
4089 if (!COMPARISON_P (op))
4090 return 0;
4091 inmode = GET_MODE (XEXP (op, 0));
4092 if (GET_CODE (XEXP (op, 0)) != REG
4093 || REGNO (XEXP (op, 0)) != 17
4094 || XEXP (op, 1) != const0_rtx)
4095 return 0;
4096
4097 if (inmode == CCFPmode || inmode == CCFPUmode)
4098 {
4099 enum rtx_code second_code, bypass_code;
4100
4101 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4102 if (bypass_code != NIL || second_code != NIL)
4103 return 0;
4104 code = ix86_fp_compare_code_to_integer (code);
4105 }
4106 else if (inmode != CCmode)
4107 return 0;
4108 return code == LTU;
4109 }
4110
4111 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4112
4113 int
4114 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4115 {
4116 enum machine_mode inmode;
4117 enum rtx_code code = GET_CODE (op);
4118
4119 if (mode != VOIDmode && GET_MODE (op) != mode)
4120 return 0;
4121 if (!COMPARISON_P (op))
4122 return 0;
4123 inmode = GET_MODE (XEXP (op, 0));
4124 if (inmode == CCFPmode || inmode == CCFPUmode)
4125 {
4126 enum rtx_code second_code, bypass_code;
4127
4128 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4129 if (bypass_code != NIL || second_code != NIL)
4130 return 0;
4131 code = ix86_fp_compare_code_to_integer (code);
4132 }
4133 /* i387 supports just limited amount of conditional codes. */
4134 switch (code)
4135 {
4136 case LTU: case GTU: case LEU: case GEU:
4137 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4138 return 1;
4139 return 0;
4140 case ORDERED: case UNORDERED:
4141 case EQ: case NE:
4142 return 1;
4143 default:
4144 return 0;
4145 }
4146 }
4147
4148 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4149
4150 int
4151 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4152 {
4153 switch (GET_CODE (op))
4154 {
4155 case MULT:
4156 /* Modern CPUs have same latency for HImode and SImode multiply,
4157 but 386 and 486 do HImode multiply faster. */
4158 return ix86_tune > PROCESSOR_I486;
4159 case PLUS:
4160 case AND:
4161 case IOR:
4162 case XOR:
4163 case ASHIFT:
4164 return 1;
4165 default:
4166 return 0;
4167 }
4168 }
4169
4170 /* Nearly general operand, but accept any const_double, since we wish
4171 to be able to drop them into memory rather than have them get pulled
4172 into registers. */
4173
4174 int
4175 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4176 {
4177 if (mode != VOIDmode && mode != GET_MODE (op))
4178 return 0;
4179 if (GET_CODE (op) == CONST_DOUBLE)
4180 return 1;
4181 return general_operand (op, mode);
4182 }
4183
4184 /* Match an SI or HImode register for a zero_extract. */
4185
4186 int
4187 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4188 {
4189 int regno;
4190 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4191 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4192 return 0;
4193
4194 if (!register_operand (op, VOIDmode))
4195 return 0;
4196
4197 /* Be careful to accept only registers having upper parts. */
4198 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4199 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4200 }
4201
4202 /* Return 1 if this is a valid binary floating-point operation.
4203 OP is the expression matched, and MODE is its mode. */
4204
4205 int
4206 binary_fp_operator (rtx op, enum machine_mode mode)
4207 {
4208 if (mode != VOIDmode && mode != GET_MODE (op))
4209 return 0;
4210
4211 switch (GET_CODE (op))
4212 {
4213 case PLUS:
4214 case MINUS:
4215 case MULT:
4216 case DIV:
4217 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4218
4219 default:
4220 return 0;
4221 }
4222 }
4223
4224 int
4225 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4226 {
4227 return GET_CODE (op) == MULT;
4228 }
4229
4230 int
4231 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4232 {
4233 return GET_CODE (op) == DIV;
4234 }
4235
4236 int
4237 arith_or_logical_operator (rtx op, enum machine_mode mode)
4238 {
4239 return ((mode == VOIDmode || GET_MODE (op) == mode)
4240 && ARITHMETIC_P (op));
4241 }
4242
4243 /* Returns 1 if OP is memory operand with a displacement. */
4244
4245 int
4246 memory_displacement_operand (rtx op, enum machine_mode mode)
4247 {
4248 struct ix86_address parts;
4249
4250 if (! memory_operand (op, mode))
4251 return 0;
4252
4253 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4254 abort ();
4255
4256 return parts.disp != NULL_RTX;
4257 }
4258
4259 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4260 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4261
4262 ??? It seems likely that this will only work because cmpsi is an
4263 expander, and no actual insns use this. */
4264
4265 int
4266 cmpsi_operand (rtx op, enum machine_mode mode)
4267 {
4268 if (nonimmediate_operand (op, mode))
4269 return 1;
4270
4271 if (GET_CODE (op) == AND
4272 && GET_MODE (op) == SImode
4273 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4274 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4275 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4276 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4277 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4278 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4279 return 1;
4280
4281 return 0;
4282 }
4283
4284 /* Returns 1 if OP is memory operand that can not be represented by the
4285 modRM array. */
4286
4287 int
4288 long_memory_operand (rtx op, enum machine_mode mode)
4289 {
4290 if (! memory_operand (op, mode))
4291 return 0;
4292
4293 return memory_address_length (op) != 0;
4294 }
4295
4296 /* Return nonzero if the rtx is known aligned. */
4297
4298 int
4299 aligned_operand (rtx op, enum machine_mode mode)
4300 {
4301 struct ix86_address parts;
4302
4303 if (!general_operand (op, mode))
4304 return 0;
4305
4306 /* Registers and immediate operands are always "aligned". */
4307 if (GET_CODE (op) != MEM)
4308 return 1;
4309
4310 /* Don't even try to do any aligned optimizations with volatiles. */
4311 if (MEM_VOLATILE_P (op))
4312 return 0;
4313
4314 op = XEXP (op, 0);
4315
4316 /* Pushes and pops are only valid on the stack pointer. */
4317 if (GET_CODE (op) == PRE_DEC
4318 || GET_CODE (op) == POST_INC)
4319 return 1;
4320
4321 /* Decode the address. */
4322 if (! ix86_decompose_address (op, &parts))
4323 abort ();
4324
4325 /* Look for some component that isn't known to be aligned. */
4326 if (parts.index)
4327 {
4328 if (parts.scale < 4
4329 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4330 return 0;
4331 }
4332 if (parts.base)
4333 {
4334 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4335 return 0;
4336 }
4337 if (parts.disp)
4338 {
4339 if (GET_CODE (parts.disp) != CONST_INT
4340 || (INTVAL (parts.disp) & 3) != 0)
4341 return 0;
4342 }
4343
4344 /* Didn't find one -- this must be an aligned address. */
4345 return 1;
4346 }
4347 \f
4348 /* Initialize the table of extra 80387 mathematical constants. */
4349
4350 static void
4351 init_ext_80387_constants (void)
4352 {
4353 static const char * cst[5] =
4354 {
4355 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4356 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4357 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4358 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4359 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4360 };
4361 int i;
4362
4363 for (i = 0; i < 5; i++)
4364 {
4365 real_from_string (&ext_80387_constants_table[i], cst[i]);
4366 /* Ensure each constant is rounded to XFmode precision. */
4367 real_convert (&ext_80387_constants_table[i],
4368 XFmode, &ext_80387_constants_table[i]);
4369 }
4370
4371 ext_80387_constants_init = 1;
4372 }
4373
4374 /* Return true if the constant is something that can be loaded with
4375 a special instruction. */
4376
4377 int
4378 standard_80387_constant_p (rtx x)
4379 {
4380 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4381 return -1;
4382
4383 if (x == CONST0_RTX (GET_MODE (x)))
4384 return 1;
4385 if (x == CONST1_RTX (GET_MODE (x)))
4386 return 2;
4387
4388 /* For XFmode constants, try to find a special 80387 instruction when
4389 optimizing for size or on those CPUs that benefit from them. */
4390 if (GET_MODE (x) == XFmode
4391 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4392 {
4393 REAL_VALUE_TYPE r;
4394 int i;
4395
4396 if (! ext_80387_constants_init)
4397 init_ext_80387_constants ();
4398
4399 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4400 for (i = 0; i < 5; i++)
4401 if (real_identical (&r, &ext_80387_constants_table[i]))
4402 return i + 3;
4403 }
4404
4405 return 0;
4406 }
4407
4408 /* Return the opcode of the special instruction to be used to load
4409 the constant X. */
4410
4411 const char *
4412 standard_80387_constant_opcode (rtx x)
4413 {
4414 switch (standard_80387_constant_p (x))
4415 {
4416 case 1:
4417 return "fldz";
4418 case 2:
4419 return "fld1";
4420 case 3:
4421 return "fldlg2";
4422 case 4:
4423 return "fldln2";
4424 case 5:
4425 return "fldl2e";
4426 case 6:
4427 return "fldl2t";
4428 case 7:
4429 return "fldpi";
4430 }
4431 abort ();
4432 }
4433
4434 /* Return the CONST_DOUBLE representing the 80387 constant that is
4435 loaded by the specified special instruction. The argument IDX
4436 matches the return value from standard_80387_constant_p. */
4437
4438 rtx
4439 standard_80387_constant_rtx (int idx)
4440 {
4441 int i;
4442
4443 if (! ext_80387_constants_init)
4444 init_ext_80387_constants ();
4445
4446 switch (idx)
4447 {
4448 case 3:
4449 case 4:
4450 case 5:
4451 case 6:
4452 case 7:
4453 i = idx - 3;
4454 break;
4455
4456 default:
4457 abort ();
4458 }
4459
4460 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4461 XFmode);
4462 }
4463
4464 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4465 */
4466 int
4467 standard_sse_constant_p (rtx x)
4468 {
4469 if (x == const0_rtx)
4470 return 1;
4471 return (x == CONST0_RTX (GET_MODE (x)));
4472 }
4473
4474 /* Returns 1 if OP contains a symbol reference */
4475
4476 int
4477 symbolic_reference_mentioned_p (rtx op)
4478 {
4479 const char *fmt;
4480 int i;
4481
4482 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4483 return 1;
4484
4485 fmt = GET_RTX_FORMAT (GET_CODE (op));
4486 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4487 {
4488 if (fmt[i] == 'E')
4489 {
4490 int j;
4491
4492 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4493 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4494 return 1;
4495 }
4496
4497 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4498 return 1;
4499 }
4500
4501 return 0;
4502 }
4503
4504 /* Return 1 if it is appropriate to emit `ret' instructions in the
4505 body of a function. Do this only if the epilogue is simple, needing a
4506 couple of insns. Prior to reloading, we can't tell how many registers
4507 must be saved, so return 0 then. Return 0 if there is no frame
4508 marker to de-allocate.
4509
4510 If NON_SAVING_SETJMP is defined and true, then it is not possible
4511 for the epilogue to be simple, so return 0. This is a special case
4512 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4513 until final, but jump_optimize may need to know sooner if a
4514 `return' is OK. */
4515
4516 int
4517 ix86_can_use_return_insn_p (void)
4518 {
4519 struct ix86_frame frame;
4520
4521 #ifdef NON_SAVING_SETJMP
4522 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4523 return 0;
4524 #endif
4525
4526 if (! reload_completed || frame_pointer_needed)
4527 return 0;
4528
4529 /* Don't allow more than 32 pop, since that's all we can do
4530 with one instruction. */
4531 if (current_function_pops_args
4532 && current_function_args_size >= 32768)
4533 return 0;
4534
4535 ix86_compute_frame_layout (&frame);
4536 return frame.to_allocate == 0 && frame.nregs == 0;
4537 }
4538 \f
4539 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4540 int
4541 x86_64_sign_extended_value (rtx value)
4542 {
4543 switch (GET_CODE (value))
4544 {
4545 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4546 to be at least 32 and this all acceptable constants are
4547 represented as CONST_INT. */
4548 case CONST_INT:
4549 if (HOST_BITS_PER_WIDE_INT == 32)
4550 return 1;
4551 else
4552 {
4553 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4554 return trunc_int_for_mode (val, SImode) == val;
4555 }
4556 break;
4557
4558 /* For certain code models, the symbolic references are known to fit.
4559 in CM_SMALL_PIC model we know it fits if it is local to the shared
4560 library. Don't count TLS SYMBOL_REFs here, since they should fit
4561 only if inside of UNSPEC handled below. */
4562 case SYMBOL_REF:
4563 /* TLS symbols are not constant. */
4564 if (tls_symbolic_operand (value, Pmode))
4565 return false;
4566 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4567
4568 /* For certain code models, the code is near as well. */
4569 case LABEL_REF:
4570 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4571 || ix86_cmodel == CM_KERNEL);
4572
4573 /* We also may accept the offsetted memory references in certain special
4574 cases. */
4575 case CONST:
4576 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4577 switch (XINT (XEXP (value, 0), 1))
4578 {
4579 case UNSPEC_GOTPCREL:
4580 case UNSPEC_DTPOFF:
4581 case UNSPEC_GOTNTPOFF:
4582 case UNSPEC_NTPOFF:
4583 return 1;
4584 default:
4585 break;
4586 }
4587 if (GET_CODE (XEXP (value, 0)) == PLUS)
4588 {
4589 rtx op1 = XEXP (XEXP (value, 0), 0);
4590 rtx op2 = XEXP (XEXP (value, 0), 1);
4591 HOST_WIDE_INT offset;
4592
4593 if (ix86_cmodel == CM_LARGE)
4594 return 0;
4595 if (GET_CODE (op2) != CONST_INT)
4596 return 0;
4597 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4598 switch (GET_CODE (op1))
4599 {
4600 case SYMBOL_REF:
4601 /* For CM_SMALL assume that latest object is 16MB before
4602 end of 31bits boundary. We may also accept pretty
4603 large negative constants knowing that all objects are
4604 in the positive half of address space. */
4605 if (ix86_cmodel == CM_SMALL
4606 && offset < 16*1024*1024
4607 && trunc_int_for_mode (offset, SImode) == offset)
4608 return 1;
4609 /* For CM_KERNEL we know that all object resist in the
4610 negative half of 32bits address space. We may not
4611 accept negative offsets, since they may be just off
4612 and we may accept pretty large positive ones. */
4613 if (ix86_cmodel == CM_KERNEL
4614 && offset > 0
4615 && trunc_int_for_mode (offset, SImode) == offset)
4616 return 1;
4617 break;
4618 case LABEL_REF:
4619 /* These conditions are similar to SYMBOL_REF ones, just the
4620 constraints for code models differ. */
4621 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4622 && offset < 16*1024*1024
4623 && trunc_int_for_mode (offset, SImode) == offset)
4624 return 1;
4625 if (ix86_cmodel == CM_KERNEL
4626 && offset > 0
4627 && trunc_int_for_mode (offset, SImode) == offset)
4628 return 1;
4629 break;
4630 case UNSPEC:
4631 switch (XINT (op1, 1))
4632 {
4633 case UNSPEC_DTPOFF:
4634 case UNSPEC_NTPOFF:
4635 if (offset > 0
4636 && trunc_int_for_mode (offset, SImode) == offset)
4637 return 1;
4638 }
4639 break;
4640 default:
4641 return 0;
4642 }
4643 }
4644 return 0;
4645 default:
4646 return 0;
4647 }
4648 }
4649
4650 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4651 int
4652 x86_64_zero_extended_value (rtx value)
4653 {
4654 switch (GET_CODE (value))
4655 {
4656 case CONST_DOUBLE:
4657 if (HOST_BITS_PER_WIDE_INT == 32)
4658 return (GET_MODE (value) == VOIDmode
4659 && !CONST_DOUBLE_HIGH (value));
4660 else
4661 return 0;
4662 case CONST_INT:
4663 if (HOST_BITS_PER_WIDE_INT == 32)
4664 return INTVAL (value) >= 0;
4665 else
4666 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4667 break;
4668
4669 /* For certain code models, the symbolic references are known to fit. */
4670 case SYMBOL_REF:
4671 /* TLS symbols are not constant. */
4672 if (tls_symbolic_operand (value, Pmode))
4673 return false;
4674 return ix86_cmodel == CM_SMALL;
4675
4676 /* For certain code models, the code is near as well. */
4677 case LABEL_REF:
4678 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4679
4680 /* We also may accept the offsetted memory references in certain special
4681 cases. */
4682 case CONST:
4683 if (GET_CODE (XEXP (value, 0)) == PLUS)
4684 {
4685 rtx op1 = XEXP (XEXP (value, 0), 0);
4686 rtx op2 = XEXP (XEXP (value, 0), 1);
4687
4688 if (ix86_cmodel == CM_LARGE)
4689 return 0;
4690 switch (GET_CODE (op1))
4691 {
4692 case SYMBOL_REF:
4693 return 0;
4694 /* For small code model we may accept pretty large positive
4695 offsets, since one bit is available for free. Negative
4696 offsets are limited by the size of NULL pointer area
4697 specified by the ABI. */
4698 if (ix86_cmodel == CM_SMALL
4699 && GET_CODE (op2) == CONST_INT
4700 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4701 && (trunc_int_for_mode (INTVAL (op2), SImode)
4702 == INTVAL (op2)))
4703 return 1;
4704 /* ??? For the kernel, we may accept adjustment of
4705 -0x10000000, since we know that it will just convert
4706 negative address space to positive, but perhaps this
4707 is not worthwhile. */
4708 break;
4709 case LABEL_REF:
4710 /* These conditions are similar to SYMBOL_REF ones, just the
4711 constraints for code models differ. */
4712 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4713 && GET_CODE (op2) == CONST_INT
4714 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4715 && (trunc_int_for_mode (INTVAL (op2), SImode)
4716 == INTVAL (op2)))
4717 return 1;
4718 break;
4719 default:
4720 return 0;
4721 }
4722 }
4723 return 0;
4724 default:
4725 return 0;
4726 }
4727 }
4728
4729 /* Value should be nonzero if functions must have frame pointers.
4730 Zero means the frame pointer need not be set up (and parms may
4731 be accessed via the stack pointer) in functions that seem suitable. */
4732
4733 int
4734 ix86_frame_pointer_required (void)
4735 {
4736 /* If we accessed previous frames, then the generated code expects
4737 to be able to access the saved ebp value in our frame. */
4738 if (cfun->machine->accesses_prev_frame)
4739 return 1;
4740
4741 /* Several x86 os'es need a frame pointer for other reasons,
4742 usually pertaining to setjmp. */
4743 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4744 return 1;
4745
4746 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4747 the frame pointer by default. Turn it back on now if we've not
4748 got a leaf function. */
4749 if (TARGET_OMIT_LEAF_FRAME_POINTER
4750 && (!current_function_is_leaf))
4751 return 1;
4752
4753 if (current_function_profile)
4754 return 1;
4755
4756 return 0;
4757 }
4758
4759 /* Record that the current function accesses previous call frames. */
4760
4761 void
4762 ix86_setup_frame_addresses (void)
4763 {
4764 cfun->machine->accesses_prev_frame = 1;
4765 }
4766 \f
4767 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4768 # define USE_HIDDEN_LINKONCE 1
4769 #else
4770 # define USE_HIDDEN_LINKONCE 0
4771 #endif
4772
4773 static int pic_labels_used;
4774
4775 /* Fills in the label name that should be used for a pc thunk for
4776 the given register. */
4777
4778 static void
4779 get_pc_thunk_name (char name[32], unsigned int regno)
4780 {
4781 if (USE_HIDDEN_LINKONCE)
4782 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4783 else
4784 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4785 }
4786
4787
4788 /* This function generates code for -fpic that loads %ebx with
4789 the return address of the caller and then returns. */
4790
4791 void
4792 ix86_file_end (void)
4793 {
4794 rtx xops[2];
4795 int regno;
4796
4797 for (regno = 0; regno < 8; ++regno)
4798 {
4799 char name[32];
4800
4801 if (! ((pic_labels_used >> regno) & 1))
4802 continue;
4803
4804 get_pc_thunk_name (name, regno);
4805
4806 if (USE_HIDDEN_LINKONCE)
4807 {
4808 tree decl;
4809
4810 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4811 error_mark_node);
4812 TREE_PUBLIC (decl) = 1;
4813 TREE_STATIC (decl) = 1;
4814 DECL_ONE_ONLY (decl) = 1;
4815
4816 (*targetm.asm_out.unique_section) (decl, 0);
4817 named_section (decl, NULL, 0);
4818
4819 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4820 fputs ("\t.hidden\t", asm_out_file);
4821 assemble_name (asm_out_file, name);
4822 fputc ('\n', asm_out_file);
4823 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4824 }
4825 else
4826 {
4827 text_section ();
4828 ASM_OUTPUT_LABEL (asm_out_file, name);
4829 }
4830
4831 xops[0] = gen_rtx_REG (SImode, regno);
4832 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4833 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4834 output_asm_insn ("ret", xops);
4835 }
4836
4837 if (NEED_INDICATE_EXEC_STACK)
4838 file_end_indicate_exec_stack ();
4839 }
4840
4841 /* Emit code for the SET_GOT patterns. */
4842
4843 const char *
4844 output_set_got (rtx dest)
4845 {
4846 rtx xops[3];
4847
4848 xops[0] = dest;
4849 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4850
4851 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4852 {
4853 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4854
4855 if (!flag_pic)
4856 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4857 else
4858 output_asm_insn ("call\t%a2", xops);
4859
4860 #if TARGET_MACHO
4861 /* Output the "canonical" label name ("Lxx$pb") here too. This
4862 is what will be referred to by the Mach-O PIC subsystem. */
4863 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4864 #endif
4865 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4866 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4867
4868 if (flag_pic)
4869 output_asm_insn ("pop{l}\t%0", xops);
4870 }
4871 else
4872 {
4873 char name[32];
4874 get_pc_thunk_name (name, REGNO (dest));
4875 pic_labels_used |= 1 << REGNO (dest);
4876
4877 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4878 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4879 output_asm_insn ("call\t%X2", xops);
4880 }
4881
4882 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4883 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4884 else if (!TARGET_MACHO)
4885 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4886
4887 return "";
4888 }
4889
4890 /* Generate an "push" pattern for input ARG. */
4891
4892 static rtx
4893 gen_push (rtx arg)
4894 {
4895 return gen_rtx_SET (VOIDmode,
4896 gen_rtx_MEM (Pmode,
4897 gen_rtx_PRE_DEC (Pmode,
4898 stack_pointer_rtx)),
4899 arg);
4900 }
4901
4902 /* Return >= 0 if there is an unused call-clobbered register available
4903 for the entire function. */
4904
4905 static unsigned int
4906 ix86_select_alt_pic_regnum (void)
4907 {
4908 if (current_function_is_leaf && !current_function_profile)
4909 {
4910 int i;
4911 for (i = 2; i >= 0; --i)
4912 if (!regs_ever_live[i])
4913 return i;
4914 }
4915
4916 return INVALID_REGNUM;
4917 }
4918
4919 /* Return 1 if we need to save REGNO. */
4920 static int
4921 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4922 {
4923 if (pic_offset_table_rtx
4924 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4925 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4926 || current_function_profile
4927 || current_function_calls_eh_return
4928 || current_function_uses_const_pool))
4929 {
4930 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4931 return 0;
4932 return 1;
4933 }
4934
4935 if (current_function_calls_eh_return && maybe_eh_return)
4936 {
4937 unsigned i;
4938 for (i = 0; ; i++)
4939 {
4940 unsigned test = EH_RETURN_DATA_REGNO (i);
4941 if (test == INVALID_REGNUM)
4942 break;
4943 if (test == regno)
4944 return 1;
4945 }
4946 }
4947
4948 return (regs_ever_live[regno]
4949 && !call_used_regs[regno]
4950 && !fixed_regs[regno]
4951 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4952 }
4953
4954 /* Return number of registers to be saved on the stack. */
4955
4956 static int
4957 ix86_nsaved_regs (void)
4958 {
4959 int nregs = 0;
4960 int regno;
4961
4962 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4963 if (ix86_save_reg (regno, true))
4964 nregs++;
4965 return nregs;
4966 }
4967
4968 /* Return the offset between two registers, one to be eliminated, and the other
4969 its replacement, at the start of a routine. */
4970
4971 HOST_WIDE_INT
4972 ix86_initial_elimination_offset (int from, int to)
4973 {
4974 struct ix86_frame frame;
4975 ix86_compute_frame_layout (&frame);
4976
4977 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4978 return frame.hard_frame_pointer_offset;
4979 else if (from == FRAME_POINTER_REGNUM
4980 && to == HARD_FRAME_POINTER_REGNUM)
4981 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4982 else
4983 {
4984 if (to != STACK_POINTER_REGNUM)
4985 abort ();
4986 else if (from == ARG_POINTER_REGNUM)
4987 return frame.stack_pointer_offset;
4988 else if (from != FRAME_POINTER_REGNUM)
4989 abort ();
4990 else
4991 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4992 }
4993 }
4994
4995 /* Fill structure ix86_frame about frame of currently computed function. */
4996
4997 static void
4998 ix86_compute_frame_layout (struct ix86_frame *frame)
4999 {
5000 HOST_WIDE_INT total_size;
5001 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5002 HOST_WIDE_INT offset;
5003 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5004 HOST_WIDE_INT size = get_frame_size ();
5005
5006 frame->nregs = ix86_nsaved_regs ();
5007 total_size = size;
5008
5009 /* During reload iteration the amount of registers saved can change.
5010 Recompute the value as needed. Do not recompute when amount of registers
5011 didn't change as reload does mutiple calls to the function and does not
5012 expect the decision to change within single iteration. */
5013 if (!optimize_size
5014 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5015 {
5016 int count = frame->nregs;
5017
5018 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5019 /* The fast prologue uses move instead of push to save registers. This
5020 is significantly longer, but also executes faster as modern hardware
5021 can execute the moves in parallel, but can't do that for push/pop.
5022
5023 Be careful about choosing what prologue to emit: When function takes
5024 many instructions to execute we may use slow version as well as in
5025 case function is known to be outside hot spot (this is known with
5026 feedback only). Weight the size of function by number of registers
5027 to save as it is cheap to use one or two push instructions but very
5028 slow to use many of them. */
5029 if (count)
5030 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5031 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5032 || (flag_branch_probabilities
5033 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5034 cfun->machine->use_fast_prologue_epilogue = false;
5035 else
5036 cfun->machine->use_fast_prologue_epilogue
5037 = !expensive_function_p (count);
5038 }
5039 if (TARGET_PROLOGUE_USING_MOVE
5040 && cfun->machine->use_fast_prologue_epilogue)
5041 frame->save_regs_using_mov = true;
5042 else
5043 frame->save_regs_using_mov = false;
5044
5045
5046 /* Skip return address and saved base pointer. */
5047 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5048
5049 frame->hard_frame_pointer_offset = offset;
5050
5051 /* Do some sanity checking of stack_alignment_needed and
5052 preferred_alignment, since i386 port is the only using those features
5053 that may break easily. */
5054
5055 if (size && !stack_alignment_needed)
5056 abort ();
5057 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5058 abort ();
5059 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5060 abort ();
5061 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5062 abort ();
5063
5064 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5065 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5066
5067 /* Register save area */
5068 offset += frame->nregs * UNITS_PER_WORD;
5069
5070 /* Va-arg area */
5071 if (ix86_save_varrargs_registers)
5072 {
5073 offset += X86_64_VARARGS_SIZE;
5074 frame->va_arg_size = X86_64_VARARGS_SIZE;
5075 }
5076 else
5077 frame->va_arg_size = 0;
5078
5079 /* Align start of frame for local function. */
5080 frame->padding1 = ((offset + stack_alignment_needed - 1)
5081 & -stack_alignment_needed) - offset;
5082
5083 offset += frame->padding1;
5084
5085 /* Frame pointer points here. */
5086 frame->frame_pointer_offset = offset;
5087
5088 offset += size;
5089
5090 /* Add outgoing arguments area. Can be skipped if we eliminated
5091 all the function calls as dead code.
5092 Skipping is however impossible when function calls alloca. Alloca
5093 expander assumes that last current_function_outgoing_args_size
5094 of stack frame are unused. */
5095 if (ACCUMULATE_OUTGOING_ARGS
5096 && (!current_function_is_leaf || current_function_calls_alloca))
5097 {
5098 offset += current_function_outgoing_args_size;
5099 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5100 }
5101 else
5102 frame->outgoing_arguments_size = 0;
5103
5104 /* Align stack boundary. Only needed if we're calling another function
5105 or using alloca. */
5106 if (!current_function_is_leaf || current_function_calls_alloca)
5107 frame->padding2 = ((offset + preferred_alignment - 1)
5108 & -preferred_alignment) - offset;
5109 else
5110 frame->padding2 = 0;
5111
5112 offset += frame->padding2;
5113
5114 /* We've reached end of stack frame. */
5115 frame->stack_pointer_offset = offset;
5116
5117 /* Size prologue needs to allocate. */
5118 frame->to_allocate =
5119 (size + frame->padding1 + frame->padding2
5120 + frame->outgoing_arguments_size + frame->va_arg_size);
5121
5122 if ((!frame->to_allocate && frame->nregs <= 1)
5123 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5124 frame->save_regs_using_mov = false;
5125
5126 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5127 && current_function_is_leaf)
5128 {
5129 frame->red_zone_size = frame->to_allocate;
5130 if (frame->save_regs_using_mov)
5131 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5132 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5133 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5134 }
5135 else
5136 frame->red_zone_size = 0;
5137 frame->to_allocate -= frame->red_zone_size;
5138 frame->stack_pointer_offset -= frame->red_zone_size;
5139 #if 0
5140 fprintf (stderr, "nregs: %i\n", frame->nregs);
5141 fprintf (stderr, "size: %i\n", size);
5142 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5143 fprintf (stderr, "padding1: %i\n", frame->padding1);
5144 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5145 fprintf (stderr, "padding2: %i\n", frame->padding2);
5146 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5147 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5148 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5149 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5150 frame->hard_frame_pointer_offset);
5151 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5152 #endif
5153 }
5154
5155 /* Emit code to save registers in the prologue. */
5156
5157 static void
5158 ix86_emit_save_regs (void)
5159 {
5160 int regno;
5161 rtx insn;
5162
5163 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5164 if (ix86_save_reg (regno, true))
5165 {
5166 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5167 RTX_FRAME_RELATED_P (insn) = 1;
5168 }
5169 }
5170
5171 /* Emit code to save registers using MOV insns. First register
5172 is restored from POINTER + OFFSET. */
5173 static void
5174 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5175 {
5176 int regno;
5177 rtx insn;
5178
5179 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5180 if (ix86_save_reg (regno, true))
5181 {
5182 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5183 Pmode, offset),
5184 gen_rtx_REG (Pmode, regno));
5185 RTX_FRAME_RELATED_P (insn) = 1;
5186 offset += UNITS_PER_WORD;
5187 }
5188 }
5189
5190 /* Expand prologue or epilogue stack adjustment.
5191 The pattern exist to put a dependency on all ebp-based memory accesses.
5192 STYLE should be negative if instructions should be marked as frame related,
5193 zero if %r11 register is live and cannot be freely used and positive
5194 otherwise. */
5195
5196 static void
5197 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5198 {
5199 rtx insn;
5200
5201 if (! TARGET_64BIT)
5202 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5203 else if (x86_64_immediate_operand (offset, DImode))
5204 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5205 else
5206 {
5207 rtx r11;
5208 /* r11 is used by indirect sibcall return as well, set before the
5209 epilogue and used after the epilogue. ATM indirect sibcall
5210 shouldn't be used together with huge frame sizes in one
5211 function because of the frame_size check in sibcall.c. */
5212 if (style == 0)
5213 abort ();
5214 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5215 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5216 if (style < 0)
5217 RTX_FRAME_RELATED_P (insn) = 1;
5218 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5219 offset));
5220 }
5221 if (style < 0)
5222 RTX_FRAME_RELATED_P (insn) = 1;
5223 }
5224
5225 /* Expand the prologue into a bunch of separate insns. */
5226
5227 void
5228 ix86_expand_prologue (void)
5229 {
5230 rtx insn;
5231 bool pic_reg_used;
5232 struct ix86_frame frame;
5233 HOST_WIDE_INT allocate;
5234
5235 ix86_compute_frame_layout (&frame);
5236
5237 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5238 slower on all targets. Also sdb doesn't like it. */
5239
5240 if (frame_pointer_needed)
5241 {
5242 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5243 RTX_FRAME_RELATED_P (insn) = 1;
5244
5245 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5246 RTX_FRAME_RELATED_P (insn) = 1;
5247 }
5248
5249 allocate = frame.to_allocate;
5250
5251 if (!frame.save_regs_using_mov)
5252 ix86_emit_save_regs ();
5253 else
5254 allocate += frame.nregs * UNITS_PER_WORD;
5255
5256 /* When using red zone we may start register saving before allocating
5257 the stack frame saving one cycle of the prologue. */
5258 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5259 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5260 : stack_pointer_rtx,
5261 -frame.nregs * UNITS_PER_WORD);
5262
5263 if (allocate == 0)
5264 ;
5265 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5266 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5267 GEN_INT (-allocate), -1);
5268 else
5269 {
5270 /* Only valid for Win32. */
5271 rtx eax = gen_rtx_REG (SImode, 0);
5272 bool eax_live = ix86_eax_live_at_start_p ();
5273
5274 if (TARGET_64BIT)
5275 abort ();
5276
5277 if (eax_live)
5278 {
5279 emit_insn (gen_push (eax));
5280 allocate -= 4;
5281 }
5282
5283 insn = emit_move_insn (eax, GEN_INT (allocate));
5284 RTX_FRAME_RELATED_P (insn) = 1;
5285
5286 insn = emit_insn (gen_allocate_stack_worker (eax));
5287 RTX_FRAME_RELATED_P (insn) = 1;
5288
5289 if (eax_live)
5290 {
5291 rtx t = plus_constant (stack_pointer_rtx, allocate);
5292 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5293 }
5294 }
5295
5296 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5297 {
5298 if (!frame_pointer_needed || !frame.to_allocate)
5299 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5300 else
5301 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5302 -frame.nregs * UNITS_PER_WORD);
5303 }
5304
5305 pic_reg_used = false;
5306 if (pic_offset_table_rtx
5307 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5308 || current_function_profile))
5309 {
5310 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5311
5312 if (alt_pic_reg_used != INVALID_REGNUM)
5313 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5314
5315 pic_reg_used = true;
5316 }
5317
5318 if (pic_reg_used)
5319 {
5320 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5321
5322 /* Even with accurate pre-reload life analysis, we can wind up
5323 deleting all references to the pic register after reload.
5324 Consider if cross-jumping unifies two sides of a branch
5325 controlled by a comparison vs the only read from a global.
5326 In which case, allow the set_got to be deleted, though we're
5327 too late to do anything about the ebx save in the prologue. */
5328 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5329 }
5330
5331 /* Prevent function calls from be scheduled before the call to mcount.
5332 In the pic_reg_used case, make sure that the got load isn't deleted. */
5333 if (current_function_profile)
5334 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5335 }
5336
5337 /* Emit code to restore saved registers using MOV insns. First register
5338 is restored from POINTER + OFFSET. */
5339 static void
5340 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5341 int maybe_eh_return)
5342 {
5343 int regno;
5344 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5345
5346 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5347 if (ix86_save_reg (regno, maybe_eh_return))
5348 {
5349 /* Ensure that adjust_address won't be forced to produce pointer
5350 out of range allowed by x86-64 instruction set. */
5351 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5352 {
5353 rtx r11;
5354
5355 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5356 emit_move_insn (r11, GEN_INT (offset));
5357 emit_insn (gen_adddi3 (r11, r11, pointer));
5358 base_address = gen_rtx_MEM (Pmode, r11);
5359 offset = 0;
5360 }
5361 emit_move_insn (gen_rtx_REG (Pmode, regno),
5362 adjust_address (base_address, Pmode, offset));
5363 offset += UNITS_PER_WORD;
5364 }
5365 }
5366
5367 /* Restore function stack, frame, and registers. */
5368
5369 void
5370 ix86_expand_epilogue (int style)
5371 {
5372 int regno;
5373 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5374 struct ix86_frame frame;
5375 HOST_WIDE_INT offset;
5376
5377 ix86_compute_frame_layout (&frame);
5378
5379 /* Calculate start of saved registers relative to ebp. Special care
5380 must be taken for the normal return case of a function using
5381 eh_return: the eax and edx registers are marked as saved, but not
5382 restored along this path. */
5383 offset = frame.nregs;
5384 if (current_function_calls_eh_return && style != 2)
5385 offset -= 2;
5386 offset *= -UNITS_PER_WORD;
5387
5388 /* If we're only restoring one register and sp is not valid then
5389 using a move instruction to restore the register since it's
5390 less work than reloading sp and popping the register.
5391
5392 The default code result in stack adjustment using add/lea instruction,
5393 while this code results in LEAVE instruction (or discrete equivalent),
5394 so it is profitable in some other cases as well. Especially when there
5395 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5396 and there is exactly one register to pop. This heuristic may need some
5397 tuning in future. */
5398 if ((!sp_valid && frame.nregs <= 1)
5399 || (TARGET_EPILOGUE_USING_MOVE
5400 && cfun->machine->use_fast_prologue_epilogue
5401 && (frame.nregs > 1 || frame.to_allocate))
5402 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5403 || (frame_pointer_needed && TARGET_USE_LEAVE
5404 && cfun->machine->use_fast_prologue_epilogue
5405 && frame.nregs == 1)
5406 || current_function_calls_eh_return)
5407 {
5408 /* Restore registers. We can use ebp or esp to address the memory
5409 locations. If both are available, default to ebp, since offsets
5410 are known to be small. Only exception is esp pointing directly to the
5411 end of block of saved registers, where we may simplify addressing
5412 mode. */
5413
5414 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5415 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5416 frame.to_allocate, style == 2);
5417 else
5418 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5419 offset, style == 2);
5420
5421 /* eh_return epilogues need %ecx added to the stack pointer. */
5422 if (style == 2)
5423 {
5424 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5425
5426 if (frame_pointer_needed)
5427 {
5428 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5429 tmp = plus_constant (tmp, UNITS_PER_WORD);
5430 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5431
5432 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5433 emit_move_insn (hard_frame_pointer_rtx, tmp);
5434
5435 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5436 const0_rtx, style);
5437 }
5438 else
5439 {
5440 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5441 tmp = plus_constant (tmp, (frame.to_allocate
5442 + frame.nregs * UNITS_PER_WORD));
5443 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5444 }
5445 }
5446 else if (!frame_pointer_needed)
5447 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5448 GEN_INT (frame.to_allocate
5449 + frame.nregs * UNITS_PER_WORD),
5450 style);
5451 /* If not an i386, mov & pop is faster than "leave". */
5452 else if (TARGET_USE_LEAVE || optimize_size
5453 || !cfun->machine->use_fast_prologue_epilogue)
5454 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5455 else
5456 {
5457 pro_epilogue_adjust_stack (stack_pointer_rtx,
5458 hard_frame_pointer_rtx,
5459 const0_rtx, style);
5460 if (TARGET_64BIT)
5461 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5462 else
5463 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5464 }
5465 }
5466 else
5467 {
5468 /* First step is to deallocate the stack frame so that we can
5469 pop the registers. */
5470 if (!sp_valid)
5471 {
5472 if (!frame_pointer_needed)
5473 abort ();
5474 pro_epilogue_adjust_stack (stack_pointer_rtx,
5475 hard_frame_pointer_rtx,
5476 GEN_INT (offset), style);
5477 }
5478 else if (frame.to_allocate)
5479 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5480 GEN_INT (frame.to_allocate), style);
5481
5482 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5483 if (ix86_save_reg (regno, false))
5484 {
5485 if (TARGET_64BIT)
5486 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5487 else
5488 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5489 }
5490 if (frame_pointer_needed)
5491 {
5492 /* Leave results in shorter dependency chains on CPUs that are
5493 able to grok it fast. */
5494 if (TARGET_USE_LEAVE)
5495 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5496 else if (TARGET_64BIT)
5497 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5498 else
5499 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5500 }
5501 }
5502
5503 /* Sibcall epilogues don't want a return instruction. */
5504 if (style == 0)
5505 return;
5506
5507 if (current_function_pops_args && current_function_args_size)
5508 {
5509 rtx popc = GEN_INT (current_function_pops_args);
5510
5511 /* i386 can only pop 64K bytes. If asked to pop more, pop
5512 return address, do explicit add, and jump indirectly to the
5513 caller. */
5514
5515 if (current_function_pops_args >= 65536)
5516 {
5517 rtx ecx = gen_rtx_REG (SImode, 2);
5518
5519 /* There is no "pascal" calling convention in 64bit ABI. */
5520 if (TARGET_64BIT)
5521 abort ();
5522
5523 emit_insn (gen_popsi1 (ecx));
5524 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5525 emit_jump_insn (gen_return_indirect_internal (ecx));
5526 }
5527 else
5528 emit_jump_insn (gen_return_pop_internal (popc));
5529 }
5530 else
5531 emit_jump_insn (gen_return_internal ());
5532 }
5533
5534 /* Reset from the function's potential modifications. */
5535
5536 static void
5537 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5538 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5539 {
5540 if (pic_offset_table_rtx)
5541 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5542 }
5543 \f
5544 /* Extract the parts of an RTL expression that is a valid memory address
5545 for an instruction. Return 0 if the structure of the address is
5546 grossly off. Return -1 if the address contains ASHIFT, so it is not
5547 strictly valid, but still used for computing length of lea instruction. */
5548
5549 static int
5550 ix86_decompose_address (rtx addr, struct ix86_address *out)
5551 {
5552 rtx base = NULL_RTX;
5553 rtx index = NULL_RTX;
5554 rtx disp = NULL_RTX;
5555 HOST_WIDE_INT scale = 1;
5556 rtx scale_rtx = NULL_RTX;
5557 int retval = 1;
5558 enum ix86_address_seg seg = SEG_DEFAULT;
5559
5560 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5561 base = addr;
5562 else if (GET_CODE (addr) == PLUS)
5563 {
5564 rtx addends[4], op;
5565 int n = 0, i;
5566
5567 op = addr;
5568 do
5569 {
5570 if (n >= 4)
5571 return 0;
5572 addends[n++] = XEXP (op, 1);
5573 op = XEXP (op, 0);
5574 }
5575 while (GET_CODE (op) == PLUS);
5576 if (n >= 4)
5577 return 0;
5578 addends[n] = op;
5579
5580 for (i = n; i >= 0; --i)
5581 {
5582 op = addends[i];
5583 switch (GET_CODE (op))
5584 {
5585 case MULT:
5586 if (index)
5587 return 0;
5588 index = XEXP (op, 0);
5589 scale_rtx = XEXP (op, 1);
5590 break;
5591
5592 case UNSPEC:
5593 if (XINT (op, 1) == UNSPEC_TP
5594 && TARGET_TLS_DIRECT_SEG_REFS
5595 && seg == SEG_DEFAULT)
5596 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5597 else
5598 return 0;
5599 break;
5600
5601 case REG:
5602 case SUBREG:
5603 if (!base)
5604 base = op;
5605 else if (!index)
5606 index = op;
5607 else
5608 return 0;
5609 break;
5610
5611 case CONST:
5612 case CONST_INT:
5613 case SYMBOL_REF:
5614 case LABEL_REF:
5615 if (disp)
5616 return 0;
5617 disp = op;
5618 break;
5619
5620 default:
5621 return 0;
5622 }
5623 }
5624 }
5625 else if (GET_CODE (addr) == MULT)
5626 {
5627 index = XEXP (addr, 0); /* index*scale */
5628 scale_rtx = XEXP (addr, 1);
5629 }
5630 else if (GET_CODE (addr) == ASHIFT)
5631 {
5632 rtx tmp;
5633
5634 /* We're called for lea too, which implements ashift on occasion. */
5635 index = XEXP (addr, 0);
5636 tmp = XEXP (addr, 1);
5637 if (GET_CODE (tmp) != CONST_INT)
5638 return 0;
5639 scale = INTVAL (tmp);
5640 if ((unsigned HOST_WIDE_INT) scale > 3)
5641 return 0;
5642 scale = 1 << scale;
5643 retval = -1;
5644 }
5645 else
5646 disp = addr; /* displacement */
5647
5648 /* Extract the integral value of scale. */
5649 if (scale_rtx)
5650 {
5651 if (GET_CODE (scale_rtx) != CONST_INT)
5652 return 0;
5653 scale = INTVAL (scale_rtx);
5654 }
5655
5656 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5657 if (base && index && scale == 1
5658 && (index == arg_pointer_rtx
5659 || index == frame_pointer_rtx
5660 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5661 {
5662 rtx tmp = base;
5663 base = index;
5664 index = tmp;
5665 }
5666
5667 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5668 if ((base == hard_frame_pointer_rtx
5669 || base == frame_pointer_rtx
5670 || base == arg_pointer_rtx) && !disp)
5671 disp = const0_rtx;
5672
5673 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5674 Avoid this by transforming to [%esi+0]. */
5675 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5676 && base && !index && !disp
5677 && REG_P (base)
5678 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5679 disp = const0_rtx;
5680
5681 /* Special case: encode reg+reg instead of reg*2. */
5682 if (!base && index && scale && scale == 2)
5683 base = index, scale = 1;
5684
5685 /* Special case: scaling cannot be encoded without base or displacement. */
5686 if (!base && !disp && index && scale != 1)
5687 disp = const0_rtx;
5688
5689 out->base = base;
5690 out->index = index;
5691 out->disp = disp;
5692 out->scale = scale;
5693 out->seg = seg;
5694
5695 return retval;
5696 }
5697 \f
5698 /* Return cost of the memory address x.
5699 For i386, it is better to use a complex address than let gcc copy
5700 the address into a reg and make a new pseudo. But not if the address
5701 requires to two regs - that would mean more pseudos with longer
5702 lifetimes. */
5703 static int
5704 ix86_address_cost (rtx x)
5705 {
5706 struct ix86_address parts;
5707 int cost = 1;
5708
5709 if (!ix86_decompose_address (x, &parts))
5710 abort ();
5711
5712 /* More complex memory references are better. */
5713 if (parts.disp && parts.disp != const0_rtx)
5714 cost--;
5715 if (parts.seg != SEG_DEFAULT)
5716 cost--;
5717
5718 /* Attempt to minimize number of registers in the address. */
5719 if ((parts.base
5720 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5721 || (parts.index
5722 && (!REG_P (parts.index)
5723 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5724 cost++;
5725
5726 if (parts.base
5727 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5728 && parts.index
5729 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5730 && parts.base != parts.index)
5731 cost++;
5732
5733 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5734 since it's predecode logic can't detect the length of instructions
5735 and it degenerates to vector decoded. Increase cost of such
5736 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5737 to split such addresses or even refuse such addresses at all.
5738
5739 Following addressing modes are affected:
5740 [base+scale*index]
5741 [scale*index+disp]
5742 [base+index]
5743
5744 The first and last case may be avoidable by explicitly coding the zero in
5745 memory address, but I don't have AMD-K6 machine handy to check this
5746 theory. */
5747
5748 if (TARGET_K6
5749 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5750 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5751 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5752 cost += 10;
5753
5754 return cost;
5755 }
5756 \f
5757 /* If X is a machine specific address (i.e. a symbol or label being
5758 referenced as a displacement from the GOT implemented using an
5759 UNSPEC), then return the base term. Otherwise return X. */
5760
5761 rtx
5762 ix86_find_base_term (rtx x)
5763 {
5764 rtx term;
5765
5766 if (TARGET_64BIT)
5767 {
5768 if (GET_CODE (x) != CONST)
5769 return x;
5770 term = XEXP (x, 0);
5771 if (GET_CODE (term) == PLUS
5772 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5773 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5774 term = XEXP (term, 0);
5775 if (GET_CODE (term) != UNSPEC
5776 || XINT (term, 1) != UNSPEC_GOTPCREL)
5777 return x;
5778
5779 term = XVECEXP (term, 0, 0);
5780
5781 if (GET_CODE (term) != SYMBOL_REF
5782 && GET_CODE (term) != LABEL_REF)
5783 return x;
5784
5785 return term;
5786 }
5787
5788 term = ix86_delegitimize_address (x);
5789
5790 if (GET_CODE (term) != SYMBOL_REF
5791 && GET_CODE (term) != LABEL_REF)
5792 return x;
5793
5794 return term;
5795 }
5796 \f
5797 /* Determine if a given RTX is a valid constant. We already know this
5798 satisfies CONSTANT_P. */
5799
5800 bool
5801 legitimate_constant_p (rtx x)
5802 {
5803 rtx inner;
5804
5805 switch (GET_CODE (x))
5806 {
5807 case SYMBOL_REF:
5808 /* TLS symbols are not constant. */
5809 if (tls_symbolic_operand (x, Pmode))
5810 return false;
5811 break;
5812
5813 case CONST:
5814 inner = XEXP (x, 0);
5815
5816 /* Offsets of TLS symbols are never valid.
5817 Discourage CSE from creating them. */
5818 if (GET_CODE (inner) == PLUS
5819 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5820 return false;
5821
5822 if (GET_CODE (inner) == PLUS)
5823 {
5824 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5825 return false;
5826 inner = XEXP (inner, 0);
5827 }
5828
5829 /* Only some unspecs are valid as "constants". */
5830 if (GET_CODE (inner) == UNSPEC)
5831 switch (XINT (inner, 1))
5832 {
5833 case UNSPEC_TPOFF:
5834 case UNSPEC_NTPOFF:
5835 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5836 case UNSPEC_DTPOFF:
5837 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5838 default:
5839 return false;
5840 }
5841 break;
5842
5843 default:
5844 break;
5845 }
5846
5847 /* Otherwise we handle everything else in the move patterns. */
5848 return true;
5849 }
5850
5851 /* Determine if it's legal to put X into the constant pool. This
5852 is not possible for the address of thread-local symbols, which
5853 is checked above. */
5854
5855 static bool
5856 ix86_cannot_force_const_mem (rtx x)
5857 {
5858 return !legitimate_constant_p (x);
5859 }
5860
5861 /* Determine if a given RTX is a valid constant address. */
5862
5863 bool
5864 constant_address_p (rtx x)
5865 {
5866 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5867 }
5868
5869 /* Nonzero if the constant value X is a legitimate general operand
5870 when generating PIC code. It is given that flag_pic is on and
5871 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5872
5873 bool
5874 legitimate_pic_operand_p (rtx x)
5875 {
5876 rtx inner;
5877
5878 switch (GET_CODE (x))
5879 {
5880 case CONST:
5881 inner = XEXP (x, 0);
5882
5883 /* Only some unspecs are valid as "constants". */
5884 if (GET_CODE (inner) == UNSPEC)
5885 switch (XINT (inner, 1))
5886 {
5887 case UNSPEC_TPOFF:
5888 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5889 default:
5890 return false;
5891 }
5892 /* FALLTHRU */
5893
5894 case SYMBOL_REF:
5895 case LABEL_REF:
5896 return legitimate_pic_address_disp_p (x);
5897
5898 default:
5899 return true;
5900 }
5901 }
5902
5903 /* Determine if a given CONST RTX is a valid memory displacement
5904 in PIC mode. */
5905
5906 int
5907 legitimate_pic_address_disp_p (rtx disp)
5908 {
5909 bool saw_plus;
5910
5911 /* In 64bit mode we can allow direct addresses of symbols and labels
5912 when they are not dynamic symbols. */
5913 if (TARGET_64BIT)
5914 {
5915 /* TLS references should always be enclosed in UNSPEC. */
5916 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5917 return 0;
5918 if (GET_CODE (disp) == SYMBOL_REF
5919 && ix86_cmodel == CM_SMALL_PIC
5920 && SYMBOL_REF_LOCAL_P (disp))
5921 return 1;
5922 if (GET_CODE (disp) == LABEL_REF)
5923 return 1;
5924 if (GET_CODE (disp) == CONST
5925 && GET_CODE (XEXP (disp, 0)) == PLUS)
5926 {
5927 rtx op0 = XEXP (XEXP (disp, 0), 0);
5928 rtx op1 = XEXP (XEXP (disp, 0), 1);
5929
5930 /* TLS references should always be enclosed in UNSPEC. */
5931 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5932 return 0;
5933 if (((GET_CODE (op0) == SYMBOL_REF
5934 && ix86_cmodel == CM_SMALL_PIC
5935 && SYMBOL_REF_LOCAL_P (op0))
5936 || GET_CODE (op0) == LABEL_REF)
5937 && GET_CODE (op1) == CONST_INT
5938 && INTVAL (op1) < 16*1024*1024
5939 && INTVAL (op1) >= -16*1024*1024)
5940 return 1;
5941 }
5942 }
5943 if (GET_CODE (disp) != CONST)
5944 return 0;
5945 disp = XEXP (disp, 0);
5946
5947 if (TARGET_64BIT)
5948 {
5949 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5950 of GOT tables. We should not need these anyway. */
5951 if (GET_CODE (disp) != UNSPEC
5952 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5953 return 0;
5954
5955 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5956 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5957 return 0;
5958 return 1;
5959 }
5960
5961 saw_plus = false;
5962 if (GET_CODE (disp) == PLUS)
5963 {
5964 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5965 return 0;
5966 disp = XEXP (disp, 0);
5967 saw_plus = true;
5968 }
5969
5970 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5971 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5972 {
5973 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5974 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5975 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5976 {
5977 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5978 if (! strcmp (sym_name, "<pic base>"))
5979 return 1;
5980 }
5981 }
5982
5983 if (GET_CODE (disp) != UNSPEC)
5984 return 0;
5985
5986 switch (XINT (disp, 1))
5987 {
5988 case UNSPEC_GOT:
5989 if (saw_plus)
5990 return false;
5991 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5992 case UNSPEC_GOTOFF:
5993 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5994 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5995 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5996 return false;
5997 case UNSPEC_GOTTPOFF:
5998 case UNSPEC_GOTNTPOFF:
5999 case UNSPEC_INDNTPOFF:
6000 if (saw_plus)
6001 return false;
6002 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6003 case UNSPEC_NTPOFF:
6004 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6005 case UNSPEC_DTPOFF:
6006 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6007 }
6008
6009 return 0;
6010 }
6011
6012 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6013 memory address for an instruction. The MODE argument is the machine mode
6014 for the MEM expression that wants to use this address.
6015
6016 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6017 convert common non-canonical forms to canonical form so that they will
6018 be recognized. */
6019
6020 int
6021 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6022 {
6023 struct ix86_address parts;
6024 rtx base, index, disp;
6025 HOST_WIDE_INT scale;
6026 const char *reason = NULL;
6027 rtx reason_rtx = NULL_RTX;
6028
6029 if (TARGET_DEBUG_ADDR)
6030 {
6031 fprintf (stderr,
6032 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6033 GET_MODE_NAME (mode), strict);
6034 debug_rtx (addr);
6035 }
6036
6037 if (ix86_decompose_address (addr, &parts) <= 0)
6038 {
6039 reason = "decomposition failed";
6040 goto report_error;
6041 }
6042
6043 base = parts.base;
6044 index = parts.index;
6045 disp = parts.disp;
6046 scale = parts.scale;
6047
6048 /* Validate base register.
6049
6050 Don't allow SUBREG's here, it can lead to spill failures when the base
6051 is one word out of a two word structure, which is represented internally
6052 as a DImode int. */
6053
6054 if (base)
6055 {
6056 reason_rtx = base;
6057
6058 if (GET_CODE (base) != REG)
6059 {
6060 reason = "base is not a register";
6061 goto report_error;
6062 }
6063
6064 if (GET_MODE (base) != Pmode)
6065 {
6066 reason = "base is not in Pmode";
6067 goto report_error;
6068 }
6069
6070 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6071 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6072 {
6073 reason = "base is not valid";
6074 goto report_error;
6075 }
6076 }
6077
6078 /* Validate index register.
6079
6080 Don't allow SUBREG's here, it can lead to spill failures when the index
6081 is one word out of a two word structure, which is represented internally
6082 as a DImode int. */
6083
6084 if (index)
6085 {
6086 reason_rtx = index;
6087
6088 if (GET_CODE (index) != REG)
6089 {
6090 reason = "index is not a register";
6091 goto report_error;
6092 }
6093
6094 if (GET_MODE (index) != Pmode)
6095 {
6096 reason = "index is not in Pmode";
6097 goto report_error;
6098 }
6099
6100 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6101 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6102 {
6103 reason = "index is not valid";
6104 goto report_error;
6105 }
6106 }
6107
6108 /* Validate scale factor. */
6109 if (scale != 1)
6110 {
6111 reason_rtx = GEN_INT (scale);
6112 if (!index)
6113 {
6114 reason = "scale without index";
6115 goto report_error;
6116 }
6117
6118 if (scale != 2 && scale != 4 && scale != 8)
6119 {
6120 reason = "scale is not a valid multiplier";
6121 goto report_error;
6122 }
6123 }
6124
6125 /* Validate displacement. */
6126 if (disp)
6127 {
6128 reason_rtx = disp;
6129
6130 if (GET_CODE (disp) == CONST
6131 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6132 switch (XINT (XEXP (disp, 0), 1))
6133 {
6134 case UNSPEC_GOT:
6135 case UNSPEC_GOTOFF:
6136 case UNSPEC_GOTPCREL:
6137 if (!flag_pic)
6138 abort ();
6139 goto is_legitimate_pic;
6140
6141 case UNSPEC_GOTTPOFF:
6142 case UNSPEC_GOTNTPOFF:
6143 case UNSPEC_INDNTPOFF:
6144 case UNSPEC_NTPOFF:
6145 case UNSPEC_DTPOFF:
6146 break;
6147
6148 default:
6149 reason = "invalid address unspec";
6150 goto report_error;
6151 }
6152
6153 else if (flag_pic && (SYMBOLIC_CONST (disp)
6154 #if TARGET_MACHO
6155 && !machopic_operand_p (disp)
6156 #endif
6157 ))
6158 {
6159 is_legitimate_pic:
6160 if (TARGET_64BIT && (index || base))
6161 {
6162 /* foo@dtpoff(%rX) is ok. */
6163 if (GET_CODE (disp) != CONST
6164 || GET_CODE (XEXP (disp, 0)) != PLUS
6165 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6166 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6167 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6168 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6169 {
6170 reason = "non-constant pic memory reference";
6171 goto report_error;
6172 }
6173 }
6174 else if (! legitimate_pic_address_disp_p (disp))
6175 {
6176 reason = "displacement is an invalid pic construct";
6177 goto report_error;
6178 }
6179
6180 /* This code used to verify that a symbolic pic displacement
6181 includes the pic_offset_table_rtx register.
6182
6183 While this is good idea, unfortunately these constructs may
6184 be created by "adds using lea" optimization for incorrect
6185 code like:
6186
6187 int a;
6188 int foo(int i)
6189 {
6190 return *(&a+i);
6191 }
6192
6193 This code is nonsensical, but results in addressing
6194 GOT table with pic_offset_table_rtx base. We can't
6195 just refuse it easily, since it gets matched by
6196 "addsi3" pattern, that later gets split to lea in the
6197 case output register differs from input. While this
6198 can be handled by separate addsi pattern for this case
6199 that never results in lea, this seems to be easier and
6200 correct fix for crash to disable this test. */
6201 }
6202 else if (GET_CODE (disp) != LABEL_REF
6203 && GET_CODE (disp) != CONST_INT
6204 && (GET_CODE (disp) != CONST
6205 || !legitimate_constant_p (disp))
6206 && (GET_CODE (disp) != SYMBOL_REF
6207 || !legitimate_constant_p (disp)))
6208 {
6209 reason = "displacement is not constant";
6210 goto report_error;
6211 }
6212 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6213 {
6214 reason = "displacement is out of range";
6215 goto report_error;
6216 }
6217 }
6218
6219 /* Everything looks valid. */
6220 if (TARGET_DEBUG_ADDR)
6221 fprintf (stderr, "Success.\n");
6222 return TRUE;
6223
6224 report_error:
6225 if (TARGET_DEBUG_ADDR)
6226 {
6227 fprintf (stderr, "Error: %s\n", reason);
6228 debug_rtx (reason_rtx);
6229 }
6230 return FALSE;
6231 }
6232 \f
6233 /* Return an unique alias set for the GOT. */
6234
6235 static HOST_WIDE_INT
6236 ix86_GOT_alias_set (void)
6237 {
6238 static HOST_WIDE_INT set = -1;
6239 if (set == -1)
6240 set = new_alias_set ();
6241 return set;
6242 }
6243
6244 /* Return a legitimate reference for ORIG (an address) using the
6245 register REG. If REG is 0, a new pseudo is generated.
6246
6247 There are two types of references that must be handled:
6248
6249 1. Global data references must load the address from the GOT, via
6250 the PIC reg. An insn is emitted to do this load, and the reg is
6251 returned.
6252
6253 2. Static data references, constant pool addresses, and code labels
6254 compute the address as an offset from the GOT, whose base is in
6255 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6256 differentiate them from global data objects. The returned
6257 address is the PIC reg + an unspec constant.
6258
6259 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6260 reg also appears in the address. */
6261
6262 rtx
6263 legitimize_pic_address (rtx orig, rtx reg)
6264 {
6265 rtx addr = orig;
6266 rtx new = orig;
6267 rtx base;
6268
6269 #if TARGET_MACHO
6270 if (reg == 0)
6271 reg = gen_reg_rtx (Pmode);
6272 /* Use the generic Mach-O PIC machinery. */
6273 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6274 #endif
6275
6276 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6277 new = addr;
6278 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6279 {
6280 /* This symbol may be referenced via a displacement from the PIC
6281 base address (@GOTOFF). */
6282
6283 if (reload_in_progress)
6284 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6285 if (GET_CODE (addr) == CONST)
6286 addr = XEXP (addr, 0);
6287 if (GET_CODE (addr) == PLUS)
6288 {
6289 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6290 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6291 }
6292 else
6293 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6294 new = gen_rtx_CONST (Pmode, new);
6295 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6296
6297 if (reg != 0)
6298 {
6299 emit_move_insn (reg, new);
6300 new = reg;
6301 }
6302 }
6303 else if (GET_CODE (addr) == SYMBOL_REF)
6304 {
6305 if (TARGET_64BIT)
6306 {
6307 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6308 new = gen_rtx_CONST (Pmode, new);
6309 new = gen_rtx_MEM (Pmode, new);
6310 RTX_UNCHANGING_P (new) = 1;
6311 set_mem_alias_set (new, ix86_GOT_alias_set ());
6312
6313 if (reg == 0)
6314 reg = gen_reg_rtx (Pmode);
6315 /* Use directly gen_movsi, otherwise the address is loaded
6316 into register for CSE. We don't want to CSE this addresses,
6317 instead we CSE addresses from the GOT table, so skip this. */
6318 emit_insn (gen_movsi (reg, new));
6319 new = reg;
6320 }
6321 else
6322 {
6323 /* This symbol must be referenced via a load from the
6324 Global Offset Table (@GOT). */
6325
6326 if (reload_in_progress)
6327 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6328 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6329 new = gen_rtx_CONST (Pmode, new);
6330 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6331 new = gen_rtx_MEM (Pmode, new);
6332 RTX_UNCHANGING_P (new) = 1;
6333 set_mem_alias_set (new, ix86_GOT_alias_set ());
6334
6335 if (reg == 0)
6336 reg = gen_reg_rtx (Pmode);
6337 emit_move_insn (reg, new);
6338 new = reg;
6339 }
6340 }
6341 else
6342 {
6343 if (GET_CODE (addr) == CONST)
6344 {
6345 addr = XEXP (addr, 0);
6346
6347 /* We must match stuff we generate before. Assume the only
6348 unspecs that can get here are ours. Not that we could do
6349 anything with them anyway.... */
6350 if (GET_CODE (addr) == UNSPEC
6351 || (GET_CODE (addr) == PLUS
6352 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6353 return orig;
6354 if (GET_CODE (addr) != PLUS)
6355 abort ();
6356 }
6357 if (GET_CODE (addr) == PLUS)
6358 {
6359 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6360
6361 /* Check first to see if this is a constant offset from a @GOTOFF
6362 symbol reference. */
6363 if (local_symbolic_operand (op0, Pmode)
6364 && GET_CODE (op1) == CONST_INT)
6365 {
6366 if (!TARGET_64BIT)
6367 {
6368 if (reload_in_progress)
6369 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6370 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6371 UNSPEC_GOTOFF);
6372 new = gen_rtx_PLUS (Pmode, new, op1);
6373 new = gen_rtx_CONST (Pmode, new);
6374 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6375
6376 if (reg != 0)
6377 {
6378 emit_move_insn (reg, new);
6379 new = reg;
6380 }
6381 }
6382 else
6383 {
6384 if (INTVAL (op1) < -16*1024*1024
6385 || INTVAL (op1) >= 16*1024*1024)
6386 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6387 }
6388 }
6389 else
6390 {
6391 base = legitimize_pic_address (XEXP (addr, 0), reg);
6392 new = legitimize_pic_address (XEXP (addr, 1),
6393 base == reg ? NULL_RTX : reg);
6394
6395 if (GET_CODE (new) == CONST_INT)
6396 new = plus_constant (base, INTVAL (new));
6397 else
6398 {
6399 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6400 {
6401 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6402 new = XEXP (new, 1);
6403 }
6404 new = gen_rtx_PLUS (Pmode, base, new);
6405 }
6406 }
6407 }
6408 }
6409 return new;
6410 }
6411 \f
6412 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6413
6414 static rtx
6415 get_thread_pointer (int to_reg)
6416 {
6417 rtx tp, reg, insn;
6418
6419 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6420 if (!to_reg)
6421 return tp;
6422
6423 reg = gen_reg_rtx (Pmode);
6424 insn = gen_rtx_SET (VOIDmode, reg, tp);
6425 insn = emit_insn (insn);
6426
6427 return reg;
6428 }
6429
6430 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6431 false if we expect this to be used for a memory address and true if
6432 we expect to load the address into a register. */
6433
6434 static rtx
6435 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6436 {
6437 rtx dest, base, off, pic;
6438 int type;
6439
6440 switch (model)
6441 {
6442 case TLS_MODEL_GLOBAL_DYNAMIC:
6443 dest = gen_reg_rtx (Pmode);
6444 if (TARGET_64BIT)
6445 {
6446 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6447
6448 start_sequence ();
6449 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6450 insns = get_insns ();
6451 end_sequence ();
6452
6453 emit_libcall_block (insns, dest, rax, x);
6454 }
6455 else
6456 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6457 break;
6458
6459 case TLS_MODEL_LOCAL_DYNAMIC:
6460 base = gen_reg_rtx (Pmode);
6461 if (TARGET_64BIT)
6462 {
6463 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6464
6465 start_sequence ();
6466 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6467 insns = get_insns ();
6468 end_sequence ();
6469
6470 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6471 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6472 emit_libcall_block (insns, base, rax, note);
6473 }
6474 else
6475 emit_insn (gen_tls_local_dynamic_base_32 (base));
6476
6477 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6478 off = gen_rtx_CONST (Pmode, off);
6479
6480 return gen_rtx_PLUS (Pmode, base, off);
6481
6482 case TLS_MODEL_INITIAL_EXEC:
6483 if (TARGET_64BIT)
6484 {
6485 pic = NULL;
6486 type = UNSPEC_GOTNTPOFF;
6487 }
6488 else if (flag_pic)
6489 {
6490 if (reload_in_progress)
6491 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6492 pic = pic_offset_table_rtx;
6493 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6494 }
6495 else if (!TARGET_GNU_TLS)
6496 {
6497 pic = gen_reg_rtx (Pmode);
6498 emit_insn (gen_set_got (pic));
6499 type = UNSPEC_GOTTPOFF;
6500 }
6501 else
6502 {
6503 pic = NULL;
6504 type = UNSPEC_INDNTPOFF;
6505 }
6506
6507 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6508 off = gen_rtx_CONST (Pmode, off);
6509 if (pic)
6510 off = gen_rtx_PLUS (Pmode, pic, off);
6511 off = gen_rtx_MEM (Pmode, off);
6512 RTX_UNCHANGING_P (off) = 1;
6513 set_mem_alias_set (off, ix86_GOT_alias_set ());
6514
6515 if (TARGET_64BIT || TARGET_GNU_TLS)
6516 {
6517 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6518 off = force_reg (Pmode, off);
6519 return gen_rtx_PLUS (Pmode, base, off);
6520 }
6521 else
6522 {
6523 base = get_thread_pointer (true);
6524 dest = gen_reg_rtx (Pmode);
6525 emit_insn (gen_subsi3 (dest, base, off));
6526 }
6527 break;
6528
6529 case TLS_MODEL_LOCAL_EXEC:
6530 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6531 (TARGET_64BIT || TARGET_GNU_TLS)
6532 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6533 off = gen_rtx_CONST (Pmode, off);
6534
6535 if (TARGET_64BIT || TARGET_GNU_TLS)
6536 {
6537 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6538 return gen_rtx_PLUS (Pmode, base, off);
6539 }
6540 else
6541 {
6542 base = get_thread_pointer (true);
6543 dest = gen_reg_rtx (Pmode);
6544 emit_insn (gen_subsi3 (dest, base, off));
6545 }
6546 break;
6547
6548 default:
6549 abort ();
6550 }
6551
6552 return dest;
6553 }
6554
6555 /* Try machine-dependent ways of modifying an illegitimate address
6556 to be legitimate. If we find one, return the new, valid address.
6557 This macro is used in only one place: `memory_address' in explow.c.
6558
6559 OLDX is the address as it was before break_out_memory_refs was called.
6560 In some cases it is useful to look at this to decide what needs to be done.
6561
6562 MODE and WIN are passed so that this macro can use
6563 GO_IF_LEGITIMATE_ADDRESS.
6564
6565 It is always safe for this macro to do nothing. It exists to recognize
6566 opportunities to optimize the output.
6567
6568 For the 80386, we handle X+REG by loading X into a register R and
6569 using R+REG. R will go in a general reg and indexing will be used.
6570 However, if REG is a broken-out memory address or multiplication,
6571 nothing needs to be done because REG can certainly go in a general reg.
6572
6573 When -fpic is used, special handling is needed for symbolic references.
6574 See comments by legitimize_pic_address in i386.c for details. */
6575
6576 rtx
6577 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6578 {
6579 int changed = 0;
6580 unsigned log;
6581
6582 if (TARGET_DEBUG_ADDR)
6583 {
6584 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6585 GET_MODE_NAME (mode));
6586 debug_rtx (x);
6587 }
6588
6589 log = tls_symbolic_operand (x, mode);
6590 if (log)
6591 return legitimize_tls_address (x, log, false);
6592
6593 if (flag_pic && SYMBOLIC_CONST (x))
6594 return legitimize_pic_address (x, 0);
6595
6596 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6597 if (GET_CODE (x) == ASHIFT
6598 && GET_CODE (XEXP (x, 1)) == CONST_INT
6599 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6600 {
6601 changed = 1;
6602 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6603 GEN_INT (1 << log));
6604 }
6605
6606 if (GET_CODE (x) == PLUS)
6607 {
6608 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6609
6610 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6611 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6612 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6613 {
6614 changed = 1;
6615 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6616 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6617 GEN_INT (1 << log));
6618 }
6619
6620 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6621 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6622 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6623 {
6624 changed = 1;
6625 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6626 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6627 GEN_INT (1 << log));
6628 }
6629
6630 /* Put multiply first if it isn't already. */
6631 if (GET_CODE (XEXP (x, 1)) == MULT)
6632 {
6633 rtx tmp = XEXP (x, 0);
6634 XEXP (x, 0) = XEXP (x, 1);
6635 XEXP (x, 1) = tmp;
6636 changed = 1;
6637 }
6638
6639 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6640 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6641 created by virtual register instantiation, register elimination, and
6642 similar optimizations. */
6643 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6644 {
6645 changed = 1;
6646 x = gen_rtx_PLUS (Pmode,
6647 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6648 XEXP (XEXP (x, 1), 0)),
6649 XEXP (XEXP (x, 1), 1));
6650 }
6651
6652 /* Canonicalize
6653 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6654 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6655 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6656 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6657 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6658 && CONSTANT_P (XEXP (x, 1)))
6659 {
6660 rtx constant;
6661 rtx other = NULL_RTX;
6662
6663 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6664 {
6665 constant = XEXP (x, 1);
6666 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6667 }
6668 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6669 {
6670 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6671 other = XEXP (x, 1);
6672 }
6673 else
6674 constant = 0;
6675
6676 if (constant)
6677 {
6678 changed = 1;
6679 x = gen_rtx_PLUS (Pmode,
6680 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6681 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6682 plus_constant (other, INTVAL (constant)));
6683 }
6684 }
6685
6686 if (changed && legitimate_address_p (mode, x, FALSE))
6687 return x;
6688
6689 if (GET_CODE (XEXP (x, 0)) == MULT)
6690 {
6691 changed = 1;
6692 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6693 }
6694
6695 if (GET_CODE (XEXP (x, 1)) == MULT)
6696 {
6697 changed = 1;
6698 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6699 }
6700
6701 if (changed
6702 && GET_CODE (XEXP (x, 1)) == REG
6703 && GET_CODE (XEXP (x, 0)) == REG)
6704 return x;
6705
6706 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6707 {
6708 changed = 1;
6709 x = legitimize_pic_address (x, 0);
6710 }
6711
6712 if (changed && legitimate_address_p (mode, x, FALSE))
6713 return x;
6714
6715 if (GET_CODE (XEXP (x, 0)) == REG)
6716 {
6717 rtx temp = gen_reg_rtx (Pmode);
6718 rtx val = force_operand (XEXP (x, 1), temp);
6719 if (val != temp)
6720 emit_move_insn (temp, val);
6721
6722 XEXP (x, 1) = temp;
6723 return x;
6724 }
6725
6726 else if (GET_CODE (XEXP (x, 1)) == REG)
6727 {
6728 rtx temp = gen_reg_rtx (Pmode);
6729 rtx val = force_operand (XEXP (x, 0), temp);
6730 if (val != temp)
6731 emit_move_insn (temp, val);
6732
6733 XEXP (x, 0) = temp;
6734 return x;
6735 }
6736 }
6737
6738 return x;
6739 }
6740 \f
6741 /* Print an integer constant expression in assembler syntax. Addition
6742 and subtraction are the only arithmetic that may appear in these
6743 expressions. FILE is the stdio stream to write to, X is the rtx, and
6744 CODE is the operand print code from the output string. */
6745
6746 static void
6747 output_pic_addr_const (FILE *file, rtx x, int code)
6748 {
6749 char buf[256];
6750
6751 switch (GET_CODE (x))
6752 {
6753 case PC:
6754 if (flag_pic)
6755 putc ('.', file);
6756 else
6757 abort ();
6758 break;
6759
6760 case SYMBOL_REF:
6761 assemble_name (file, XSTR (x, 0));
6762 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6763 fputs ("@PLT", file);
6764 break;
6765
6766 case LABEL_REF:
6767 x = XEXP (x, 0);
6768 /* FALLTHRU */
6769 case CODE_LABEL:
6770 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6771 assemble_name (asm_out_file, buf);
6772 break;
6773
6774 case CONST_INT:
6775 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6776 break;
6777
6778 case CONST:
6779 /* This used to output parentheses around the expression,
6780 but that does not work on the 386 (either ATT or BSD assembler). */
6781 output_pic_addr_const (file, XEXP (x, 0), code);
6782 break;
6783
6784 case CONST_DOUBLE:
6785 if (GET_MODE (x) == VOIDmode)
6786 {
6787 /* We can use %d if the number is <32 bits and positive. */
6788 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6789 fprintf (file, "0x%lx%08lx",
6790 (unsigned long) CONST_DOUBLE_HIGH (x),
6791 (unsigned long) CONST_DOUBLE_LOW (x));
6792 else
6793 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6794 }
6795 else
6796 /* We can't handle floating point constants;
6797 PRINT_OPERAND must handle them. */
6798 output_operand_lossage ("floating constant misused");
6799 break;
6800
6801 case PLUS:
6802 /* Some assemblers need integer constants to appear first. */
6803 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6804 {
6805 output_pic_addr_const (file, XEXP (x, 0), code);
6806 putc ('+', file);
6807 output_pic_addr_const (file, XEXP (x, 1), code);
6808 }
6809 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6810 {
6811 output_pic_addr_const (file, XEXP (x, 1), code);
6812 putc ('+', file);
6813 output_pic_addr_const (file, XEXP (x, 0), code);
6814 }
6815 else
6816 abort ();
6817 break;
6818
6819 case MINUS:
6820 if (!TARGET_MACHO)
6821 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6822 output_pic_addr_const (file, XEXP (x, 0), code);
6823 putc ('-', file);
6824 output_pic_addr_const (file, XEXP (x, 1), code);
6825 if (!TARGET_MACHO)
6826 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6827 break;
6828
6829 case UNSPEC:
6830 if (XVECLEN (x, 0) != 1)
6831 abort ();
6832 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6833 switch (XINT (x, 1))
6834 {
6835 case UNSPEC_GOT:
6836 fputs ("@GOT", file);
6837 break;
6838 case UNSPEC_GOTOFF:
6839 fputs ("@GOTOFF", file);
6840 break;
6841 case UNSPEC_GOTPCREL:
6842 fputs ("@GOTPCREL(%rip)", file);
6843 break;
6844 case UNSPEC_GOTTPOFF:
6845 /* FIXME: This might be @TPOFF in Sun ld too. */
6846 fputs ("@GOTTPOFF", file);
6847 break;
6848 case UNSPEC_TPOFF:
6849 fputs ("@TPOFF", file);
6850 break;
6851 case UNSPEC_NTPOFF:
6852 if (TARGET_64BIT)
6853 fputs ("@TPOFF", file);
6854 else
6855 fputs ("@NTPOFF", file);
6856 break;
6857 case UNSPEC_DTPOFF:
6858 fputs ("@DTPOFF", file);
6859 break;
6860 case UNSPEC_GOTNTPOFF:
6861 if (TARGET_64BIT)
6862 fputs ("@GOTTPOFF(%rip)", file);
6863 else
6864 fputs ("@GOTNTPOFF", file);
6865 break;
6866 case UNSPEC_INDNTPOFF:
6867 fputs ("@INDNTPOFF", file);
6868 break;
6869 default:
6870 output_operand_lossage ("invalid UNSPEC as operand");
6871 break;
6872 }
6873 break;
6874
6875 default:
6876 output_operand_lossage ("invalid expression as operand");
6877 }
6878 }
6879
6880 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6881 We need to handle our special PIC relocations. */
6882
6883 void
6884 i386_dwarf_output_addr_const (FILE *file, rtx x)
6885 {
6886 #ifdef ASM_QUAD
6887 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6888 #else
6889 if (TARGET_64BIT)
6890 abort ();
6891 fprintf (file, "%s", ASM_LONG);
6892 #endif
6893 if (flag_pic)
6894 output_pic_addr_const (file, x, '\0');
6895 else
6896 output_addr_const (file, x);
6897 fputc ('\n', file);
6898 }
6899
6900 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6901 We need to emit DTP-relative relocations. */
6902
6903 void
6904 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6905 {
6906 fputs (ASM_LONG, file);
6907 output_addr_const (file, x);
6908 fputs ("@DTPOFF", file);
6909 switch (size)
6910 {
6911 case 4:
6912 break;
6913 case 8:
6914 fputs (", 0", file);
6915 break;
6916 default:
6917 abort ();
6918 }
6919 }
6920
6921 /* In the name of slightly smaller debug output, and to cater to
6922 general assembler losage, recognize PIC+GOTOFF and turn it back
6923 into a direct symbol reference. */
6924
6925 static rtx
6926 ix86_delegitimize_address (rtx orig_x)
6927 {
6928 rtx x = orig_x, y;
6929
6930 if (GET_CODE (x) == MEM)
6931 x = XEXP (x, 0);
6932
6933 if (TARGET_64BIT)
6934 {
6935 if (GET_CODE (x) != CONST
6936 || GET_CODE (XEXP (x, 0)) != UNSPEC
6937 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6938 || GET_CODE (orig_x) != MEM)
6939 return orig_x;
6940 return XVECEXP (XEXP (x, 0), 0, 0);
6941 }
6942
6943 if (GET_CODE (x) != PLUS
6944 || GET_CODE (XEXP (x, 1)) != CONST)
6945 return orig_x;
6946
6947 if (GET_CODE (XEXP (x, 0)) == REG
6948 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6949 /* %ebx + GOT/GOTOFF */
6950 y = NULL;
6951 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6952 {
6953 /* %ebx + %reg * scale + GOT/GOTOFF */
6954 y = XEXP (x, 0);
6955 if (GET_CODE (XEXP (y, 0)) == REG
6956 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6957 y = XEXP (y, 1);
6958 else if (GET_CODE (XEXP (y, 1)) == REG
6959 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6960 y = XEXP (y, 0);
6961 else
6962 return orig_x;
6963 if (GET_CODE (y) != REG
6964 && GET_CODE (y) != MULT
6965 && GET_CODE (y) != ASHIFT)
6966 return orig_x;
6967 }
6968 else
6969 return orig_x;
6970
6971 x = XEXP (XEXP (x, 1), 0);
6972 if (GET_CODE (x) == UNSPEC
6973 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6974 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6975 {
6976 if (y)
6977 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6978 return XVECEXP (x, 0, 0);
6979 }
6980
6981 if (GET_CODE (x) == PLUS
6982 && GET_CODE (XEXP (x, 0)) == UNSPEC
6983 && GET_CODE (XEXP (x, 1)) == CONST_INT
6984 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6985 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6986 && GET_CODE (orig_x) != MEM)))
6987 {
6988 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6989 if (y)
6990 return gen_rtx_PLUS (Pmode, y, x);
6991 return x;
6992 }
6993
6994 return orig_x;
6995 }
6996 \f
6997 static void
6998 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6999 int fp, FILE *file)
7000 {
7001 const char *suffix;
7002
7003 if (mode == CCFPmode || mode == CCFPUmode)
7004 {
7005 enum rtx_code second_code, bypass_code;
7006 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7007 if (bypass_code != NIL || second_code != NIL)
7008 abort ();
7009 code = ix86_fp_compare_code_to_integer (code);
7010 mode = CCmode;
7011 }
7012 if (reverse)
7013 code = reverse_condition (code);
7014
7015 switch (code)
7016 {
7017 case EQ:
7018 suffix = "e";
7019 break;
7020 case NE:
7021 suffix = "ne";
7022 break;
7023 case GT:
7024 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7025 abort ();
7026 suffix = "g";
7027 break;
7028 case GTU:
7029 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7030 Those same assemblers have the same but opposite losage on cmov. */
7031 if (mode != CCmode)
7032 abort ();
7033 suffix = fp ? "nbe" : "a";
7034 break;
7035 case LT:
7036 if (mode == CCNOmode || mode == CCGOCmode)
7037 suffix = "s";
7038 else if (mode == CCmode || mode == CCGCmode)
7039 suffix = "l";
7040 else
7041 abort ();
7042 break;
7043 case LTU:
7044 if (mode != CCmode)
7045 abort ();
7046 suffix = "b";
7047 break;
7048 case GE:
7049 if (mode == CCNOmode || mode == CCGOCmode)
7050 suffix = "ns";
7051 else if (mode == CCmode || mode == CCGCmode)
7052 suffix = "ge";
7053 else
7054 abort ();
7055 break;
7056 case GEU:
7057 /* ??? As above. */
7058 if (mode != CCmode)
7059 abort ();
7060 suffix = fp ? "nb" : "ae";
7061 break;
7062 case LE:
7063 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7064 abort ();
7065 suffix = "le";
7066 break;
7067 case LEU:
7068 if (mode != CCmode)
7069 abort ();
7070 suffix = "be";
7071 break;
7072 case UNORDERED:
7073 suffix = fp ? "u" : "p";
7074 break;
7075 case ORDERED:
7076 suffix = fp ? "nu" : "np";
7077 break;
7078 default:
7079 abort ();
7080 }
7081 fputs (suffix, file);
7082 }
7083
7084 /* Print the name of register X to FILE based on its machine mode and number.
7085 If CODE is 'w', pretend the mode is HImode.
7086 If CODE is 'b', pretend the mode is QImode.
7087 If CODE is 'k', pretend the mode is SImode.
7088 If CODE is 'q', pretend the mode is DImode.
7089 If CODE is 'h', pretend the reg is the `high' byte register.
7090 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7091
7092 void
7093 print_reg (rtx x, int code, FILE *file)
7094 {
7095 if (REGNO (x) == ARG_POINTER_REGNUM
7096 || REGNO (x) == FRAME_POINTER_REGNUM
7097 || REGNO (x) == FLAGS_REG
7098 || REGNO (x) == FPSR_REG)
7099 abort ();
7100
7101 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7102 putc ('%', file);
7103
7104 if (code == 'w' || MMX_REG_P (x))
7105 code = 2;
7106 else if (code == 'b')
7107 code = 1;
7108 else if (code == 'k')
7109 code = 4;
7110 else if (code == 'q')
7111 code = 8;
7112 else if (code == 'y')
7113 code = 3;
7114 else if (code == 'h')
7115 code = 0;
7116 else
7117 code = GET_MODE_SIZE (GET_MODE (x));
7118
7119 /* Irritatingly, AMD extended registers use different naming convention
7120 from the normal registers. */
7121 if (REX_INT_REG_P (x))
7122 {
7123 if (!TARGET_64BIT)
7124 abort ();
7125 switch (code)
7126 {
7127 case 0:
7128 error ("extended registers have no high halves");
7129 break;
7130 case 1:
7131 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7132 break;
7133 case 2:
7134 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7135 break;
7136 case 4:
7137 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7138 break;
7139 case 8:
7140 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7141 break;
7142 default:
7143 error ("unsupported operand size for extended register");
7144 break;
7145 }
7146 return;
7147 }
7148 switch (code)
7149 {
7150 case 3:
7151 if (STACK_TOP_P (x))
7152 {
7153 fputs ("st(0)", file);
7154 break;
7155 }
7156 /* FALLTHRU */
7157 case 8:
7158 case 4:
7159 case 12:
7160 if (! ANY_FP_REG_P (x))
7161 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7162 /* FALLTHRU */
7163 case 16:
7164 case 2:
7165 normal:
7166 fputs (hi_reg_name[REGNO (x)], file);
7167 break;
7168 case 1:
7169 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7170 goto normal;
7171 fputs (qi_reg_name[REGNO (x)], file);
7172 break;
7173 case 0:
7174 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7175 goto normal;
7176 fputs (qi_high_reg_name[REGNO (x)], file);
7177 break;
7178 default:
7179 abort ();
7180 }
7181 }
7182
7183 /* Locate some local-dynamic symbol still in use by this function
7184 so that we can print its name in some tls_local_dynamic_base
7185 pattern. */
7186
7187 static const char *
7188 get_some_local_dynamic_name (void)
7189 {
7190 rtx insn;
7191
7192 if (cfun->machine->some_ld_name)
7193 return cfun->machine->some_ld_name;
7194
7195 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7196 if (INSN_P (insn)
7197 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7198 return cfun->machine->some_ld_name;
7199
7200 abort ();
7201 }
7202
7203 static int
7204 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7205 {
7206 rtx x = *px;
7207
7208 if (GET_CODE (x) == SYMBOL_REF
7209 && local_dynamic_symbolic_operand (x, Pmode))
7210 {
7211 cfun->machine->some_ld_name = XSTR (x, 0);
7212 return 1;
7213 }
7214
7215 return 0;
7216 }
7217
7218 /* Meaning of CODE:
7219 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7220 C -- print opcode suffix for set/cmov insn.
7221 c -- like C, but print reversed condition
7222 F,f -- likewise, but for floating-point.
7223 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7224 otherwise nothing
7225 R -- print the prefix for register names.
7226 z -- print the opcode suffix for the size of the current operand.
7227 * -- print a star (in certain assembler syntax)
7228 A -- print an absolute memory reference.
7229 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7230 s -- print a shift double count, followed by the assemblers argument
7231 delimiter.
7232 b -- print the QImode name of the register for the indicated operand.
7233 %b0 would print %al if operands[0] is reg 0.
7234 w -- likewise, print the HImode name of the register.
7235 k -- likewise, print the SImode name of the register.
7236 q -- likewise, print the DImode name of the register.
7237 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7238 y -- print "st(0)" instead of "st" as a register.
7239 D -- print condition for SSE cmp instruction.
7240 P -- if PIC, print an @PLT suffix.
7241 X -- don't print any sort of PIC '@' suffix for a symbol.
7242 & -- print some in-use local-dynamic symbol name.
7243 */
7244
7245 void
7246 print_operand (FILE *file, rtx x, int code)
7247 {
7248 if (code)
7249 {
7250 switch (code)
7251 {
7252 case '*':
7253 if (ASSEMBLER_DIALECT == ASM_ATT)
7254 putc ('*', file);
7255 return;
7256
7257 case '&':
7258 assemble_name (file, get_some_local_dynamic_name ());
7259 return;
7260
7261 case 'A':
7262 if (ASSEMBLER_DIALECT == ASM_ATT)
7263 putc ('*', file);
7264 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7265 {
7266 /* Intel syntax. For absolute addresses, registers should not
7267 be surrounded by braces. */
7268 if (GET_CODE (x) != REG)
7269 {
7270 putc ('[', file);
7271 PRINT_OPERAND (file, x, 0);
7272 putc (']', file);
7273 return;
7274 }
7275 }
7276 else
7277 abort ();
7278
7279 PRINT_OPERAND (file, x, 0);
7280 return;
7281
7282
7283 case 'L':
7284 if (ASSEMBLER_DIALECT == ASM_ATT)
7285 putc ('l', file);
7286 return;
7287
7288 case 'W':
7289 if (ASSEMBLER_DIALECT == ASM_ATT)
7290 putc ('w', file);
7291 return;
7292
7293 case 'B':
7294 if (ASSEMBLER_DIALECT == ASM_ATT)
7295 putc ('b', file);
7296 return;
7297
7298 case 'Q':
7299 if (ASSEMBLER_DIALECT == ASM_ATT)
7300 putc ('l', file);
7301 return;
7302
7303 case 'S':
7304 if (ASSEMBLER_DIALECT == ASM_ATT)
7305 putc ('s', file);
7306 return;
7307
7308 case 'T':
7309 if (ASSEMBLER_DIALECT == ASM_ATT)
7310 putc ('t', file);
7311 return;
7312
7313 case 'z':
7314 /* 387 opcodes don't get size suffixes if the operands are
7315 registers. */
7316 if (STACK_REG_P (x))
7317 return;
7318
7319 /* Likewise if using Intel opcodes. */
7320 if (ASSEMBLER_DIALECT == ASM_INTEL)
7321 return;
7322
7323 /* This is the size of op from size of operand. */
7324 switch (GET_MODE_SIZE (GET_MODE (x)))
7325 {
7326 case 2:
7327 #ifdef HAVE_GAS_FILDS_FISTS
7328 putc ('s', file);
7329 #endif
7330 return;
7331
7332 case 4:
7333 if (GET_MODE (x) == SFmode)
7334 {
7335 putc ('s', file);
7336 return;
7337 }
7338 else
7339 putc ('l', file);
7340 return;
7341
7342 case 12:
7343 case 16:
7344 putc ('t', file);
7345 return;
7346
7347 case 8:
7348 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7349 {
7350 #ifdef GAS_MNEMONICS
7351 putc ('q', file);
7352 #else
7353 putc ('l', file);
7354 putc ('l', file);
7355 #endif
7356 }
7357 else
7358 putc ('l', file);
7359 return;
7360
7361 default:
7362 abort ();
7363 }
7364
7365 case 'b':
7366 case 'w':
7367 case 'k':
7368 case 'q':
7369 case 'h':
7370 case 'y':
7371 case 'X':
7372 case 'P':
7373 break;
7374
7375 case 's':
7376 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7377 {
7378 PRINT_OPERAND (file, x, 0);
7379 putc (',', file);
7380 }
7381 return;
7382
7383 case 'D':
7384 /* Little bit of braindamage here. The SSE compare instructions
7385 does use completely different names for the comparisons that the
7386 fp conditional moves. */
7387 switch (GET_CODE (x))
7388 {
7389 case EQ:
7390 case UNEQ:
7391 fputs ("eq", file);
7392 break;
7393 case LT:
7394 case UNLT:
7395 fputs ("lt", file);
7396 break;
7397 case LE:
7398 case UNLE:
7399 fputs ("le", file);
7400 break;
7401 case UNORDERED:
7402 fputs ("unord", file);
7403 break;
7404 case NE:
7405 case LTGT:
7406 fputs ("neq", file);
7407 break;
7408 case UNGE:
7409 case GE:
7410 fputs ("nlt", file);
7411 break;
7412 case UNGT:
7413 case GT:
7414 fputs ("nle", file);
7415 break;
7416 case ORDERED:
7417 fputs ("ord", file);
7418 break;
7419 default:
7420 abort ();
7421 break;
7422 }
7423 return;
7424 case 'O':
7425 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7426 if (ASSEMBLER_DIALECT == ASM_ATT)
7427 {
7428 switch (GET_MODE (x))
7429 {
7430 case HImode: putc ('w', file); break;
7431 case SImode:
7432 case SFmode: putc ('l', file); break;
7433 case DImode:
7434 case DFmode: putc ('q', file); break;
7435 default: abort ();
7436 }
7437 putc ('.', file);
7438 }
7439 #endif
7440 return;
7441 case 'C':
7442 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7443 return;
7444 case 'F':
7445 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7446 if (ASSEMBLER_DIALECT == ASM_ATT)
7447 putc ('.', file);
7448 #endif
7449 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7450 return;
7451
7452 /* Like above, but reverse condition */
7453 case 'c':
7454 /* Check to see if argument to %c is really a constant
7455 and not a condition code which needs to be reversed. */
7456 if (!COMPARISON_P (x))
7457 {
7458 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7459 return;
7460 }
7461 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7462 return;
7463 case 'f':
7464 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7465 if (ASSEMBLER_DIALECT == ASM_ATT)
7466 putc ('.', file);
7467 #endif
7468 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7469 return;
7470 case '+':
7471 {
7472 rtx x;
7473
7474 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7475 return;
7476
7477 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7478 if (x)
7479 {
7480 int pred_val = INTVAL (XEXP (x, 0));
7481
7482 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7483 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7484 {
7485 int taken = pred_val > REG_BR_PROB_BASE / 2;
7486 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7487
7488 /* Emit hints only in the case default branch prediction
7489 heuristics would fail. */
7490 if (taken != cputaken)
7491 {
7492 /* We use 3e (DS) prefix for taken branches and
7493 2e (CS) prefix for not taken branches. */
7494 if (taken)
7495 fputs ("ds ; ", file);
7496 else
7497 fputs ("cs ; ", file);
7498 }
7499 }
7500 }
7501 return;
7502 }
7503 default:
7504 output_operand_lossage ("invalid operand code `%c'", code);
7505 }
7506 }
7507
7508 if (GET_CODE (x) == REG)
7509 print_reg (x, code, file);
7510
7511 else if (GET_CODE (x) == MEM)
7512 {
7513 /* No `byte ptr' prefix for call instructions. */
7514 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7515 {
7516 const char * size;
7517 switch (GET_MODE_SIZE (GET_MODE (x)))
7518 {
7519 case 1: size = "BYTE"; break;
7520 case 2: size = "WORD"; break;
7521 case 4: size = "DWORD"; break;
7522 case 8: size = "QWORD"; break;
7523 case 12: size = "XWORD"; break;
7524 case 16: size = "XMMWORD"; break;
7525 default:
7526 abort ();
7527 }
7528
7529 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7530 if (code == 'b')
7531 size = "BYTE";
7532 else if (code == 'w')
7533 size = "WORD";
7534 else if (code == 'k')
7535 size = "DWORD";
7536
7537 fputs (size, file);
7538 fputs (" PTR ", file);
7539 }
7540
7541 x = XEXP (x, 0);
7542 /* Avoid (%rip) for call operands. */
7543 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7544 && GET_CODE (x) != CONST_INT)
7545 output_addr_const (file, x);
7546 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7547 output_operand_lossage ("invalid constraints for operand");
7548 else
7549 output_address (x);
7550 }
7551
7552 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7553 {
7554 REAL_VALUE_TYPE r;
7555 long l;
7556
7557 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7558 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7559
7560 if (ASSEMBLER_DIALECT == ASM_ATT)
7561 putc ('$', file);
7562 fprintf (file, "0x%08lx", l);
7563 }
7564
7565 /* These float cases don't actually occur as immediate operands. */
7566 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7567 {
7568 char dstr[30];
7569
7570 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7571 fprintf (file, "%s", dstr);
7572 }
7573
7574 else if (GET_CODE (x) == CONST_DOUBLE
7575 && GET_MODE (x) == XFmode)
7576 {
7577 char dstr[30];
7578
7579 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7580 fprintf (file, "%s", dstr);
7581 }
7582
7583 else
7584 {
7585 if (code != 'P')
7586 {
7587 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7588 {
7589 if (ASSEMBLER_DIALECT == ASM_ATT)
7590 putc ('$', file);
7591 }
7592 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7593 || GET_CODE (x) == LABEL_REF)
7594 {
7595 if (ASSEMBLER_DIALECT == ASM_ATT)
7596 putc ('$', file);
7597 else
7598 fputs ("OFFSET FLAT:", file);
7599 }
7600 }
7601 if (GET_CODE (x) == CONST_INT)
7602 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7603 else if (flag_pic)
7604 output_pic_addr_const (file, x, code);
7605 else
7606 output_addr_const (file, x);
7607 }
7608 }
7609 \f
7610 /* Print a memory operand whose address is ADDR. */
7611
7612 void
7613 print_operand_address (FILE *file, rtx addr)
7614 {
7615 struct ix86_address parts;
7616 rtx base, index, disp;
7617 int scale;
7618
7619 if (! ix86_decompose_address (addr, &parts))
7620 abort ();
7621
7622 base = parts.base;
7623 index = parts.index;
7624 disp = parts.disp;
7625 scale = parts.scale;
7626
7627 switch (parts.seg)
7628 {
7629 case SEG_DEFAULT:
7630 break;
7631 case SEG_FS:
7632 case SEG_GS:
7633 if (USER_LABEL_PREFIX[0] == 0)
7634 putc ('%', file);
7635 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7636 break;
7637 default:
7638 abort ();
7639 }
7640
7641 if (!base && !index)
7642 {
7643 /* Displacement only requires special attention. */
7644
7645 if (GET_CODE (disp) == CONST_INT)
7646 {
7647 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7648 {
7649 if (USER_LABEL_PREFIX[0] == 0)
7650 putc ('%', file);
7651 fputs ("ds:", file);
7652 }
7653 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7654 }
7655 else if (flag_pic)
7656 output_pic_addr_const (file, disp, 0);
7657 else
7658 output_addr_const (file, disp);
7659
7660 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7661 if (TARGET_64BIT
7662 && ((GET_CODE (disp) == SYMBOL_REF
7663 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7664 || GET_CODE (disp) == LABEL_REF
7665 || (GET_CODE (disp) == CONST
7666 && GET_CODE (XEXP (disp, 0)) == PLUS
7667 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7668 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7669 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7670 fputs ("(%rip)", file);
7671 }
7672 else
7673 {
7674 if (ASSEMBLER_DIALECT == ASM_ATT)
7675 {
7676 if (disp)
7677 {
7678 if (flag_pic)
7679 output_pic_addr_const (file, disp, 0);
7680 else if (GET_CODE (disp) == LABEL_REF)
7681 output_asm_label (disp);
7682 else
7683 output_addr_const (file, disp);
7684 }
7685
7686 putc ('(', file);
7687 if (base)
7688 print_reg (base, 0, file);
7689 if (index)
7690 {
7691 putc (',', file);
7692 print_reg (index, 0, file);
7693 if (scale != 1)
7694 fprintf (file, ",%d", scale);
7695 }
7696 putc (')', file);
7697 }
7698 else
7699 {
7700 rtx offset = NULL_RTX;
7701
7702 if (disp)
7703 {
7704 /* Pull out the offset of a symbol; print any symbol itself. */
7705 if (GET_CODE (disp) == CONST
7706 && GET_CODE (XEXP (disp, 0)) == PLUS
7707 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7708 {
7709 offset = XEXP (XEXP (disp, 0), 1);
7710 disp = gen_rtx_CONST (VOIDmode,
7711 XEXP (XEXP (disp, 0), 0));
7712 }
7713
7714 if (flag_pic)
7715 output_pic_addr_const (file, disp, 0);
7716 else if (GET_CODE (disp) == LABEL_REF)
7717 output_asm_label (disp);
7718 else if (GET_CODE (disp) == CONST_INT)
7719 offset = disp;
7720 else
7721 output_addr_const (file, disp);
7722 }
7723
7724 putc ('[', file);
7725 if (base)
7726 {
7727 print_reg (base, 0, file);
7728 if (offset)
7729 {
7730 if (INTVAL (offset) >= 0)
7731 putc ('+', file);
7732 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7733 }
7734 }
7735 else if (offset)
7736 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7737 else
7738 putc ('0', file);
7739
7740 if (index)
7741 {
7742 putc ('+', file);
7743 print_reg (index, 0, file);
7744 if (scale != 1)
7745 fprintf (file, "*%d", scale);
7746 }
7747 putc (']', file);
7748 }
7749 }
7750 }
7751
7752 bool
7753 output_addr_const_extra (FILE *file, rtx x)
7754 {
7755 rtx op;
7756
7757 if (GET_CODE (x) != UNSPEC)
7758 return false;
7759
7760 op = XVECEXP (x, 0, 0);
7761 switch (XINT (x, 1))
7762 {
7763 case UNSPEC_GOTTPOFF:
7764 output_addr_const (file, op);
7765 /* FIXME: This might be @TPOFF in Sun ld. */
7766 fputs ("@GOTTPOFF", file);
7767 break;
7768 case UNSPEC_TPOFF:
7769 output_addr_const (file, op);
7770 fputs ("@TPOFF", file);
7771 break;
7772 case UNSPEC_NTPOFF:
7773 output_addr_const (file, op);
7774 if (TARGET_64BIT)
7775 fputs ("@TPOFF", file);
7776 else
7777 fputs ("@NTPOFF", file);
7778 break;
7779 case UNSPEC_DTPOFF:
7780 output_addr_const (file, op);
7781 fputs ("@DTPOFF", file);
7782 break;
7783 case UNSPEC_GOTNTPOFF:
7784 output_addr_const (file, op);
7785 if (TARGET_64BIT)
7786 fputs ("@GOTTPOFF(%rip)", file);
7787 else
7788 fputs ("@GOTNTPOFF", file);
7789 break;
7790 case UNSPEC_INDNTPOFF:
7791 output_addr_const (file, op);
7792 fputs ("@INDNTPOFF", file);
7793 break;
7794
7795 default:
7796 return false;
7797 }
7798
7799 return true;
7800 }
7801 \f
7802 /* Split one or more DImode RTL references into pairs of SImode
7803 references. The RTL can be REG, offsettable MEM, integer constant, or
7804 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7805 split and "num" is its length. lo_half and hi_half are output arrays
7806 that parallel "operands". */
7807
7808 void
7809 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7810 {
7811 while (num--)
7812 {
7813 rtx op = operands[num];
7814
7815 /* simplify_subreg refuse to split volatile memory addresses,
7816 but we still have to handle it. */
7817 if (GET_CODE (op) == MEM)
7818 {
7819 lo_half[num] = adjust_address (op, SImode, 0);
7820 hi_half[num] = adjust_address (op, SImode, 4);
7821 }
7822 else
7823 {
7824 lo_half[num] = simplify_gen_subreg (SImode, op,
7825 GET_MODE (op) == VOIDmode
7826 ? DImode : GET_MODE (op), 0);
7827 hi_half[num] = simplify_gen_subreg (SImode, op,
7828 GET_MODE (op) == VOIDmode
7829 ? DImode : GET_MODE (op), 4);
7830 }
7831 }
7832 }
7833 /* Split one or more TImode RTL references into pairs of SImode
7834 references. The RTL can be REG, offsettable MEM, integer constant, or
7835 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7836 split and "num" is its length. lo_half and hi_half are output arrays
7837 that parallel "operands". */
7838
7839 void
7840 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7841 {
7842 while (num--)
7843 {
7844 rtx op = operands[num];
7845
7846 /* simplify_subreg refuse to split volatile memory addresses, but we
7847 still have to handle it. */
7848 if (GET_CODE (op) == MEM)
7849 {
7850 lo_half[num] = adjust_address (op, DImode, 0);
7851 hi_half[num] = adjust_address (op, DImode, 8);
7852 }
7853 else
7854 {
7855 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7856 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7857 }
7858 }
7859 }
7860 \f
7861 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7862 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7863 is the expression of the binary operation. The output may either be
7864 emitted here, or returned to the caller, like all output_* functions.
7865
7866 There is no guarantee that the operands are the same mode, as they
7867 might be within FLOAT or FLOAT_EXTEND expressions. */
7868
7869 #ifndef SYSV386_COMPAT
7870 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7871 wants to fix the assemblers because that causes incompatibility
7872 with gcc. No-one wants to fix gcc because that causes
7873 incompatibility with assemblers... You can use the option of
7874 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7875 #define SYSV386_COMPAT 1
7876 #endif
7877
7878 const char *
7879 output_387_binary_op (rtx insn, rtx *operands)
7880 {
7881 static char buf[30];
7882 const char *p;
7883 const char *ssep;
7884 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7885
7886 #ifdef ENABLE_CHECKING
7887 /* Even if we do not want to check the inputs, this documents input
7888 constraints. Which helps in understanding the following code. */
7889 if (STACK_REG_P (operands[0])
7890 && ((REG_P (operands[1])
7891 && REGNO (operands[0]) == REGNO (operands[1])
7892 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7893 || (REG_P (operands[2])
7894 && REGNO (operands[0]) == REGNO (operands[2])
7895 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7896 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7897 ; /* ok */
7898 else if (!is_sse)
7899 abort ();
7900 #endif
7901
7902 switch (GET_CODE (operands[3]))
7903 {
7904 case PLUS:
7905 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7906 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7907 p = "fiadd";
7908 else
7909 p = "fadd";
7910 ssep = "add";
7911 break;
7912
7913 case MINUS:
7914 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7915 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7916 p = "fisub";
7917 else
7918 p = "fsub";
7919 ssep = "sub";
7920 break;
7921
7922 case MULT:
7923 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7924 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7925 p = "fimul";
7926 else
7927 p = "fmul";
7928 ssep = "mul";
7929 break;
7930
7931 case DIV:
7932 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7933 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7934 p = "fidiv";
7935 else
7936 p = "fdiv";
7937 ssep = "div";
7938 break;
7939
7940 default:
7941 abort ();
7942 }
7943
7944 if (is_sse)
7945 {
7946 strcpy (buf, ssep);
7947 if (GET_MODE (operands[0]) == SFmode)
7948 strcat (buf, "ss\t{%2, %0|%0, %2}");
7949 else
7950 strcat (buf, "sd\t{%2, %0|%0, %2}");
7951 return buf;
7952 }
7953 strcpy (buf, p);
7954
7955 switch (GET_CODE (operands[3]))
7956 {
7957 case MULT:
7958 case PLUS:
7959 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7960 {
7961 rtx temp = operands[2];
7962 operands[2] = operands[1];
7963 operands[1] = temp;
7964 }
7965
7966 /* know operands[0] == operands[1]. */
7967
7968 if (GET_CODE (operands[2]) == MEM)
7969 {
7970 p = "%z2\t%2";
7971 break;
7972 }
7973
7974 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7975 {
7976 if (STACK_TOP_P (operands[0]))
7977 /* How is it that we are storing to a dead operand[2]?
7978 Well, presumably operands[1] is dead too. We can't
7979 store the result to st(0) as st(0) gets popped on this
7980 instruction. Instead store to operands[2] (which I
7981 think has to be st(1)). st(1) will be popped later.
7982 gcc <= 2.8.1 didn't have this check and generated
7983 assembly code that the Unixware assembler rejected. */
7984 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7985 else
7986 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7987 break;
7988 }
7989
7990 if (STACK_TOP_P (operands[0]))
7991 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7992 else
7993 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7994 break;
7995
7996 case MINUS:
7997 case DIV:
7998 if (GET_CODE (operands[1]) == MEM)
7999 {
8000 p = "r%z1\t%1";
8001 break;
8002 }
8003
8004 if (GET_CODE (operands[2]) == MEM)
8005 {
8006 p = "%z2\t%2";
8007 break;
8008 }
8009
8010 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8011 {
8012 #if SYSV386_COMPAT
8013 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8014 derived assemblers, confusingly reverse the direction of
8015 the operation for fsub{r} and fdiv{r} when the
8016 destination register is not st(0). The Intel assembler
8017 doesn't have this brain damage. Read !SYSV386_COMPAT to
8018 figure out what the hardware really does. */
8019 if (STACK_TOP_P (operands[0]))
8020 p = "{p\t%0, %2|rp\t%2, %0}";
8021 else
8022 p = "{rp\t%2, %0|p\t%0, %2}";
8023 #else
8024 if (STACK_TOP_P (operands[0]))
8025 /* As above for fmul/fadd, we can't store to st(0). */
8026 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8027 else
8028 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8029 #endif
8030 break;
8031 }
8032
8033 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8034 {
8035 #if SYSV386_COMPAT
8036 if (STACK_TOP_P (operands[0]))
8037 p = "{rp\t%0, %1|p\t%1, %0}";
8038 else
8039 p = "{p\t%1, %0|rp\t%0, %1}";
8040 #else
8041 if (STACK_TOP_P (operands[0]))
8042 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8043 else
8044 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8045 #endif
8046 break;
8047 }
8048
8049 if (STACK_TOP_P (operands[0]))
8050 {
8051 if (STACK_TOP_P (operands[1]))
8052 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8053 else
8054 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8055 break;
8056 }
8057 else if (STACK_TOP_P (operands[1]))
8058 {
8059 #if SYSV386_COMPAT
8060 p = "{\t%1, %0|r\t%0, %1}";
8061 #else
8062 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8063 #endif
8064 }
8065 else
8066 {
8067 #if SYSV386_COMPAT
8068 p = "{r\t%2, %0|\t%0, %2}";
8069 #else
8070 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8071 #endif
8072 }
8073 break;
8074
8075 default:
8076 abort ();
8077 }
8078
8079 strcat (buf, p);
8080 return buf;
8081 }
8082
8083 /* Output code to initialize control word copies used by
8084 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8085 is set to control word rounding downwards. */
8086 void
8087 emit_i387_cw_initialization (rtx normal, rtx round_down)
8088 {
8089 rtx reg = gen_reg_rtx (HImode);
8090
8091 emit_insn (gen_x86_fnstcw_1 (normal));
8092 emit_move_insn (reg, normal);
8093 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8094 && !TARGET_64BIT)
8095 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8096 else
8097 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8098 emit_move_insn (round_down, reg);
8099 }
8100
8101 /* Output code for INSN to convert a float to a signed int. OPERANDS
8102 are the insn operands. The output may be [HSD]Imode and the input
8103 operand may be [SDX]Fmode. */
8104
8105 const char *
8106 output_fix_trunc (rtx insn, rtx *operands)
8107 {
8108 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8109 int dimode_p = GET_MODE (operands[0]) == DImode;
8110
8111 /* Jump through a hoop or two for DImode, since the hardware has no
8112 non-popping instruction. We used to do this a different way, but
8113 that was somewhat fragile and broke with post-reload splitters. */
8114 if (dimode_p && !stack_top_dies)
8115 output_asm_insn ("fld\t%y1", operands);
8116
8117 if (!STACK_TOP_P (operands[1]))
8118 abort ();
8119
8120 if (GET_CODE (operands[0]) != MEM)
8121 abort ();
8122
8123 output_asm_insn ("fldcw\t%3", operands);
8124 if (stack_top_dies || dimode_p)
8125 output_asm_insn ("fistp%z0\t%0", operands);
8126 else
8127 output_asm_insn ("fist%z0\t%0", operands);
8128 output_asm_insn ("fldcw\t%2", operands);
8129
8130 return "";
8131 }
8132
8133 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8134 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8135 when fucom should be used. */
8136
8137 const char *
8138 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8139 {
8140 int stack_top_dies;
8141 rtx cmp_op0 = operands[0];
8142 rtx cmp_op1 = operands[1];
8143 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8144
8145 if (eflags_p == 2)
8146 {
8147 cmp_op0 = cmp_op1;
8148 cmp_op1 = operands[2];
8149 }
8150 if (is_sse)
8151 {
8152 if (GET_MODE (operands[0]) == SFmode)
8153 if (unordered_p)
8154 return "ucomiss\t{%1, %0|%0, %1}";
8155 else
8156 return "comiss\t{%1, %0|%0, %1}";
8157 else
8158 if (unordered_p)
8159 return "ucomisd\t{%1, %0|%0, %1}";
8160 else
8161 return "comisd\t{%1, %0|%0, %1}";
8162 }
8163
8164 if (! STACK_TOP_P (cmp_op0))
8165 abort ();
8166
8167 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8168
8169 if (STACK_REG_P (cmp_op1)
8170 && stack_top_dies
8171 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8172 && REGNO (cmp_op1) != FIRST_STACK_REG)
8173 {
8174 /* If both the top of the 387 stack dies, and the other operand
8175 is also a stack register that dies, then this must be a
8176 `fcompp' float compare */
8177
8178 if (eflags_p == 1)
8179 {
8180 /* There is no double popping fcomi variant. Fortunately,
8181 eflags is immune from the fstp's cc clobbering. */
8182 if (unordered_p)
8183 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8184 else
8185 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8186 return "fstp\t%y0";
8187 }
8188 else
8189 {
8190 if (eflags_p == 2)
8191 {
8192 if (unordered_p)
8193 return "fucompp\n\tfnstsw\t%0";
8194 else
8195 return "fcompp\n\tfnstsw\t%0";
8196 }
8197 else
8198 {
8199 if (unordered_p)
8200 return "fucompp";
8201 else
8202 return "fcompp";
8203 }
8204 }
8205 }
8206 else
8207 {
8208 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8209
8210 static const char * const alt[24] =
8211 {
8212 "fcom%z1\t%y1",
8213 "fcomp%z1\t%y1",
8214 "fucom%z1\t%y1",
8215 "fucomp%z1\t%y1",
8216
8217 "ficom%z1\t%y1",
8218 "ficomp%z1\t%y1",
8219 NULL,
8220 NULL,
8221
8222 "fcomi\t{%y1, %0|%0, %y1}",
8223 "fcomip\t{%y1, %0|%0, %y1}",
8224 "fucomi\t{%y1, %0|%0, %y1}",
8225 "fucomip\t{%y1, %0|%0, %y1}",
8226
8227 NULL,
8228 NULL,
8229 NULL,
8230 NULL,
8231
8232 "fcom%z2\t%y2\n\tfnstsw\t%0",
8233 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8234 "fucom%z2\t%y2\n\tfnstsw\t%0",
8235 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8236
8237 "ficom%z2\t%y2\n\tfnstsw\t%0",
8238 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8239 NULL,
8240 NULL
8241 };
8242
8243 int mask;
8244 const char *ret;
8245
8246 mask = eflags_p << 3;
8247 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8248 mask |= unordered_p << 1;
8249 mask |= stack_top_dies;
8250
8251 if (mask >= 24)
8252 abort ();
8253 ret = alt[mask];
8254 if (ret == NULL)
8255 abort ();
8256
8257 return ret;
8258 }
8259 }
8260
8261 void
8262 ix86_output_addr_vec_elt (FILE *file, int value)
8263 {
8264 const char *directive = ASM_LONG;
8265
8266 if (TARGET_64BIT)
8267 {
8268 #ifdef ASM_QUAD
8269 directive = ASM_QUAD;
8270 #else
8271 abort ();
8272 #endif
8273 }
8274
8275 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8276 }
8277
8278 void
8279 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8280 {
8281 if (TARGET_64BIT)
8282 fprintf (file, "%s%s%d-%s%d\n",
8283 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8284 else if (HAVE_AS_GOTOFF_IN_DATA)
8285 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8286 #if TARGET_MACHO
8287 else if (TARGET_MACHO)
8288 {
8289 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8290 machopic_output_function_base_name (file);
8291 fprintf(file, "\n");
8292 }
8293 #endif
8294 else
8295 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8296 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8297 }
8298 \f
8299 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8300 for the target. */
8301
8302 void
8303 ix86_expand_clear (rtx dest)
8304 {
8305 rtx tmp;
8306
8307 /* We play register width games, which are only valid after reload. */
8308 if (!reload_completed)
8309 abort ();
8310
8311 /* Avoid HImode and its attendant prefix byte. */
8312 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8313 dest = gen_rtx_REG (SImode, REGNO (dest));
8314
8315 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8316
8317 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8318 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8319 {
8320 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8321 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8322 }
8323
8324 emit_insn (tmp);
8325 }
8326
8327 /* X is an unchanging MEM. If it is a constant pool reference, return
8328 the constant pool rtx, else NULL. */
8329
8330 static rtx
8331 maybe_get_pool_constant (rtx x)
8332 {
8333 x = ix86_delegitimize_address (XEXP (x, 0));
8334
8335 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8336 return get_pool_constant (x);
8337
8338 return NULL_RTX;
8339 }
8340
8341 void
8342 ix86_expand_move (enum machine_mode mode, rtx operands[])
8343 {
8344 int strict = (reload_in_progress || reload_completed);
8345 rtx op0, op1;
8346 enum tls_model model;
8347
8348 op0 = operands[0];
8349 op1 = operands[1];
8350
8351 model = tls_symbolic_operand (op1, Pmode);
8352 if (model)
8353 {
8354 op1 = legitimize_tls_address (op1, model, true);
8355 op1 = force_operand (op1, op0);
8356 if (op1 == op0)
8357 return;
8358 }
8359
8360 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8361 {
8362 #if TARGET_MACHO
8363 if (MACHOPIC_PURE)
8364 {
8365 rtx temp = ((reload_in_progress
8366 || ((op0 && GET_CODE (op0) == REG)
8367 && mode == Pmode))
8368 ? op0 : gen_reg_rtx (Pmode));
8369 op1 = machopic_indirect_data_reference (op1, temp);
8370 op1 = machopic_legitimize_pic_address (op1, mode,
8371 temp == op1 ? 0 : temp);
8372 }
8373 else if (MACHOPIC_INDIRECT)
8374 op1 = machopic_indirect_data_reference (op1, 0);
8375 if (op0 == op1)
8376 return;
8377 #else
8378 if (GET_CODE (op0) == MEM)
8379 op1 = force_reg (Pmode, op1);
8380 else
8381 {
8382 rtx temp = op0;
8383 if (GET_CODE (temp) != REG)
8384 temp = gen_reg_rtx (Pmode);
8385 temp = legitimize_pic_address (op1, temp);
8386 if (temp == op0)
8387 return;
8388 op1 = temp;
8389 }
8390 #endif /* TARGET_MACHO */
8391 }
8392 else
8393 {
8394 if (GET_CODE (op0) == MEM
8395 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8396 || !push_operand (op0, mode))
8397 && GET_CODE (op1) == MEM)
8398 op1 = force_reg (mode, op1);
8399
8400 if (push_operand (op0, mode)
8401 && ! general_no_elim_operand (op1, mode))
8402 op1 = copy_to_mode_reg (mode, op1);
8403
8404 /* Force large constants in 64bit compilation into register
8405 to get them CSEed. */
8406 if (TARGET_64BIT && mode == DImode
8407 && immediate_operand (op1, mode)
8408 && !x86_64_zero_extended_value (op1)
8409 && !register_operand (op0, mode)
8410 && optimize && !reload_completed && !reload_in_progress)
8411 op1 = copy_to_mode_reg (mode, op1);
8412
8413 if (FLOAT_MODE_P (mode))
8414 {
8415 /* If we are loading a floating point constant to a register,
8416 force the value to memory now, since we'll get better code
8417 out the back end. */
8418
8419 if (strict)
8420 ;
8421 else if (GET_CODE (op1) == CONST_DOUBLE)
8422 {
8423 op1 = validize_mem (force_const_mem (mode, op1));
8424 if (!register_operand (op0, mode))
8425 {
8426 rtx temp = gen_reg_rtx (mode);
8427 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8428 emit_move_insn (op0, temp);
8429 return;
8430 }
8431 }
8432 }
8433 }
8434
8435 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8436 }
8437
8438 void
8439 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8440 {
8441 /* Force constants other than zero into memory. We do not know how
8442 the instructions used to build constants modify the upper 64 bits
8443 of the register, once we have that information we may be able
8444 to handle some of them more efficiently. */
8445 if ((reload_in_progress | reload_completed) == 0
8446 && register_operand (operands[0], mode)
8447 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8448 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8449
8450 /* Make operand1 a register if it isn't already. */
8451 if (!no_new_pseudos
8452 && !register_operand (operands[0], mode)
8453 && !register_operand (operands[1], mode))
8454 {
8455 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8456 emit_move_insn (operands[0], temp);
8457 return;
8458 }
8459
8460 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8461 }
8462
8463 /* Attempt to expand a binary operator. Make the expansion closer to the
8464 actual machine, then just general_operand, which will allow 3 separate
8465 memory references (one output, two input) in a single insn. */
8466
8467 void
8468 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8469 rtx operands[])
8470 {
8471 int matching_memory;
8472 rtx src1, src2, dst, op, clob;
8473
8474 dst = operands[0];
8475 src1 = operands[1];
8476 src2 = operands[2];
8477
8478 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8479 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8480 && (rtx_equal_p (dst, src2)
8481 || immediate_operand (src1, mode)))
8482 {
8483 rtx temp = src1;
8484 src1 = src2;
8485 src2 = temp;
8486 }
8487
8488 /* If the destination is memory, and we do not have matching source
8489 operands, do things in registers. */
8490 matching_memory = 0;
8491 if (GET_CODE (dst) == MEM)
8492 {
8493 if (rtx_equal_p (dst, src1))
8494 matching_memory = 1;
8495 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8496 && rtx_equal_p (dst, src2))
8497 matching_memory = 2;
8498 else
8499 dst = gen_reg_rtx (mode);
8500 }
8501
8502 /* Both source operands cannot be in memory. */
8503 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8504 {
8505 if (matching_memory != 2)
8506 src2 = force_reg (mode, src2);
8507 else
8508 src1 = force_reg (mode, src1);
8509 }
8510
8511 /* If the operation is not commutable, source 1 cannot be a constant
8512 or non-matching memory. */
8513 if ((CONSTANT_P (src1)
8514 || (!matching_memory && GET_CODE (src1) == MEM))
8515 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8516 src1 = force_reg (mode, src1);
8517
8518 /* If optimizing, copy to regs to improve CSE */
8519 if (optimize && ! no_new_pseudos)
8520 {
8521 if (GET_CODE (dst) == MEM)
8522 dst = gen_reg_rtx (mode);
8523 if (GET_CODE (src1) == MEM)
8524 src1 = force_reg (mode, src1);
8525 if (GET_CODE (src2) == MEM)
8526 src2 = force_reg (mode, src2);
8527 }
8528
8529 /* Emit the instruction. */
8530
8531 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8532 if (reload_in_progress)
8533 {
8534 /* Reload doesn't know about the flags register, and doesn't know that
8535 it doesn't want to clobber it. We can only do this with PLUS. */
8536 if (code != PLUS)
8537 abort ();
8538 emit_insn (op);
8539 }
8540 else
8541 {
8542 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8543 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8544 }
8545
8546 /* Fix up the destination if needed. */
8547 if (dst != operands[0])
8548 emit_move_insn (operands[0], dst);
8549 }
8550
8551 /* Return TRUE or FALSE depending on whether the binary operator meets the
8552 appropriate constraints. */
8553
8554 int
8555 ix86_binary_operator_ok (enum rtx_code code,
8556 enum machine_mode mode ATTRIBUTE_UNUSED,
8557 rtx operands[3])
8558 {
8559 /* Both source operands cannot be in memory. */
8560 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8561 return 0;
8562 /* If the operation is not commutable, source 1 cannot be a constant. */
8563 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8564 return 0;
8565 /* If the destination is memory, we must have a matching source operand. */
8566 if (GET_CODE (operands[0]) == MEM
8567 && ! (rtx_equal_p (operands[0], operands[1])
8568 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8569 && rtx_equal_p (operands[0], operands[2]))))
8570 return 0;
8571 /* If the operation is not commutable and the source 1 is memory, we must
8572 have a matching destination. */
8573 if (GET_CODE (operands[1]) == MEM
8574 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8575 && ! rtx_equal_p (operands[0], operands[1]))
8576 return 0;
8577 return 1;
8578 }
8579
8580 /* Attempt to expand a unary operator. Make the expansion closer to the
8581 actual machine, then just general_operand, which will allow 2 separate
8582 memory references (one output, one input) in a single insn. */
8583
8584 void
8585 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8586 rtx operands[])
8587 {
8588 int matching_memory;
8589 rtx src, dst, op, clob;
8590
8591 dst = operands[0];
8592 src = operands[1];
8593
8594 /* If the destination is memory, and we do not have matching source
8595 operands, do things in registers. */
8596 matching_memory = 0;
8597 if (GET_CODE (dst) == MEM)
8598 {
8599 if (rtx_equal_p (dst, src))
8600 matching_memory = 1;
8601 else
8602 dst = gen_reg_rtx (mode);
8603 }
8604
8605 /* When source operand is memory, destination must match. */
8606 if (!matching_memory && GET_CODE (src) == MEM)
8607 src = force_reg (mode, src);
8608
8609 /* If optimizing, copy to regs to improve CSE */
8610 if (optimize && ! no_new_pseudos)
8611 {
8612 if (GET_CODE (dst) == MEM)
8613 dst = gen_reg_rtx (mode);
8614 if (GET_CODE (src) == MEM)
8615 src = force_reg (mode, src);
8616 }
8617
8618 /* Emit the instruction. */
8619
8620 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8621 if (reload_in_progress || code == NOT)
8622 {
8623 /* Reload doesn't know about the flags register, and doesn't know that
8624 it doesn't want to clobber it. */
8625 if (code != NOT)
8626 abort ();
8627 emit_insn (op);
8628 }
8629 else
8630 {
8631 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8633 }
8634
8635 /* Fix up the destination if needed. */
8636 if (dst != operands[0])
8637 emit_move_insn (operands[0], dst);
8638 }
8639
8640 /* Return TRUE or FALSE depending on whether the unary operator meets the
8641 appropriate constraints. */
8642
8643 int
8644 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8645 enum machine_mode mode ATTRIBUTE_UNUSED,
8646 rtx operands[2] ATTRIBUTE_UNUSED)
8647 {
8648 /* If one of operands is memory, source and destination must match. */
8649 if ((GET_CODE (operands[0]) == MEM
8650 || GET_CODE (operands[1]) == MEM)
8651 && ! rtx_equal_p (operands[0], operands[1]))
8652 return FALSE;
8653 return TRUE;
8654 }
8655
8656 /* Return TRUE or FALSE depending on whether the first SET in INSN
8657 has source and destination with matching CC modes, and that the
8658 CC mode is at least as constrained as REQ_MODE. */
8659
8660 int
8661 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8662 {
8663 rtx set;
8664 enum machine_mode set_mode;
8665
8666 set = PATTERN (insn);
8667 if (GET_CODE (set) == PARALLEL)
8668 set = XVECEXP (set, 0, 0);
8669 if (GET_CODE (set) != SET)
8670 abort ();
8671 if (GET_CODE (SET_SRC (set)) != COMPARE)
8672 abort ();
8673
8674 set_mode = GET_MODE (SET_DEST (set));
8675 switch (set_mode)
8676 {
8677 case CCNOmode:
8678 if (req_mode != CCNOmode
8679 && (req_mode != CCmode
8680 || XEXP (SET_SRC (set), 1) != const0_rtx))
8681 return 0;
8682 break;
8683 case CCmode:
8684 if (req_mode == CCGCmode)
8685 return 0;
8686 /* FALLTHRU */
8687 case CCGCmode:
8688 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8689 return 0;
8690 /* FALLTHRU */
8691 case CCGOCmode:
8692 if (req_mode == CCZmode)
8693 return 0;
8694 /* FALLTHRU */
8695 case CCZmode:
8696 break;
8697
8698 default:
8699 abort ();
8700 }
8701
8702 return (GET_MODE (SET_SRC (set)) == set_mode);
8703 }
8704
8705 /* Generate insn patterns to do an integer compare of OPERANDS. */
8706
8707 static rtx
8708 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8709 {
8710 enum machine_mode cmpmode;
8711 rtx tmp, flags;
8712
8713 cmpmode = SELECT_CC_MODE (code, op0, op1);
8714 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8715
8716 /* This is very simple, but making the interface the same as in the
8717 FP case makes the rest of the code easier. */
8718 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8719 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8720
8721 /* Return the test that should be put into the flags user, i.e.
8722 the bcc, scc, or cmov instruction. */
8723 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8724 }
8725
8726 /* Figure out whether to use ordered or unordered fp comparisons.
8727 Return the appropriate mode to use. */
8728
8729 enum machine_mode
8730 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8731 {
8732 /* ??? In order to make all comparisons reversible, we do all comparisons
8733 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8734 all forms trapping and nontrapping comparisons, we can make inequality
8735 comparisons trapping again, since it results in better code when using
8736 FCOM based compares. */
8737 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8738 }
8739
8740 enum machine_mode
8741 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8742 {
8743 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8744 return ix86_fp_compare_mode (code);
8745 switch (code)
8746 {
8747 /* Only zero flag is needed. */
8748 case EQ: /* ZF=0 */
8749 case NE: /* ZF!=0 */
8750 return CCZmode;
8751 /* Codes needing carry flag. */
8752 case GEU: /* CF=0 */
8753 case GTU: /* CF=0 & ZF=0 */
8754 case LTU: /* CF=1 */
8755 case LEU: /* CF=1 | ZF=1 */
8756 return CCmode;
8757 /* Codes possibly doable only with sign flag when
8758 comparing against zero. */
8759 case GE: /* SF=OF or SF=0 */
8760 case LT: /* SF<>OF or SF=1 */
8761 if (op1 == const0_rtx)
8762 return CCGOCmode;
8763 else
8764 /* For other cases Carry flag is not required. */
8765 return CCGCmode;
8766 /* Codes doable only with sign flag when comparing
8767 against zero, but we miss jump instruction for it
8768 so we need to use relational tests against overflow
8769 that thus needs to be zero. */
8770 case GT: /* ZF=0 & SF=OF */
8771 case LE: /* ZF=1 | SF<>OF */
8772 if (op1 == const0_rtx)
8773 return CCNOmode;
8774 else
8775 return CCGCmode;
8776 /* strcmp pattern do (use flags) and combine may ask us for proper
8777 mode. */
8778 case USE:
8779 return CCmode;
8780 default:
8781 abort ();
8782 }
8783 }
8784
8785 /* Return the fixed registers used for condition codes. */
8786
8787 static bool
8788 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8789 {
8790 *p1 = FLAGS_REG;
8791 *p2 = FPSR_REG;
8792 return true;
8793 }
8794
8795 /* If two condition code modes are compatible, return a condition code
8796 mode which is compatible with both. Otherwise, return
8797 VOIDmode. */
8798
8799 static enum machine_mode
8800 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8801 {
8802 if (m1 == m2)
8803 return m1;
8804
8805 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8806 return VOIDmode;
8807
8808 if ((m1 == CCGCmode && m2 == CCGOCmode)
8809 || (m1 == CCGOCmode && m2 == CCGCmode))
8810 return CCGCmode;
8811
8812 switch (m1)
8813 {
8814 default:
8815 abort ();
8816
8817 case CCmode:
8818 case CCGCmode:
8819 case CCGOCmode:
8820 case CCNOmode:
8821 case CCZmode:
8822 switch (m2)
8823 {
8824 default:
8825 return VOIDmode;
8826
8827 case CCmode:
8828 case CCGCmode:
8829 case CCGOCmode:
8830 case CCNOmode:
8831 case CCZmode:
8832 return CCmode;
8833 }
8834
8835 case CCFPmode:
8836 case CCFPUmode:
8837 /* These are only compatible with themselves, which we already
8838 checked above. */
8839 return VOIDmode;
8840 }
8841 }
8842
8843 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8844
8845 int
8846 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8847 {
8848 enum rtx_code swapped_code = swap_condition (code);
8849 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8850 || (ix86_fp_comparison_cost (swapped_code)
8851 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8852 }
8853
8854 /* Swap, force into registers, or otherwise massage the two operands
8855 to a fp comparison. The operands are updated in place; the new
8856 comparison code is returned. */
8857
8858 static enum rtx_code
8859 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8860 {
8861 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8862 rtx op0 = *pop0, op1 = *pop1;
8863 enum machine_mode op_mode = GET_MODE (op0);
8864 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8865
8866 /* All of the unordered compare instructions only work on registers.
8867 The same is true of the XFmode compare instructions. The same is
8868 true of the fcomi compare instructions. */
8869
8870 if (!is_sse
8871 && (fpcmp_mode == CCFPUmode
8872 || op_mode == XFmode
8873 || ix86_use_fcomi_compare (code)))
8874 {
8875 op0 = force_reg (op_mode, op0);
8876 op1 = force_reg (op_mode, op1);
8877 }
8878 else
8879 {
8880 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8881 things around if they appear profitable, otherwise force op0
8882 into a register. */
8883
8884 if (standard_80387_constant_p (op0) == 0
8885 || (GET_CODE (op0) == MEM
8886 && ! (standard_80387_constant_p (op1) == 0
8887 || GET_CODE (op1) == MEM)))
8888 {
8889 rtx tmp;
8890 tmp = op0, op0 = op1, op1 = tmp;
8891 code = swap_condition (code);
8892 }
8893
8894 if (GET_CODE (op0) != REG)
8895 op0 = force_reg (op_mode, op0);
8896
8897 if (CONSTANT_P (op1))
8898 {
8899 if (standard_80387_constant_p (op1))
8900 op1 = force_reg (op_mode, op1);
8901 else
8902 op1 = validize_mem (force_const_mem (op_mode, op1));
8903 }
8904 }
8905
8906 /* Try to rearrange the comparison to make it cheaper. */
8907 if (ix86_fp_comparison_cost (code)
8908 > ix86_fp_comparison_cost (swap_condition (code))
8909 && (GET_CODE (op1) == REG || !no_new_pseudos))
8910 {
8911 rtx tmp;
8912 tmp = op0, op0 = op1, op1 = tmp;
8913 code = swap_condition (code);
8914 if (GET_CODE (op0) != REG)
8915 op0 = force_reg (op_mode, op0);
8916 }
8917
8918 *pop0 = op0;
8919 *pop1 = op1;
8920 return code;
8921 }
8922
8923 /* Convert comparison codes we use to represent FP comparison to integer
8924 code that will result in proper branch. Return UNKNOWN if no such code
8925 is available. */
8926 static enum rtx_code
8927 ix86_fp_compare_code_to_integer (enum rtx_code code)
8928 {
8929 switch (code)
8930 {
8931 case GT:
8932 return GTU;
8933 case GE:
8934 return GEU;
8935 case ORDERED:
8936 case UNORDERED:
8937 return code;
8938 break;
8939 case UNEQ:
8940 return EQ;
8941 break;
8942 case UNLT:
8943 return LTU;
8944 break;
8945 case UNLE:
8946 return LEU;
8947 break;
8948 case LTGT:
8949 return NE;
8950 break;
8951 default:
8952 return UNKNOWN;
8953 }
8954 }
8955
8956 /* Split comparison code CODE into comparisons we can do using branch
8957 instructions. BYPASS_CODE is comparison code for branch that will
8958 branch around FIRST_CODE and SECOND_CODE. If some of branches
8959 is not required, set value to NIL.
8960 We never require more than two branches. */
8961 static void
8962 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8963 enum rtx_code *first_code,
8964 enum rtx_code *second_code)
8965 {
8966 *first_code = code;
8967 *bypass_code = NIL;
8968 *second_code = NIL;
8969
8970 /* The fcomi comparison sets flags as follows:
8971
8972 cmp ZF PF CF
8973 > 0 0 0
8974 < 0 0 1
8975 = 1 0 0
8976 un 1 1 1 */
8977
8978 switch (code)
8979 {
8980 case GT: /* GTU - CF=0 & ZF=0 */
8981 case GE: /* GEU - CF=0 */
8982 case ORDERED: /* PF=0 */
8983 case UNORDERED: /* PF=1 */
8984 case UNEQ: /* EQ - ZF=1 */
8985 case UNLT: /* LTU - CF=1 */
8986 case UNLE: /* LEU - CF=1 | ZF=1 */
8987 case LTGT: /* EQ - ZF=0 */
8988 break;
8989 case LT: /* LTU - CF=1 - fails on unordered */
8990 *first_code = UNLT;
8991 *bypass_code = UNORDERED;
8992 break;
8993 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8994 *first_code = UNLE;
8995 *bypass_code = UNORDERED;
8996 break;
8997 case EQ: /* EQ - ZF=1 - fails on unordered */
8998 *first_code = UNEQ;
8999 *bypass_code = UNORDERED;
9000 break;
9001 case NE: /* NE - ZF=0 - fails on unordered */
9002 *first_code = LTGT;
9003 *second_code = UNORDERED;
9004 break;
9005 case UNGE: /* GEU - CF=0 - fails on unordered */
9006 *first_code = GE;
9007 *second_code = UNORDERED;
9008 break;
9009 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9010 *first_code = GT;
9011 *second_code = UNORDERED;
9012 break;
9013 default:
9014 abort ();
9015 }
9016 if (!TARGET_IEEE_FP)
9017 {
9018 *second_code = NIL;
9019 *bypass_code = NIL;
9020 }
9021 }
9022
9023 /* Return cost of comparison done fcom + arithmetics operations on AX.
9024 All following functions do use number of instructions as a cost metrics.
9025 In future this should be tweaked to compute bytes for optimize_size and
9026 take into account performance of various instructions on various CPUs. */
9027 static int
9028 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9029 {
9030 if (!TARGET_IEEE_FP)
9031 return 4;
9032 /* The cost of code output by ix86_expand_fp_compare. */
9033 switch (code)
9034 {
9035 case UNLE:
9036 case UNLT:
9037 case LTGT:
9038 case GT:
9039 case GE:
9040 case UNORDERED:
9041 case ORDERED:
9042 case UNEQ:
9043 return 4;
9044 break;
9045 case LT:
9046 case NE:
9047 case EQ:
9048 case UNGE:
9049 return 5;
9050 break;
9051 case LE:
9052 case UNGT:
9053 return 6;
9054 break;
9055 default:
9056 abort ();
9057 }
9058 }
9059
9060 /* Return cost of comparison done using fcomi operation.
9061 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9062 static int
9063 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9064 {
9065 enum rtx_code bypass_code, first_code, second_code;
9066 /* Return arbitrarily high cost when instruction is not supported - this
9067 prevents gcc from using it. */
9068 if (!TARGET_CMOVE)
9069 return 1024;
9070 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9071 return (bypass_code != NIL || second_code != NIL) + 2;
9072 }
9073
9074 /* Return cost of comparison done using sahf operation.
9075 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9076 static int
9077 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9078 {
9079 enum rtx_code bypass_code, first_code, second_code;
9080 /* Return arbitrarily high cost when instruction is not preferred - this
9081 avoids gcc from using it. */
9082 if (!TARGET_USE_SAHF && !optimize_size)
9083 return 1024;
9084 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9085 return (bypass_code != NIL || second_code != NIL) + 3;
9086 }
9087
9088 /* Compute cost of the comparison done using any method.
9089 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9090 static int
9091 ix86_fp_comparison_cost (enum rtx_code code)
9092 {
9093 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9094 int min;
9095
9096 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9097 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9098
9099 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9100 if (min > sahf_cost)
9101 min = sahf_cost;
9102 if (min > fcomi_cost)
9103 min = fcomi_cost;
9104 return min;
9105 }
9106
9107 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9108
9109 static rtx
9110 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9111 rtx *second_test, rtx *bypass_test)
9112 {
9113 enum machine_mode fpcmp_mode, intcmp_mode;
9114 rtx tmp, tmp2;
9115 int cost = ix86_fp_comparison_cost (code);
9116 enum rtx_code bypass_code, first_code, second_code;
9117
9118 fpcmp_mode = ix86_fp_compare_mode (code);
9119 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9120
9121 if (second_test)
9122 *second_test = NULL_RTX;
9123 if (bypass_test)
9124 *bypass_test = NULL_RTX;
9125
9126 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9127
9128 /* Do fcomi/sahf based test when profitable. */
9129 if ((bypass_code == NIL || bypass_test)
9130 && (second_code == NIL || second_test)
9131 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9132 {
9133 if (TARGET_CMOVE)
9134 {
9135 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9136 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9137 tmp);
9138 emit_insn (tmp);
9139 }
9140 else
9141 {
9142 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9143 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9144 if (!scratch)
9145 scratch = gen_reg_rtx (HImode);
9146 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9147 emit_insn (gen_x86_sahf_1 (scratch));
9148 }
9149
9150 /* The FP codes work out to act like unsigned. */
9151 intcmp_mode = fpcmp_mode;
9152 code = first_code;
9153 if (bypass_code != NIL)
9154 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9155 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9156 const0_rtx);
9157 if (second_code != NIL)
9158 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9159 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9160 const0_rtx);
9161 }
9162 else
9163 {
9164 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9165 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9166 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9167 if (!scratch)
9168 scratch = gen_reg_rtx (HImode);
9169 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9170
9171 /* In the unordered case, we have to check C2 for NaN's, which
9172 doesn't happen to work out to anything nice combination-wise.
9173 So do some bit twiddling on the value we've got in AH to come
9174 up with an appropriate set of condition codes. */
9175
9176 intcmp_mode = CCNOmode;
9177 switch (code)
9178 {
9179 case GT:
9180 case UNGT:
9181 if (code == GT || !TARGET_IEEE_FP)
9182 {
9183 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9184 code = EQ;
9185 }
9186 else
9187 {
9188 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9189 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9190 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9191 intcmp_mode = CCmode;
9192 code = GEU;
9193 }
9194 break;
9195 case LT:
9196 case UNLT:
9197 if (code == LT && TARGET_IEEE_FP)
9198 {
9199 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9200 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9201 intcmp_mode = CCmode;
9202 code = EQ;
9203 }
9204 else
9205 {
9206 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9207 code = NE;
9208 }
9209 break;
9210 case GE:
9211 case UNGE:
9212 if (code == GE || !TARGET_IEEE_FP)
9213 {
9214 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9215 code = EQ;
9216 }
9217 else
9218 {
9219 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9220 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9221 GEN_INT (0x01)));
9222 code = NE;
9223 }
9224 break;
9225 case LE:
9226 case UNLE:
9227 if (code == LE && TARGET_IEEE_FP)
9228 {
9229 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9230 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9231 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9232 intcmp_mode = CCmode;
9233 code = LTU;
9234 }
9235 else
9236 {
9237 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9238 code = NE;
9239 }
9240 break;
9241 case EQ:
9242 case UNEQ:
9243 if (code == EQ && TARGET_IEEE_FP)
9244 {
9245 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9246 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9247 intcmp_mode = CCmode;
9248 code = EQ;
9249 }
9250 else
9251 {
9252 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9253 code = NE;
9254 break;
9255 }
9256 break;
9257 case NE:
9258 case LTGT:
9259 if (code == NE && TARGET_IEEE_FP)
9260 {
9261 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9262 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9263 GEN_INT (0x40)));
9264 code = NE;
9265 }
9266 else
9267 {
9268 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9269 code = EQ;
9270 }
9271 break;
9272
9273 case UNORDERED:
9274 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9275 code = NE;
9276 break;
9277 case ORDERED:
9278 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9279 code = EQ;
9280 break;
9281
9282 default:
9283 abort ();
9284 }
9285 }
9286
9287 /* Return the test that should be put into the flags user, i.e.
9288 the bcc, scc, or cmov instruction. */
9289 return gen_rtx_fmt_ee (code, VOIDmode,
9290 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9291 const0_rtx);
9292 }
9293
9294 rtx
9295 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9296 {
9297 rtx op0, op1, ret;
9298 op0 = ix86_compare_op0;
9299 op1 = ix86_compare_op1;
9300
9301 if (second_test)
9302 *second_test = NULL_RTX;
9303 if (bypass_test)
9304 *bypass_test = NULL_RTX;
9305
9306 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9307 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9308 second_test, bypass_test);
9309 else
9310 ret = ix86_expand_int_compare (code, op0, op1);
9311
9312 return ret;
9313 }
9314
9315 /* Return true if the CODE will result in nontrivial jump sequence. */
9316 bool
9317 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9318 {
9319 enum rtx_code bypass_code, first_code, second_code;
9320 if (!TARGET_CMOVE)
9321 return true;
9322 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9323 return bypass_code != NIL || second_code != NIL;
9324 }
9325
9326 void
9327 ix86_expand_branch (enum rtx_code code, rtx label)
9328 {
9329 rtx tmp;
9330
9331 switch (GET_MODE (ix86_compare_op0))
9332 {
9333 case QImode:
9334 case HImode:
9335 case SImode:
9336 simple:
9337 tmp = ix86_expand_compare (code, NULL, NULL);
9338 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9339 gen_rtx_LABEL_REF (VOIDmode, label),
9340 pc_rtx);
9341 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9342 return;
9343
9344 case SFmode:
9345 case DFmode:
9346 case XFmode:
9347 {
9348 rtvec vec;
9349 int use_fcomi;
9350 enum rtx_code bypass_code, first_code, second_code;
9351
9352 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9353 &ix86_compare_op1);
9354
9355 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9356
9357 /* Check whether we will use the natural sequence with one jump. If
9358 so, we can expand jump early. Otherwise delay expansion by
9359 creating compound insn to not confuse optimizers. */
9360 if (bypass_code == NIL && second_code == NIL
9361 && TARGET_CMOVE)
9362 {
9363 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9364 gen_rtx_LABEL_REF (VOIDmode, label),
9365 pc_rtx, NULL_RTX);
9366 }
9367 else
9368 {
9369 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9370 ix86_compare_op0, ix86_compare_op1);
9371 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9372 gen_rtx_LABEL_REF (VOIDmode, label),
9373 pc_rtx);
9374 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9375
9376 use_fcomi = ix86_use_fcomi_compare (code);
9377 vec = rtvec_alloc (3 + !use_fcomi);
9378 RTVEC_ELT (vec, 0) = tmp;
9379 RTVEC_ELT (vec, 1)
9380 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9381 RTVEC_ELT (vec, 2)
9382 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9383 if (! use_fcomi)
9384 RTVEC_ELT (vec, 3)
9385 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9386
9387 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9388 }
9389 return;
9390 }
9391
9392 case DImode:
9393 if (TARGET_64BIT)
9394 goto simple;
9395 /* Expand DImode branch into multiple compare+branch. */
9396 {
9397 rtx lo[2], hi[2], label2;
9398 enum rtx_code code1, code2, code3;
9399
9400 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9401 {
9402 tmp = ix86_compare_op0;
9403 ix86_compare_op0 = ix86_compare_op1;
9404 ix86_compare_op1 = tmp;
9405 code = swap_condition (code);
9406 }
9407 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9408 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9409
9410 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9411 avoid two branches. This costs one extra insn, so disable when
9412 optimizing for size. */
9413
9414 if ((code == EQ || code == NE)
9415 && (!optimize_size
9416 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9417 {
9418 rtx xor0, xor1;
9419
9420 xor1 = hi[0];
9421 if (hi[1] != const0_rtx)
9422 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9423 NULL_RTX, 0, OPTAB_WIDEN);
9424
9425 xor0 = lo[0];
9426 if (lo[1] != const0_rtx)
9427 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9428 NULL_RTX, 0, OPTAB_WIDEN);
9429
9430 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9431 NULL_RTX, 0, OPTAB_WIDEN);
9432
9433 ix86_compare_op0 = tmp;
9434 ix86_compare_op1 = const0_rtx;
9435 ix86_expand_branch (code, label);
9436 return;
9437 }
9438
9439 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9440 op1 is a constant and the low word is zero, then we can just
9441 examine the high word. */
9442
9443 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9444 switch (code)
9445 {
9446 case LT: case LTU: case GE: case GEU:
9447 ix86_compare_op0 = hi[0];
9448 ix86_compare_op1 = hi[1];
9449 ix86_expand_branch (code, label);
9450 return;
9451 default:
9452 break;
9453 }
9454
9455 /* Otherwise, we need two or three jumps. */
9456
9457 label2 = gen_label_rtx ();
9458
9459 code1 = code;
9460 code2 = swap_condition (code);
9461 code3 = unsigned_condition (code);
9462
9463 switch (code)
9464 {
9465 case LT: case GT: case LTU: case GTU:
9466 break;
9467
9468 case LE: code1 = LT; code2 = GT; break;
9469 case GE: code1 = GT; code2 = LT; break;
9470 case LEU: code1 = LTU; code2 = GTU; break;
9471 case GEU: code1 = GTU; code2 = LTU; break;
9472
9473 case EQ: code1 = NIL; code2 = NE; break;
9474 case NE: code2 = NIL; break;
9475
9476 default:
9477 abort ();
9478 }
9479
9480 /*
9481 * a < b =>
9482 * if (hi(a) < hi(b)) goto true;
9483 * if (hi(a) > hi(b)) goto false;
9484 * if (lo(a) < lo(b)) goto true;
9485 * false:
9486 */
9487
9488 ix86_compare_op0 = hi[0];
9489 ix86_compare_op1 = hi[1];
9490
9491 if (code1 != NIL)
9492 ix86_expand_branch (code1, label);
9493 if (code2 != NIL)
9494 ix86_expand_branch (code2, label2);
9495
9496 ix86_compare_op0 = lo[0];
9497 ix86_compare_op1 = lo[1];
9498 ix86_expand_branch (code3, label);
9499
9500 if (code2 != NIL)
9501 emit_label (label2);
9502 return;
9503 }
9504
9505 default:
9506 abort ();
9507 }
9508 }
9509
9510 /* Split branch based on floating point condition. */
9511 void
9512 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9513 rtx target1, rtx target2, rtx tmp)
9514 {
9515 rtx second, bypass;
9516 rtx label = NULL_RTX;
9517 rtx condition;
9518 int bypass_probability = -1, second_probability = -1, probability = -1;
9519 rtx i;
9520
9521 if (target2 != pc_rtx)
9522 {
9523 rtx tmp = target2;
9524 code = reverse_condition_maybe_unordered (code);
9525 target2 = target1;
9526 target1 = tmp;
9527 }
9528
9529 condition = ix86_expand_fp_compare (code, op1, op2,
9530 tmp, &second, &bypass);
9531
9532 if (split_branch_probability >= 0)
9533 {
9534 /* Distribute the probabilities across the jumps.
9535 Assume the BYPASS and SECOND to be always test
9536 for UNORDERED. */
9537 probability = split_branch_probability;
9538
9539 /* Value of 1 is low enough to make no need for probability
9540 to be updated. Later we may run some experiments and see
9541 if unordered values are more frequent in practice. */
9542 if (bypass)
9543 bypass_probability = 1;
9544 if (second)
9545 second_probability = 1;
9546 }
9547 if (bypass != NULL_RTX)
9548 {
9549 label = gen_label_rtx ();
9550 i = emit_jump_insn (gen_rtx_SET
9551 (VOIDmode, pc_rtx,
9552 gen_rtx_IF_THEN_ELSE (VOIDmode,
9553 bypass,
9554 gen_rtx_LABEL_REF (VOIDmode,
9555 label),
9556 pc_rtx)));
9557 if (bypass_probability >= 0)
9558 REG_NOTES (i)
9559 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9560 GEN_INT (bypass_probability),
9561 REG_NOTES (i));
9562 }
9563 i = emit_jump_insn (gen_rtx_SET
9564 (VOIDmode, pc_rtx,
9565 gen_rtx_IF_THEN_ELSE (VOIDmode,
9566 condition, target1, target2)));
9567 if (probability >= 0)
9568 REG_NOTES (i)
9569 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9570 GEN_INT (probability),
9571 REG_NOTES (i));
9572 if (second != NULL_RTX)
9573 {
9574 i = emit_jump_insn (gen_rtx_SET
9575 (VOIDmode, pc_rtx,
9576 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9577 target2)));
9578 if (second_probability >= 0)
9579 REG_NOTES (i)
9580 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9581 GEN_INT (second_probability),
9582 REG_NOTES (i));
9583 }
9584 if (label != NULL_RTX)
9585 emit_label (label);
9586 }
9587
9588 int
9589 ix86_expand_setcc (enum rtx_code code, rtx dest)
9590 {
9591 rtx ret, tmp, tmpreg, equiv;
9592 rtx second_test, bypass_test;
9593
9594 if (GET_MODE (ix86_compare_op0) == DImode
9595 && !TARGET_64BIT)
9596 return 0; /* FAIL */
9597
9598 if (GET_MODE (dest) != QImode)
9599 abort ();
9600
9601 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9602 PUT_MODE (ret, QImode);
9603
9604 tmp = dest;
9605 tmpreg = dest;
9606
9607 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9608 if (bypass_test || second_test)
9609 {
9610 rtx test = second_test;
9611 int bypass = 0;
9612 rtx tmp2 = gen_reg_rtx (QImode);
9613 if (bypass_test)
9614 {
9615 if (second_test)
9616 abort ();
9617 test = bypass_test;
9618 bypass = 1;
9619 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9620 }
9621 PUT_MODE (test, QImode);
9622 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9623
9624 if (bypass)
9625 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9626 else
9627 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9628 }
9629
9630 /* Attach a REG_EQUAL note describing the comparison result. */
9631 equiv = simplify_gen_relational (code, QImode,
9632 GET_MODE (ix86_compare_op0),
9633 ix86_compare_op0, ix86_compare_op1);
9634 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9635
9636 return 1; /* DONE */
9637 }
9638
9639 /* Expand comparison setting or clearing carry flag. Return true when
9640 successful and set pop for the operation. */
9641 static bool
9642 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9643 {
9644 enum machine_mode mode =
9645 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9646
9647 /* Do not handle DImode compares that go trought special path. Also we can't
9648 deal with FP compares yet. This is possible to add. */
9649 if ((mode == DImode && !TARGET_64BIT))
9650 return false;
9651 if (FLOAT_MODE_P (mode))
9652 {
9653 rtx second_test = NULL, bypass_test = NULL;
9654 rtx compare_op, compare_seq;
9655
9656 /* Shortcut: following common codes never translate into carry flag compares. */
9657 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9658 || code == ORDERED || code == UNORDERED)
9659 return false;
9660
9661 /* These comparisons require zero flag; swap operands so they won't. */
9662 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9663 && !TARGET_IEEE_FP)
9664 {
9665 rtx tmp = op0;
9666 op0 = op1;
9667 op1 = tmp;
9668 code = swap_condition (code);
9669 }
9670
9671 /* Try to expand the comparison and verify that we end up with carry flag
9672 based comparison. This is fails to be true only when we decide to expand
9673 comparison using arithmetic that is not too common scenario. */
9674 start_sequence ();
9675 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9676 &second_test, &bypass_test);
9677 compare_seq = get_insns ();
9678 end_sequence ();
9679
9680 if (second_test || bypass_test)
9681 return false;
9682 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9683 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9684 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9685 else
9686 code = GET_CODE (compare_op);
9687 if (code != LTU && code != GEU)
9688 return false;
9689 emit_insn (compare_seq);
9690 *pop = compare_op;
9691 return true;
9692 }
9693 if (!INTEGRAL_MODE_P (mode))
9694 return false;
9695 switch (code)
9696 {
9697 case LTU:
9698 case GEU:
9699 break;
9700
9701 /* Convert a==0 into (unsigned)a<1. */
9702 case EQ:
9703 case NE:
9704 if (op1 != const0_rtx)
9705 return false;
9706 op1 = const1_rtx;
9707 code = (code == EQ ? LTU : GEU);
9708 break;
9709
9710 /* Convert a>b into b<a or a>=b-1. */
9711 case GTU:
9712 case LEU:
9713 if (GET_CODE (op1) == CONST_INT)
9714 {
9715 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9716 /* Bail out on overflow. We still can swap operands but that
9717 would force loading of the constant into register. */
9718 if (op1 == const0_rtx
9719 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9720 return false;
9721 code = (code == GTU ? GEU : LTU);
9722 }
9723 else
9724 {
9725 rtx tmp = op1;
9726 op1 = op0;
9727 op0 = tmp;
9728 code = (code == GTU ? LTU : GEU);
9729 }
9730 break;
9731
9732 /* Convert a>=0 into (unsigned)a<0x80000000. */
9733 case LT:
9734 case GE:
9735 if (mode == DImode || op1 != const0_rtx)
9736 return false;
9737 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9738 code = (code == LT ? GEU : LTU);
9739 break;
9740 case LE:
9741 case GT:
9742 if (mode == DImode || op1 != constm1_rtx)
9743 return false;
9744 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9745 code = (code == LE ? GEU : LTU);
9746 break;
9747
9748 default:
9749 return false;
9750 }
9751 /* Swapping operands may cause constant to appear as first operand. */
9752 if (!nonimmediate_operand (op0, VOIDmode))
9753 {
9754 if (no_new_pseudos)
9755 return false;
9756 op0 = force_reg (mode, op0);
9757 }
9758 ix86_compare_op0 = op0;
9759 ix86_compare_op1 = op1;
9760 *pop = ix86_expand_compare (code, NULL, NULL);
9761 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9762 abort ();
9763 return true;
9764 }
9765
9766 int
9767 ix86_expand_int_movcc (rtx operands[])
9768 {
9769 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9770 rtx compare_seq, compare_op;
9771 rtx second_test, bypass_test;
9772 enum machine_mode mode = GET_MODE (operands[0]);
9773 bool sign_bit_compare_p = false;;
9774
9775 start_sequence ();
9776 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9777 compare_seq = get_insns ();
9778 end_sequence ();
9779
9780 compare_code = GET_CODE (compare_op);
9781
9782 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9783 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9784 sign_bit_compare_p = true;
9785
9786 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9787 HImode insns, we'd be swallowed in word prefix ops. */
9788
9789 if ((mode != HImode || TARGET_FAST_PREFIX)
9790 && (mode != DImode || TARGET_64BIT)
9791 && GET_CODE (operands[2]) == CONST_INT
9792 && GET_CODE (operands[3]) == CONST_INT)
9793 {
9794 rtx out = operands[0];
9795 HOST_WIDE_INT ct = INTVAL (operands[2]);
9796 HOST_WIDE_INT cf = INTVAL (operands[3]);
9797 HOST_WIDE_INT diff;
9798
9799 diff = ct - cf;
9800 /* Sign bit compares are better done using shifts than we do by using
9801 sbb. */
9802 if (sign_bit_compare_p
9803 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9804 ix86_compare_op1, &compare_op))
9805 {
9806 /* Detect overlap between destination and compare sources. */
9807 rtx tmp = out;
9808
9809 if (!sign_bit_compare_p)
9810 {
9811 bool fpcmp = false;
9812
9813 compare_code = GET_CODE (compare_op);
9814
9815 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9816 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9817 {
9818 fpcmp = true;
9819 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9820 }
9821
9822 /* To simplify rest of code, restrict to the GEU case. */
9823 if (compare_code == LTU)
9824 {
9825 HOST_WIDE_INT tmp = ct;
9826 ct = cf;
9827 cf = tmp;
9828 compare_code = reverse_condition (compare_code);
9829 code = reverse_condition (code);
9830 }
9831 else
9832 {
9833 if (fpcmp)
9834 PUT_CODE (compare_op,
9835 reverse_condition_maybe_unordered
9836 (GET_CODE (compare_op)));
9837 else
9838 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9839 }
9840 diff = ct - cf;
9841
9842 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9843 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9844 tmp = gen_reg_rtx (mode);
9845
9846 if (mode == DImode)
9847 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9848 else
9849 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9850 }
9851 else
9852 {
9853 if (code == GT || code == GE)
9854 code = reverse_condition (code);
9855 else
9856 {
9857 HOST_WIDE_INT tmp = ct;
9858 ct = cf;
9859 cf = tmp;
9860 diff = ct - cf;
9861 }
9862 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9863 ix86_compare_op1, VOIDmode, 0, -1);
9864 }
9865
9866 if (diff == 1)
9867 {
9868 /*
9869 * cmpl op0,op1
9870 * sbbl dest,dest
9871 * [addl dest, ct]
9872 *
9873 * Size 5 - 8.
9874 */
9875 if (ct)
9876 tmp = expand_simple_binop (mode, PLUS,
9877 tmp, GEN_INT (ct),
9878 copy_rtx (tmp), 1, OPTAB_DIRECT);
9879 }
9880 else if (cf == -1)
9881 {
9882 /*
9883 * cmpl op0,op1
9884 * sbbl dest,dest
9885 * orl $ct, dest
9886 *
9887 * Size 8.
9888 */
9889 tmp = expand_simple_binop (mode, IOR,
9890 tmp, GEN_INT (ct),
9891 copy_rtx (tmp), 1, OPTAB_DIRECT);
9892 }
9893 else if (diff == -1 && ct)
9894 {
9895 /*
9896 * cmpl op0,op1
9897 * sbbl dest,dest
9898 * notl dest
9899 * [addl dest, cf]
9900 *
9901 * Size 8 - 11.
9902 */
9903 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9904 if (cf)
9905 tmp = expand_simple_binop (mode, PLUS,
9906 copy_rtx (tmp), GEN_INT (cf),
9907 copy_rtx (tmp), 1, OPTAB_DIRECT);
9908 }
9909 else
9910 {
9911 /*
9912 * cmpl op0,op1
9913 * sbbl dest,dest
9914 * [notl dest]
9915 * andl cf - ct, dest
9916 * [addl dest, ct]
9917 *
9918 * Size 8 - 11.
9919 */
9920
9921 if (cf == 0)
9922 {
9923 cf = ct;
9924 ct = 0;
9925 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9926 }
9927
9928 tmp = expand_simple_binop (mode, AND,
9929 copy_rtx (tmp),
9930 gen_int_mode (cf - ct, mode),
9931 copy_rtx (tmp), 1, OPTAB_DIRECT);
9932 if (ct)
9933 tmp = expand_simple_binop (mode, PLUS,
9934 copy_rtx (tmp), GEN_INT (ct),
9935 copy_rtx (tmp), 1, OPTAB_DIRECT);
9936 }
9937
9938 if (!rtx_equal_p (tmp, out))
9939 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9940
9941 return 1; /* DONE */
9942 }
9943
9944 if (diff < 0)
9945 {
9946 HOST_WIDE_INT tmp;
9947 tmp = ct, ct = cf, cf = tmp;
9948 diff = -diff;
9949 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9950 {
9951 /* We may be reversing unordered compare to normal compare, that
9952 is not valid in general (we may convert non-trapping condition
9953 to trapping one), however on i386 we currently emit all
9954 comparisons unordered. */
9955 compare_code = reverse_condition_maybe_unordered (compare_code);
9956 code = reverse_condition_maybe_unordered (code);
9957 }
9958 else
9959 {
9960 compare_code = reverse_condition (compare_code);
9961 code = reverse_condition (code);
9962 }
9963 }
9964
9965 compare_code = NIL;
9966 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9967 && GET_CODE (ix86_compare_op1) == CONST_INT)
9968 {
9969 if (ix86_compare_op1 == const0_rtx
9970 && (code == LT || code == GE))
9971 compare_code = code;
9972 else if (ix86_compare_op1 == constm1_rtx)
9973 {
9974 if (code == LE)
9975 compare_code = LT;
9976 else if (code == GT)
9977 compare_code = GE;
9978 }
9979 }
9980
9981 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9982 if (compare_code != NIL
9983 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9984 && (cf == -1 || ct == -1))
9985 {
9986 /* If lea code below could be used, only optimize
9987 if it results in a 2 insn sequence. */
9988
9989 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9990 || diff == 3 || diff == 5 || diff == 9)
9991 || (compare_code == LT && ct == -1)
9992 || (compare_code == GE && cf == -1))
9993 {
9994 /*
9995 * notl op1 (if necessary)
9996 * sarl $31, op1
9997 * orl cf, op1
9998 */
9999 if (ct != -1)
10000 {
10001 cf = ct;
10002 ct = -1;
10003 code = reverse_condition (code);
10004 }
10005
10006 out = emit_store_flag (out, code, ix86_compare_op0,
10007 ix86_compare_op1, VOIDmode, 0, -1);
10008
10009 out = expand_simple_binop (mode, IOR,
10010 out, GEN_INT (cf),
10011 out, 1, OPTAB_DIRECT);
10012 if (out != operands[0])
10013 emit_move_insn (operands[0], out);
10014
10015 return 1; /* DONE */
10016 }
10017 }
10018
10019
10020 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10021 || diff == 3 || diff == 5 || diff == 9)
10022 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10023 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10024 {
10025 /*
10026 * xorl dest,dest
10027 * cmpl op1,op2
10028 * setcc dest
10029 * lea cf(dest*(ct-cf)),dest
10030 *
10031 * Size 14.
10032 *
10033 * This also catches the degenerate setcc-only case.
10034 */
10035
10036 rtx tmp;
10037 int nops;
10038
10039 out = emit_store_flag (out, code, ix86_compare_op0,
10040 ix86_compare_op1, VOIDmode, 0, 1);
10041
10042 nops = 0;
10043 /* On x86_64 the lea instruction operates on Pmode, so we need
10044 to get arithmetics done in proper mode to match. */
10045 if (diff == 1)
10046 tmp = copy_rtx (out);
10047 else
10048 {
10049 rtx out1;
10050 out1 = copy_rtx (out);
10051 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10052 nops++;
10053 if (diff & 1)
10054 {
10055 tmp = gen_rtx_PLUS (mode, tmp, out1);
10056 nops++;
10057 }
10058 }
10059 if (cf != 0)
10060 {
10061 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10062 nops++;
10063 }
10064 if (!rtx_equal_p (tmp, out))
10065 {
10066 if (nops == 1)
10067 out = force_operand (tmp, copy_rtx (out));
10068 else
10069 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10070 }
10071 if (!rtx_equal_p (out, operands[0]))
10072 emit_move_insn (operands[0], copy_rtx (out));
10073
10074 return 1; /* DONE */
10075 }
10076
10077 /*
10078 * General case: Jumpful:
10079 * xorl dest,dest cmpl op1, op2
10080 * cmpl op1, op2 movl ct, dest
10081 * setcc dest jcc 1f
10082 * decl dest movl cf, dest
10083 * andl (cf-ct),dest 1:
10084 * addl ct,dest
10085 *
10086 * Size 20. Size 14.
10087 *
10088 * This is reasonably steep, but branch mispredict costs are
10089 * high on modern cpus, so consider failing only if optimizing
10090 * for space.
10091 */
10092
10093 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10094 && BRANCH_COST >= 2)
10095 {
10096 if (cf == 0)
10097 {
10098 cf = ct;
10099 ct = 0;
10100 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10101 /* We may be reversing unordered compare to normal compare,
10102 that is not valid in general (we may convert non-trapping
10103 condition to trapping one), however on i386 we currently
10104 emit all comparisons unordered. */
10105 code = reverse_condition_maybe_unordered (code);
10106 else
10107 {
10108 code = reverse_condition (code);
10109 if (compare_code != NIL)
10110 compare_code = reverse_condition (compare_code);
10111 }
10112 }
10113
10114 if (compare_code != NIL)
10115 {
10116 /* notl op1 (if needed)
10117 sarl $31, op1
10118 andl (cf-ct), op1
10119 addl ct, op1
10120
10121 For x < 0 (resp. x <= -1) there will be no notl,
10122 so if possible swap the constants to get rid of the
10123 complement.
10124 True/false will be -1/0 while code below (store flag
10125 followed by decrement) is 0/-1, so the constants need
10126 to be exchanged once more. */
10127
10128 if (compare_code == GE || !cf)
10129 {
10130 code = reverse_condition (code);
10131 compare_code = LT;
10132 }
10133 else
10134 {
10135 HOST_WIDE_INT tmp = cf;
10136 cf = ct;
10137 ct = tmp;
10138 }
10139
10140 out = emit_store_flag (out, code, ix86_compare_op0,
10141 ix86_compare_op1, VOIDmode, 0, -1);
10142 }
10143 else
10144 {
10145 out = emit_store_flag (out, code, ix86_compare_op0,
10146 ix86_compare_op1, VOIDmode, 0, 1);
10147
10148 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10149 copy_rtx (out), 1, OPTAB_DIRECT);
10150 }
10151
10152 out = expand_simple_binop (mode, AND, copy_rtx (out),
10153 gen_int_mode (cf - ct, mode),
10154 copy_rtx (out), 1, OPTAB_DIRECT);
10155 if (ct)
10156 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10157 copy_rtx (out), 1, OPTAB_DIRECT);
10158 if (!rtx_equal_p (out, operands[0]))
10159 emit_move_insn (operands[0], copy_rtx (out));
10160
10161 return 1; /* DONE */
10162 }
10163 }
10164
10165 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10166 {
10167 /* Try a few things more with specific constants and a variable. */
10168
10169 optab op;
10170 rtx var, orig_out, out, tmp;
10171
10172 if (BRANCH_COST <= 2)
10173 return 0; /* FAIL */
10174
10175 /* If one of the two operands is an interesting constant, load a
10176 constant with the above and mask it in with a logical operation. */
10177
10178 if (GET_CODE (operands[2]) == CONST_INT)
10179 {
10180 var = operands[3];
10181 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10182 operands[3] = constm1_rtx, op = and_optab;
10183 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10184 operands[3] = const0_rtx, op = ior_optab;
10185 else
10186 return 0; /* FAIL */
10187 }
10188 else if (GET_CODE (operands[3]) == CONST_INT)
10189 {
10190 var = operands[2];
10191 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10192 operands[2] = constm1_rtx, op = and_optab;
10193 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10194 operands[2] = const0_rtx, op = ior_optab;
10195 else
10196 return 0; /* FAIL */
10197 }
10198 else
10199 return 0; /* FAIL */
10200
10201 orig_out = operands[0];
10202 tmp = gen_reg_rtx (mode);
10203 operands[0] = tmp;
10204
10205 /* Recurse to get the constant loaded. */
10206 if (ix86_expand_int_movcc (operands) == 0)
10207 return 0; /* FAIL */
10208
10209 /* Mask in the interesting variable. */
10210 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10211 OPTAB_WIDEN);
10212 if (!rtx_equal_p (out, orig_out))
10213 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10214
10215 return 1; /* DONE */
10216 }
10217
10218 /*
10219 * For comparison with above,
10220 *
10221 * movl cf,dest
10222 * movl ct,tmp
10223 * cmpl op1,op2
10224 * cmovcc tmp,dest
10225 *
10226 * Size 15.
10227 */
10228
10229 if (! nonimmediate_operand (operands[2], mode))
10230 operands[2] = force_reg (mode, operands[2]);
10231 if (! nonimmediate_operand (operands[3], mode))
10232 operands[3] = force_reg (mode, operands[3]);
10233
10234 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10235 {
10236 rtx tmp = gen_reg_rtx (mode);
10237 emit_move_insn (tmp, operands[3]);
10238 operands[3] = tmp;
10239 }
10240 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10241 {
10242 rtx tmp = gen_reg_rtx (mode);
10243 emit_move_insn (tmp, operands[2]);
10244 operands[2] = tmp;
10245 }
10246
10247 if (! register_operand (operands[2], VOIDmode)
10248 && (mode == QImode
10249 || ! register_operand (operands[3], VOIDmode)))
10250 operands[2] = force_reg (mode, operands[2]);
10251
10252 if (mode == QImode
10253 && ! register_operand (operands[3], VOIDmode))
10254 operands[3] = force_reg (mode, operands[3]);
10255
10256 emit_insn (compare_seq);
10257 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10258 gen_rtx_IF_THEN_ELSE (mode,
10259 compare_op, operands[2],
10260 operands[3])));
10261 if (bypass_test)
10262 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10263 gen_rtx_IF_THEN_ELSE (mode,
10264 bypass_test,
10265 copy_rtx (operands[3]),
10266 copy_rtx (operands[0]))));
10267 if (second_test)
10268 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10269 gen_rtx_IF_THEN_ELSE (mode,
10270 second_test,
10271 copy_rtx (operands[2]),
10272 copy_rtx (operands[0]))));
10273
10274 return 1; /* DONE */
10275 }
10276
10277 int
10278 ix86_expand_fp_movcc (rtx operands[])
10279 {
10280 enum rtx_code code;
10281 rtx tmp;
10282 rtx compare_op, second_test, bypass_test;
10283
10284 /* For SF/DFmode conditional moves based on comparisons
10285 in same mode, we may want to use SSE min/max instructions. */
10286 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10287 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10288 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10289 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10290 && (!TARGET_IEEE_FP
10291 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10292 /* We may be called from the post-reload splitter. */
10293 && (!REG_P (operands[0])
10294 || SSE_REG_P (operands[0])
10295 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10296 {
10297 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10298 code = GET_CODE (operands[1]);
10299
10300 /* See if we have (cross) match between comparison operands and
10301 conditional move operands. */
10302 if (rtx_equal_p (operands[2], op1))
10303 {
10304 rtx tmp = op0;
10305 op0 = op1;
10306 op1 = tmp;
10307 code = reverse_condition_maybe_unordered (code);
10308 }
10309 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10310 {
10311 /* Check for min operation. */
10312 if (code == LT || code == UNLE)
10313 {
10314 if (code == UNLE)
10315 {
10316 rtx tmp = op0;
10317 op0 = op1;
10318 op1 = tmp;
10319 }
10320 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10321 if (memory_operand (op0, VOIDmode))
10322 op0 = force_reg (GET_MODE (operands[0]), op0);
10323 if (GET_MODE (operands[0]) == SFmode)
10324 emit_insn (gen_minsf3 (operands[0], op0, op1));
10325 else
10326 emit_insn (gen_mindf3 (operands[0], op0, op1));
10327 return 1;
10328 }
10329 /* Check for max operation. */
10330 if (code == GT || code == UNGE)
10331 {
10332 if (code == UNGE)
10333 {
10334 rtx tmp = op0;
10335 op0 = op1;
10336 op1 = tmp;
10337 }
10338 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10339 if (memory_operand (op0, VOIDmode))
10340 op0 = force_reg (GET_MODE (operands[0]), op0);
10341 if (GET_MODE (operands[0]) == SFmode)
10342 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10343 else
10344 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10345 return 1;
10346 }
10347 }
10348 /* Manage condition to be sse_comparison_operator. In case we are
10349 in non-ieee mode, try to canonicalize the destination operand
10350 to be first in the comparison - this helps reload to avoid extra
10351 moves. */
10352 if (!sse_comparison_operator (operands[1], VOIDmode)
10353 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10354 {
10355 rtx tmp = ix86_compare_op0;
10356 ix86_compare_op0 = ix86_compare_op1;
10357 ix86_compare_op1 = tmp;
10358 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10359 VOIDmode, ix86_compare_op0,
10360 ix86_compare_op1);
10361 }
10362 /* Similarly try to manage result to be first operand of conditional
10363 move. We also don't support the NE comparison on SSE, so try to
10364 avoid it. */
10365 if ((rtx_equal_p (operands[0], operands[3])
10366 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10367 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10368 {
10369 rtx tmp = operands[2];
10370 operands[2] = operands[3];
10371 operands[3] = tmp;
10372 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10373 (GET_CODE (operands[1])),
10374 VOIDmode, ix86_compare_op0,
10375 ix86_compare_op1);
10376 }
10377 if (GET_MODE (operands[0]) == SFmode)
10378 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10379 operands[2], operands[3],
10380 ix86_compare_op0, ix86_compare_op1));
10381 else
10382 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10383 operands[2], operands[3],
10384 ix86_compare_op0, ix86_compare_op1));
10385 return 1;
10386 }
10387
10388 /* The floating point conditional move instructions don't directly
10389 support conditions resulting from a signed integer comparison. */
10390
10391 code = GET_CODE (operands[1]);
10392 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10393
10394 /* The floating point conditional move instructions don't directly
10395 support signed integer comparisons. */
10396
10397 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10398 {
10399 if (second_test != NULL || bypass_test != NULL)
10400 abort ();
10401 tmp = gen_reg_rtx (QImode);
10402 ix86_expand_setcc (code, tmp);
10403 code = NE;
10404 ix86_compare_op0 = tmp;
10405 ix86_compare_op1 = const0_rtx;
10406 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10407 }
10408 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10409 {
10410 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10411 emit_move_insn (tmp, operands[3]);
10412 operands[3] = tmp;
10413 }
10414 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10415 {
10416 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10417 emit_move_insn (tmp, operands[2]);
10418 operands[2] = tmp;
10419 }
10420
10421 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10422 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10423 compare_op,
10424 operands[2],
10425 operands[3])));
10426 if (bypass_test)
10427 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10428 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10429 bypass_test,
10430 operands[3],
10431 operands[0])));
10432 if (second_test)
10433 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10434 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10435 second_test,
10436 operands[2],
10437 operands[0])));
10438
10439 return 1;
10440 }
10441
10442 /* Expand conditional increment or decrement using adb/sbb instructions.
10443 The default case using setcc followed by the conditional move can be
10444 done by generic code. */
10445 int
10446 ix86_expand_int_addcc (rtx operands[])
10447 {
10448 enum rtx_code code = GET_CODE (operands[1]);
10449 rtx compare_op;
10450 rtx val = const0_rtx;
10451 bool fpcmp = false;
10452 enum machine_mode mode = GET_MODE (operands[0]);
10453
10454 if (operands[3] != const1_rtx
10455 && operands[3] != constm1_rtx)
10456 return 0;
10457 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10458 ix86_compare_op1, &compare_op))
10459 return 0;
10460 code = GET_CODE (compare_op);
10461
10462 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10463 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10464 {
10465 fpcmp = true;
10466 code = ix86_fp_compare_code_to_integer (code);
10467 }
10468
10469 if (code != LTU)
10470 {
10471 val = constm1_rtx;
10472 if (fpcmp)
10473 PUT_CODE (compare_op,
10474 reverse_condition_maybe_unordered
10475 (GET_CODE (compare_op)));
10476 else
10477 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10478 }
10479 PUT_MODE (compare_op, mode);
10480
10481 /* Construct either adc or sbb insn. */
10482 if ((code == LTU) == (operands[3] == constm1_rtx))
10483 {
10484 switch (GET_MODE (operands[0]))
10485 {
10486 case QImode:
10487 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10488 break;
10489 case HImode:
10490 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10491 break;
10492 case SImode:
10493 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10494 break;
10495 case DImode:
10496 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10497 break;
10498 default:
10499 abort ();
10500 }
10501 }
10502 else
10503 {
10504 switch (GET_MODE (operands[0]))
10505 {
10506 case QImode:
10507 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10508 break;
10509 case HImode:
10510 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10511 break;
10512 case SImode:
10513 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10514 break;
10515 case DImode:
10516 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10517 break;
10518 default:
10519 abort ();
10520 }
10521 }
10522 return 1; /* DONE */
10523 }
10524
10525
10526 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10527 works for floating pointer parameters and nonoffsetable memories.
10528 For pushes, it returns just stack offsets; the values will be saved
10529 in the right order. Maximally three parts are generated. */
10530
10531 static int
10532 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10533 {
10534 int size;
10535
10536 if (!TARGET_64BIT)
10537 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10538 else
10539 size = (GET_MODE_SIZE (mode) + 4) / 8;
10540
10541 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10542 abort ();
10543 if (size < 2 || size > 3)
10544 abort ();
10545
10546 /* Optimize constant pool reference to immediates. This is used by fp
10547 moves, that force all constants to memory to allow combining. */
10548 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10549 {
10550 rtx tmp = maybe_get_pool_constant (operand);
10551 if (tmp)
10552 operand = tmp;
10553 }
10554
10555 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10556 {
10557 /* The only non-offsetable memories we handle are pushes. */
10558 if (! push_operand (operand, VOIDmode))
10559 abort ();
10560
10561 operand = copy_rtx (operand);
10562 PUT_MODE (operand, Pmode);
10563 parts[0] = parts[1] = parts[2] = operand;
10564 }
10565 else if (!TARGET_64BIT)
10566 {
10567 if (mode == DImode)
10568 split_di (&operand, 1, &parts[0], &parts[1]);
10569 else
10570 {
10571 if (REG_P (operand))
10572 {
10573 if (!reload_completed)
10574 abort ();
10575 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10576 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10577 if (size == 3)
10578 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10579 }
10580 else if (offsettable_memref_p (operand))
10581 {
10582 operand = adjust_address (operand, SImode, 0);
10583 parts[0] = operand;
10584 parts[1] = adjust_address (operand, SImode, 4);
10585 if (size == 3)
10586 parts[2] = adjust_address (operand, SImode, 8);
10587 }
10588 else if (GET_CODE (operand) == CONST_DOUBLE)
10589 {
10590 REAL_VALUE_TYPE r;
10591 long l[4];
10592
10593 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10594 switch (mode)
10595 {
10596 case XFmode:
10597 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10598 parts[2] = gen_int_mode (l[2], SImode);
10599 break;
10600 case DFmode:
10601 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10602 break;
10603 default:
10604 abort ();
10605 }
10606 parts[1] = gen_int_mode (l[1], SImode);
10607 parts[0] = gen_int_mode (l[0], SImode);
10608 }
10609 else
10610 abort ();
10611 }
10612 }
10613 else
10614 {
10615 if (mode == TImode)
10616 split_ti (&operand, 1, &parts[0], &parts[1]);
10617 if (mode == XFmode || mode == TFmode)
10618 {
10619 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10620 if (REG_P (operand))
10621 {
10622 if (!reload_completed)
10623 abort ();
10624 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10625 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10626 }
10627 else if (offsettable_memref_p (operand))
10628 {
10629 operand = adjust_address (operand, DImode, 0);
10630 parts[0] = operand;
10631 parts[1] = adjust_address (operand, upper_mode, 8);
10632 }
10633 else if (GET_CODE (operand) == CONST_DOUBLE)
10634 {
10635 REAL_VALUE_TYPE r;
10636 long l[3];
10637
10638 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10639 real_to_target (l, &r, mode);
10640 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10641 if (HOST_BITS_PER_WIDE_INT >= 64)
10642 parts[0]
10643 = gen_int_mode
10644 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10645 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10646 DImode);
10647 else
10648 parts[0] = immed_double_const (l[0], l[1], DImode);
10649 if (upper_mode == SImode)
10650 parts[1] = gen_int_mode (l[2], SImode);
10651 else if (HOST_BITS_PER_WIDE_INT >= 64)
10652 parts[1]
10653 = gen_int_mode
10654 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10655 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10656 DImode);
10657 else
10658 parts[1] = immed_double_const (l[2], l[3], DImode);
10659 }
10660 else
10661 abort ();
10662 }
10663 }
10664
10665 return size;
10666 }
10667
10668 /* Emit insns to perform a move or push of DI, DF, and XF values.
10669 Return false when normal moves are needed; true when all required
10670 insns have been emitted. Operands 2-4 contain the input values
10671 int the correct order; operands 5-7 contain the output values. */
10672
10673 void
10674 ix86_split_long_move (rtx operands[])
10675 {
10676 rtx part[2][3];
10677 int nparts;
10678 int push = 0;
10679 int collisions = 0;
10680 enum machine_mode mode = GET_MODE (operands[0]);
10681
10682 /* The DFmode expanders may ask us to move double.
10683 For 64bit target this is single move. By hiding the fact
10684 here we simplify i386.md splitters. */
10685 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10686 {
10687 /* Optimize constant pool reference to immediates. This is used by
10688 fp moves, that force all constants to memory to allow combining. */
10689
10690 if (GET_CODE (operands[1]) == MEM
10691 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10692 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10693 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10694 if (push_operand (operands[0], VOIDmode))
10695 {
10696 operands[0] = copy_rtx (operands[0]);
10697 PUT_MODE (operands[0], Pmode);
10698 }
10699 else
10700 operands[0] = gen_lowpart (DImode, operands[0]);
10701 operands[1] = gen_lowpart (DImode, operands[1]);
10702 emit_move_insn (operands[0], operands[1]);
10703 return;
10704 }
10705
10706 /* The only non-offsettable memory we handle is push. */
10707 if (push_operand (operands[0], VOIDmode))
10708 push = 1;
10709 else if (GET_CODE (operands[0]) == MEM
10710 && ! offsettable_memref_p (operands[0]))
10711 abort ();
10712
10713 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10714 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10715
10716 /* When emitting push, take care for source operands on the stack. */
10717 if (push && GET_CODE (operands[1]) == MEM
10718 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10719 {
10720 if (nparts == 3)
10721 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10722 XEXP (part[1][2], 0));
10723 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10724 XEXP (part[1][1], 0));
10725 }
10726
10727 /* We need to do copy in the right order in case an address register
10728 of the source overlaps the destination. */
10729 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10730 {
10731 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10732 collisions++;
10733 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10734 collisions++;
10735 if (nparts == 3
10736 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10737 collisions++;
10738
10739 /* Collision in the middle part can be handled by reordering. */
10740 if (collisions == 1 && nparts == 3
10741 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10742 {
10743 rtx tmp;
10744 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10745 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10746 }
10747
10748 /* If there are more collisions, we can't handle it by reordering.
10749 Do an lea to the last part and use only one colliding move. */
10750 else if (collisions > 1)
10751 {
10752 rtx base;
10753
10754 collisions = 1;
10755
10756 base = part[0][nparts - 1];
10757
10758 /* Handle the case when the last part isn't valid for lea.
10759 Happens in 64-bit mode storing the 12-byte XFmode. */
10760 if (GET_MODE (base) != Pmode)
10761 base = gen_rtx_REG (Pmode, REGNO (base));
10762
10763 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10764 part[1][0] = replace_equiv_address (part[1][0], base);
10765 part[1][1] = replace_equiv_address (part[1][1],
10766 plus_constant (base, UNITS_PER_WORD));
10767 if (nparts == 3)
10768 part[1][2] = replace_equiv_address (part[1][2],
10769 plus_constant (base, 8));
10770 }
10771 }
10772
10773 if (push)
10774 {
10775 if (!TARGET_64BIT)
10776 {
10777 if (nparts == 3)
10778 {
10779 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10780 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10781 emit_move_insn (part[0][2], part[1][2]);
10782 }
10783 }
10784 else
10785 {
10786 /* In 64bit mode we don't have 32bit push available. In case this is
10787 register, it is OK - we will just use larger counterpart. We also
10788 retype memory - these comes from attempt to avoid REX prefix on
10789 moving of second half of TFmode value. */
10790 if (GET_MODE (part[1][1]) == SImode)
10791 {
10792 if (GET_CODE (part[1][1]) == MEM)
10793 part[1][1] = adjust_address (part[1][1], DImode, 0);
10794 else if (REG_P (part[1][1]))
10795 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10796 else
10797 abort ();
10798 if (GET_MODE (part[1][0]) == SImode)
10799 part[1][0] = part[1][1];
10800 }
10801 }
10802 emit_move_insn (part[0][1], part[1][1]);
10803 emit_move_insn (part[0][0], part[1][0]);
10804 return;
10805 }
10806
10807 /* Choose correct order to not overwrite the source before it is copied. */
10808 if ((REG_P (part[0][0])
10809 && REG_P (part[1][1])
10810 && (REGNO (part[0][0]) == REGNO (part[1][1])
10811 || (nparts == 3
10812 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10813 || (collisions > 0
10814 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10815 {
10816 if (nparts == 3)
10817 {
10818 operands[2] = part[0][2];
10819 operands[3] = part[0][1];
10820 operands[4] = part[0][0];
10821 operands[5] = part[1][2];
10822 operands[6] = part[1][1];
10823 operands[7] = part[1][0];
10824 }
10825 else
10826 {
10827 operands[2] = part[0][1];
10828 operands[3] = part[0][0];
10829 operands[5] = part[1][1];
10830 operands[6] = part[1][0];
10831 }
10832 }
10833 else
10834 {
10835 if (nparts == 3)
10836 {
10837 operands[2] = part[0][0];
10838 operands[3] = part[0][1];
10839 operands[4] = part[0][2];
10840 operands[5] = part[1][0];
10841 operands[6] = part[1][1];
10842 operands[7] = part[1][2];
10843 }
10844 else
10845 {
10846 operands[2] = part[0][0];
10847 operands[3] = part[0][1];
10848 operands[5] = part[1][0];
10849 operands[6] = part[1][1];
10850 }
10851 }
10852 emit_move_insn (operands[2], operands[5]);
10853 emit_move_insn (operands[3], operands[6]);
10854 if (nparts == 3)
10855 emit_move_insn (operands[4], operands[7]);
10856
10857 return;
10858 }
10859
10860 void
10861 ix86_split_ashldi (rtx *operands, rtx scratch)
10862 {
10863 rtx low[2], high[2];
10864 int count;
10865
10866 if (GET_CODE (operands[2]) == CONST_INT)
10867 {
10868 split_di (operands, 2, low, high);
10869 count = INTVAL (operands[2]) & 63;
10870
10871 if (count >= 32)
10872 {
10873 emit_move_insn (high[0], low[1]);
10874 emit_move_insn (low[0], const0_rtx);
10875
10876 if (count > 32)
10877 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10878 }
10879 else
10880 {
10881 if (!rtx_equal_p (operands[0], operands[1]))
10882 emit_move_insn (operands[0], operands[1]);
10883 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10884 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10885 }
10886 }
10887 else
10888 {
10889 if (!rtx_equal_p (operands[0], operands[1]))
10890 emit_move_insn (operands[0], operands[1]);
10891
10892 split_di (operands, 1, low, high);
10893
10894 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10895 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10896
10897 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10898 {
10899 if (! no_new_pseudos)
10900 scratch = force_reg (SImode, const0_rtx);
10901 else
10902 emit_move_insn (scratch, const0_rtx);
10903
10904 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10905 scratch));
10906 }
10907 else
10908 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10909 }
10910 }
10911
10912 void
10913 ix86_split_ashrdi (rtx *operands, rtx scratch)
10914 {
10915 rtx low[2], high[2];
10916 int count;
10917
10918 if (GET_CODE (operands[2]) == CONST_INT)
10919 {
10920 split_di (operands, 2, low, high);
10921 count = INTVAL (operands[2]) & 63;
10922
10923 if (count == 63)
10924 {
10925 emit_move_insn (high[0], high[1]);
10926 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10927 emit_move_insn (low[0], high[0]);
10928
10929 }
10930 else if (count >= 32)
10931 {
10932 emit_move_insn (low[0], high[1]);
10933
10934 if (! reload_completed)
10935 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10936 else
10937 {
10938 emit_move_insn (high[0], low[0]);
10939 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10940 }
10941
10942 if (count > 32)
10943 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10944 }
10945 else
10946 {
10947 if (!rtx_equal_p (operands[0], operands[1]))
10948 emit_move_insn (operands[0], operands[1]);
10949 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10950 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10951 }
10952 }
10953 else
10954 {
10955 if (!rtx_equal_p (operands[0], operands[1]))
10956 emit_move_insn (operands[0], operands[1]);
10957
10958 split_di (operands, 1, low, high);
10959
10960 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10961 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10962
10963 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10964 {
10965 if (! no_new_pseudos)
10966 scratch = gen_reg_rtx (SImode);
10967 emit_move_insn (scratch, high[0]);
10968 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10969 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10970 scratch));
10971 }
10972 else
10973 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10974 }
10975 }
10976
10977 void
10978 ix86_split_lshrdi (rtx *operands, rtx scratch)
10979 {
10980 rtx low[2], high[2];
10981 int count;
10982
10983 if (GET_CODE (operands[2]) == CONST_INT)
10984 {
10985 split_di (operands, 2, low, high);
10986 count = INTVAL (operands[2]) & 63;
10987
10988 if (count >= 32)
10989 {
10990 emit_move_insn (low[0], high[1]);
10991 emit_move_insn (high[0], const0_rtx);
10992
10993 if (count > 32)
10994 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10995 }
10996 else
10997 {
10998 if (!rtx_equal_p (operands[0], operands[1]))
10999 emit_move_insn (operands[0], operands[1]);
11000 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11001 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11002 }
11003 }
11004 else
11005 {
11006 if (!rtx_equal_p (operands[0], operands[1]))
11007 emit_move_insn (operands[0], operands[1]);
11008
11009 split_di (operands, 1, low, high);
11010
11011 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11012 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11013
11014 /* Heh. By reversing the arguments, we can reuse this pattern. */
11015 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11016 {
11017 if (! no_new_pseudos)
11018 scratch = force_reg (SImode, const0_rtx);
11019 else
11020 emit_move_insn (scratch, const0_rtx);
11021
11022 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11023 scratch));
11024 }
11025 else
11026 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11027 }
11028 }
11029
11030 /* Helper function for the string operations below. Dest VARIABLE whether
11031 it is aligned to VALUE bytes. If true, jump to the label. */
11032 static rtx
11033 ix86_expand_aligntest (rtx variable, int value)
11034 {
11035 rtx label = gen_label_rtx ();
11036 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11037 if (GET_MODE (variable) == DImode)
11038 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11039 else
11040 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11041 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11042 1, label);
11043 return label;
11044 }
11045
11046 /* Adjust COUNTER by the VALUE. */
11047 static void
11048 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11049 {
11050 if (GET_MODE (countreg) == DImode)
11051 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11052 else
11053 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11054 }
11055
11056 /* Zero extend possibly SImode EXP to Pmode register. */
11057 rtx
11058 ix86_zero_extend_to_Pmode (rtx exp)
11059 {
11060 rtx r;
11061 if (GET_MODE (exp) == VOIDmode)
11062 return force_reg (Pmode, exp);
11063 if (GET_MODE (exp) == Pmode)
11064 return copy_to_mode_reg (Pmode, exp);
11065 r = gen_reg_rtx (Pmode);
11066 emit_insn (gen_zero_extendsidi2 (r, exp));
11067 return r;
11068 }
11069
11070 /* Expand string move (memcpy) operation. Use i386 string operations when
11071 profitable. expand_clrstr contains similar code. */
11072 int
11073 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11074 {
11075 rtx srcreg, destreg, countreg, srcexp, destexp;
11076 enum machine_mode counter_mode;
11077 HOST_WIDE_INT align = 0;
11078 unsigned HOST_WIDE_INT count = 0;
11079
11080 if (GET_CODE (align_exp) == CONST_INT)
11081 align = INTVAL (align_exp);
11082
11083 /* Can't use any of this if the user has appropriated esi or edi. */
11084 if (global_regs[4] || global_regs[5])
11085 return 0;
11086
11087 /* This simple hack avoids all inlining code and simplifies code below. */
11088 if (!TARGET_ALIGN_STRINGOPS)
11089 align = 64;
11090
11091 if (GET_CODE (count_exp) == CONST_INT)
11092 {
11093 count = INTVAL (count_exp);
11094 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11095 return 0;
11096 }
11097
11098 /* Figure out proper mode for counter. For 32bits it is always SImode,
11099 for 64bits use SImode when possible, otherwise DImode.
11100 Set count to number of bytes copied when known at compile time. */
11101 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11102 || x86_64_zero_extended_value (count_exp))
11103 counter_mode = SImode;
11104 else
11105 counter_mode = DImode;
11106
11107 if (counter_mode != SImode && counter_mode != DImode)
11108 abort ();
11109
11110 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11111 if (destreg != XEXP (dst, 0))
11112 dst = replace_equiv_address_nv (dst, destreg);
11113 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11114 if (srcreg != XEXP (src, 0))
11115 src = replace_equiv_address_nv (src, srcreg);
11116
11117 /* When optimizing for size emit simple rep ; movsb instruction for
11118 counts not divisible by 4. */
11119
11120 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11121 {
11122 emit_insn (gen_cld ());
11123 countreg = ix86_zero_extend_to_Pmode (count_exp);
11124 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11125 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11126 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11127 destexp, srcexp));
11128 }
11129
11130 /* For constant aligned (or small unaligned) copies use rep movsl
11131 followed by code copying the rest. For PentiumPro ensure 8 byte
11132 alignment to allow rep movsl acceleration. */
11133
11134 else if (count != 0
11135 && (align >= 8
11136 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11137 || optimize_size || count < (unsigned int) 64))
11138 {
11139 unsigned HOST_WIDE_INT offset = 0;
11140 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11141 rtx srcmem, dstmem;
11142
11143 emit_insn (gen_cld ());
11144 if (count & ~(size - 1))
11145 {
11146 countreg = copy_to_mode_reg (counter_mode,
11147 GEN_INT ((count >> (size == 4 ? 2 : 3))
11148 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11149 countreg = ix86_zero_extend_to_Pmode (countreg);
11150
11151 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11152 GEN_INT (size == 4 ? 2 : 3));
11153 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11154 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11155
11156 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11157 countreg, destexp, srcexp));
11158 offset = count & ~(size - 1);
11159 }
11160 if (size == 8 && (count & 0x04))
11161 {
11162 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11163 offset);
11164 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11165 offset);
11166 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11167 offset += 4;
11168 }
11169 if (count & 0x02)
11170 {
11171 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11172 offset);
11173 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11174 offset);
11175 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11176 offset += 2;
11177 }
11178 if (count & 0x01)
11179 {
11180 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11181 offset);
11182 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11183 offset);
11184 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11185 }
11186 }
11187 /* The generic code based on the glibc implementation:
11188 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11189 allowing accelerated copying there)
11190 - copy the data using rep movsl
11191 - copy the rest. */
11192 else
11193 {
11194 rtx countreg2;
11195 rtx label = NULL;
11196 rtx srcmem, dstmem;
11197 int desired_alignment = (TARGET_PENTIUMPRO
11198 && (count == 0 || count >= (unsigned int) 260)
11199 ? 8 : UNITS_PER_WORD);
11200 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11201 dst = change_address (dst, BLKmode, destreg);
11202 src = change_address (src, BLKmode, srcreg);
11203
11204 /* In case we don't know anything about the alignment, default to
11205 library version, since it is usually equally fast and result in
11206 shorter code.
11207
11208 Also emit call when we know that the count is large and call overhead
11209 will not be important. */
11210 if (!TARGET_INLINE_ALL_STRINGOPS
11211 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11212 return 0;
11213
11214 if (TARGET_SINGLE_STRINGOP)
11215 emit_insn (gen_cld ());
11216
11217 countreg2 = gen_reg_rtx (Pmode);
11218 countreg = copy_to_mode_reg (counter_mode, count_exp);
11219
11220 /* We don't use loops to align destination and to copy parts smaller
11221 than 4 bytes, because gcc is able to optimize such code better (in
11222 the case the destination or the count really is aligned, gcc is often
11223 able to predict the branches) and also it is friendlier to the
11224 hardware branch prediction.
11225
11226 Using loops is beneficial for generic case, because we can
11227 handle small counts using the loops. Many CPUs (such as Athlon)
11228 have large REP prefix setup costs.
11229
11230 This is quite costly. Maybe we can revisit this decision later or
11231 add some customizability to this code. */
11232
11233 if (count == 0 && align < desired_alignment)
11234 {
11235 label = gen_label_rtx ();
11236 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11237 LEU, 0, counter_mode, 1, label);
11238 }
11239 if (align <= 1)
11240 {
11241 rtx label = ix86_expand_aligntest (destreg, 1);
11242 srcmem = change_address (src, QImode, srcreg);
11243 dstmem = change_address (dst, QImode, destreg);
11244 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11245 ix86_adjust_counter (countreg, 1);
11246 emit_label (label);
11247 LABEL_NUSES (label) = 1;
11248 }
11249 if (align <= 2)
11250 {
11251 rtx label = ix86_expand_aligntest (destreg, 2);
11252 srcmem = change_address (src, HImode, srcreg);
11253 dstmem = change_address (dst, HImode, destreg);
11254 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11255 ix86_adjust_counter (countreg, 2);
11256 emit_label (label);
11257 LABEL_NUSES (label) = 1;
11258 }
11259 if (align <= 4 && desired_alignment > 4)
11260 {
11261 rtx label = ix86_expand_aligntest (destreg, 4);
11262 srcmem = change_address (src, SImode, srcreg);
11263 dstmem = change_address (dst, SImode, destreg);
11264 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11265 ix86_adjust_counter (countreg, 4);
11266 emit_label (label);
11267 LABEL_NUSES (label) = 1;
11268 }
11269
11270 if (label && desired_alignment > 4 && !TARGET_64BIT)
11271 {
11272 emit_label (label);
11273 LABEL_NUSES (label) = 1;
11274 label = NULL_RTX;
11275 }
11276 if (!TARGET_SINGLE_STRINGOP)
11277 emit_insn (gen_cld ());
11278 if (TARGET_64BIT)
11279 {
11280 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11281 GEN_INT (3)));
11282 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11283 }
11284 else
11285 {
11286 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11287 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11288 }
11289 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11290 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11291 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11292 countreg2, destexp, srcexp));
11293
11294 if (label)
11295 {
11296 emit_label (label);
11297 LABEL_NUSES (label) = 1;
11298 }
11299 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11300 {
11301 srcmem = change_address (src, SImode, srcreg);
11302 dstmem = change_address (dst, SImode, destreg);
11303 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11304 }
11305 if ((align <= 4 || count == 0) && TARGET_64BIT)
11306 {
11307 rtx label = ix86_expand_aligntest (countreg, 4);
11308 srcmem = change_address (src, SImode, srcreg);
11309 dstmem = change_address (dst, SImode, destreg);
11310 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11311 emit_label (label);
11312 LABEL_NUSES (label) = 1;
11313 }
11314 if (align > 2 && count != 0 && (count & 2))
11315 {
11316 srcmem = change_address (src, HImode, srcreg);
11317 dstmem = change_address (dst, HImode, destreg);
11318 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11319 }
11320 if (align <= 2 || count == 0)
11321 {
11322 rtx label = ix86_expand_aligntest (countreg, 2);
11323 srcmem = change_address (src, HImode, srcreg);
11324 dstmem = change_address (dst, HImode, destreg);
11325 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11326 emit_label (label);
11327 LABEL_NUSES (label) = 1;
11328 }
11329 if (align > 1 && count != 0 && (count & 1))
11330 {
11331 srcmem = change_address (src, QImode, srcreg);
11332 dstmem = change_address (dst, QImode, destreg);
11333 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11334 }
11335 if (align <= 1 || count == 0)
11336 {
11337 rtx label = ix86_expand_aligntest (countreg, 1);
11338 srcmem = change_address (src, QImode, srcreg);
11339 dstmem = change_address (dst, QImode, destreg);
11340 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11341 emit_label (label);
11342 LABEL_NUSES (label) = 1;
11343 }
11344 }
11345
11346 return 1;
11347 }
11348
11349 /* Expand string clear operation (bzero). Use i386 string operations when
11350 profitable. expand_movstr contains similar code. */
11351 int
11352 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11353 {
11354 rtx destreg, zeroreg, countreg, destexp;
11355 enum machine_mode counter_mode;
11356 HOST_WIDE_INT align = 0;
11357 unsigned HOST_WIDE_INT count = 0;
11358
11359 if (GET_CODE (align_exp) == CONST_INT)
11360 align = INTVAL (align_exp);
11361
11362 /* Can't use any of this if the user has appropriated esi. */
11363 if (global_regs[4])
11364 return 0;
11365
11366 /* This simple hack avoids all inlining code and simplifies code below. */
11367 if (!TARGET_ALIGN_STRINGOPS)
11368 align = 32;
11369
11370 if (GET_CODE (count_exp) == CONST_INT)
11371 {
11372 count = INTVAL (count_exp);
11373 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11374 return 0;
11375 }
11376 /* Figure out proper mode for counter. For 32bits it is always SImode,
11377 for 64bits use SImode when possible, otherwise DImode.
11378 Set count to number of bytes copied when known at compile time. */
11379 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11380 || x86_64_zero_extended_value (count_exp))
11381 counter_mode = SImode;
11382 else
11383 counter_mode = DImode;
11384
11385 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11386 if (destreg != XEXP (dst, 0))
11387 dst = replace_equiv_address_nv (dst, destreg);
11388
11389 emit_insn (gen_cld ());
11390
11391 /* When optimizing for size emit simple rep ; movsb instruction for
11392 counts not divisible by 4. */
11393
11394 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11395 {
11396 countreg = ix86_zero_extend_to_Pmode (count_exp);
11397 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11398 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11399 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11400 }
11401 else if (count != 0
11402 && (align >= 8
11403 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11404 || optimize_size || count < (unsigned int) 64))
11405 {
11406 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11407 unsigned HOST_WIDE_INT offset = 0;
11408
11409 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11410 if (count & ~(size - 1))
11411 {
11412 countreg = copy_to_mode_reg (counter_mode,
11413 GEN_INT ((count >> (size == 4 ? 2 : 3))
11414 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11415 countreg = ix86_zero_extend_to_Pmode (countreg);
11416 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11417 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11418 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11419 offset = count & ~(size - 1);
11420 }
11421 if (size == 8 && (count & 0x04))
11422 {
11423 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11424 offset);
11425 emit_insn (gen_strset (destreg, mem,
11426 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11427 offset += 4;
11428 }
11429 if (count & 0x02)
11430 {
11431 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11432 offset);
11433 emit_insn (gen_strset (destreg, mem,
11434 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11435 offset += 2;
11436 }
11437 if (count & 0x01)
11438 {
11439 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11440 offset);
11441 emit_insn (gen_strset (destreg, mem,
11442 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11443 }
11444 }
11445 else
11446 {
11447 rtx countreg2;
11448 rtx label = NULL;
11449 /* Compute desired alignment of the string operation. */
11450 int desired_alignment = (TARGET_PENTIUMPRO
11451 && (count == 0 || count >= (unsigned int) 260)
11452 ? 8 : UNITS_PER_WORD);
11453
11454 /* In case we don't know anything about the alignment, default to
11455 library version, since it is usually equally fast and result in
11456 shorter code.
11457
11458 Also emit call when we know that the count is large and call overhead
11459 will not be important. */
11460 if (!TARGET_INLINE_ALL_STRINGOPS
11461 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11462 return 0;
11463
11464 if (TARGET_SINGLE_STRINGOP)
11465 emit_insn (gen_cld ());
11466
11467 countreg2 = gen_reg_rtx (Pmode);
11468 countreg = copy_to_mode_reg (counter_mode, count_exp);
11469 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11470 /* Get rid of MEM_OFFSET, it won't be accurate. */
11471 dst = change_address (dst, BLKmode, destreg);
11472
11473 if (count == 0 && align < desired_alignment)
11474 {
11475 label = gen_label_rtx ();
11476 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11477 LEU, 0, counter_mode, 1, label);
11478 }
11479 if (align <= 1)
11480 {
11481 rtx label = ix86_expand_aligntest (destreg, 1);
11482 emit_insn (gen_strset (destreg, dst,
11483 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11484 ix86_adjust_counter (countreg, 1);
11485 emit_label (label);
11486 LABEL_NUSES (label) = 1;
11487 }
11488 if (align <= 2)
11489 {
11490 rtx label = ix86_expand_aligntest (destreg, 2);
11491 emit_insn (gen_strset (destreg, dst,
11492 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11493 ix86_adjust_counter (countreg, 2);
11494 emit_label (label);
11495 LABEL_NUSES (label) = 1;
11496 }
11497 if (align <= 4 && desired_alignment > 4)
11498 {
11499 rtx label = ix86_expand_aligntest (destreg, 4);
11500 emit_insn (gen_strset (destreg, dst,
11501 (TARGET_64BIT
11502 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11503 : zeroreg)));
11504 ix86_adjust_counter (countreg, 4);
11505 emit_label (label);
11506 LABEL_NUSES (label) = 1;
11507 }
11508
11509 if (label && desired_alignment > 4 && !TARGET_64BIT)
11510 {
11511 emit_label (label);
11512 LABEL_NUSES (label) = 1;
11513 label = NULL_RTX;
11514 }
11515
11516 if (!TARGET_SINGLE_STRINGOP)
11517 emit_insn (gen_cld ());
11518 if (TARGET_64BIT)
11519 {
11520 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11521 GEN_INT (3)));
11522 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11523 }
11524 else
11525 {
11526 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11527 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11528 }
11529 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11530 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11531
11532 if (label)
11533 {
11534 emit_label (label);
11535 LABEL_NUSES (label) = 1;
11536 }
11537
11538 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11539 emit_insn (gen_strset (destreg, dst,
11540 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11541 if (TARGET_64BIT && (align <= 4 || count == 0))
11542 {
11543 rtx label = ix86_expand_aligntest (countreg, 4);
11544 emit_insn (gen_strset (destreg, dst,
11545 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11546 emit_label (label);
11547 LABEL_NUSES (label) = 1;
11548 }
11549 if (align > 2 && count != 0 && (count & 2))
11550 emit_insn (gen_strset (destreg, dst,
11551 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11552 if (align <= 2 || count == 0)
11553 {
11554 rtx label = ix86_expand_aligntest (countreg, 2);
11555 emit_insn (gen_strset (destreg, dst,
11556 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11557 emit_label (label);
11558 LABEL_NUSES (label) = 1;
11559 }
11560 if (align > 1 && count != 0 && (count & 1))
11561 emit_insn (gen_strset (destreg, dst,
11562 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11563 if (align <= 1 || count == 0)
11564 {
11565 rtx label = ix86_expand_aligntest (countreg, 1);
11566 emit_insn (gen_strset (destreg, dst,
11567 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11568 emit_label (label);
11569 LABEL_NUSES (label) = 1;
11570 }
11571 }
11572 return 1;
11573 }
11574
11575 /* Expand strlen. */
11576 int
11577 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11578 {
11579 rtx addr, scratch1, scratch2, scratch3, scratch4;
11580
11581 /* The generic case of strlen expander is long. Avoid it's
11582 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11583
11584 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11585 && !TARGET_INLINE_ALL_STRINGOPS
11586 && !optimize_size
11587 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11588 return 0;
11589
11590 addr = force_reg (Pmode, XEXP (src, 0));
11591 scratch1 = gen_reg_rtx (Pmode);
11592
11593 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11594 && !optimize_size)
11595 {
11596 /* Well it seems that some optimizer does not combine a call like
11597 foo(strlen(bar), strlen(bar));
11598 when the move and the subtraction is done here. It does calculate
11599 the length just once when these instructions are done inside of
11600 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11601 often used and I use one fewer register for the lifetime of
11602 output_strlen_unroll() this is better. */
11603
11604 emit_move_insn (out, addr);
11605
11606 ix86_expand_strlensi_unroll_1 (out, src, align);
11607
11608 /* strlensi_unroll_1 returns the address of the zero at the end of
11609 the string, like memchr(), so compute the length by subtracting
11610 the start address. */
11611 if (TARGET_64BIT)
11612 emit_insn (gen_subdi3 (out, out, addr));
11613 else
11614 emit_insn (gen_subsi3 (out, out, addr));
11615 }
11616 else
11617 {
11618 rtx unspec;
11619 scratch2 = gen_reg_rtx (Pmode);
11620 scratch3 = gen_reg_rtx (Pmode);
11621 scratch4 = force_reg (Pmode, constm1_rtx);
11622
11623 emit_move_insn (scratch3, addr);
11624 eoschar = force_reg (QImode, eoschar);
11625
11626 emit_insn (gen_cld ());
11627 src = replace_equiv_address_nv (src, scratch3);
11628
11629 /* If .md starts supporting :P, this can be done in .md. */
11630 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11631 scratch4), UNSPEC_SCAS);
11632 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11633 if (TARGET_64BIT)
11634 {
11635 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11636 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11637 }
11638 else
11639 {
11640 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11641 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11642 }
11643 }
11644 return 1;
11645 }
11646
11647 /* Expand the appropriate insns for doing strlen if not just doing
11648 repnz; scasb
11649
11650 out = result, initialized with the start address
11651 align_rtx = alignment of the address.
11652 scratch = scratch register, initialized with the startaddress when
11653 not aligned, otherwise undefined
11654
11655 This is just the body. It needs the initializations mentioned above and
11656 some address computing at the end. These things are done in i386.md. */
11657
11658 static void
11659 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11660 {
11661 int align;
11662 rtx tmp;
11663 rtx align_2_label = NULL_RTX;
11664 rtx align_3_label = NULL_RTX;
11665 rtx align_4_label = gen_label_rtx ();
11666 rtx end_0_label = gen_label_rtx ();
11667 rtx mem;
11668 rtx tmpreg = gen_reg_rtx (SImode);
11669 rtx scratch = gen_reg_rtx (SImode);
11670 rtx cmp;
11671
11672 align = 0;
11673 if (GET_CODE (align_rtx) == CONST_INT)
11674 align = INTVAL (align_rtx);
11675
11676 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11677
11678 /* Is there a known alignment and is it less than 4? */
11679 if (align < 4)
11680 {
11681 rtx scratch1 = gen_reg_rtx (Pmode);
11682 emit_move_insn (scratch1, out);
11683 /* Is there a known alignment and is it not 2? */
11684 if (align != 2)
11685 {
11686 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11687 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11688
11689 /* Leave just the 3 lower bits. */
11690 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11691 NULL_RTX, 0, OPTAB_WIDEN);
11692
11693 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11694 Pmode, 1, align_4_label);
11695 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11696 Pmode, 1, align_2_label);
11697 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11698 Pmode, 1, align_3_label);
11699 }
11700 else
11701 {
11702 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11703 check if is aligned to 4 - byte. */
11704
11705 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11706 NULL_RTX, 0, OPTAB_WIDEN);
11707
11708 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11709 Pmode, 1, align_4_label);
11710 }
11711
11712 mem = change_address (src, QImode, out);
11713
11714 /* Now compare the bytes. */
11715
11716 /* Compare the first n unaligned byte on a byte per byte basis. */
11717 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11718 QImode, 1, end_0_label);
11719
11720 /* Increment the address. */
11721 if (TARGET_64BIT)
11722 emit_insn (gen_adddi3 (out, out, const1_rtx));
11723 else
11724 emit_insn (gen_addsi3 (out, out, const1_rtx));
11725
11726 /* Not needed with an alignment of 2 */
11727 if (align != 2)
11728 {
11729 emit_label (align_2_label);
11730
11731 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11732 end_0_label);
11733
11734 if (TARGET_64BIT)
11735 emit_insn (gen_adddi3 (out, out, const1_rtx));
11736 else
11737 emit_insn (gen_addsi3 (out, out, const1_rtx));
11738
11739 emit_label (align_3_label);
11740 }
11741
11742 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11743 end_0_label);
11744
11745 if (TARGET_64BIT)
11746 emit_insn (gen_adddi3 (out, out, const1_rtx));
11747 else
11748 emit_insn (gen_addsi3 (out, out, const1_rtx));
11749 }
11750
11751 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11752 align this loop. It gives only huge programs, but does not help to
11753 speed up. */
11754 emit_label (align_4_label);
11755
11756 mem = change_address (src, SImode, out);
11757 emit_move_insn (scratch, mem);
11758 if (TARGET_64BIT)
11759 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11760 else
11761 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11762
11763 /* This formula yields a nonzero result iff one of the bytes is zero.
11764 This saves three branches inside loop and many cycles. */
11765
11766 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11767 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11768 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11769 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11770 gen_int_mode (0x80808080, SImode)));
11771 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11772 align_4_label);
11773
11774 if (TARGET_CMOVE)
11775 {
11776 rtx reg = gen_reg_rtx (SImode);
11777 rtx reg2 = gen_reg_rtx (Pmode);
11778 emit_move_insn (reg, tmpreg);
11779 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11780
11781 /* If zero is not in the first two bytes, move two bytes forward. */
11782 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11783 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11784 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11785 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11786 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11787 reg,
11788 tmpreg)));
11789 /* Emit lea manually to avoid clobbering of flags. */
11790 emit_insn (gen_rtx_SET (SImode, reg2,
11791 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11792
11793 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11794 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11795 emit_insn (gen_rtx_SET (VOIDmode, out,
11796 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11797 reg2,
11798 out)));
11799
11800 }
11801 else
11802 {
11803 rtx end_2_label = gen_label_rtx ();
11804 /* Is zero in the first two bytes? */
11805
11806 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11807 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11808 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11809 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11810 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11811 pc_rtx);
11812 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11813 JUMP_LABEL (tmp) = end_2_label;
11814
11815 /* Not in the first two. Move two bytes forward. */
11816 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11817 if (TARGET_64BIT)
11818 emit_insn (gen_adddi3 (out, out, const2_rtx));
11819 else
11820 emit_insn (gen_addsi3 (out, out, const2_rtx));
11821
11822 emit_label (end_2_label);
11823
11824 }
11825
11826 /* Avoid branch in fixing the byte. */
11827 tmpreg = gen_lowpart (QImode, tmpreg);
11828 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11829 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11830 if (TARGET_64BIT)
11831 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11832 else
11833 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11834
11835 emit_label (end_0_label);
11836 }
11837
11838 void
11839 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11840 rtx callarg2 ATTRIBUTE_UNUSED,
11841 rtx pop, int sibcall)
11842 {
11843 rtx use = NULL, call;
11844
11845 if (pop == const0_rtx)
11846 pop = NULL;
11847 if (TARGET_64BIT && pop)
11848 abort ();
11849
11850 #if TARGET_MACHO
11851 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11852 fnaddr = machopic_indirect_call_target (fnaddr);
11853 #else
11854 /* Static functions and indirect calls don't need the pic register. */
11855 if (! TARGET_64BIT && flag_pic
11856 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11857 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11858 use_reg (&use, pic_offset_table_rtx);
11859
11860 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11861 {
11862 rtx al = gen_rtx_REG (QImode, 0);
11863 emit_move_insn (al, callarg2);
11864 use_reg (&use, al);
11865 }
11866 #endif /* TARGET_MACHO */
11867
11868 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11869 {
11870 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11871 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11872 }
11873 if (sibcall && TARGET_64BIT
11874 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11875 {
11876 rtx addr;
11877 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11878 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11879 emit_move_insn (fnaddr, addr);
11880 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11881 }
11882
11883 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11884 if (retval)
11885 call = gen_rtx_SET (VOIDmode, retval, call);
11886 if (pop)
11887 {
11888 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11889 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11890 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11891 }
11892
11893 call = emit_call_insn (call);
11894 if (use)
11895 CALL_INSN_FUNCTION_USAGE (call) = use;
11896 }
11897
11898 \f
11899 /* Clear stack slot assignments remembered from previous functions.
11900 This is called from INIT_EXPANDERS once before RTL is emitted for each
11901 function. */
11902
11903 static struct machine_function *
11904 ix86_init_machine_status (void)
11905 {
11906 struct machine_function *f;
11907
11908 f = ggc_alloc_cleared (sizeof (struct machine_function));
11909 f->use_fast_prologue_epilogue_nregs = -1;
11910
11911 return f;
11912 }
11913
11914 /* Return a MEM corresponding to a stack slot with mode MODE.
11915 Allocate a new slot if necessary.
11916
11917 The RTL for a function can have several slots available: N is
11918 which slot to use. */
11919
11920 rtx
11921 assign_386_stack_local (enum machine_mode mode, int n)
11922 {
11923 struct stack_local_entry *s;
11924
11925 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11926 abort ();
11927
11928 for (s = ix86_stack_locals; s; s = s->next)
11929 if (s->mode == mode && s->n == n)
11930 return s->rtl;
11931
11932 s = (struct stack_local_entry *)
11933 ggc_alloc (sizeof (struct stack_local_entry));
11934 s->n = n;
11935 s->mode = mode;
11936 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11937
11938 s->next = ix86_stack_locals;
11939 ix86_stack_locals = s;
11940 return s->rtl;
11941 }
11942
11943 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11944
11945 static GTY(()) rtx ix86_tls_symbol;
11946 rtx
11947 ix86_tls_get_addr (void)
11948 {
11949
11950 if (!ix86_tls_symbol)
11951 {
11952 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11953 (TARGET_GNU_TLS && !TARGET_64BIT)
11954 ? "___tls_get_addr"
11955 : "__tls_get_addr");
11956 }
11957
11958 return ix86_tls_symbol;
11959 }
11960 \f
11961 /* Calculate the length of the memory address in the instruction
11962 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11963
11964 static int
11965 memory_address_length (rtx addr)
11966 {
11967 struct ix86_address parts;
11968 rtx base, index, disp;
11969 int len;
11970
11971 if (GET_CODE (addr) == PRE_DEC
11972 || GET_CODE (addr) == POST_INC
11973 || GET_CODE (addr) == PRE_MODIFY
11974 || GET_CODE (addr) == POST_MODIFY)
11975 return 0;
11976
11977 if (! ix86_decompose_address (addr, &parts))
11978 abort ();
11979
11980 base = parts.base;
11981 index = parts.index;
11982 disp = parts.disp;
11983 len = 0;
11984
11985 /* Rule of thumb:
11986 - esp as the base always wants an index,
11987 - ebp as the base always wants a displacement. */
11988
11989 /* Register Indirect. */
11990 if (base && !index && !disp)
11991 {
11992 /* esp (for its index) and ebp (for its displacement) need
11993 the two-byte modrm form. */
11994 if (addr == stack_pointer_rtx
11995 || addr == arg_pointer_rtx
11996 || addr == frame_pointer_rtx
11997 || addr == hard_frame_pointer_rtx)
11998 len = 1;
11999 }
12000
12001 /* Direct Addressing. */
12002 else if (disp && !base && !index)
12003 len = 4;
12004
12005 else
12006 {
12007 /* Find the length of the displacement constant. */
12008 if (disp)
12009 {
12010 if (GET_CODE (disp) == CONST_INT
12011 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12012 && base)
12013 len = 1;
12014 else
12015 len = 4;
12016 }
12017 /* ebp always wants a displacement. */
12018 else if (base == hard_frame_pointer_rtx)
12019 len = 1;
12020
12021 /* An index requires the two-byte modrm form.... */
12022 if (index
12023 /* ...like esp, which always wants an index. */
12024 || base == stack_pointer_rtx
12025 || base == arg_pointer_rtx
12026 || base == frame_pointer_rtx)
12027 len += 1;
12028 }
12029
12030 return len;
12031 }
12032
12033 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12034 is set, expect that insn have 8bit immediate alternative. */
12035 int
12036 ix86_attr_length_immediate_default (rtx insn, int shortform)
12037 {
12038 int len = 0;
12039 int i;
12040 extract_insn_cached (insn);
12041 for (i = recog_data.n_operands - 1; i >= 0; --i)
12042 if (CONSTANT_P (recog_data.operand[i]))
12043 {
12044 if (len)
12045 abort ();
12046 if (shortform
12047 && GET_CODE (recog_data.operand[i]) == CONST_INT
12048 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12049 len = 1;
12050 else
12051 {
12052 switch (get_attr_mode (insn))
12053 {
12054 case MODE_QI:
12055 len+=1;
12056 break;
12057 case MODE_HI:
12058 len+=2;
12059 break;
12060 case MODE_SI:
12061 len+=4;
12062 break;
12063 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12064 case MODE_DI:
12065 len+=4;
12066 break;
12067 default:
12068 fatal_insn ("unknown insn mode", insn);
12069 }
12070 }
12071 }
12072 return len;
12073 }
12074 /* Compute default value for "length_address" attribute. */
12075 int
12076 ix86_attr_length_address_default (rtx insn)
12077 {
12078 int i;
12079
12080 if (get_attr_type (insn) == TYPE_LEA)
12081 {
12082 rtx set = PATTERN (insn);
12083 if (GET_CODE (set) == SET)
12084 ;
12085 else if (GET_CODE (set) == PARALLEL
12086 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12087 set = XVECEXP (set, 0, 0);
12088 else
12089 {
12090 #ifdef ENABLE_CHECKING
12091 abort ();
12092 #endif
12093 return 0;
12094 }
12095
12096 return memory_address_length (SET_SRC (set));
12097 }
12098
12099 extract_insn_cached (insn);
12100 for (i = recog_data.n_operands - 1; i >= 0; --i)
12101 if (GET_CODE (recog_data.operand[i]) == MEM)
12102 {
12103 return memory_address_length (XEXP (recog_data.operand[i], 0));
12104 break;
12105 }
12106 return 0;
12107 }
12108 \f
12109 /* Return the maximum number of instructions a cpu can issue. */
12110
12111 static int
12112 ix86_issue_rate (void)
12113 {
12114 switch (ix86_tune)
12115 {
12116 case PROCESSOR_PENTIUM:
12117 case PROCESSOR_K6:
12118 return 2;
12119
12120 case PROCESSOR_PENTIUMPRO:
12121 case PROCESSOR_PENTIUM4:
12122 case PROCESSOR_ATHLON:
12123 case PROCESSOR_K8:
12124 case PROCESSOR_NOCONA:
12125 return 3;
12126
12127 default:
12128 return 1;
12129 }
12130 }
12131
12132 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12133 by DEP_INSN and nothing set by DEP_INSN. */
12134
12135 static int
12136 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12137 {
12138 rtx set, set2;
12139
12140 /* Simplify the test for uninteresting insns. */
12141 if (insn_type != TYPE_SETCC
12142 && insn_type != TYPE_ICMOV
12143 && insn_type != TYPE_FCMOV
12144 && insn_type != TYPE_IBR)
12145 return 0;
12146
12147 if ((set = single_set (dep_insn)) != 0)
12148 {
12149 set = SET_DEST (set);
12150 set2 = NULL_RTX;
12151 }
12152 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12153 && XVECLEN (PATTERN (dep_insn), 0) == 2
12154 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12155 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12156 {
12157 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12158 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12159 }
12160 else
12161 return 0;
12162
12163 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12164 return 0;
12165
12166 /* This test is true if the dependent insn reads the flags but
12167 not any other potentially set register. */
12168 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12169 return 0;
12170
12171 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12172 return 0;
12173
12174 return 1;
12175 }
12176
12177 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12178 address with operands set by DEP_INSN. */
12179
12180 static int
12181 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12182 {
12183 rtx addr;
12184
12185 if (insn_type == TYPE_LEA
12186 && TARGET_PENTIUM)
12187 {
12188 addr = PATTERN (insn);
12189 if (GET_CODE (addr) == SET)
12190 ;
12191 else if (GET_CODE (addr) == PARALLEL
12192 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12193 addr = XVECEXP (addr, 0, 0);
12194 else
12195 abort ();
12196 addr = SET_SRC (addr);
12197 }
12198 else
12199 {
12200 int i;
12201 extract_insn_cached (insn);
12202 for (i = recog_data.n_operands - 1; i >= 0; --i)
12203 if (GET_CODE (recog_data.operand[i]) == MEM)
12204 {
12205 addr = XEXP (recog_data.operand[i], 0);
12206 goto found;
12207 }
12208 return 0;
12209 found:;
12210 }
12211
12212 return modified_in_p (addr, dep_insn);
12213 }
12214
12215 static int
12216 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12217 {
12218 enum attr_type insn_type, dep_insn_type;
12219 enum attr_memory memory, dep_memory;
12220 rtx set, set2;
12221 int dep_insn_code_number;
12222
12223 /* Anti and output dependencies have zero cost on all CPUs. */
12224 if (REG_NOTE_KIND (link) != 0)
12225 return 0;
12226
12227 dep_insn_code_number = recog_memoized (dep_insn);
12228
12229 /* If we can't recognize the insns, we can't really do anything. */
12230 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12231 return cost;
12232
12233 insn_type = get_attr_type (insn);
12234 dep_insn_type = get_attr_type (dep_insn);
12235
12236 switch (ix86_tune)
12237 {
12238 case PROCESSOR_PENTIUM:
12239 /* Address Generation Interlock adds a cycle of latency. */
12240 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12241 cost += 1;
12242
12243 /* ??? Compares pair with jump/setcc. */
12244 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12245 cost = 0;
12246
12247 /* Floating point stores require value to be ready one cycle earlier. */
12248 if (insn_type == TYPE_FMOV
12249 && get_attr_memory (insn) == MEMORY_STORE
12250 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12251 cost += 1;
12252 break;
12253
12254 case PROCESSOR_PENTIUMPRO:
12255 memory = get_attr_memory (insn);
12256 dep_memory = get_attr_memory (dep_insn);
12257
12258 /* Since we can't represent delayed latencies of load+operation,
12259 increase the cost here for non-imov insns. */
12260 if (dep_insn_type != TYPE_IMOV
12261 && dep_insn_type != TYPE_FMOV
12262 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12263 cost += 1;
12264
12265 /* INT->FP conversion is expensive. */
12266 if (get_attr_fp_int_src (dep_insn))
12267 cost += 5;
12268
12269 /* There is one cycle extra latency between an FP op and a store. */
12270 if (insn_type == TYPE_FMOV
12271 && (set = single_set (dep_insn)) != NULL_RTX
12272 && (set2 = single_set (insn)) != NULL_RTX
12273 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12274 && GET_CODE (SET_DEST (set2)) == MEM)
12275 cost += 1;
12276
12277 /* Show ability of reorder buffer to hide latency of load by executing
12278 in parallel with previous instruction in case
12279 previous instruction is not needed to compute the address. */
12280 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12281 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12282 {
12283 /* Claim moves to take one cycle, as core can issue one load
12284 at time and the next load can start cycle later. */
12285 if (dep_insn_type == TYPE_IMOV
12286 || dep_insn_type == TYPE_FMOV)
12287 cost = 1;
12288 else if (cost > 1)
12289 cost--;
12290 }
12291 break;
12292
12293 case PROCESSOR_K6:
12294 memory = get_attr_memory (insn);
12295 dep_memory = get_attr_memory (dep_insn);
12296 /* The esp dependency is resolved before the instruction is really
12297 finished. */
12298 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12299 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12300 return 1;
12301
12302 /* Since we can't represent delayed latencies of load+operation,
12303 increase the cost here for non-imov insns. */
12304 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12305 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12306
12307 /* INT->FP conversion is expensive. */
12308 if (get_attr_fp_int_src (dep_insn))
12309 cost += 5;
12310
12311 /* Show ability of reorder buffer to hide latency of load by executing
12312 in parallel with previous instruction in case
12313 previous instruction is not needed to compute the address. */
12314 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12315 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12316 {
12317 /* Claim moves to take one cycle, as core can issue one load
12318 at time and the next load can start cycle later. */
12319 if (dep_insn_type == TYPE_IMOV
12320 || dep_insn_type == TYPE_FMOV)
12321 cost = 1;
12322 else if (cost > 2)
12323 cost -= 2;
12324 else
12325 cost = 1;
12326 }
12327 break;
12328
12329 case PROCESSOR_ATHLON:
12330 case PROCESSOR_K8:
12331 memory = get_attr_memory (insn);
12332 dep_memory = get_attr_memory (dep_insn);
12333
12334 /* Show ability of reorder buffer to hide latency of load by executing
12335 in parallel with previous instruction in case
12336 previous instruction is not needed to compute the address. */
12337 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12338 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12339 {
12340 enum attr_unit unit = get_attr_unit (insn);
12341 int loadcost = 3;
12342
12343 /* Because of the difference between the length of integer and
12344 floating unit pipeline preparation stages, the memory operands
12345 for floating point are cheaper.
12346
12347 ??? For Athlon it the difference is most probably 2. */
12348 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12349 loadcost = 3;
12350 else
12351 loadcost = TARGET_ATHLON ? 2 : 0;
12352
12353 if (cost >= loadcost)
12354 cost -= loadcost;
12355 else
12356 cost = 0;
12357 }
12358
12359 default:
12360 break;
12361 }
12362
12363 return cost;
12364 }
12365
12366 static int
12367 ia32_use_dfa_pipeline_interface (void)
12368 {
12369 if (TARGET_PENTIUM
12370 || TARGET_PENTIUMPRO
12371 || TARGET_ATHLON_K8)
12372 return 1;
12373 return 0;
12374 }
12375
12376 /* How many alternative schedules to try. This should be as wide as the
12377 scheduling freedom in the DFA, but no wider. Making this value too
12378 large results extra work for the scheduler. */
12379
12380 static int
12381 ia32_multipass_dfa_lookahead (void)
12382 {
12383 if (ix86_tune == PROCESSOR_PENTIUM)
12384 return 2;
12385
12386 if (ix86_tune == PROCESSOR_PENTIUMPRO)
12387 return 1;
12388
12389 else
12390 return 0;
12391 }
12392
12393 \f
12394 /* Compute the alignment given to a constant that is being placed in memory.
12395 EXP is the constant and ALIGN is the alignment that the object would
12396 ordinarily have.
12397 The value of this function is used instead of that alignment to align
12398 the object. */
12399
12400 int
12401 ix86_constant_alignment (tree exp, int align)
12402 {
12403 if (TREE_CODE (exp) == REAL_CST)
12404 {
12405 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12406 return 64;
12407 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12408 return 128;
12409 }
12410 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12411 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12412 return BITS_PER_WORD;
12413
12414 return align;
12415 }
12416
12417 /* Compute the alignment for a static variable.
12418 TYPE is the data type, and ALIGN is the alignment that
12419 the object would ordinarily have. The value of this function is used
12420 instead of that alignment to align the object. */
12421
12422 int
12423 ix86_data_alignment (tree type, int align)
12424 {
12425 if (AGGREGATE_TYPE_P (type)
12426 && TYPE_SIZE (type)
12427 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12428 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12429 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12430 return 256;
12431
12432 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12433 to 16byte boundary. */
12434 if (TARGET_64BIT)
12435 {
12436 if (AGGREGATE_TYPE_P (type)
12437 && TYPE_SIZE (type)
12438 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12439 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12440 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12441 return 128;
12442 }
12443
12444 if (TREE_CODE (type) == ARRAY_TYPE)
12445 {
12446 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12447 return 64;
12448 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12449 return 128;
12450 }
12451 else if (TREE_CODE (type) == COMPLEX_TYPE)
12452 {
12453
12454 if (TYPE_MODE (type) == DCmode && align < 64)
12455 return 64;
12456 if (TYPE_MODE (type) == XCmode && align < 128)
12457 return 128;
12458 }
12459 else if ((TREE_CODE (type) == RECORD_TYPE
12460 || TREE_CODE (type) == UNION_TYPE
12461 || TREE_CODE (type) == QUAL_UNION_TYPE)
12462 && TYPE_FIELDS (type))
12463 {
12464 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12465 return 64;
12466 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12467 return 128;
12468 }
12469 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12470 || TREE_CODE (type) == INTEGER_TYPE)
12471 {
12472 if (TYPE_MODE (type) == DFmode && align < 64)
12473 return 64;
12474 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12475 return 128;
12476 }
12477
12478 return align;
12479 }
12480
12481 /* Compute the alignment for a local variable.
12482 TYPE is the data type, and ALIGN is the alignment that
12483 the object would ordinarily have. The value of this macro is used
12484 instead of that alignment to align the object. */
12485
12486 int
12487 ix86_local_alignment (tree type, int align)
12488 {
12489 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12490 to 16byte boundary. */
12491 if (TARGET_64BIT)
12492 {
12493 if (AGGREGATE_TYPE_P (type)
12494 && TYPE_SIZE (type)
12495 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12496 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12497 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12498 return 128;
12499 }
12500 if (TREE_CODE (type) == ARRAY_TYPE)
12501 {
12502 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12503 return 64;
12504 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12505 return 128;
12506 }
12507 else if (TREE_CODE (type) == COMPLEX_TYPE)
12508 {
12509 if (TYPE_MODE (type) == DCmode && align < 64)
12510 return 64;
12511 if (TYPE_MODE (type) == XCmode && align < 128)
12512 return 128;
12513 }
12514 else if ((TREE_CODE (type) == RECORD_TYPE
12515 || TREE_CODE (type) == UNION_TYPE
12516 || TREE_CODE (type) == QUAL_UNION_TYPE)
12517 && TYPE_FIELDS (type))
12518 {
12519 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12520 return 64;
12521 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12522 return 128;
12523 }
12524 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12525 || TREE_CODE (type) == INTEGER_TYPE)
12526 {
12527
12528 if (TYPE_MODE (type) == DFmode && align < 64)
12529 return 64;
12530 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12531 return 128;
12532 }
12533 return align;
12534 }
12535 \f
12536 /* Emit RTL insns to initialize the variable parts of a trampoline.
12537 FNADDR is an RTX for the address of the function's pure code.
12538 CXT is an RTX for the static chain value for the function. */
12539 void
12540 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12541 {
12542 if (!TARGET_64BIT)
12543 {
12544 /* Compute offset from the end of the jmp to the target function. */
12545 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12546 plus_constant (tramp, 10),
12547 NULL_RTX, 1, OPTAB_DIRECT);
12548 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12549 gen_int_mode (0xb9, QImode));
12550 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12551 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12552 gen_int_mode (0xe9, QImode));
12553 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12554 }
12555 else
12556 {
12557 int offset = 0;
12558 /* Try to load address using shorter movl instead of movabs.
12559 We may want to support movq for kernel mode, but kernel does not use
12560 trampolines at the moment. */
12561 if (x86_64_zero_extended_value (fnaddr))
12562 {
12563 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12564 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12565 gen_int_mode (0xbb41, HImode));
12566 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12567 gen_lowpart (SImode, fnaddr));
12568 offset += 6;
12569 }
12570 else
12571 {
12572 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12573 gen_int_mode (0xbb49, HImode));
12574 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12575 fnaddr);
12576 offset += 10;
12577 }
12578 /* Load static chain using movabs to r10. */
12579 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12580 gen_int_mode (0xba49, HImode));
12581 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12582 cxt);
12583 offset += 10;
12584 /* Jump to the r11 */
12585 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12586 gen_int_mode (0xff49, HImode));
12587 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12588 gen_int_mode (0xe3, QImode));
12589 offset += 3;
12590 if (offset > TRAMPOLINE_SIZE)
12591 abort ();
12592 }
12593
12594 #ifdef TRANSFER_FROM_TRAMPOLINE
12595 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12596 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12597 #endif
12598 }
12599 \f
12600 #define def_builtin(MASK, NAME, TYPE, CODE) \
12601 do { \
12602 if ((MASK) & target_flags \
12603 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12604 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12605 NULL, NULL_TREE); \
12606 } while (0)
12607
12608 struct builtin_description
12609 {
12610 const unsigned int mask;
12611 const enum insn_code icode;
12612 const char *const name;
12613 const enum ix86_builtins code;
12614 const enum rtx_code comparison;
12615 const unsigned int flag;
12616 };
12617
12618 static const struct builtin_description bdesc_comi[] =
12619 {
12620 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12621 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12622 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12623 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12624 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12625 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12626 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12627 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12628 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12629 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12630 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12631 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12632 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12633 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12634 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12635 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12636 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12637 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12638 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12639 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12640 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12641 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12642 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12643 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12644 };
12645
12646 static const struct builtin_description bdesc_2arg[] =
12647 {
12648 /* SSE */
12649 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12650 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12651 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12652 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12653 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12654 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12655 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12656 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12657
12658 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12659 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12660 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12661 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12662 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12663 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12664 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12665 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12666 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12667 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12668 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12669 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12670 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12671 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12672 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12673 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12674 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12675 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12676 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12677 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12678
12679 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12680 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12681 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12682 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12683
12684 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12685 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12686 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12687 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12688
12689 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12690 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12691 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12692 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12693 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12694
12695 /* MMX */
12696 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12697 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12698 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12699 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12700 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12701 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12702 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12703 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12704
12705 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12706 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12707 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12708 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12709 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12710 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12711 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12712 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12713
12714 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12715 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12716 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12717
12718 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12719 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12720 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12721 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12722
12723 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12724 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12725
12726 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12727 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12728 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12729 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12730 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12731 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12732
12733 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12734 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12735 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12736 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12737
12738 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12739 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12740 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12741 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12742 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12743 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12744
12745 /* Special. */
12746 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12747 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12748 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12749
12750 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12751 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12752 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12753
12754 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12755 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12756 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12757 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12758 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12759 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12760
12761 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12762 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12763 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12764 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12765 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12766 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12767
12768 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12769 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12770 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12771 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12772
12773 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12774 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12775
12776 /* SSE2 */
12777 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12782 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12783 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12785
12786 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12787 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12788 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12789 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12790 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12791 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12792 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12793 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12794 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12795 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12796 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12797 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12798 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12799 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12800 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12801 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12802 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12803 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12804 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12805 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12806
12807 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12810 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12811
12812 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12813 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12816
12817 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12820
12821 /* SSE2 MMX */
12822 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12823 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12830
12831 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12832 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12833 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12834 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12835 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12836 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12837 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12838 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12839
12840 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12841 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12842 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12843 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12844
12845 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12846 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12849
12850 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12852
12853 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12854 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12855 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12858 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12859
12860 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12864
12865 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12873
12874 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12877
12878 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12880
12881 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12887
12888 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12894
12895 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12899
12900 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12901
12902 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12903 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12906
12907 /* SSE3 MMX */
12908 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12909 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12910 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12911 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12912 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12913 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12914 };
12915
12916 static const struct builtin_description bdesc_1arg[] =
12917 {
12918 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12919 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12920
12921 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12922 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12923 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12924
12925 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12926 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12927 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12928 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12929 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12930 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12931
12932 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12936
12937 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12938
12939 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12941
12942 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12947
12948 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12949
12950 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12952 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12953 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12954
12955 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12958
12959 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12960
12961 /* SSE3 */
12962 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12963 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12964 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12965 };
12966
12967 void
12968 ix86_init_builtins (void)
12969 {
12970 if (TARGET_MMX)
12971 ix86_init_mmx_sse_builtins ();
12972 }
12973
12974 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12975 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12976 builtins. */
12977 static void
12978 ix86_init_mmx_sse_builtins (void)
12979 {
12980 const struct builtin_description * d;
12981 size_t i;
12982
12983 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12984 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12985 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12986 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12987 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12988 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12989 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12990 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12991 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12992 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12993
12994 tree pchar_type_node = build_pointer_type (char_type_node);
12995 tree pcchar_type_node = build_pointer_type (
12996 build_type_variant (char_type_node, 1, 0));
12997 tree pfloat_type_node = build_pointer_type (float_type_node);
12998 tree pcfloat_type_node = build_pointer_type (
12999 build_type_variant (float_type_node, 1, 0));
13000 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13001 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13002 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13003
13004 /* Comparisons. */
13005 tree int_ftype_v4sf_v4sf
13006 = build_function_type_list (integer_type_node,
13007 V4SF_type_node, V4SF_type_node, NULL_TREE);
13008 tree v4si_ftype_v4sf_v4sf
13009 = build_function_type_list (V4SI_type_node,
13010 V4SF_type_node, V4SF_type_node, NULL_TREE);
13011 /* MMX/SSE/integer conversions. */
13012 tree int_ftype_v4sf
13013 = build_function_type_list (integer_type_node,
13014 V4SF_type_node, NULL_TREE);
13015 tree int64_ftype_v4sf
13016 = build_function_type_list (long_long_integer_type_node,
13017 V4SF_type_node, NULL_TREE);
13018 tree int_ftype_v8qi
13019 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13020 tree v4sf_ftype_v4sf_int
13021 = build_function_type_list (V4SF_type_node,
13022 V4SF_type_node, integer_type_node, NULL_TREE);
13023 tree v4sf_ftype_v4sf_int64
13024 = build_function_type_list (V4SF_type_node,
13025 V4SF_type_node, long_long_integer_type_node,
13026 NULL_TREE);
13027 tree v4sf_ftype_v4sf_v2si
13028 = build_function_type_list (V4SF_type_node,
13029 V4SF_type_node, V2SI_type_node, NULL_TREE);
13030 tree int_ftype_v4hi_int
13031 = build_function_type_list (integer_type_node,
13032 V4HI_type_node, integer_type_node, NULL_TREE);
13033 tree v4hi_ftype_v4hi_int_int
13034 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13035 integer_type_node, integer_type_node,
13036 NULL_TREE);
13037 /* Miscellaneous. */
13038 tree v8qi_ftype_v4hi_v4hi
13039 = build_function_type_list (V8QI_type_node,
13040 V4HI_type_node, V4HI_type_node, NULL_TREE);
13041 tree v4hi_ftype_v2si_v2si
13042 = build_function_type_list (V4HI_type_node,
13043 V2SI_type_node, V2SI_type_node, NULL_TREE);
13044 tree v4sf_ftype_v4sf_v4sf_int
13045 = build_function_type_list (V4SF_type_node,
13046 V4SF_type_node, V4SF_type_node,
13047 integer_type_node, NULL_TREE);
13048 tree v2si_ftype_v4hi_v4hi
13049 = build_function_type_list (V2SI_type_node,
13050 V4HI_type_node, V4HI_type_node, NULL_TREE);
13051 tree v4hi_ftype_v4hi_int
13052 = build_function_type_list (V4HI_type_node,
13053 V4HI_type_node, integer_type_node, NULL_TREE);
13054 tree v4hi_ftype_v4hi_di
13055 = build_function_type_list (V4HI_type_node,
13056 V4HI_type_node, long_long_unsigned_type_node,
13057 NULL_TREE);
13058 tree v2si_ftype_v2si_di
13059 = build_function_type_list (V2SI_type_node,
13060 V2SI_type_node, long_long_unsigned_type_node,
13061 NULL_TREE);
13062 tree void_ftype_void
13063 = build_function_type (void_type_node, void_list_node);
13064 tree void_ftype_unsigned
13065 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13066 tree void_ftype_unsigned_unsigned
13067 = build_function_type_list (void_type_node, unsigned_type_node,
13068 unsigned_type_node, NULL_TREE);
13069 tree void_ftype_pcvoid_unsigned_unsigned
13070 = build_function_type_list (void_type_node, const_ptr_type_node,
13071 unsigned_type_node, unsigned_type_node,
13072 NULL_TREE);
13073 tree unsigned_ftype_void
13074 = build_function_type (unsigned_type_node, void_list_node);
13075 tree di_ftype_void
13076 = build_function_type (long_long_unsigned_type_node, void_list_node);
13077 tree v4sf_ftype_void
13078 = build_function_type (V4SF_type_node, void_list_node);
13079 tree v2si_ftype_v4sf
13080 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13081 /* Loads/stores. */
13082 tree void_ftype_v8qi_v8qi_pchar
13083 = build_function_type_list (void_type_node,
13084 V8QI_type_node, V8QI_type_node,
13085 pchar_type_node, NULL_TREE);
13086 tree v4sf_ftype_pcfloat
13087 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13088 /* @@@ the type is bogus */
13089 tree v4sf_ftype_v4sf_pv2si
13090 = build_function_type_list (V4SF_type_node,
13091 V4SF_type_node, pv2si_type_node, NULL_TREE);
13092 tree void_ftype_pv2si_v4sf
13093 = build_function_type_list (void_type_node,
13094 pv2si_type_node, V4SF_type_node, NULL_TREE);
13095 tree void_ftype_pfloat_v4sf
13096 = build_function_type_list (void_type_node,
13097 pfloat_type_node, V4SF_type_node, NULL_TREE);
13098 tree void_ftype_pdi_di
13099 = build_function_type_list (void_type_node,
13100 pdi_type_node, long_long_unsigned_type_node,
13101 NULL_TREE);
13102 tree void_ftype_pv2di_v2di
13103 = build_function_type_list (void_type_node,
13104 pv2di_type_node, V2DI_type_node, NULL_TREE);
13105 /* Normal vector unops. */
13106 tree v4sf_ftype_v4sf
13107 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13108
13109 /* Normal vector binops. */
13110 tree v4sf_ftype_v4sf_v4sf
13111 = build_function_type_list (V4SF_type_node,
13112 V4SF_type_node, V4SF_type_node, NULL_TREE);
13113 tree v8qi_ftype_v8qi_v8qi
13114 = build_function_type_list (V8QI_type_node,
13115 V8QI_type_node, V8QI_type_node, NULL_TREE);
13116 tree v4hi_ftype_v4hi_v4hi
13117 = build_function_type_list (V4HI_type_node,
13118 V4HI_type_node, V4HI_type_node, NULL_TREE);
13119 tree v2si_ftype_v2si_v2si
13120 = build_function_type_list (V2SI_type_node,
13121 V2SI_type_node, V2SI_type_node, NULL_TREE);
13122 tree di_ftype_di_di
13123 = build_function_type_list (long_long_unsigned_type_node,
13124 long_long_unsigned_type_node,
13125 long_long_unsigned_type_node, NULL_TREE);
13126
13127 tree v2si_ftype_v2sf
13128 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13129 tree v2sf_ftype_v2si
13130 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13131 tree v2si_ftype_v2si
13132 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13133 tree v2sf_ftype_v2sf
13134 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13135 tree v2sf_ftype_v2sf_v2sf
13136 = build_function_type_list (V2SF_type_node,
13137 V2SF_type_node, V2SF_type_node, NULL_TREE);
13138 tree v2si_ftype_v2sf_v2sf
13139 = build_function_type_list (V2SI_type_node,
13140 V2SF_type_node, V2SF_type_node, NULL_TREE);
13141 tree pint_type_node = build_pointer_type (integer_type_node);
13142 tree pcint_type_node = build_pointer_type (
13143 build_type_variant (integer_type_node, 1, 0));
13144 tree pdouble_type_node = build_pointer_type (double_type_node);
13145 tree pcdouble_type_node = build_pointer_type (
13146 build_type_variant (double_type_node, 1, 0));
13147 tree int_ftype_v2df_v2df
13148 = build_function_type_list (integer_type_node,
13149 V2DF_type_node, V2DF_type_node, NULL_TREE);
13150
13151 tree ti_ftype_void
13152 = build_function_type (intTI_type_node, void_list_node);
13153 tree v2di_ftype_void
13154 = build_function_type (V2DI_type_node, void_list_node);
13155 tree ti_ftype_ti_ti
13156 = build_function_type_list (intTI_type_node,
13157 intTI_type_node, intTI_type_node, NULL_TREE);
13158 tree void_ftype_pcvoid
13159 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13160 tree v2di_ftype_di
13161 = build_function_type_list (V2DI_type_node,
13162 long_long_unsigned_type_node, NULL_TREE);
13163 tree di_ftype_v2di
13164 = build_function_type_list (long_long_unsigned_type_node,
13165 V2DI_type_node, NULL_TREE);
13166 tree v4sf_ftype_v4si
13167 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13168 tree v4si_ftype_v4sf
13169 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13170 tree v2df_ftype_v4si
13171 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13172 tree v4si_ftype_v2df
13173 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13174 tree v2si_ftype_v2df
13175 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13176 tree v4sf_ftype_v2df
13177 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13178 tree v2df_ftype_v2si
13179 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13180 tree v2df_ftype_v4sf
13181 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13182 tree int_ftype_v2df
13183 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13184 tree int64_ftype_v2df
13185 = build_function_type_list (long_long_integer_type_node,
13186 V2DF_type_node, NULL_TREE);
13187 tree v2df_ftype_v2df_int
13188 = build_function_type_list (V2DF_type_node,
13189 V2DF_type_node, integer_type_node, NULL_TREE);
13190 tree v2df_ftype_v2df_int64
13191 = build_function_type_list (V2DF_type_node,
13192 V2DF_type_node, long_long_integer_type_node,
13193 NULL_TREE);
13194 tree v4sf_ftype_v4sf_v2df
13195 = build_function_type_list (V4SF_type_node,
13196 V4SF_type_node, V2DF_type_node, NULL_TREE);
13197 tree v2df_ftype_v2df_v4sf
13198 = build_function_type_list (V2DF_type_node,
13199 V2DF_type_node, V4SF_type_node, NULL_TREE);
13200 tree v2df_ftype_v2df_v2df_int
13201 = build_function_type_list (V2DF_type_node,
13202 V2DF_type_node, V2DF_type_node,
13203 integer_type_node,
13204 NULL_TREE);
13205 tree v2df_ftype_v2df_pv2si
13206 = build_function_type_list (V2DF_type_node,
13207 V2DF_type_node, pv2si_type_node, NULL_TREE);
13208 tree void_ftype_pv2si_v2df
13209 = build_function_type_list (void_type_node,
13210 pv2si_type_node, V2DF_type_node, NULL_TREE);
13211 tree void_ftype_pdouble_v2df
13212 = build_function_type_list (void_type_node,
13213 pdouble_type_node, V2DF_type_node, NULL_TREE);
13214 tree void_ftype_pint_int
13215 = build_function_type_list (void_type_node,
13216 pint_type_node, integer_type_node, NULL_TREE);
13217 tree void_ftype_v16qi_v16qi_pchar
13218 = build_function_type_list (void_type_node,
13219 V16QI_type_node, V16QI_type_node,
13220 pchar_type_node, NULL_TREE);
13221 tree v2df_ftype_pcdouble
13222 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13223 tree v2df_ftype_v2df_v2df
13224 = build_function_type_list (V2DF_type_node,
13225 V2DF_type_node, V2DF_type_node, NULL_TREE);
13226 tree v16qi_ftype_v16qi_v16qi
13227 = build_function_type_list (V16QI_type_node,
13228 V16QI_type_node, V16QI_type_node, NULL_TREE);
13229 tree v8hi_ftype_v8hi_v8hi
13230 = build_function_type_list (V8HI_type_node,
13231 V8HI_type_node, V8HI_type_node, NULL_TREE);
13232 tree v4si_ftype_v4si_v4si
13233 = build_function_type_list (V4SI_type_node,
13234 V4SI_type_node, V4SI_type_node, NULL_TREE);
13235 tree v2di_ftype_v2di_v2di
13236 = build_function_type_list (V2DI_type_node,
13237 V2DI_type_node, V2DI_type_node, NULL_TREE);
13238 tree v2di_ftype_v2df_v2df
13239 = build_function_type_list (V2DI_type_node,
13240 V2DF_type_node, V2DF_type_node, NULL_TREE);
13241 tree v2df_ftype_v2df
13242 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13243 tree v2df_ftype_double
13244 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13245 tree v2df_ftype_double_double
13246 = build_function_type_list (V2DF_type_node,
13247 double_type_node, double_type_node, NULL_TREE);
13248 tree int_ftype_v8hi_int
13249 = build_function_type_list (integer_type_node,
13250 V8HI_type_node, integer_type_node, NULL_TREE);
13251 tree v8hi_ftype_v8hi_int_int
13252 = build_function_type_list (V8HI_type_node,
13253 V8HI_type_node, integer_type_node,
13254 integer_type_node, NULL_TREE);
13255 tree v2di_ftype_v2di_int
13256 = build_function_type_list (V2DI_type_node,
13257 V2DI_type_node, integer_type_node, NULL_TREE);
13258 tree v4si_ftype_v4si_int
13259 = build_function_type_list (V4SI_type_node,
13260 V4SI_type_node, integer_type_node, NULL_TREE);
13261 tree v8hi_ftype_v8hi_int
13262 = build_function_type_list (V8HI_type_node,
13263 V8HI_type_node, integer_type_node, NULL_TREE);
13264 tree v8hi_ftype_v8hi_v2di
13265 = build_function_type_list (V8HI_type_node,
13266 V8HI_type_node, V2DI_type_node, NULL_TREE);
13267 tree v4si_ftype_v4si_v2di
13268 = build_function_type_list (V4SI_type_node,
13269 V4SI_type_node, V2DI_type_node, NULL_TREE);
13270 tree v4si_ftype_v8hi_v8hi
13271 = build_function_type_list (V4SI_type_node,
13272 V8HI_type_node, V8HI_type_node, NULL_TREE);
13273 tree di_ftype_v8qi_v8qi
13274 = build_function_type_list (long_long_unsigned_type_node,
13275 V8QI_type_node, V8QI_type_node, NULL_TREE);
13276 tree v2di_ftype_v16qi_v16qi
13277 = build_function_type_list (V2DI_type_node,
13278 V16QI_type_node, V16QI_type_node, NULL_TREE);
13279 tree int_ftype_v16qi
13280 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13281 tree v16qi_ftype_pcchar
13282 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13283 tree void_ftype_pchar_v16qi
13284 = build_function_type_list (void_type_node,
13285 pchar_type_node, V16QI_type_node, NULL_TREE);
13286 tree v4si_ftype_pcint
13287 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13288 tree void_ftype_pcint_v4si
13289 = build_function_type_list (void_type_node,
13290 pcint_type_node, V4SI_type_node, NULL_TREE);
13291 tree v2di_ftype_v2di
13292 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13293
13294 tree float80_type;
13295 tree float128_type;
13296
13297 /* The __float80 type. */
13298 if (TYPE_MODE (long_double_type_node) == XFmode)
13299 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13300 "__float80");
13301 else
13302 {
13303 /* The __float80 type. */
13304 float80_type = make_node (REAL_TYPE);
13305 TYPE_PRECISION (float80_type) = 96;
13306 layout_type (float80_type);
13307 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13308 }
13309
13310 float128_type = make_node (REAL_TYPE);
13311 TYPE_PRECISION (float128_type) = 128;
13312 layout_type (float128_type);
13313 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13314
13315 /* Add all builtins that are more or less simple operations on two
13316 operands. */
13317 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13318 {
13319 /* Use one of the operands; the target can have a different mode for
13320 mask-generating compares. */
13321 enum machine_mode mode;
13322 tree type;
13323
13324 if (d->name == 0)
13325 continue;
13326 mode = insn_data[d->icode].operand[1].mode;
13327
13328 switch (mode)
13329 {
13330 case V16QImode:
13331 type = v16qi_ftype_v16qi_v16qi;
13332 break;
13333 case V8HImode:
13334 type = v8hi_ftype_v8hi_v8hi;
13335 break;
13336 case V4SImode:
13337 type = v4si_ftype_v4si_v4si;
13338 break;
13339 case V2DImode:
13340 type = v2di_ftype_v2di_v2di;
13341 break;
13342 case V2DFmode:
13343 type = v2df_ftype_v2df_v2df;
13344 break;
13345 case TImode:
13346 type = ti_ftype_ti_ti;
13347 break;
13348 case V4SFmode:
13349 type = v4sf_ftype_v4sf_v4sf;
13350 break;
13351 case V8QImode:
13352 type = v8qi_ftype_v8qi_v8qi;
13353 break;
13354 case V4HImode:
13355 type = v4hi_ftype_v4hi_v4hi;
13356 break;
13357 case V2SImode:
13358 type = v2si_ftype_v2si_v2si;
13359 break;
13360 case DImode:
13361 type = di_ftype_di_di;
13362 break;
13363
13364 default:
13365 abort ();
13366 }
13367
13368 /* Override for comparisons. */
13369 if (d->icode == CODE_FOR_maskcmpv4sf3
13370 || d->icode == CODE_FOR_maskncmpv4sf3
13371 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13372 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13373 type = v4si_ftype_v4sf_v4sf;
13374
13375 if (d->icode == CODE_FOR_maskcmpv2df3
13376 || d->icode == CODE_FOR_maskncmpv2df3
13377 || d->icode == CODE_FOR_vmmaskcmpv2df3
13378 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13379 type = v2di_ftype_v2df_v2df;
13380
13381 def_builtin (d->mask, d->name, type, d->code);
13382 }
13383
13384 /* Add the remaining MMX insns with somewhat more complicated types. */
13385 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13386 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13387 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13388 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13389 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13390
13391 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13392 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13393 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13394
13395 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13396 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13397
13398 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13399 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13400
13401 /* comi/ucomi insns. */
13402 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13403 if (d->mask == MASK_SSE2)
13404 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13405 else
13406 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13407
13408 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13409 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13410 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13411
13412 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13413 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13414 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13415 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13416 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13417 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13418 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13419 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13420 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13421 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13422 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13423
13424 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13425 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13426
13427 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13428
13429 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13430 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13431 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13432 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13433 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13434 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13435
13436 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13437 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13438 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13439 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13440
13441 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13442 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13443 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13444 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13445
13446 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13447
13448 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13449
13450 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13451 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13452 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13453 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13454 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13455 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13456
13457 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13458
13459 /* Original 3DNow! */
13460 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13461 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13462 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13463 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13465 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13466 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13467 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13468 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13469 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13470 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13471 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13472 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13473 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13474 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13475 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13476 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13477 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13478 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13479 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13480
13481 /* 3DNow! extension as used in the Athlon CPU. */
13482 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13483 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13484 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13485 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13486 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13487 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13488
13489 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13490
13491 /* SSE2 */
13492 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13493 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13494
13495 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13496 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13497 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13498
13499 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13500 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13501 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13502 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13503 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13504 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13505
13506 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13508 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13509 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13510
13511 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13514 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13515 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13516
13517 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13518 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13520 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13521
13522 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13523 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13524
13525 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13526
13527 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13529
13530 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13531 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13535
13536 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13537
13538 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13540 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13541 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13542
13543 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13546
13547 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13548 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13549 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13551
13552 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13557 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13559
13560 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13562 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13563
13564 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13570 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13571
13572 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13573
13574 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13577
13578 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13581
13582 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13584
13585 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13589
13590 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13594
13595 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13597
13598 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13599
13600 /* Prescott New Instructions. */
13601 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13602 void_ftype_pcvoid_unsigned_unsigned,
13603 IX86_BUILTIN_MONITOR);
13604 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13605 void_ftype_unsigned_unsigned,
13606 IX86_BUILTIN_MWAIT);
13607 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13608 v4sf_ftype_v4sf,
13609 IX86_BUILTIN_MOVSHDUP);
13610 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13611 v4sf_ftype_v4sf,
13612 IX86_BUILTIN_MOVSLDUP);
13613 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13614 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13615 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13616 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13617 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13618 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13619 }
13620
13621 /* Errors in the source file can cause expand_expr to return const0_rtx
13622 where we expect a vector. To avoid crashing, use one of the vector
13623 clear instructions. */
13624 static rtx
13625 safe_vector_operand (rtx x, enum machine_mode mode)
13626 {
13627 if (x != const0_rtx)
13628 return x;
13629 x = gen_reg_rtx (mode);
13630
13631 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13632 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13633 : gen_rtx_SUBREG (DImode, x, 0)));
13634 else
13635 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13636 : gen_rtx_SUBREG (V4SFmode, x, 0),
13637 CONST0_RTX (V4SFmode)));
13638 return x;
13639 }
13640
13641 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13642
13643 static rtx
13644 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13645 {
13646 rtx pat;
13647 tree arg0 = TREE_VALUE (arglist);
13648 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13649 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13650 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13651 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13652 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13653 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13654
13655 if (VECTOR_MODE_P (mode0))
13656 op0 = safe_vector_operand (op0, mode0);
13657 if (VECTOR_MODE_P (mode1))
13658 op1 = safe_vector_operand (op1, mode1);
13659
13660 if (! target
13661 || GET_MODE (target) != tmode
13662 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13663 target = gen_reg_rtx (tmode);
13664
13665 if (GET_MODE (op1) == SImode && mode1 == TImode)
13666 {
13667 rtx x = gen_reg_rtx (V4SImode);
13668 emit_insn (gen_sse2_loadd (x, op1));
13669 op1 = gen_lowpart (TImode, x);
13670 }
13671
13672 /* In case the insn wants input operands in modes different from
13673 the result, abort. */
13674 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13675 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13676 abort ();
13677
13678 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13679 op0 = copy_to_mode_reg (mode0, op0);
13680 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13681 op1 = copy_to_mode_reg (mode1, op1);
13682
13683 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13684 yet one of the two must not be a memory. This is normally enforced
13685 by expanders, but we didn't bother to create one here. */
13686 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13687 op0 = copy_to_mode_reg (mode0, op0);
13688
13689 pat = GEN_FCN (icode) (target, op0, op1);
13690 if (! pat)
13691 return 0;
13692 emit_insn (pat);
13693 return target;
13694 }
13695
13696 /* Subroutine of ix86_expand_builtin to take care of stores. */
13697
13698 static rtx
13699 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13700 {
13701 rtx pat;
13702 tree arg0 = TREE_VALUE (arglist);
13703 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13704 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13705 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13706 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13707 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13708
13709 if (VECTOR_MODE_P (mode1))
13710 op1 = safe_vector_operand (op1, mode1);
13711
13712 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13713 op1 = copy_to_mode_reg (mode1, op1);
13714
13715 pat = GEN_FCN (icode) (op0, op1);
13716 if (pat)
13717 emit_insn (pat);
13718 return 0;
13719 }
13720
13721 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13722
13723 static rtx
13724 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13725 rtx target, int do_load)
13726 {
13727 rtx pat;
13728 tree arg0 = TREE_VALUE (arglist);
13729 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13730 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13731 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13732
13733 if (! target
13734 || GET_MODE (target) != tmode
13735 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13736 target = gen_reg_rtx (tmode);
13737 if (do_load)
13738 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13739 else
13740 {
13741 if (VECTOR_MODE_P (mode0))
13742 op0 = safe_vector_operand (op0, mode0);
13743
13744 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13745 op0 = copy_to_mode_reg (mode0, op0);
13746 }
13747
13748 pat = GEN_FCN (icode) (target, op0);
13749 if (! pat)
13750 return 0;
13751 emit_insn (pat);
13752 return target;
13753 }
13754
13755 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13756 sqrtss, rsqrtss, rcpss. */
13757
13758 static rtx
13759 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13760 {
13761 rtx pat;
13762 tree arg0 = TREE_VALUE (arglist);
13763 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13764 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13765 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13766
13767 if (! target
13768 || GET_MODE (target) != tmode
13769 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13770 target = gen_reg_rtx (tmode);
13771
13772 if (VECTOR_MODE_P (mode0))
13773 op0 = safe_vector_operand (op0, mode0);
13774
13775 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13776 op0 = copy_to_mode_reg (mode0, op0);
13777
13778 op1 = op0;
13779 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13780 op1 = copy_to_mode_reg (mode0, op1);
13781
13782 pat = GEN_FCN (icode) (target, op0, op1);
13783 if (! pat)
13784 return 0;
13785 emit_insn (pat);
13786 return target;
13787 }
13788
13789 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13790
13791 static rtx
13792 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13793 rtx target)
13794 {
13795 rtx pat;
13796 tree arg0 = TREE_VALUE (arglist);
13797 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13798 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13799 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13800 rtx op2;
13801 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13802 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13803 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13804 enum rtx_code comparison = d->comparison;
13805
13806 if (VECTOR_MODE_P (mode0))
13807 op0 = safe_vector_operand (op0, mode0);
13808 if (VECTOR_MODE_P (mode1))
13809 op1 = safe_vector_operand (op1, mode1);
13810
13811 /* Swap operands if we have a comparison that isn't available in
13812 hardware. */
13813 if (d->flag)
13814 {
13815 rtx tmp = gen_reg_rtx (mode1);
13816 emit_move_insn (tmp, op1);
13817 op1 = op0;
13818 op0 = tmp;
13819 }
13820
13821 if (! target
13822 || GET_MODE (target) != tmode
13823 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13824 target = gen_reg_rtx (tmode);
13825
13826 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13827 op0 = copy_to_mode_reg (mode0, op0);
13828 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13829 op1 = copy_to_mode_reg (mode1, op1);
13830
13831 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13832 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13833 if (! pat)
13834 return 0;
13835 emit_insn (pat);
13836 return target;
13837 }
13838
13839 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13840
13841 static rtx
13842 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13843 rtx target)
13844 {
13845 rtx pat;
13846 tree arg0 = TREE_VALUE (arglist);
13847 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13848 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13849 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13850 rtx op2;
13851 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13852 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13853 enum rtx_code comparison = d->comparison;
13854
13855 if (VECTOR_MODE_P (mode0))
13856 op0 = safe_vector_operand (op0, mode0);
13857 if (VECTOR_MODE_P (mode1))
13858 op1 = safe_vector_operand (op1, mode1);
13859
13860 /* Swap operands if we have a comparison that isn't available in
13861 hardware. */
13862 if (d->flag)
13863 {
13864 rtx tmp = op1;
13865 op1 = op0;
13866 op0 = tmp;
13867 }
13868
13869 target = gen_reg_rtx (SImode);
13870 emit_move_insn (target, const0_rtx);
13871 target = gen_rtx_SUBREG (QImode, target, 0);
13872
13873 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13874 op0 = copy_to_mode_reg (mode0, op0);
13875 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13876 op1 = copy_to_mode_reg (mode1, op1);
13877
13878 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13879 pat = GEN_FCN (d->icode) (op0, op1);
13880 if (! pat)
13881 return 0;
13882 emit_insn (pat);
13883 emit_insn (gen_rtx_SET (VOIDmode,
13884 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13885 gen_rtx_fmt_ee (comparison, QImode,
13886 SET_DEST (pat),
13887 const0_rtx)));
13888
13889 return SUBREG_REG (target);
13890 }
13891
13892 /* Expand an expression EXP that calls a built-in function,
13893 with result going to TARGET if that's convenient
13894 (and in mode MODE if that's convenient).
13895 SUBTARGET may be used as the target for computing one of EXP's operands.
13896 IGNORE is nonzero if the value is to be ignored. */
13897
13898 rtx
13899 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13900 enum machine_mode mode ATTRIBUTE_UNUSED,
13901 int ignore ATTRIBUTE_UNUSED)
13902 {
13903 const struct builtin_description *d;
13904 size_t i;
13905 enum insn_code icode;
13906 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13907 tree arglist = TREE_OPERAND (exp, 1);
13908 tree arg0, arg1, arg2;
13909 rtx op0, op1, op2, pat;
13910 enum machine_mode tmode, mode0, mode1, mode2;
13911 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13912
13913 switch (fcode)
13914 {
13915 case IX86_BUILTIN_EMMS:
13916 emit_insn (gen_emms ());
13917 return 0;
13918
13919 case IX86_BUILTIN_SFENCE:
13920 emit_insn (gen_sfence ());
13921 return 0;
13922
13923 case IX86_BUILTIN_PEXTRW:
13924 case IX86_BUILTIN_PEXTRW128:
13925 icode = (fcode == IX86_BUILTIN_PEXTRW
13926 ? CODE_FOR_mmx_pextrw
13927 : CODE_FOR_sse2_pextrw);
13928 arg0 = TREE_VALUE (arglist);
13929 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13930 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13931 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13932 tmode = insn_data[icode].operand[0].mode;
13933 mode0 = insn_data[icode].operand[1].mode;
13934 mode1 = insn_data[icode].operand[2].mode;
13935
13936 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13937 op0 = copy_to_mode_reg (mode0, op0);
13938 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13939 {
13940 error ("selector must be an integer constant in the range 0..%i",
13941 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13942 return gen_reg_rtx (tmode);
13943 }
13944 if (target == 0
13945 || GET_MODE (target) != tmode
13946 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13947 target = gen_reg_rtx (tmode);
13948 pat = GEN_FCN (icode) (target, op0, op1);
13949 if (! pat)
13950 return 0;
13951 emit_insn (pat);
13952 return target;
13953
13954 case IX86_BUILTIN_PINSRW:
13955 case IX86_BUILTIN_PINSRW128:
13956 icode = (fcode == IX86_BUILTIN_PINSRW
13957 ? CODE_FOR_mmx_pinsrw
13958 : CODE_FOR_sse2_pinsrw);
13959 arg0 = TREE_VALUE (arglist);
13960 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13961 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13962 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13963 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13964 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13965 tmode = insn_data[icode].operand[0].mode;
13966 mode0 = insn_data[icode].operand[1].mode;
13967 mode1 = insn_data[icode].operand[2].mode;
13968 mode2 = insn_data[icode].operand[3].mode;
13969
13970 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13971 op0 = copy_to_mode_reg (mode0, op0);
13972 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13973 op1 = copy_to_mode_reg (mode1, op1);
13974 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13975 {
13976 error ("selector must be an integer constant in the range 0..%i",
13977 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13978 return const0_rtx;
13979 }
13980 if (target == 0
13981 || GET_MODE (target) != tmode
13982 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13983 target = gen_reg_rtx (tmode);
13984 pat = GEN_FCN (icode) (target, op0, op1, op2);
13985 if (! pat)
13986 return 0;
13987 emit_insn (pat);
13988 return target;
13989
13990 case IX86_BUILTIN_MASKMOVQ:
13991 case IX86_BUILTIN_MASKMOVDQU:
13992 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13993 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13994 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13995 : CODE_FOR_sse2_maskmovdqu));
13996 /* Note the arg order is different from the operand order. */
13997 arg1 = TREE_VALUE (arglist);
13998 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13999 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14000 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14001 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14002 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14003 mode0 = insn_data[icode].operand[0].mode;
14004 mode1 = insn_data[icode].operand[1].mode;
14005 mode2 = insn_data[icode].operand[2].mode;
14006
14007 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14008 op0 = copy_to_mode_reg (mode0, op0);
14009 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14010 op1 = copy_to_mode_reg (mode1, op1);
14011 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14012 op2 = copy_to_mode_reg (mode2, op2);
14013 pat = GEN_FCN (icode) (op0, op1, op2);
14014 if (! pat)
14015 return 0;
14016 emit_insn (pat);
14017 return 0;
14018
14019 case IX86_BUILTIN_SQRTSS:
14020 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14021 case IX86_BUILTIN_RSQRTSS:
14022 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14023 case IX86_BUILTIN_RCPSS:
14024 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14025
14026 case IX86_BUILTIN_LOADAPS:
14027 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14028
14029 case IX86_BUILTIN_LOADUPS:
14030 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14031
14032 case IX86_BUILTIN_STOREAPS:
14033 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14034
14035 case IX86_BUILTIN_STOREUPS:
14036 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14037
14038 case IX86_BUILTIN_LOADSS:
14039 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14040
14041 case IX86_BUILTIN_STORESS:
14042 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14043
14044 case IX86_BUILTIN_LOADHPS:
14045 case IX86_BUILTIN_LOADLPS:
14046 case IX86_BUILTIN_LOADHPD:
14047 case IX86_BUILTIN_LOADLPD:
14048 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14049 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14050 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14051 : CODE_FOR_sse2_movsd);
14052 arg0 = TREE_VALUE (arglist);
14053 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14054 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14055 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14056 tmode = insn_data[icode].operand[0].mode;
14057 mode0 = insn_data[icode].operand[1].mode;
14058 mode1 = insn_data[icode].operand[2].mode;
14059
14060 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14061 op0 = copy_to_mode_reg (mode0, op0);
14062 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14063 if (target == 0
14064 || GET_MODE (target) != tmode
14065 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14066 target = gen_reg_rtx (tmode);
14067 pat = GEN_FCN (icode) (target, op0, op1);
14068 if (! pat)
14069 return 0;
14070 emit_insn (pat);
14071 return target;
14072
14073 case IX86_BUILTIN_STOREHPS:
14074 case IX86_BUILTIN_STORELPS:
14075 case IX86_BUILTIN_STOREHPD:
14076 case IX86_BUILTIN_STORELPD:
14077 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14078 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14079 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14080 : CODE_FOR_sse2_movsd);
14081 arg0 = TREE_VALUE (arglist);
14082 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14083 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14084 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14085 mode0 = insn_data[icode].operand[1].mode;
14086 mode1 = insn_data[icode].operand[2].mode;
14087
14088 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14089 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14090 op1 = copy_to_mode_reg (mode1, op1);
14091
14092 pat = GEN_FCN (icode) (op0, op0, op1);
14093 if (! pat)
14094 return 0;
14095 emit_insn (pat);
14096 return 0;
14097
14098 case IX86_BUILTIN_MOVNTPS:
14099 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14100 case IX86_BUILTIN_MOVNTQ:
14101 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14102
14103 case IX86_BUILTIN_LDMXCSR:
14104 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14105 target = assign_386_stack_local (SImode, 0);
14106 emit_move_insn (target, op0);
14107 emit_insn (gen_ldmxcsr (target));
14108 return 0;
14109
14110 case IX86_BUILTIN_STMXCSR:
14111 target = assign_386_stack_local (SImode, 0);
14112 emit_insn (gen_stmxcsr (target));
14113 return copy_to_mode_reg (SImode, target);
14114
14115 case IX86_BUILTIN_SHUFPS:
14116 case IX86_BUILTIN_SHUFPD:
14117 icode = (fcode == IX86_BUILTIN_SHUFPS
14118 ? CODE_FOR_sse_shufps
14119 : CODE_FOR_sse2_shufpd);
14120 arg0 = TREE_VALUE (arglist);
14121 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14122 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14123 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14124 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14125 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14126 tmode = insn_data[icode].operand[0].mode;
14127 mode0 = insn_data[icode].operand[1].mode;
14128 mode1 = insn_data[icode].operand[2].mode;
14129 mode2 = insn_data[icode].operand[3].mode;
14130
14131 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14132 op0 = copy_to_mode_reg (mode0, op0);
14133 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14134 op1 = copy_to_mode_reg (mode1, op1);
14135 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14136 {
14137 /* @@@ better error message */
14138 error ("mask must be an immediate");
14139 return gen_reg_rtx (tmode);
14140 }
14141 if (target == 0
14142 || GET_MODE (target) != tmode
14143 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14144 target = gen_reg_rtx (tmode);
14145 pat = GEN_FCN (icode) (target, op0, op1, op2);
14146 if (! pat)
14147 return 0;
14148 emit_insn (pat);
14149 return target;
14150
14151 case IX86_BUILTIN_PSHUFW:
14152 case IX86_BUILTIN_PSHUFD:
14153 case IX86_BUILTIN_PSHUFHW:
14154 case IX86_BUILTIN_PSHUFLW:
14155 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14156 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14157 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14158 : CODE_FOR_mmx_pshufw);
14159 arg0 = TREE_VALUE (arglist);
14160 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14161 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14162 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14163 tmode = insn_data[icode].operand[0].mode;
14164 mode1 = insn_data[icode].operand[1].mode;
14165 mode2 = insn_data[icode].operand[2].mode;
14166
14167 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14168 op0 = copy_to_mode_reg (mode1, op0);
14169 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14170 {
14171 /* @@@ better error message */
14172 error ("mask must be an immediate");
14173 return const0_rtx;
14174 }
14175 if (target == 0
14176 || GET_MODE (target) != tmode
14177 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14178 target = gen_reg_rtx (tmode);
14179 pat = GEN_FCN (icode) (target, op0, op1);
14180 if (! pat)
14181 return 0;
14182 emit_insn (pat);
14183 return target;
14184
14185 case IX86_BUILTIN_PSLLDQI128:
14186 case IX86_BUILTIN_PSRLDQI128:
14187 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14188 : CODE_FOR_sse2_lshrti3);
14189 arg0 = TREE_VALUE (arglist);
14190 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14191 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14192 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14193 tmode = insn_data[icode].operand[0].mode;
14194 mode1 = insn_data[icode].operand[1].mode;
14195 mode2 = insn_data[icode].operand[2].mode;
14196
14197 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14198 {
14199 op0 = copy_to_reg (op0);
14200 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14201 }
14202 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14203 {
14204 error ("shift must be an immediate");
14205 return const0_rtx;
14206 }
14207 target = gen_reg_rtx (V2DImode);
14208 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14209 if (! pat)
14210 return 0;
14211 emit_insn (pat);
14212 return target;
14213
14214 case IX86_BUILTIN_FEMMS:
14215 emit_insn (gen_femms ());
14216 return NULL_RTX;
14217
14218 case IX86_BUILTIN_PAVGUSB:
14219 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14220
14221 case IX86_BUILTIN_PF2ID:
14222 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14223
14224 case IX86_BUILTIN_PFACC:
14225 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14226
14227 case IX86_BUILTIN_PFADD:
14228 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14229
14230 case IX86_BUILTIN_PFCMPEQ:
14231 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14232
14233 case IX86_BUILTIN_PFCMPGE:
14234 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14235
14236 case IX86_BUILTIN_PFCMPGT:
14237 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14238
14239 case IX86_BUILTIN_PFMAX:
14240 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14241
14242 case IX86_BUILTIN_PFMIN:
14243 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14244
14245 case IX86_BUILTIN_PFMUL:
14246 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14247
14248 case IX86_BUILTIN_PFRCP:
14249 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14250
14251 case IX86_BUILTIN_PFRCPIT1:
14252 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14253
14254 case IX86_BUILTIN_PFRCPIT2:
14255 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14256
14257 case IX86_BUILTIN_PFRSQIT1:
14258 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14259
14260 case IX86_BUILTIN_PFRSQRT:
14261 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14262
14263 case IX86_BUILTIN_PFSUB:
14264 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14265
14266 case IX86_BUILTIN_PFSUBR:
14267 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14268
14269 case IX86_BUILTIN_PI2FD:
14270 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14271
14272 case IX86_BUILTIN_PMULHRW:
14273 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14274
14275 case IX86_BUILTIN_PF2IW:
14276 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14277
14278 case IX86_BUILTIN_PFNACC:
14279 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14280
14281 case IX86_BUILTIN_PFPNACC:
14282 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14283
14284 case IX86_BUILTIN_PI2FW:
14285 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14286
14287 case IX86_BUILTIN_PSWAPDSI:
14288 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14289
14290 case IX86_BUILTIN_PSWAPDSF:
14291 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14292
14293 case IX86_BUILTIN_SSE_ZERO:
14294 target = gen_reg_rtx (V4SFmode);
14295 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14296 return target;
14297
14298 case IX86_BUILTIN_MMX_ZERO:
14299 target = gen_reg_rtx (DImode);
14300 emit_insn (gen_mmx_clrdi (target));
14301 return target;
14302
14303 case IX86_BUILTIN_CLRTI:
14304 target = gen_reg_rtx (V2DImode);
14305 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14306 return target;
14307
14308
14309 case IX86_BUILTIN_SQRTSD:
14310 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14311 case IX86_BUILTIN_LOADAPD:
14312 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14313 case IX86_BUILTIN_LOADUPD:
14314 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14315
14316 case IX86_BUILTIN_STOREAPD:
14317 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14318 case IX86_BUILTIN_STOREUPD:
14319 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14320
14321 case IX86_BUILTIN_LOADSD:
14322 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14323
14324 case IX86_BUILTIN_STORESD:
14325 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14326
14327 case IX86_BUILTIN_SETPD1:
14328 target = assign_386_stack_local (DFmode, 0);
14329 arg0 = TREE_VALUE (arglist);
14330 emit_move_insn (adjust_address (target, DFmode, 0),
14331 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14332 op0 = gen_reg_rtx (V2DFmode);
14333 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14334 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14335 return op0;
14336
14337 case IX86_BUILTIN_SETPD:
14338 target = assign_386_stack_local (V2DFmode, 0);
14339 arg0 = TREE_VALUE (arglist);
14340 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14341 emit_move_insn (adjust_address (target, DFmode, 0),
14342 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14343 emit_move_insn (adjust_address (target, DFmode, 8),
14344 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14345 op0 = gen_reg_rtx (V2DFmode);
14346 emit_insn (gen_sse2_movapd (op0, target));
14347 return op0;
14348
14349 case IX86_BUILTIN_LOADRPD:
14350 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14351 gen_reg_rtx (V2DFmode), 1);
14352 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14353 return target;
14354
14355 case IX86_BUILTIN_LOADPD1:
14356 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14357 gen_reg_rtx (V2DFmode), 1);
14358 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14359 return target;
14360
14361 case IX86_BUILTIN_STOREPD1:
14362 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14363 case IX86_BUILTIN_STORERPD:
14364 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14365
14366 case IX86_BUILTIN_CLRPD:
14367 target = gen_reg_rtx (V2DFmode);
14368 emit_insn (gen_sse_clrv2df (target));
14369 return target;
14370
14371 case IX86_BUILTIN_MFENCE:
14372 emit_insn (gen_sse2_mfence ());
14373 return 0;
14374 case IX86_BUILTIN_LFENCE:
14375 emit_insn (gen_sse2_lfence ());
14376 return 0;
14377
14378 case IX86_BUILTIN_CLFLUSH:
14379 arg0 = TREE_VALUE (arglist);
14380 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14381 icode = CODE_FOR_sse2_clflush;
14382 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14383 op0 = copy_to_mode_reg (Pmode, op0);
14384
14385 emit_insn (gen_sse2_clflush (op0));
14386 return 0;
14387
14388 case IX86_BUILTIN_MOVNTPD:
14389 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14390 case IX86_BUILTIN_MOVNTDQ:
14391 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14392 case IX86_BUILTIN_MOVNTI:
14393 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14394
14395 case IX86_BUILTIN_LOADDQA:
14396 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14397 case IX86_BUILTIN_LOADDQU:
14398 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14399 case IX86_BUILTIN_LOADD:
14400 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14401
14402 case IX86_BUILTIN_STOREDQA:
14403 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14404 case IX86_BUILTIN_STOREDQU:
14405 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14406 case IX86_BUILTIN_STORED:
14407 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14408
14409 case IX86_BUILTIN_MONITOR:
14410 arg0 = TREE_VALUE (arglist);
14411 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14412 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14413 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14414 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14415 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14416 if (!REG_P (op0))
14417 op0 = copy_to_mode_reg (SImode, op0);
14418 if (!REG_P (op1))
14419 op1 = copy_to_mode_reg (SImode, op1);
14420 if (!REG_P (op2))
14421 op2 = copy_to_mode_reg (SImode, op2);
14422 emit_insn (gen_monitor (op0, op1, op2));
14423 return 0;
14424
14425 case IX86_BUILTIN_MWAIT:
14426 arg0 = TREE_VALUE (arglist);
14427 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14428 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14429 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14430 if (!REG_P (op0))
14431 op0 = copy_to_mode_reg (SImode, op0);
14432 if (!REG_P (op1))
14433 op1 = copy_to_mode_reg (SImode, op1);
14434 emit_insn (gen_mwait (op0, op1));
14435 return 0;
14436
14437 case IX86_BUILTIN_LOADDDUP:
14438 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14439
14440 case IX86_BUILTIN_LDDQU:
14441 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14442 1);
14443
14444 default:
14445 break;
14446 }
14447
14448 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14449 if (d->code == fcode)
14450 {
14451 /* Compares are treated specially. */
14452 if (d->icode == CODE_FOR_maskcmpv4sf3
14453 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14454 || d->icode == CODE_FOR_maskncmpv4sf3
14455 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14456 || d->icode == CODE_FOR_maskcmpv2df3
14457 || d->icode == CODE_FOR_vmmaskcmpv2df3
14458 || d->icode == CODE_FOR_maskncmpv2df3
14459 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14460 return ix86_expand_sse_compare (d, arglist, target);
14461
14462 return ix86_expand_binop_builtin (d->icode, arglist, target);
14463 }
14464
14465 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14466 if (d->code == fcode)
14467 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14468
14469 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14470 if (d->code == fcode)
14471 return ix86_expand_sse_comi (d, arglist, target);
14472
14473 /* @@@ Should really do something sensible here. */
14474 return 0;
14475 }
14476
14477 /* Store OPERAND to the memory after reload is completed. This means
14478 that we can't easily use assign_stack_local. */
14479 rtx
14480 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14481 {
14482 rtx result;
14483 if (!reload_completed)
14484 abort ();
14485 if (TARGET_RED_ZONE)
14486 {
14487 result = gen_rtx_MEM (mode,
14488 gen_rtx_PLUS (Pmode,
14489 stack_pointer_rtx,
14490 GEN_INT (-RED_ZONE_SIZE)));
14491 emit_move_insn (result, operand);
14492 }
14493 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14494 {
14495 switch (mode)
14496 {
14497 case HImode:
14498 case SImode:
14499 operand = gen_lowpart (DImode, operand);
14500 /* FALLTHRU */
14501 case DImode:
14502 emit_insn (
14503 gen_rtx_SET (VOIDmode,
14504 gen_rtx_MEM (DImode,
14505 gen_rtx_PRE_DEC (DImode,
14506 stack_pointer_rtx)),
14507 operand));
14508 break;
14509 default:
14510 abort ();
14511 }
14512 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14513 }
14514 else
14515 {
14516 switch (mode)
14517 {
14518 case DImode:
14519 {
14520 rtx operands[2];
14521 split_di (&operand, 1, operands, operands + 1);
14522 emit_insn (
14523 gen_rtx_SET (VOIDmode,
14524 gen_rtx_MEM (SImode,
14525 gen_rtx_PRE_DEC (Pmode,
14526 stack_pointer_rtx)),
14527 operands[1]));
14528 emit_insn (
14529 gen_rtx_SET (VOIDmode,
14530 gen_rtx_MEM (SImode,
14531 gen_rtx_PRE_DEC (Pmode,
14532 stack_pointer_rtx)),
14533 operands[0]));
14534 }
14535 break;
14536 case HImode:
14537 /* It is better to store HImodes as SImodes. */
14538 if (!TARGET_PARTIAL_REG_STALL)
14539 operand = gen_lowpart (SImode, operand);
14540 /* FALLTHRU */
14541 case SImode:
14542 emit_insn (
14543 gen_rtx_SET (VOIDmode,
14544 gen_rtx_MEM (GET_MODE (operand),
14545 gen_rtx_PRE_DEC (SImode,
14546 stack_pointer_rtx)),
14547 operand));
14548 break;
14549 default:
14550 abort ();
14551 }
14552 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14553 }
14554 return result;
14555 }
14556
14557 /* Free operand from the memory. */
14558 void
14559 ix86_free_from_memory (enum machine_mode mode)
14560 {
14561 if (!TARGET_RED_ZONE)
14562 {
14563 int size;
14564
14565 if (mode == DImode || TARGET_64BIT)
14566 size = 8;
14567 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14568 size = 2;
14569 else
14570 size = 4;
14571 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14572 to pop or add instruction if registers are available. */
14573 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14574 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14575 GEN_INT (size))));
14576 }
14577 }
14578
14579 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14580 QImode must go into class Q_REGS.
14581 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14582 movdf to do mem-to-mem moves through integer regs. */
14583 enum reg_class
14584 ix86_preferred_reload_class (rtx x, enum reg_class class)
14585 {
14586 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14587 return NO_REGS;
14588 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14589 {
14590 /* SSE can't load any constant directly yet. */
14591 if (SSE_CLASS_P (class))
14592 return NO_REGS;
14593 /* Floats can load 0 and 1. */
14594 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14595 {
14596 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14597 if (MAYBE_SSE_CLASS_P (class))
14598 return (reg_class_subset_p (class, GENERAL_REGS)
14599 ? GENERAL_REGS : FLOAT_REGS);
14600 else
14601 return class;
14602 }
14603 /* General regs can load everything. */
14604 if (reg_class_subset_p (class, GENERAL_REGS))
14605 return GENERAL_REGS;
14606 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14607 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14608 return NO_REGS;
14609 }
14610 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14611 return NO_REGS;
14612 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14613 return Q_REGS;
14614 return class;
14615 }
14616
14617 /* If we are copying between general and FP registers, we need a memory
14618 location. The same is true for SSE and MMX registers.
14619
14620 The macro can't work reliably when one of the CLASSES is class containing
14621 registers from multiple units (SSE, MMX, integer). We avoid this by never
14622 combining those units in single alternative in the machine description.
14623 Ensure that this constraint holds to avoid unexpected surprises.
14624
14625 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14626 enforce these sanity checks. */
14627 int
14628 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14629 enum machine_mode mode, int strict)
14630 {
14631 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14632 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14633 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14634 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14635 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14636 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14637 {
14638 if (strict)
14639 abort ();
14640 else
14641 return 1;
14642 }
14643 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14644 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14645 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14646 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14647 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14648 }
14649 /* Return the cost of moving data from a register in class CLASS1 to
14650 one in class CLASS2.
14651
14652 It is not required that the cost always equal 2 when FROM is the same as TO;
14653 on some machines it is expensive to move between registers if they are not
14654 general registers. */
14655 int
14656 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14657 enum reg_class class2)
14658 {
14659 /* In case we require secondary memory, compute cost of the store followed
14660 by load. In order to avoid bad register allocation choices, we need
14661 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14662
14663 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14664 {
14665 int cost = 1;
14666
14667 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14668 MEMORY_MOVE_COST (mode, class1, 1));
14669 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14670 MEMORY_MOVE_COST (mode, class2, 1));
14671
14672 /* In case of copying from general_purpose_register we may emit multiple
14673 stores followed by single load causing memory size mismatch stall.
14674 Count this as arbitrarily high cost of 20. */
14675 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14676 cost += 20;
14677
14678 /* In the case of FP/MMX moves, the registers actually overlap, and we
14679 have to switch modes in order to treat them differently. */
14680 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14681 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14682 cost += 20;
14683
14684 return cost;
14685 }
14686
14687 /* Moves between SSE/MMX and integer unit are expensive. */
14688 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14689 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14690 return ix86_cost->mmxsse_to_integer;
14691 if (MAYBE_FLOAT_CLASS_P (class1))
14692 return ix86_cost->fp_move;
14693 if (MAYBE_SSE_CLASS_P (class1))
14694 return ix86_cost->sse_move;
14695 if (MAYBE_MMX_CLASS_P (class1))
14696 return ix86_cost->mmx_move;
14697 return 2;
14698 }
14699
14700 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14701 int
14702 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14703 {
14704 /* Flags and only flags can only hold CCmode values. */
14705 if (CC_REGNO_P (regno))
14706 return GET_MODE_CLASS (mode) == MODE_CC;
14707 if (GET_MODE_CLASS (mode) == MODE_CC
14708 || GET_MODE_CLASS (mode) == MODE_RANDOM
14709 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14710 return 0;
14711 if (FP_REGNO_P (regno))
14712 return VALID_FP_MODE_P (mode);
14713 if (SSE_REGNO_P (regno))
14714 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14715 if (MMX_REGNO_P (regno))
14716 return (TARGET_MMX
14717 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14718 /* We handle both integer and floats in the general purpose registers.
14719 In future we should be able to handle vector modes as well. */
14720 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14721 return 0;
14722 /* Take care for QImode values - they can be in non-QI regs, but then
14723 they do cause partial register stalls. */
14724 if (regno < 4 || mode != QImode || TARGET_64BIT)
14725 return 1;
14726 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14727 }
14728
14729 /* Return the cost of moving data of mode M between a
14730 register and memory. A value of 2 is the default; this cost is
14731 relative to those in `REGISTER_MOVE_COST'.
14732
14733 If moving between registers and memory is more expensive than
14734 between two registers, you should define this macro to express the
14735 relative cost.
14736
14737 Model also increased moving costs of QImode registers in non
14738 Q_REGS classes.
14739 */
14740 int
14741 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14742 {
14743 if (FLOAT_CLASS_P (class))
14744 {
14745 int index;
14746 switch (mode)
14747 {
14748 case SFmode:
14749 index = 0;
14750 break;
14751 case DFmode:
14752 index = 1;
14753 break;
14754 case XFmode:
14755 index = 2;
14756 break;
14757 default:
14758 return 100;
14759 }
14760 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14761 }
14762 if (SSE_CLASS_P (class))
14763 {
14764 int index;
14765 switch (GET_MODE_SIZE (mode))
14766 {
14767 case 4:
14768 index = 0;
14769 break;
14770 case 8:
14771 index = 1;
14772 break;
14773 case 16:
14774 index = 2;
14775 break;
14776 default:
14777 return 100;
14778 }
14779 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14780 }
14781 if (MMX_CLASS_P (class))
14782 {
14783 int index;
14784 switch (GET_MODE_SIZE (mode))
14785 {
14786 case 4:
14787 index = 0;
14788 break;
14789 case 8:
14790 index = 1;
14791 break;
14792 default:
14793 return 100;
14794 }
14795 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14796 }
14797 switch (GET_MODE_SIZE (mode))
14798 {
14799 case 1:
14800 if (in)
14801 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14802 : ix86_cost->movzbl_load);
14803 else
14804 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14805 : ix86_cost->int_store[0] + 4);
14806 break;
14807 case 2:
14808 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14809 default:
14810 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14811 if (mode == TFmode)
14812 mode = XFmode;
14813 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14814 * (((int) GET_MODE_SIZE (mode)
14815 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14816 }
14817 }
14818
14819 /* Compute a (partial) cost for rtx X. Return true if the complete
14820 cost has been computed, and false if subexpressions should be
14821 scanned. In either case, *TOTAL contains the cost result. */
14822
14823 static bool
14824 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14825 {
14826 enum machine_mode mode = GET_MODE (x);
14827
14828 switch (code)
14829 {
14830 case CONST_INT:
14831 case CONST:
14832 case LABEL_REF:
14833 case SYMBOL_REF:
14834 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14835 *total = 3;
14836 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14837 *total = 2;
14838 else if (flag_pic && SYMBOLIC_CONST (x)
14839 && (!TARGET_64BIT
14840 || (!GET_CODE (x) != LABEL_REF
14841 && (GET_CODE (x) != SYMBOL_REF
14842 || !SYMBOL_REF_LOCAL_P (x)))))
14843 *total = 1;
14844 else
14845 *total = 0;
14846 return true;
14847
14848 case CONST_DOUBLE:
14849 if (mode == VOIDmode)
14850 *total = 0;
14851 else
14852 switch (standard_80387_constant_p (x))
14853 {
14854 case 1: /* 0.0 */
14855 *total = 1;
14856 break;
14857 default: /* Other constants */
14858 *total = 2;
14859 break;
14860 case 0:
14861 case -1:
14862 /* Start with (MEM (SYMBOL_REF)), since that's where
14863 it'll probably end up. Add a penalty for size. */
14864 *total = (COSTS_N_INSNS (1)
14865 + (flag_pic != 0 && !TARGET_64BIT)
14866 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14867 break;
14868 }
14869 return true;
14870
14871 case ZERO_EXTEND:
14872 /* The zero extensions is often completely free on x86_64, so make
14873 it as cheap as possible. */
14874 if (TARGET_64BIT && mode == DImode
14875 && GET_MODE (XEXP (x, 0)) == SImode)
14876 *total = 1;
14877 else if (TARGET_ZERO_EXTEND_WITH_AND)
14878 *total = COSTS_N_INSNS (ix86_cost->add);
14879 else
14880 *total = COSTS_N_INSNS (ix86_cost->movzx);
14881 return false;
14882
14883 case SIGN_EXTEND:
14884 *total = COSTS_N_INSNS (ix86_cost->movsx);
14885 return false;
14886
14887 case ASHIFT:
14888 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14889 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14890 {
14891 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14892 if (value == 1)
14893 {
14894 *total = COSTS_N_INSNS (ix86_cost->add);
14895 return false;
14896 }
14897 if ((value == 2 || value == 3)
14898 && !TARGET_DECOMPOSE_LEA
14899 && ix86_cost->lea <= ix86_cost->shift_const)
14900 {
14901 *total = COSTS_N_INSNS (ix86_cost->lea);
14902 return false;
14903 }
14904 }
14905 /* FALLTHRU */
14906
14907 case ROTATE:
14908 case ASHIFTRT:
14909 case LSHIFTRT:
14910 case ROTATERT:
14911 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14912 {
14913 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14914 {
14915 if (INTVAL (XEXP (x, 1)) > 32)
14916 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14917 else
14918 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14919 }
14920 else
14921 {
14922 if (GET_CODE (XEXP (x, 1)) == AND)
14923 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14924 else
14925 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14926 }
14927 }
14928 else
14929 {
14930 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14931 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14932 else
14933 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14934 }
14935 return false;
14936
14937 case MULT:
14938 if (FLOAT_MODE_P (mode))
14939 {
14940 *total = COSTS_N_INSNS (ix86_cost->fmul);
14941 return false;
14942 }
14943 else
14944 {
14945 rtx op0 = XEXP (x, 0);
14946 rtx op1 = XEXP (x, 1);
14947 int nbits;
14948 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14949 {
14950 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14951 for (nbits = 0; value != 0; value &= value - 1)
14952 nbits++;
14953 }
14954 else
14955 /* This is arbitrary. */
14956 nbits = 7;
14957
14958 /* Compute costs correctly for widening multiplication. */
14959 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14960 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14961 == GET_MODE_SIZE (mode))
14962 {
14963 int is_mulwiden = 0;
14964 enum machine_mode inner_mode = GET_MODE (op0);
14965
14966 if (GET_CODE (op0) == GET_CODE (op1))
14967 is_mulwiden = 1, op1 = XEXP (op1, 0);
14968 else if (GET_CODE (op1) == CONST_INT)
14969 {
14970 if (GET_CODE (op0) == SIGN_EXTEND)
14971 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14972 == INTVAL (op1);
14973 else
14974 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14975 }
14976
14977 if (is_mulwiden)
14978 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14979 }
14980
14981 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14982 + nbits * ix86_cost->mult_bit)
14983 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14984
14985 return true;
14986 }
14987
14988 case DIV:
14989 case UDIV:
14990 case MOD:
14991 case UMOD:
14992 if (FLOAT_MODE_P (mode))
14993 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14994 else
14995 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14996 return false;
14997
14998 case PLUS:
14999 if (FLOAT_MODE_P (mode))
15000 *total = COSTS_N_INSNS (ix86_cost->fadd);
15001 else if (!TARGET_DECOMPOSE_LEA
15002 && GET_MODE_CLASS (mode) == MODE_INT
15003 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15004 {
15005 if (GET_CODE (XEXP (x, 0)) == PLUS
15006 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15007 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15008 && CONSTANT_P (XEXP (x, 1)))
15009 {
15010 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15011 if (val == 2 || val == 4 || val == 8)
15012 {
15013 *total = COSTS_N_INSNS (ix86_cost->lea);
15014 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15015 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15016 outer_code);
15017 *total += rtx_cost (XEXP (x, 1), outer_code);
15018 return true;
15019 }
15020 }
15021 else if (GET_CODE (XEXP (x, 0)) == MULT
15022 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15023 {
15024 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15025 if (val == 2 || val == 4 || val == 8)
15026 {
15027 *total = COSTS_N_INSNS (ix86_cost->lea);
15028 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15029 *total += rtx_cost (XEXP (x, 1), outer_code);
15030 return true;
15031 }
15032 }
15033 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15034 {
15035 *total = COSTS_N_INSNS (ix86_cost->lea);
15036 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15037 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15038 *total += rtx_cost (XEXP (x, 1), outer_code);
15039 return true;
15040 }
15041 }
15042 /* FALLTHRU */
15043
15044 case MINUS:
15045 if (FLOAT_MODE_P (mode))
15046 {
15047 *total = COSTS_N_INSNS (ix86_cost->fadd);
15048 return false;
15049 }
15050 /* FALLTHRU */
15051
15052 case AND:
15053 case IOR:
15054 case XOR:
15055 if (!TARGET_64BIT && mode == DImode)
15056 {
15057 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15058 + (rtx_cost (XEXP (x, 0), outer_code)
15059 << (GET_MODE (XEXP (x, 0)) != DImode))
15060 + (rtx_cost (XEXP (x, 1), outer_code)
15061 << (GET_MODE (XEXP (x, 1)) != DImode)));
15062 return true;
15063 }
15064 /* FALLTHRU */
15065
15066 case NEG:
15067 if (FLOAT_MODE_P (mode))
15068 {
15069 *total = COSTS_N_INSNS (ix86_cost->fchs);
15070 return false;
15071 }
15072 /* FALLTHRU */
15073
15074 case NOT:
15075 if (!TARGET_64BIT && mode == DImode)
15076 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15077 else
15078 *total = COSTS_N_INSNS (ix86_cost->add);
15079 return false;
15080
15081 case FLOAT_EXTEND:
15082 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15083 *total = 0;
15084 return false;
15085
15086 case ABS:
15087 if (FLOAT_MODE_P (mode))
15088 *total = COSTS_N_INSNS (ix86_cost->fabs);
15089 return false;
15090
15091 case SQRT:
15092 if (FLOAT_MODE_P (mode))
15093 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15094 return false;
15095
15096 case UNSPEC:
15097 if (XINT (x, 1) == UNSPEC_TP)
15098 *total = 0;
15099 return false;
15100
15101 default:
15102 return false;
15103 }
15104 }
15105
15106 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15107 static void
15108 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15109 {
15110 init_section ();
15111 fputs ("\tpushl $", asm_out_file);
15112 assemble_name (asm_out_file, XSTR (symbol, 0));
15113 fputc ('\n', asm_out_file);
15114 }
15115 #endif
15116
15117 #if TARGET_MACHO
15118
15119 static int current_machopic_label_num;
15120
15121 /* Given a symbol name and its associated stub, write out the
15122 definition of the stub. */
15123
15124 void
15125 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15126 {
15127 unsigned int length;
15128 char *binder_name, *symbol_name, lazy_ptr_name[32];
15129 int label = ++current_machopic_label_num;
15130
15131 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15132 symb = (*targetm.strip_name_encoding) (symb);
15133
15134 length = strlen (stub);
15135 binder_name = alloca (length + 32);
15136 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15137
15138 length = strlen (symb);
15139 symbol_name = alloca (length + 32);
15140 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15141
15142 sprintf (lazy_ptr_name, "L%d$lz", label);
15143
15144 if (MACHOPIC_PURE)
15145 machopic_picsymbol_stub_section ();
15146 else
15147 machopic_symbol_stub_section ();
15148
15149 fprintf (file, "%s:\n", stub);
15150 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15151
15152 if (MACHOPIC_PURE)
15153 {
15154 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15155 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15156 fprintf (file, "\tjmp %%edx\n");
15157 }
15158 else
15159 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15160
15161 fprintf (file, "%s:\n", binder_name);
15162
15163 if (MACHOPIC_PURE)
15164 {
15165 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15166 fprintf (file, "\tpushl %%eax\n");
15167 }
15168 else
15169 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15170
15171 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15172
15173 machopic_lazy_symbol_ptr_section ();
15174 fprintf (file, "%s:\n", lazy_ptr_name);
15175 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15176 fprintf (file, "\t.long %s\n", binder_name);
15177 }
15178 #endif /* TARGET_MACHO */
15179
15180 /* Order the registers for register allocator. */
15181
15182 void
15183 x86_order_regs_for_local_alloc (void)
15184 {
15185 int pos = 0;
15186 int i;
15187
15188 /* First allocate the local general purpose registers. */
15189 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15190 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15191 reg_alloc_order [pos++] = i;
15192
15193 /* Global general purpose registers. */
15194 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15195 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15196 reg_alloc_order [pos++] = i;
15197
15198 /* x87 registers come first in case we are doing FP math
15199 using them. */
15200 if (!TARGET_SSE_MATH)
15201 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15202 reg_alloc_order [pos++] = i;
15203
15204 /* SSE registers. */
15205 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15206 reg_alloc_order [pos++] = i;
15207 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15208 reg_alloc_order [pos++] = i;
15209
15210 /* x87 registers. */
15211 if (TARGET_SSE_MATH)
15212 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15213 reg_alloc_order [pos++] = i;
15214
15215 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15216 reg_alloc_order [pos++] = i;
15217
15218 /* Initialize the rest of array as we do not allocate some registers
15219 at all. */
15220 while (pos < FIRST_PSEUDO_REGISTER)
15221 reg_alloc_order [pos++] = 0;
15222 }
15223
15224 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15225 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15226 #endif
15227
15228 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15229 struct attribute_spec.handler. */
15230 static tree
15231 ix86_handle_struct_attribute (tree *node, tree name,
15232 tree args ATTRIBUTE_UNUSED,
15233 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15234 {
15235 tree *type = NULL;
15236 if (DECL_P (*node))
15237 {
15238 if (TREE_CODE (*node) == TYPE_DECL)
15239 type = &TREE_TYPE (*node);
15240 }
15241 else
15242 type = node;
15243
15244 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15245 || TREE_CODE (*type) == UNION_TYPE)))
15246 {
15247 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15248 *no_add_attrs = true;
15249 }
15250
15251 else if ((is_attribute_p ("ms_struct", name)
15252 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15253 || ((is_attribute_p ("gcc_struct", name)
15254 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15255 {
15256 warning ("`%s' incompatible attribute ignored",
15257 IDENTIFIER_POINTER (name));
15258 *no_add_attrs = true;
15259 }
15260
15261 return NULL_TREE;
15262 }
15263
15264 static bool
15265 ix86_ms_bitfield_layout_p (tree record_type)
15266 {
15267 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15268 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15269 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15270 }
15271
15272 /* Returns an expression indicating where the this parameter is
15273 located on entry to the FUNCTION. */
15274
15275 static rtx
15276 x86_this_parameter (tree function)
15277 {
15278 tree type = TREE_TYPE (function);
15279
15280 if (TARGET_64BIT)
15281 {
15282 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15283 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15284 }
15285
15286 if (ix86_function_regparm (type, function) > 0)
15287 {
15288 tree parm;
15289
15290 parm = TYPE_ARG_TYPES (type);
15291 /* Figure out whether or not the function has a variable number of
15292 arguments. */
15293 for (; parm; parm = TREE_CHAIN (parm))
15294 if (TREE_VALUE (parm) == void_type_node)
15295 break;
15296 /* If not, the this parameter is in the first argument. */
15297 if (parm)
15298 {
15299 int regno = 0;
15300 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15301 regno = 2;
15302 return gen_rtx_REG (SImode, regno);
15303 }
15304 }
15305
15306 if (aggregate_value_p (TREE_TYPE (type), type))
15307 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15308 else
15309 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15310 }
15311
15312 /* Determine whether x86_output_mi_thunk can succeed. */
15313
15314 static bool
15315 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15316 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15317 HOST_WIDE_INT vcall_offset, tree function)
15318 {
15319 /* 64-bit can handle anything. */
15320 if (TARGET_64BIT)
15321 return true;
15322
15323 /* For 32-bit, everything's fine if we have one free register. */
15324 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15325 return true;
15326
15327 /* Need a free register for vcall_offset. */
15328 if (vcall_offset)
15329 return false;
15330
15331 /* Need a free register for GOT references. */
15332 if (flag_pic && !(*targetm.binds_local_p) (function))
15333 return false;
15334
15335 /* Otherwise ok. */
15336 return true;
15337 }
15338
15339 /* Output the assembler code for a thunk function. THUNK_DECL is the
15340 declaration for the thunk function itself, FUNCTION is the decl for
15341 the target function. DELTA is an immediate constant offset to be
15342 added to THIS. If VCALL_OFFSET is nonzero, the word at
15343 *(*this + vcall_offset) should be added to THIS. */
15344
15345 static void
15346 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15347 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15348 HOST_WIDE_INT vcall_offset, tree function)
15349 {
15350 rtx xops[3];
15351 rtx this = x86_this_parameter (function);
15352 rtx this_reg, tmp;
15353
15354 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15355 pull it in now and let DELTA benefit. */
15356 if (REG_P (this))
15357 this_reg = this;
15358 else if (vcall_offset)
15359 {
15360 /* Put the this parameter into %eax. */
15361 xops[0] = this;
15362 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15363 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15364 }
15365 else
15366 this_reg = NULL_RTX;
15367
15368 /* Adjust the this parameter by a fixed constant. */
15369 if (delta)
15370 {
15371 xops[0] = GEN_INT (delta);
15372 xops[1] = this_reg ? this_reg : this;
15373 if (TARGET_64BIT)
15374 {
15375 if (!x86_64_general_operand (xops[0], DImode))
15376 {
15377 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15378 xops[1] = tmp;
15379 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15380 xops[0] = tmp;
15381 xops[1] = this;
15382 }
15383 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15384 }
15385 else
15386 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15387 }
15388
15389 /* Adjust the this parameter by a value stored in the vtable. */
15390 if (vcall_offset)
15391 {
15392 if (TARGET_64BIT)
15393 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15394 else
15395 {
15396 int tmp_regno = 2 /* ECX */;
15397 if (lookup_attribute ("fastcall",
15398 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15399 tmp_regno = 0 /* EAX */;
15400 tmp = gen_rtx_REG (SImode, tmp_regno);
15401 }
15402
15403 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15404 xops[1] = tmp;
15405 if (TARGET_64BIT)
15406 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15407 else
15408 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15409
15410 /* Adjust the this parameter. */
15411 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15412 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15413 {
15414 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15415 xops[0] = GEN_INT (vcall_offset);
15416 xops[1] = tmp2;
15417 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15418 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15419 }
15420 xops[1] = this_reg;
15421 if (TARGET_64BIT)
15422 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15423 else
15424 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15425 }
15426
15427 /* If necessary, drop THIS back to its stack slot. */
15428 if (this_reg && this_reg != this)
15429 {
15430 xops[0] = this_reg;
15431 xops[1] = this;
15432 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15433 }
15434
15435 xops[0] = XEXP (DECL_RTL (function), 0);
15436 if (TARGET_64BIT)
15437 {
15438 if (!flag_pic || (*targetm.binds_local_p) (function))
15439 output_asm_insn ("jmp\t%P0", xops);
15440 else
15441 {
15442 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15443 tmp = gen_rtx_CONST (Pmode, tmp);
15444 tmp = gen_rtx_MEM (QImode, tmp);
15445 xops[0] = tmp;
15446 output_asm_insn ("jmp\t%A0", xops);
15447 }
15448 }
15449 else
15450 {
15451 if (!flag_pic || (*targetm.binds_local_p) (function))
15452 output_asm_insn ("jmp\t%P0", xops);
15453 else
15454 #if TARGET_MACHO
15455 if (TARGET_MACHO)
15456 {
15457 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15458 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15459 tmp = gen_rtx_MEM (QImode, tmp);
15460 xops[0] = tmp;
15461 output_asm_insn ("jmp\t%0", xops);
15462 }
15463 else
15464 #endif /* TARGET_MACHO */
15465 {
15466 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15467 output_set_got (tmp);
15468
15469 xops[1] = tmp;
15470 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15471 output_asm_insn ("jmp\t{*}%1", xops);
15472 }
15473 }
15474 }
15475
15476 static void
15477 x86_file_start (void)
15478 {
15479 default_file_start ();
15480 if (X86_FILE_START_VERSION_DIRECTIVE)
15481 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15482 if (X86_FILE_START_FLTUSED)
15483 fputs ("\t.global\t__fltused\n", asm_out_file);
15484 if (ix86_asm_dialect == ASM_INTEL)
15485 fputs ("\t.intel_syntax\n", asm_out_file);
15486 }
15487
15488 int
15489 x86_field_alignment (tree field, int computed)
15490 {
15491 enum machine_mode mode;
15492 tree type = TREE_TYPE (field);
15493
15494 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15495 return computed;
15496 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15497 ? get_inner_array_type (type) : type);
15498 if (mode == DFmode || mode == DCmode
15499 || GET_MODE_CLASS (mode) == MODE_INT
15500 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15501 return MIN (32, computed);
15502 return computed;
15503 }
15504
15505 /* Output assembler code to FILE to increment profiler label # LABELNO
15506 for profiling a function entry. */
15507 void
15508 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15509 {
15510 if (TARGET_64BIT)
15511 if (flag_pic)
15512 {
15513 #ifndef NO_PROFILE_COUNTERS
15514 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15515 #endif
15516 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15517 }
15518 else
15519 {
15520 #ifndef NO_PROFILE_COUNTERS
15521 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15522 #endif
15523 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15524 }
15525 else if (flag_pic)
15526 {
15527 #ifndef NO_PROFILE_COUNTERS
15528 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15529 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15530 #endif
15531 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15532 }
15533 else
15534 {
15535 #ifndef NO_PROFILE_COUNTERS
15536 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15537 PROFILE_COUNT_REGISTER);
15538 #endif
15539 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15540 }
15541 }
15542
15543 /* We don't have exact information about the insn sizes, but we may assume
15544 quite safely that we are informed about all 1 byte insns and memory
15545 address sizes. This is enough to eliminate unnecessary padding in
15546 99% of cases. */
15547
15548 static int
15549 min_insn_size (rtx insn)
15550 {
15551 int l = 0;
15552
15553 if (!INSN_P (insn) || !active_insn_p (insn))
15554 return 0;
15555
15556 /* Discard alignments we've emit and jump instructions. */
15557 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15558 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15559 return 0;
15560 if (GET_CODE (insn) == JUMP_INSN
15561 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15562 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15563 return 0;
15564
15565 /* Important case - calls are always 5 bytes.
15566 It is common to have many calls in the row. */
15567 if (GET_CODE (insn) == CALL_INSN
15568 && symbolic_reference_mentioned_p (PATTERN (insn))
15569 && !SIBLING_CALL_P (insn))
15570 return 5;
15571 if (get_attr_length (insn) <= 1)
15572 return 1;
15573
15574 /* For normal instructions we may rely on the sizes of addresses
15575 and the presence of symbol to require 4 bytes of encoding.
15576 This is not the case for jumps where references are PC relative. */
15577 if (GET_CODE (insn) != JUMP_INSN)
15578 {
15579 l = get_attr_length_address (insn);
15580 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15581 l = 4;
15582 }
15583 if (l)
15584 return 1+l;
15585 else
15586 return 2;
15587 }
15588
15589 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15590 window. */
15591
15592 static void
15593 ix86_avoid_jump_misspredicts (void)
15594 {
15595 rtx insn, start = get_insns ();
15596 int nbytes = 0, njumps = 0;
15597 int isjump = 0;
15598
15599 /* Look for all minimal intervals of instructions containing 4 jumps.
15600 The intervals are bounded by START and INSN. NBYTES is the total
15601 size of instructions in the interval including INSN and not including
15602 START. When the NBYTES is smaller than 16 bytes, it is possible
15603 that the end of START and INSN ends up in the same 16byte page.
15604
15605 The smallest offset in the page INSN can start is the case where START
15606 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15607 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15608 */
15609 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15610 {
15611
15612 nbytes += min_insn_size (insn);
15613 if (dump_file)
15614 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15615 INSN_UID (insn), min_insn_size (insn));
15616 if ((GET_CODE (insn) == JUMP_INSN
15617 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15618 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15619 || GET_CODE (insn) == CALL_INSN)
15620 njumps++;
15621 else
15622 continue;
15623
15624 while (njumps > 3)
15625 {
15626 start = NEXT_INSN (start);
15627 if ((GET_CODE (start) == JUMP_INSN
15628 && GET_CODE (PATTERN (start)) != ADDR_VEC
15629 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15630 || GET_CODE (start) == CALL_INSN)
15631 njumps--, isjump = 1;
15632 else
15633 isjump = 0;
15634 nbytes -= min_insn_size (start);
15635 }
15636 if (njumps < 0)
15637 abort ();
15638 if (dump_file)
15639 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15640 INSN_UID (start), INSN_UID (insn), nbytes);
15641
15642 if (njumps == 3 && isjump && nbytes < 16)
15643 {
15644 int padsize = 15 - nbytes + min_insn_size (insn);
15645
15646 if (dump_file)
15647 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15648 INSN_UID (insn), padsize);
15649 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15650 }
15651 }
15652 }
15653
15654 /* AMD Athlon works faster
15655 when RET is not destination of conditional jump or directly preceded
15656 by other jump instruction. We avoid the penalty by inserting NOP just
15657 before the RET instructions in such cases. */
15658 static void
15659 ix86_pad_returns (void)
15660 {
15661 edge e;
15662
15663 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15664 {
15665 basic_block bb = e->src;
15666 rtx ret = BB_END (bb);
15667 rtx prev;
15668 bool replace = false;
15669
15670 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15671 || !maybe_hot_bb_p (bb))
15672 continue;
15673 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15674 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15675 break;
15676 if (prev && GET_CODE (prev) == CODE_LABEL)
15677 {
15678 edge e;
15679 for (e = bb->pred; e; e = e->pred_next)
15680 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15681 && !(e->flags & EDGE_FALLTHRU))
15682 replace = true;
15683 }
15684 if (!replace)
15685 {
15686 prev = prev_active_insn (ret);
15687 if (prev
15688 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15689 || GET_CODE (prev) == CALL_INSN))
15690 replace = true;
15691 /* Empty functions get branch mispredict even when the jump destination
15692 is not visible to us. */
15693 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15694 replace = true;
15695 }
15696 if (replace)
15697 {
15698 emit_insn_before (gen_return_internal_long (), ret);
15699 delete_insn (ret);
15700 }
15701 }
15702 }
15703
15704 /* Implement machine specific optimizations. We implement padding of returns
15705 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15706 static void
15707 ix86_reorg (void)
15708 {
15709 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15710 ix86_pad_returns ();
15711 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15712 ix86_avoid_jump_misspredicts ();
15713 }
15714
15715 /* Return nonzero when QImode register that must be represented via REX prefix
15716 is used. */
15717 bool
15718 x86_extended_QIreg_mentioned_p (rtx insn)
15719 {
15720 int i;
15721 extract_insn_cached (insn);
15722 for (i = 0; i < recog_data.n_operands; i++)
15723 if (REG_P (recog_data.operand[i])
15724 && REGNO (recog_data.operand[i]) >= 4)
15725 return true;
15726 return false;
15727 }
15728
15729 /* Return nonzero when P points to register encoded via REX prefix.
15730 Called via for_each_rtx. */
15731 static int
15732 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15733 {
15734 unsigned int regno;
15735 if (!REG_P (*p))
15736 return 0;
15737 regno = REGNO (*p);
15738 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15739 }
15740
15741 /* Return true when INSN mentions register that must be encoded using REX
15742 prefix. */
15743 bool
15744 x86_extended_reg_mentioned_p (rtx insn)
15745 {
15746 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15747 }
15748
15749 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15750 optabs would emit if we didn't have TFmode patterns. */
15751
15752 void
15753 x86_emit_floatuns (rtx operands[2])
15754 {
15755 rtx neglab, donelab, i0, i1, f0, in, out;
15756 enum machine_mode mode, inmode;
15757
15758 inmode = GET_MODE (operands[1]);
15759 if (inmode != SImode
15760 && inmode != DImode)
15761 abort ();
15762
15763 out = operands[0];
15764 in = force_reg (inmode, operands[1]);
15765 mode = GET_MODE (out);
15766 neglab = gen_label_rtx ();
15767 donelab = gen_label_rtx ();
15768 i1 = gen_reg_rtx (Pmode);
15769 f0 = gen_reg_rtx (mode);
15770
15771 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15772
15773 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15774 emit_jump_insn (gen_jump (donelab));
15775 emit_barrier ();
15776
15777 emit_label (neglab);
15778
15779 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15780 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15781 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15782 expand_float (f0, i0, 0);
15783 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15784
15785 emit_label (donelab);
15786 }
15787
15788 /* Return if we do not know how to pass TYPE solely in registers. */
15789 bool
15790 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15791 {
15792 if (default_must_pass_in_stack (mode, type))
15793 return true;
15794 return (!TARGET_64BIT && type && mode == TImode);
15795 }
15796
15797 /* Initialize vector TARGET via VALS. */
15798 void
15799 ix86_expand_vector_init (rtx target, rtx vals)
15800 {
15801 enum machine_mode mode = GET_MODE (target);
15802 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15803 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15804 int i;
15805
15806 for (i = n_elts - 1; i >= 0; i--)
15807 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15808 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15809 break;
15810
15811 /* Few special cases first...
15812 ... constants are best loaded from constant pool. */
15813 if (i < 0)
15814 {
15815 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15816 return;
15817 }
15818
15819 /* ... values where only first field is non-constant are best loaded
15820 from the pool and overwritten via move later. */
15821 if (!i)
15822 {
15823 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15824 GET_MODE_INNER (mode), 0);
15825
15826 op = force_reg (mode, op);
15827 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15828 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15829 switch (GET_MODE (target))
15830 {
15831 case V2DFmode:
15832 emit_insn (gen_sse2_movsd (target, target, op));
15833 break;
15834 case V4SFmode:
15835 emit_insn (gen_sse_movss (target, target, op));
15836 break;
15837 default:
15838 break;
15839 }
15840 return;
15841 }
15842
15843 /* And the busy sequence doing rotations. */
15844 switch (GET_MODE (target))
15845 {
15846 case V2DFmode:
15847 {
15848 rtx vecop0 =
15849 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15850 rtx vecop1 =
15851 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15852
15853 vecop0 = force_reg (V2DFmode, vecop0);
15854 vecop1 = force_reg (V2DFmode, vecop1);
15855 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15856 }
15857 break;
15858 case V4SFmode:
15859 {
15860 rtx vecop0 =
15861 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15862 rtx vecop1 =
15863 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15864 rtx vecop2 =
15865 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15866 rtx vecop3 =
15867 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15868 rtx tmp1 = gen_reg_rtx (V4SFmode);
15869 rtx tmp2 = gen_reg_rtx (V4SFmode);
15870
15871 vecop0 = force_reg (V4SFmode, vecop0);
15872 vecop1 = force_reg (V4SFmode, vecop1);
15873 vecop2 = force_reg (V4SFmode, vecop2);
15874 vecop3 = force_reg (V4SFmode, vecop3);
15875 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15876 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15877 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15878 }
15879 break;
15880 default:
15881 abort ();
15882 }
15883 }
15884
15885 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15886
15887 We do this in the new i386 backend to maintain source compatibility
15888 with the old cc0-based compiler. */
15889
15890 static tree
15891 ix86_md_asm_clobbers (tree clobbers)
15892 {
15893 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15894 clobbers);
15895 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15896 clobbers);
15897 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15898 clobbers);
15899 return clobbers;
15900 }
15901
15902 /* Worker function for REVERSE_CONDITION. */
15903
15904 enum rtx_code
15905 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15906 {
15907 return (mode != CCFPmode && mode != CCFPUmode
15908 ? reverse_condition (code)
15909 : reverse_condition_maybe_unordered (code));
15910 }
15911
15912 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15913 to OPERANDS[0]. */
15914
15915 const char *
15916 output_387_reg_move (rtx insn, rtx *operands)
15917 {
15918 if (REG_P (operands[1])
15919 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15920 {
15921 if (REGNO (operands[0]) == FIRST_STACK_REG
15922 && TARGET_USE_FFREEP)
15923 return "ffreep\t%y0";
15924 return "fstp\t%y0";
15925 }
15926 if (STACK_TOP_P (operands[0]))
15927 return "fld%z1\t%y1";
15928 return "fst\t%y0";
15929 }
15930
15931 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15932 FP status register is set. */
15933
15934 void
15935 ix86_emit_fp_unordered_jump (rtx label)
15936 {
15937 rtx reg = gen_reg_rtx (HImode);
15938 rtx temp;
15939
15940 emit_insn (gen_x86_fnstsw_1 (reg));
15941 emit_insn (gen_x86_sahf_1 (reg));
15942
15943 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15944 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15945 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15946 gen_rtx_LABEL_REF (VOIDmode, label),
15947 pc_rtx);
15948 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15949 emit_jump_insn (temp);
15950 }
15951
15952 /* Output code to perform a log1p XFmode calculation. */
15953
15954 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15955 {
15956 rtx label1 = gen_label_rtx ();
15957 rtx label2 = gen_label_rtx ();
15958
15959 rtx tmp = gen_reg_rtx (XFmode);
15960 rtx tmp2 = gen_reg_rtx (XFmode);
15961
15962 emit_insn (gen_absxf2 (tmp, op1));
15963 emit_insn (gen_cmpxf (tmp,
15964 CONST_DOUBLE_FROM_REAL_VALUE (
15965 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15966 XFmode)));
15967 emit_jump_insn (gen_bge (label1));
15968
15969 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15970 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15971 emit_jump (label2);
15972
15973 emit_label (label1);
15974 emit_move_insn (tmp, CONST1_RTX (XFmode));
15975 emit_insn (gen_addxf3 (tmp, op1, tmp));
15976 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15977 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15978
15979 emit_label (label2);
15980 }
15981
15982 #include "gt-i386.h"
This page took 0.762318 seconds and 5 git commands to generate.