]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
h8300.md (output_a_shift): Clean up the code to output shifts using rotation.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
51 #endif
52
53 /* Processor costs (relative to an add) */
54 static const
55 struct processor_costs size_cost = { /* costs for tunning for size */
56 2, /* cost of an add instruction */
57 3, /* cost of a lea instruction */
58 2, /* variable shift costs */
59 3, /* constant shift costs */
60 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
61 0, /* cost of multiply per each bit set */
62 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
63 3, /* cost of movsx */
64 3, /* cost of movzx */
65 0, /* "large" insn */
66 2, /* MOVE_RATIO */
67 2, /* cost for loading QImode using movzbl */
68 {2, 2, 2}, /* cost of loading integer registers
69 in QImode, HImode and SImode.
70 Relative to reg-reg move (2). */
71 {2, 2, 2}, /* cost of storing integer registers */
72 2, /* cost of reg,reg fld/fst */
73 {2, 2, 2}, /* cost of loading fp registers
74 in SFmode, DFmode and XFmode */
75 {2, 2, 2}, /* cost of loading integer registers */
76 3, /* cost of moving MMX register */
77 {3, 3}, /* cost of loading MMX registers
78 in SImode and DImode */
79 {3, 3}, /* cost of storing MMX registers
80 in SImode and DImode */
81 3, /* cost of moving SSE register */
82 {3, 3, 3}, /* cost of loading SSE registers
83 in SImode, DImode and TImode */
84 {3, 3, 3}, /* cost of storing SSE registers
85 in SImode, DImode and TImode */
86 3, /* MMX or SSE register to integer */
87 0, /* size of prefetch block */
88 0, /* number of parallel prefetches */
89 1, /* Branch cost */
90 2, /* cost of FADD and FSUB insns. */
91 2, /* cost of FMUL instruction. */
92 2, /* cost of FDIV instruction. */
93 2, /* cost of FABS instruction. */
94 2, /* cost of FCHS instruction. */
95 2, /* cost of FSQRT instruction. */
96 };
97
98 /* Processor costs (relative to an add) */
99 static const
100 struct processor_costs i386_cost = { /* 386 specific costs */
101 1, /* cost of an add instruction */
102 1, /* cost of a lea instruction */
103 3, /* variable shift costs */
104 2, /* constant shift costs */
105 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
106 1, /* cost of multiply per each bit set */
107 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
108 3, /* cost of movsx */
109 2, /* cost of movzx */
110 15, /* "large" insn */
111 3, /* MOVE_RATIO */
112 4, /* cost for loading QImode using movzbl */
113 {2, 4, 2}, /* cost of loading integer registers
114 in QImode, HImode and SImode.
115 Relative to reg-reg move (2). */
116 {2, 4, 2}, /* cost of storing integer registers */
117 2, /* cost of reg,reg fld/fst */
118 {8, 8, 8}, /* cost of loading fp registers
119 in SFmode, DFmode and XFmode */
120 {8, 8, 8}, /* cost of loading integer registers */
121 2, /* cost of moving MMX register */
122 {4, 8}, /* cost of loading MMX registers
123 in SImode and DImode */
124 {4, 8}, /* cost of storing MMX registers
125 in SImode and DImode */
126 2, /* cost of moving SSE register */
127 {4, 8, 16}, /* cost of loading SSE registers
128 in SImode, DImode and TImode */
129 {4, 8, 16}, /* cost of storing SSE registers
130 in SImode, DImode and TImode */
131 3, /* MMX or SSE register to integer */
132 0, /* size of prefetch block */
133 0, /* number of parallel prefetches */
134 1, /* Branch cost */
135 23, /* cost of FADD and FSUB insns. */
136 27, /* cost of FMUL instruction. */
137 88, /* cost of FDIV instruction. */
138 22, /* cost of FABS instruction. */
139 24, /* cost of FCHS instruction. */
140 122, /* cost of FSQRT instruction. */
141 };
142
143 static const
144 struct processor_costs i486_cost = { /* 486 specific costs */
145 1, /* cost of an add instruction */
146 1, /* cost of a lea instruction */
147 3, /* variable shift costs */
148 2, /* constant shift costs */
149 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
150 1, /* cost of multiply per each bit set */
151 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
152 3, /* cost of movsx */
153 2, /* cost of movzx */
154 15, /* "large" insn */
155 3, /* MOVE_RATIO */
156 4, /* cost for loading QImode using movzbl */
157 {2, 4, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 4, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {8, 8, 8}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {8, 8, 8}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {4, 8}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {4, 8}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {4, 8, 16}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {4, 8, 16}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3, /* MMX or SSE register to integer */
176 0, /* size of prefetch block */
177 0, /* number of parallel prefetches */
178 1, /* Branch cost */
179 8, /* cost of FADD and FSUB insns. */
180 16, /* cost of FMUL instruction. */
181 73, /* cost of FDIV instruction. */
182 3, /* cost of FABS instruction. */
183 3, /* cost of FCHS instruction. */
184 83, /* cost of FSQRT instruction. */
185 };
186
187 static const
188 struct processor_costs pentium_cost = {
189 1, /* cost of an add instruction */
190 1, /* cost of a lea instruction */
191 4, /* variable shift costs */
192 1, /* constant shift costs */
193 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
194 0, /* cost of multiply per each bit set */
195 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
196 3, /* cost of movsx */
197 2, /* cost of movzx */
198 8, /* "large" insn */
199 6, /* MOVE_RATIO */
200 6, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {2, 2, 6}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {4, 4, 6}, /* cost of loading integer registers */
209 8, /* cost of moving MMX register */
210 {8, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {8, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
222 2, /* Branch cost */
223 3, /* cost of FADD and FSUB insns. */
224 3, /* cost of FMUL instruction. */
225 39, /* cost of FDIV instruction. */
226 1, /* cost of FABS instruction. */
227 1, /* cost of FCHS instruction. */
228 70, /* cost of FSQRT instruction. */
229 };
230
231 static const
232 struct processor_costs pentiumpro_cost = {
233 1, /* cost of an add instruction */
234 1, /* cost of a lea instruction */
235 1, /* variable shift costs */
236 1, /* constant shift costs */
237 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
238 0, /* cost of multiply per each bit set */
239 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
240 1, /* cost of movsx */
241 1, /* cost of movzx */
242 8, /* "large" insn */
243 6, /* MOVE_RATIO */
244 2, /* cost for loading QImode using movzbl */
245 {4, 4, 4}, /* cost of loading integer registers
246 in QImode, HImode and SImode.
247 Relative to reg-reg move (2). */
248 {2, 2, 2}, /* cost of storing integer registers */
249 2, /* cost of reg,reg fld/fst */
250 {2, 2, 6}, /* cost of loading fp registers
251 in SFmode, DFmode and XFmode */
252 {4, 4, 6}, /* cost of loading integer registers */
253 2, /* cost of moving MMX register */
254 {2, 2}, /* cost of loading MMX registers
255 in SImode and DImode */
256 {2, 2}, /* cost of storing MMX registers
257 in SImode and DImode */
258 2, /* cost of moving SSE register */
259 {2, 2, 8}, /* cost of loading SSE registers
260 in SImode, DImode and TImode */
261 {2, 2, 8}, /* cost of storing SSE registers
262 in SImode, DImode and TImode */
263 3, /* MMX or SSE register to integer */
264 32, /* size of prefetch block */
265 6, /* number of parallel prefetches */
266 2, /* Branch cost */
267 3, /* cost of FADD and FSUB insns. */
268 5, /* cost of FMUL instruction. */
269 56, /* cost of FDIV instruction. */
270 2, /* cost of FABS instruction. */
271 2, /* cost of FCHS instruction. */
272 56, /* cost of FSQRT instruction. */
273 };
274
275 static const
276 struct processor_costs k6_cost = {
277 1, /* cost of an add instruction */
278 2, /* cost of a lea instruction */
279 1, /* variable shift costs */
280 1, /* constant shift costs */
281 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
282 0, /* cost of multiply per each bit set */
283 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
284 2, /* cost of movsx */
285 2, /* cost of movzx */
286 8, /* "large" insn */
287 4, /* MOVE_RATIO */
288 3, /* cost for loading QImode using movzbl */
289 {4, 5, 4}, /* cost of loading integer registers
290 in QImode, HImode and SImode.
291 Relative to reg-reg move (2). */
292 {2, 3, 2}, /* cost of storing integer registers */
293 4, /* cost of reg,reg fld/fst */
294 {6, 6, 6}, /* cost of loading fp registers
295 in SFmode, DFmode and XFmode */
296 {4, 4, 4}, /* cost of loading integer registers */
297 2, /* cost of moving MMX register */
298 {2, 2}, /* cost of loading MMX registers
299 in SImode and DImode */
300 {2, 2}, /* cost of storing MMX registers
301 in SImode and DImode */
302 2, /* cost of moving SSE register */
303 {2, 2, 8}, /* cost of loading SSE registers
304 in SImode, DImode and TImode */
305 {2, 2, 8}, /* cost of storing SSE registers
306 in SImode, DImode and TImode */
307 6, /* MMX or SSE register to integer */
308 32, /* size of prefetch block */
309 1, /* number of parallel prefetches */
310 1, /* Branch cost */
311 2, /* cost of FADD and FSUB insns. */
312 2, /* cost of FMUL instruction. */
313 56, /* cost of FDIV instruction. */
314 2, /* cost of FABS instruction. */
315 2, /* cost of FCHS instruction. */
316 56, /* cost of FSQRT instruction. */
317 };
318
319 static const
320 struct processor_costs athlon_cost = {
321 1, /* cost of an add instruction */
322 2, /* cost of a lea instruction */
323 1, /* variable shift costs */
324 1, /* constant shift costs */
325 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
326 0, /* cost of multiply per each bit set */
327 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
328 1, /* cost of movsx */
329 1, /* cost of movzx */
330 8, /* "large" insn */
331 9, /* MOVE_RATIO */
332 4, /* cost for loading QImode using movzbl */
333 {3, 4, 3}, /* cost of loading integer registers
334 in QImode, HImode and SImode.
335 Relative to reg-reg move (2). */
336 {3, 4, 3}, /* cost of storing integer registers */
337 4, /* cost of reg,reg fld/fst */
338 {4, 4, 12}, /* cost of loading fp registers
339 in SFmode, DFmode and XFmode */
340 {6, 6, 8}, /* cost of loading integer registers */
341 2, /* cost of moving MMX register */
342 {4, 4}, /* cost of loading MMX registers
343 in SImode and DImode */
344 {4, 4}, /* cost of storing MMX registers
345 in SImode and DImode */
346 2, /* cost of moving SSE register */
347 {4, 4, 6}, /* cost of loading SSE registers
348 in SImode, DImode and TImode */
349 {4, 4, 5}, /* cost of storing SSE registers
350 in SImode, DImode and TImode */
351 5, /* MMX or SSE register to integer */
352 64, /* size of prefetch block */
353 6, /* number of parallel prefetches */
354 2, /* Branch cost */
355 4, /* cost of FADD and FSUB insns. */
356 4, /* cost of FMUL instruction. */
357 24, /* cost of FDIV instruction. */
358 2, /* cost of FABS instruction. */
359 2, /* cost of FCHS instruction. */
360 35, /* cost of FSQRT instruction. */
361 };
362
363 static const
364 struct processor_costs k8_cost = {
365 1, /* cost of an add instruction */
366 2, /* cost of a lea instruction */
367 1, /* variable shift costs */
368 1, /* constant shift costs */
369 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
370 0, /* cost of multiply per each bit set */
371 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
372 1, /* cost of movsx */
373 1, /* cost of movzx */
374 8, /* "large" insn */
375 9, /* MOVE_RATIO */
376 4, /* cost for loading QImode using movzbl */
377 {3, 4, 3}, /* cost of loading integer registers
378 in QImode, HImode and SImode.
379 Relative to reg-reg move (2). */
380 {3, 4, 3}, /* cost of storing integer registers */
381 4, /* cost of reg,reg fld/fst */
382 {4, 4, 12}, /* cost of loading fp registers
383 in SFmode, DFmode and XFmode */
384 {6, 6, 8}, /* cost of loading integer registers */
385 2, /* cost of moving MMX register */
386 {3, 3}, /* cost of loading MMX registers
387 in SImode and DImode */
388 {4, 4}, /* cost of storing MMX registers
389 in SImode and DImode */
390 2, /* cost of moving SSE register */
391 {4, 3, 6}, /* cost of loading SSE registers
392 in SImode, DImode and TImode */
393 {4, 4, 5}, /* cost of storing SSE registers
394 in SImode, DImode and TImode */
395 5, /* MMX or SSE register to integer */
396 64, /* size of prefetch block */
397 6, /* number of parallel prefetches */
398 2, /* Branch cost */
399 4, /* cost of FADD and FSUB insns. */
400 4, /* cost of FMUL instruction. */
401 19, /* cost of FDIV instruction. */
402 2, /* cost of FABS instruction. */
403 2, /* cost of FCHS instruction. */
404 35, /* cost of FSQRT instruction. */
405 };
406
407 static const
408 struct processor_costs pentium4_cost = {
409 1, /* cost of an add instruction */
410 1, /* cost of a lea instruction */
411 4, /* variable shift costs */
412 4, /* constant shift costs */
413 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
414 0, /* cost of multiply per each bit set */
415 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
416 1, /* cost of movsx */
417 1, /* cost of movzx */
418 16, /* "large" insn */
419 6, /* MOVE_RATIO */
420 2, /* cost for loading QImode using movzbl */
421 {4, 5, 4}, /* cost of loading integer registers
422 in QImode, HImode and SImode.
423 Relative to reg-reg move (2). */
424 {2, 3, 2}, /* cost of storing integer registers */
425 2, /* cost of reg,reg fld/fst */
426 {2, 2, 6}, /* cost of loading fp registers
427 in SFmode, DFmode and XFmode */
428 {4, 4, 6}, /* cost of loading integer registers */
429 2, /* cost of moving MMX register */
430 {2, 2}, /* cost of loading MMX registers
431 in SImode and DImode */
432 {2, 2}, /* cost of storing MMX registers
433 in SImode and DImode */
434 12, /* cost of moving SSE register */
435 {12, 12, 12}, /* cost of loading SSE registers
436 in SImode, DImode and TImode */
437 {2, 2, 8}, /* cost of storing SSE registers
438 in SImode, DImode and TImode */
439 10, /* MMX or SSE register to integer */
440 64, /* size of prefetch block */
441 6, /* number of parallel prefetches */
442 2, /* Branch cost */
443 5, /* cost of FADD and FSUB insns. */
444 7, /* cost of FMUL instruction. */
445 43, /* cost of FDIV instruction. */
446 2, /* cost of FABS instruction. */
447 2, /* cost of FCHS instruction. */
448 43, /* cost of FSQRT instruction. */
449 };
450
451 const struct processor_costs *ix86_cost = &pentium_cost;
452
453 /* Processor feature/optimization bitmasks. */
454 #define m_386 (1<<PROCESSOR_I386)
455 #define m_486 (1<<PROCESSOR_I486)
456 #define m_PENT (1<<PROCESSOR_PENTIUM)
457 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
458 #define m_K6 (1<<PROCESSOR_K6)
459 #define m_ATHLON (1<<PROCESSOR_ATHLON)
460 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
461 #define m_K8 (1<<PROCESSOR_K8)
462 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
463
464 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
465 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
466 const int x86_zero_extend_with_and = m_486 | m_PENT;
467 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
468 const int x86_double_with_add = ~m_386;
469 const int x86_use_bit_test = m_386;
470 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
471 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
472 const int x86_3dnow_a = m_ATHLON_K8;
473 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_branch_hints = m_PENT4;
475 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
476 const int x86_partial_reg_stall = m_PPRO;
477 const int x86_use_loop = m_K6;
478 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
479 const int x86_use_mov0 = m_K6;
480 const int x86_use_cltd = ~(m_PENT | m_K6);
481 const int x86_read_modify_write = ~m_PENT;
482 const int x86_read_modify = ~(m_PENT | m_PPRO);
483 const int x86_split_long_moves = m_PPRO;
484 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
485 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
486 const int x86_single_stringop = m_386 | m_PENT4;
487 const int x86_qimode_math = ~(0);
488 const int x86_promote_qi_regs = 0;
489 const int x86_himode_math = ~(m_PPRO);
490 const int x86_promote_hi_regs = m_PPRO;
491 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
492 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
493 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
494 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
495 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
496 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
497 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
498 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
499 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
500 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
501 const int x86_decompose_lea = m_PENT4;
502 const int x86_shift1 = ~m_486;
503 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
504 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
505 /* Set for machines where the type and dependencies are resolved on SSE register
506 parts insetad of whole registers, so we may maintain just lower part of
507 scalar values in proper format leaving the upper part undefined. */
508 const int x86_sse_partial_regs = m_ATHLON_K8;
509 /* Athlon optimizes partial-register FPS special case, thus avoiding the
510 need for extra instructions beforehand */
511 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
512 const int x86_sse_typeless_stores = m_ATHLON_K8;
513 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
514 const int x86_use_ffreep = m_ATHLON_K8;
515 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
516
517 /* In case the avreage insn count for single function invocation is
518 lower than this constant, emit fast (but longer) prologue and
519 epilogue code. */
520 #define FAST_PROLOGUE_INSN_COUNT 20
521
522 /* Set by prologue expander and used by epilogue expander to determine
523 the style used. */
524 static int use_fast_prologue_epilogue;
525
526 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
527 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
528 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
529 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
530
531 /* Array of the smallest class containing reg number REGNO, indexed by
532 REGNO. Used by REGNO_REG_CLASS in i386.h. */
533
534 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
535 {
536 /* ax, dx, cx, bx */
537 AREG, DREG, CREG, BREG,
538 /* si, di, bp, sp */
539 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
540 /* FP registers */
541 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
542 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
543 /* arg pointer */
544 NON_Q_REGS,
545 /* flags, fpsr, dirflag, frame */
546 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
547 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
548 SSE_REGS, SSE_REGS,
549 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
550 MMX_REGS, MMX_REGS,
551 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
552 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
553 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
554 SSE_REGS, SSE_REGS,
555 };
556
557 /* The "default" register map used in 32bit mode. */
558
559 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
560 {
561 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
562 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
563 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
564 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
565 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
566 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
567 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
568 };
569
570 static int const x86_64_int_parameter_registers[6] =
571 {
572 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
573 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
574 };
575
576 static int const x86_64_int_return_registers[4] =
577 {
578 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
579 };
580
581 /* The "default" register map used in 64bit mode. */
582 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
583 {
584 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
585 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
586 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
587 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
588 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
589 8,9,10,11,12,13,14,15, /* extended integer registers */
590 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
591 };
592
593 /* Define the register numbers to be used in Dwarf debugging information.
594 The SVR4 reference port C compiler uses the following register numbers
595 in its Dwarf output code:
596 0 for %eax (gcc regno = 0)
597 1 for %ecx (gcc regno = 2)
598 2 for %edx (gcc regno = 1)
599 3 for %ebx (gcc regno = 3)
600 4 for %esp (gcc regno = 7)
601 5 for %ebp (gcc regno = 6)
602 6 for %esi (gcc regno = 4)
603 7 for %edi (gcc regno = 5)
604 The following three DWARF register numbers are never generated by
605 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
606 believes these numbers have these meanings.
607 8 for %eip (no gcc equivalent)
608 9 for %eflags (gcc regno = 17)
609 10 for %trapno (no gcc equivalent)
610 It is not at all clear how we should number the FP stack registers
611 for the x86 architecture. If the version of SDB on x86/svr4 were
612 a bit less brain dead with respect to floating-point then we would
613 have a precedent to follow with respect to DWARF register numbers
614 for x86 FP registers, but the SDB on x86/svr4 is so completely
615 broken with respect to FP registers that it is hardly worth thinking
616 of it as something to strive for compatibility with.
617 The version of x86/svr4 SDB I have at the moment does (partially)
618 seem to believe that DWARF register number 11 is associated with
619 the x86 register %st(0), but that's about all. Higher DWARF
620 register numbers don't seem to be associated with anything in
621 particular, and even for DWARF regno 11, SDB only seems to under-
622 stand that it should say that a variable lives in %st(0) (when
623 asked via an `=' command) if we said it was in DWARF regno 11,
624 but SDB still prints garbage when asked for the value of the
625 variable in question (via a `/' command).
626 (Also note that the labels SDB prints for various FP stack regs
627 when doing an `x' command are all wrong.)
628 Note that these problems generally don't affect the native SVR4
629 C compiler because it doesn't allow the use of -O with -g and
630 because when it is *not* optimizing, it allocates a memory
631 location for each floating-point variable, and the memory
632 location is what gets described in the DWARF AT_location
633 attribute for the variable in question.
634 Regardless of the severe mental illness of the x86/svr4 SDB, we
635 do something sensible here and we use the following DWARF
636 register numbers. Note that these are all stack-top-relative
637 numbers.
638 11 for %st(0) (gcc regno = 8)
639 12 for %st(1) (gcc regno = 9)
640 13 for %st(2) (gcc regno = 10)
641 14 for %st(3) (gcc regno = 11)
642 15 for %st(4) (gcc regno = 12)
643 16 for %st(5) (gcc regno = 13)
644 17 for %st(6) (gcc regno = 14)
645 18 for %st(7) (gcc regno = 15)
646 */
647 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
648 {
649 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
650 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
651 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
652 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
653 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
654 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
655 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
656 };
657
658 /* Test and compare insns in i386.md store the information needed to
659 generate branch and scc insns here. */
660
661 rtx ix86_compare_op0 = NULL_RTX;
662 rtx ix86_compare_op1 = NULL_RTX;
663
664 /* The encoding characters for the four TLS models present in ELF. */
665
666 static char const tls_model_chars[] = " GLil";
667
668 #define MAX_386_STACK_LOCALS 3
669 /* Size of the register save area. */
670 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
671
672 /* Define the structure for the machine field in struct function. */
673 struct machine_function GTY(())
674 {
675 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
676 const char *some_ld_name;
677 int save_varrargs_registers;
678 int accesses_prev_frame;
679 };
680
681 #define ix86_stack_locals (cfun->machine->stack_locals)
682 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
683
684 /* Structure describing stack frame layout.
685 Stack grows downward:
686
687 [arguments]
688 <- ARG_POINTER
689 saved pc
690
691 saved frame pointer if frame_pointer_needed
692 <- HARD_FRAME_POINTER
693 [saved regs]
694
695 [padding1] \
696 )
697 [va_arg registers] (
698 > to_allocate <- FRAME_POINTER
699 [frame] (
700 )
701 [padding2] /
702 */
703 struct ix86_frame
704 {
705 int nregs;
706 int padding1;
707 int va_arg_size;
708 HOST_WIDE_INT frame;
709 int padding2;
710 int outgoing_arguments_size;
711 int red_zone_size;
712
713 HOST_WIDE_INT to_allocate;
714 /* The offsets relative to ARG_POINTER. */
715 HOST_WIDE_INT frame_pointer_offset;
716 HOST_WIDE_INT hard_frame_pointer_offset;
717 HOST_WIDE_INT stack_pointer_offset;
718 };
719
720 /* Used to enable/disable debugging features. */
721 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
722 /* Code model option as passed by user. */
723 const char *ix86_cmodel_string;
724 /* Parsed value. */
725 enum cmodel ix86_cmodel;
726 /* Asm dialect. */
727 const char *ix86_asm_string;
728 enum asm_dialect ix86_asm_dialect = ASM_ATT;
729 /* TLS dialext. */
730 const char *ix86_tls_dialect_string;
731 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
732
733 /* Which unit we are generating floating point math for. */
734 enum fpmath_unit ix86_fpmath;
735
736 /* Which cpu are we scheduling for. */
737 enum processor_type ix86_cpu;
738 /* Which instruction set architecture to use. */
739 enum processor_type ix86_arch;
740
741 /* Strings to hold which cpu and instruction set architecture to use. */
742 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
743 const char *ix86_arch_string; /* for -march=<xxx> */
744 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
745
746 /* # of registers to use to pass arguments. */
747 const char *ix86_regparm_string;
748
749 /* true if sse prefetch instruction is not NOOP. */
750 int x86_prefetch_sse;
751
752 /* ix86_regparm_string as a number */
753 int ix86_regparm;
754
755 /* Alignment to use for loops and jumps: */
756
757 /* Power of two alignment for loops. */
758 const char *ix86_align_loops_string;
759
760 /* Power of two alignment for non-loop jumps. */
761 const char *ix86_align_jumps_string;
762
763 /* Power of two alignment for stack boundary in bytes. */
764 const char *ix86_preferred_stack_boundary_string;
765
766 /* Preferred alignment for stack boundary in bits. */
767 int ix86_preferred_stack_boundary;
768
769 /* Values 1-5: see jump.c */
770 int ix86_branch_cost;
771 const char *ix86_branch_cost_string;
772
773 /* Power of two alignment for functions. */
774 const char *ix86_align_funcs_string;
775
776 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
777 static char internal_label_prefix[16];
778 static int internal_label_prefix_len;
779 \f
780 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
781 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
782 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
783 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
784 int, int, FILE *));
785 static const char *get_some_local_dynamic_name PARAMS ((void));
786 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
787 static rtx maybe_get_pool_constant PARAMS ((rtx));
788 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
789 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
790 rtx *, rtx *));
791 static rtx get_thread_pointer PARAMS ((void));
792 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
793 static rtx gen_push PARAMS ((rtx));
794 static int memory_address_length PARAMS ((rtx addr));
795 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
796 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
797 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
798 static void ix86_dump_ppro_packet PARAMS ((FILE *));
799 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
800 static struct machine_function * ix86_init_machine_status PARAMS ((void));
801 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
802 static int ix86_nsaved_regs PARAMS ((void));
803 static void ix86_emit_save_regs PARAMS ((void));
804 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
805 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
806 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
807 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
808 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
809 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
810 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
811 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
812 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
813 static int ix86_issue_rate PARAMS ((void));
814 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
815 static void ix86_sched_init PARAMS ((FILE *, int, int));
816 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
817 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
818 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
819 static int ia32_multipass_dfa_lookahead PARAMS ((void));
820 static void ix86_init_mmx_sse_builtins PARAMS ((void));
821 static rtx x86_this_parameter PARAMS ((tree));
822 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
823 HOST_WIDE_INT, tree));
824 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
825 HOST_WIDE_INT, tree));
826 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
827
828 struct ix86_address
829 {
830 rtx base, index, disp;
831 HOST_WIDE_INT scale;
832 };
833
834 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
835 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
836
837 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
838 static const char *ix86_strip_name_encoding PARAMS ((const char *))
839 ATTRIBUTE_UNUSED;
840
841 struct builtin_description;
842 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
843 tree, rtx));
844 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
845 tree, rtx));
846 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
847 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
848 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
849 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
850 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
851 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
852 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
853 enum rtx_code *,
854 enum rtx_code *,
855 enum rtx_code *));
856 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
857 rtx *, rtx *));
858 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
859 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
860 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
861 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
862 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
863 static int ix86_save_reg PARAMS ((unsigned int, int));
864 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
865 static int ix86_comp_type_attributes PARAMS ((tree, tree));
866 static int ix86_fntype_regparm PARAMS ((tree));
867 const struct attribute_spec ix86_attribute_table[];
868 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
869 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
870 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
871 static int ix86_value_regno PARAMS ((enum machine_mode));
872 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
873 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
874
875 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
876 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
877 #endif
878
879 /* Register class used for passing given 64bit part of the argument.
880 These represent classes as documented by the PS ABI, with the exception
881 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
882 use SF or DFmode move instead of DImode to avoid reformating penalties.
883
884 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
885 whenever possible (upper half does contain padding).
886 */
887 enum x86_64_reg_class
888 {
889 X86_64_NO_CLASS,
890 X86_64_INTEGER_CLASS,
891 X86_64_INTEGERSI_CLASS,
892 X86_64_SSE_CLASS,
893 X86_64_SSESF_CLASS,
894 X86_64_SSEDF_CLASS,
895 X86_64_SSEUP_CLASS,
896 X86_64_X87_CLASS,
897 X86_64_X87UP_CLASS,
898 X86_64_MEMORY_CLASS
899 };
900 static const char * const x86_64_reg_class_name[] =
901 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
902
903 #define MAX_CLASSES 4
904 static int classify_argument PARAMS ((enum machine_mode, tree,
905 enum x86_64_reg_class [MAX_CLASSES],
906 int));
907 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
908 int *));
909 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
910 const int *, int));
911 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
912 enum x86_64_reg_class));
913 \f
914 /* Initialize the GCC target structure. */
915 #undef TARGET_ATTRIBUTE_TABLE
916 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
917 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
918 # undef TARGET_MERGE_DECL_ATTRIBUTES
919 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
920 #endif
921
922 #undef TARGET_COMP_TYPE_ATTRIBUTES
923 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
924
925 #undef TARGET_INIT_BUILTINS
926 #define TARGET_INIT_BUILTINS ix86_init_builtins
927
928 #undef TARGET_EXPAND_BUILTIN
929 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
930
931 #undef TARGET_ASM_FUNCTION_EPILOGUE
932 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
933
934 #undef TARGET_ASM_OPEN_PAREN
935 #define TARGET_ASM_OPEN_PAREN ""
936 #undef TARGET_ASM_CLOSE_PAREN
937 #define TARGET_ASM_CLOSE_PAREN ""
938
939 #undef TARGET_ASM_ALIGNED_HI_OP
940 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
941 #undef TARGET_ASM_ALIGNED_SI_OP
942 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
943 #ifdef ASM_QUAD
944 #undef TARGET_ASM_ALIGNED_DI_OP
945 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
946 #endif
947
948 #undef TARGET_ASM_UNALIGNED_HI_OP
949 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
950 #undef TARGET_ASM_UNALIGNED_SI_OP
951 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
952 #undef TARGET_ASM_UNALIGNED_DI_OP
953 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
954
955 #undef TARGET_SCHED_ADJUST_COST
956 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
957 #undef TARGET_SCHED_ISSUE_RATE
958 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
959 #undef TARGET_SCHED_VARIABLE_ISSUE
960 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
961 #undef TARGET_SCHED_INIT
962 #define TARGET_SCHED_INIT ix86_sched_init
963 #undef TARGET_SCHED_REORDER
964 #define TARGET_SCHED_REORDER ix86_sched_reorder
965 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
966 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
967 ia32_use_dfa_pipeline_interface
968 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
969 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
970 ia32_multipass_dfa_lookahead
971
972 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
973 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
974
975 #ifdef HAVE_AS_TLS
976 #undef TARGET_HAVE_TLS
977 #define TARGET_HAVE_TLS true
978 #endif
979 #undef TARGET_CANNOT_FORCE_CONST_MEM
980 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
981
982 #undef TARGET_MS_BITFIELD_LAYOUT_P
983 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
984
985 #undef TARGET_ASM_OUTPUT_MI_THUNK
986 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
987 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
988 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
989
990 struct gcc_target targetm = TARGET_INITIALIZER;
991 \f
992 /* Sometimes certain combinations of command options do not make
993 sense on a particular target machine. You can define a macro
994 `OVERRIDE_OPTIONS' to take account of this. This macro, if
995 defined, is executed once just after all the command options have
996 been parsed.
997
998 Don't use this macro to turn on various extra optimizations for
999 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1000
1001 void
1002 override_options ()
1003 {
1004 int i;
1005 /* Comes from final.c -- no real reason to change it. */
1006 #define MAX_CODE_ALIGN 16
1007
1008 static struct ptt
1009 {
1010 const struct processor_costs *cost; /* Processor costs */
1011 const int target_enable; /* Target flags to enable. */
1012 const int target_disable; /* Target flags to disable. */
1013 const int align_loop; /* Default alignments. */
1014 const int align_loop_max_skip;
1015 const int align_jump;
1016 const int align_jump_max_skip;
1017 const int align_func;
1018 }
1019 const processor_target_table[PROCESSOR_max] =
1020 {
1021 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1022 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1023 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1024 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1025 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1026 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1027 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1028 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1029 };
1030
1031 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1032 static struct pta
1033 {
1034 const char *const name; /* processor name or nickname. */
1035 const enum processor_type processor;
1036 const enum pta_flags
1037 {
1038 PTA_SSE = 1,
1039 PTA_SSE2 = 2,
1040 PTA_MMX = 4,
1041 PTA_PREFETCH_SSE = 8,
1042 PTA_3DNOW = 16,
1043 PTA_3DNOW_A = 64,
1044 PTA_64BIT = 128
1045 } flags;
1046 }
1047 const processor_alias_table[] =
1048 {
1049 {"i386", PROCESSOR_I386, 0},
1050 {"i486", PROCESSOR_I486, 0},
1051 {"i586", PROCESSOR_PENTIUM, 0},
1052 {"pentium", PROCESSOR_PENTIUM, 0},
1053 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1054 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1055 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1056 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1057 {"i686", PROCESSOR_PENTIUMPRO, 0},
1058 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1059 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1060 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1061 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1062 PTA_MMX | PTA_PREFETCH_SSE},
1063 {"k6", PROCESSOR_K6, PTA_MMX},
1064 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1065 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1066 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1067 | PTA_3DNOW_A},
1068 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1069 | PTA_3DNOW | PTA_3DNOW_A},
1070 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1071 | PTA_3DNOW_A | PTA_SSE},
1072 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1073 | PTA_3DNOW_A | PTA_SSE},
1074 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1075 | PTA_3DNOW_A | PTA_SSE},
1076 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1077 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1078 };
1079
1080 int const pta_size = ARRAY_SIZE (processor_alias_table);
1081
1082 /* By default our XFmode is the 80-bit extended format. If we have
1083 use TFmode instead, it's also the 80-bit format, but with padding. */
1084 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1085 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1086
1087 /* Set the default values for switches whose default depends on TARGET_64BIT
1088 in case they weren't overwriten by command line options. */
1089 if (TARGET_64BIT)
1090 {
1091 if (flag_omit_frame_pointer == 2)
1092 flag_omit_frame_pointer = 1;
1093 if (flag_asynchronous_unwind_tables == 2)
1094 flag_asynchronous_unwind_tables = 1;
1095 if (flag_pcc_struct_return == 2)
1096 flag_pcc_struct_return = 0;
1097 }
1098 else
1099 {
1100 if (flag_omit_frame_pointer == 2)
1101 flag_omit_frame_pointer = 0;
1102 if (flag_asynchronous_unwind_tables == 2)
1103 flag_asynchronous_unwind_tables = 0;
1104 if (flag_pcc_struct_return == 2)
1105 flag_pcc_struct_return = 1;
1106 }
1107
1108 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1109 SUBTARGET_OVERRIDE_OPTIONS;
1110 #endif
1111
1112 if (!ix86_cpu_string && ix86_arch_string)
1113 ix86_cpu_string = ix86_arch_string;
1114 if (!ix86_cpu_string)
1115 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1116 if (!ix86_arch_string)
1117 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1118
1119 if (ix86_cmodel_string != 0)
1120 {
1121 if (!strcmp (ix86_cmodel_string, "small"))
1122 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1123 else if (flag_pic)
1124 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1125 else if (!strcmp (ix86_cmodel_string, "32"))
1126 ix86_cmodel = CM_32;
1127 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1128 ix86_cmodel = CM_KERNEL;
1129 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1130 ix86_cmodel = CM_MEDIUM;
1131 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1132 ix86_cmodel = CM_LARGE;
1133 else
1134 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1135 }
1136 else
1137 {
1138 ix86_cmodel = CM_32;
1139 if (TARGET_64BIT)
1140 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1141 }
1142 if (ix86_asm_string != 0)
1143 {
1144 if (!strcmp (ix86_asm_string, "intel"))
1145 ix86_asm_dialect = ASM_INTEL;
1146 else if (!strcmp (ix86_asm_string, "att"))
1147 ix86_asm_dialect = ASM_ATT;
1148 else
1149 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1150 }
1151 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1152 error ("code model `%s' not supported in the %s bit mode",
1153 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1154 if (ix86_cmodel == CM_LARGE)
1155 sorry ("code model `large' not supported yet");
1156 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1157 sorry ("%i-bit mode not compiled in",
1158 (target_flags & MASK_64BIT) ? 64 : 32);
1159
1160 for (i = 0; i < pta_size; i++)
1161 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1162 {
1163 ix86_arch = processor_alias_table[i].processor;
1164 /* Default cpu tuning to the architecture. */
1165 ix86_cpu = ix86_arch;
1166 if (processor_alias_table[i].flags & PTA_MMX
1167 && !(target_flags_explicit & MASK_MMX))
1168 target_flags |= MASK_MMX;
1169 if (processor_alias_table[i].flags & PTA_3DNOW
1170 && !(target_flags_explicit & MASK_3DNOW))
1171 target_flags |= MASK_3DNOW;
1172 if (processor_alias_table[i].flags & PTA_3DNOW_A
1173 && !(target_flags_explicit & MASK_3DNOW_A))
1174 target_flags |= MASK_3DNOW_A;
1175 if (processor_alias_table[i].flags & PTA_SSE
1176 && !(target_flags_explicit & MASK_SSE))
1177 target_flags |= MASK_SSE;
1178 if (processor_alias_table[i].flags & PTA_SSE2
1179 && !(target_flags_explicit & MASK_SSE2))
1180 target_flags |= MASK_SSE2;
1181 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1182 x86_prefetch_sse = true;
1183 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1184 error ("CPU you selected does not support x86-64 instruction set");
1185 break;
1186 }
1187
1188 if (i == pta_size)
1189 error ("bad value (%s) for -march= switch", ix86_arch_string);
1190
1191 for (i = 0; i < pta_size; i++)
1192 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1193 {
1194 ix86_cpu = processor_alias_table[i].processor;
1195 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1196 error ("CPU you selected does not support x86-64 instruction set");
1197 break;
1198 }
1199 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1200 x86_prefetch_sse = true;
1201 if (i == pta_size)
1202 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1203
1204 if (optimize_size)
1205 ix86_cost = &size_cost;
1206 else
1207 ix86_cost = processor_target_table[ix86_cpu].cost;
1208 target_flags |= processor_target_table[ix86_cpu].target_enable;
1209 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1210
1211 /* Arrange to set up i386_stack_locals for all functions. */
1212 init_machine_status = ix86_init_machine_status;
1213
1214 /* Validate -mregparm= value. */
1215 if (ix86_regparm_string)
1216 {
1217 i = atoi (ix86_regparm_string);
1218 if (i < 0 || i > REGPARM_MAX)
1219 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1220 else
1221 ix86_regparm = i;
1222 }
1223 else
1224 if (TARGET_64BIT)
1225 ix86_regparm = REGPARM_MAX;
1226
1227 /* If the user has provided any of the -malign-* options,
1228 warn and use that value only if -falign-* is not set.
1229 Remove this code in GCC 3.2 or later. */
1230 if (ix86_align_loops_string)
1231 {
1232 warning ("-malign-loops is obsolete, use -falign-loops");
1233 if (align_loops == 0)
1234 {
1235 i = atoi (ix86_align_loops_string);
1236 if (i < 0 || i > MAX_CODE_ALIGN)
1237 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1238 else
1239 align_loops = 1 << i;
1240 }
1241 }
1242
1243 if (ix86_align_jumps_string)
1244 {
1245 warning ("-malign-jumps is obsolete, use -falign-jumps");
1246 if (align_jumps == 0)
1247 {
1248 i = atoi (ix86_align_jumps_string);
1249 if (i < 0 || i > MAX_CODE_ALIGN)
1250 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1251 else
1252 align_jumps = 1 << i;
1253 }
1254 }
1255
1256 if (ix86_align_funcs_string)
1257 {
1258 warning ("-malign-functions is obsolete, use -falign-functions");
1259 if (align_functions == 0)
1260 {
1261 i = atoi (ix86_align_funcs_string);
1262 if (i < 0 || i > MAX_CODE_ALIGN)
1263 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1264 else
1265 align_functions = 1 << i;
1266 }
1267 }
1268
1269 /* Default align_* from the processor table. */
1270 if (align_loops == 0)
1271 {
1272 align_loops = processor_target_table[ix86_cpu].align_loop;
1273 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1274 }
1275 if (align_jumps == 0)
1276 {
1277 align_jumps = processor_target_table[ix86_cpu].align_jump;
1278 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1279 }
1280 if (align_functions == 0)
1281 {
1282 align_functions = processor_target_table[ix86_cpu].align_func;
1283 }
1284
1285 /* Validate -mpreferred-stack-boundary= value, or provide default.
1286 The default of 128 bits is for Pentium III's SSE __m128, but we
1287 don't want additional code to keep the stack aligned when
1288 optimizing for code size. */
1289 ix86_preferred_stack_boundary = (optimize_size
1290 ? TARGET_64BIT ? 128 : 32
1291 : 128);
1292 if (ix86_preferred_stack_boundary_string)
1293 {
1294 i = atoi (ix86_preferred_stack_boundary_string);
1295 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1296 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1297 TARGET_64BIT ? 4 : 2);
1298 else
1299 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1300 }
1301
1302 /* Validate -mbranch-cost= value, or provide default. */
1303 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1304 if (ix86_branch_cost_string)
1305 {
1306 i = atoi (ix86_branch_cost_string);
1307 if (i < 0 || i > 5)
1308 error ("-mbranch-cost=%d is not between 0 and 5", i);
1309 else
1310 ix86_branch_cost = i;
1311 }
1312
1313 if (ix86_tls_dialect_string)
1314 {
1315 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1316 ix86_tls_dialect = TLS_DIALECT_GNU;
1317 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1318 ix86_tls_dialect = TLS_DIALECT_SUN;
1319 else
1320 error ("bad value (%s) for -mtls-dialect= switch",
1321 ix86_tls_dialect_string);
1322 }
1323
1324 /* Keep nonleaf frame pointers. */
1325 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1326 flag_omit_frame_pointer = 1;
1327
1328 /* If we're doing fast math, we don't care about comparison order
1329 wrt NaNs. This lets us use a shorter comparison sequence. */
1330 if (flag_unsafe_math_optimizations)
1331 target_flags &= ~MASK_IEEE_FP;
1332
1333 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1334 since the insns won't need emulation. */
1335 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1336 target_flags &= ~MASK_NO_FANCY_MATH_387;
1337
1338 if (TARGET_64BIT)
1339 {
1340 if (TARGET_ALIGN_DOUBLE)
1341 error ("-malign-double makes no sense in the 64bit mode");
1342 if (TARGET_RTD)
1343 error ("-mrtd calling convention not supported in the 64bit mode");
1344 /* Enable by default the SSE and MMX builtins. */
1345 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1346 ix86_fpmath = FPMATH_SSE;
1347 }
1348 else
1349 ix86_fpmath = FPMATH_387;
1350
1351 if (ix86_fpmath_string != 0)
1352 {
1353 if (! strcmp (ix86_fpmath_string, "387"))
1354 ix86_fpmath = FPMATH_387;
1355 else if (! strcmp (ix86_fpmath_string, "sse"))
1356 {
1357 if (!TARGET_SSE)
1358 {
1359 warning ("SSE instruction set disabled, using 387 arithmetics");
1360 ix86_fpmath = FPMATH_387;
1361 }
1362 else
1363 ix86_fpmath = FPMATH_SSE;
1364 }
1365 else if (! strcmp (ix86_fpmath_string, "387,sse")
1366 || ! strcmp (ix86_fpmath_string, "sse,387"))
1367 {
1368 if (!TARGET_SSE)
1369 {
1370 warning ("SSE instruction set disabled, using 387 arithmetics");
1371 ix86_fpmath = FPMATH_387;
1372 }
1373 else if (!TARGET_80387)
1374 {
1375 warning ("387 instruction set disabled, using SSE arithmetics");
1376 ix86_fpmath = FPMATH_SSE;
1377 }
1378 else
1379 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1380 }
1381 else
1382 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1383 }
1384
1385 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1386 on by -msse. */
1387 if (TARGET_SSE)
1388 {
1389 target_flags |= MASK_MMX;
1390 x86_prefetch_sse = true;
1391 }
1392
1393 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1394 if (TARGET_3DNOW)
1395 {
1396 target_flags |= MASK_MMX;
1397 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1398 extensions it adds. */
1399 if (x86_3dnow_a & (1 << ix86_arch))
1400 target_flags |= MASK_3DNOW_A;
1401 }
1402 if ((x86_accumulate_outgoing_args & CPUMASK)
1403 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1404 && !optimize_size)
1405 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1406
1407 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1408 {
1409 char *p;
1410 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1411 p = strchr (internal_label_prefix, 'X');
1412 internal_label_prefix_len = p - internal_label_prefix;
1413 *p = '\0';
1414 }
1415 }
1416 \f
1417 void
1418 optimization_options (level, size)
1419 int level;
1420 int size ATTRIBUTE_UNUSED;
1421 {
1422 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1423 make the problem with not enough registers even worse. */
1424 #ifdef INSN_SCHEDULING
1425 if (level > 1)
1426 flag_schedule_insns = 0;
1427 #endif
1428
1429 /* The default values of these switches depend on the TARGET_64BIT
1430 that is not known at this moment. Mark these values with 2 and
1431 let user the to override these. In case there is no command line option
1432 specifying them, we will set the defaults in override_options. */
1433 if (optimize >= 1)
1434 flag_omit_frame_pointer = 2;
1435 flag_pcc_struct_return = 2;
1436 flag_asynchronous_unwind_tables = 2;
1437 }
1438 \f
1439 /* Table of valid machine attributes. */
1440 const struct attribute_spec ix86_attribute_table[] =
1441 {
1442 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1443 /* Stdcall attribute says callee is responsible for popping arguments
1444 if they are not variable. */
1445 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1446 /* Fastcall attribute says callee is responsible for popping arguments
1447 if they are not variable. */
1448 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1449 /* Cdecl attribute says the callee is a normal C declaration */
1450 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1451 /* Regparm attribute specifies how many integer arguments are to be
1452 passed in registers. */
1453 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1454 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1455 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1456 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1457 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1458 #endif
1459 { NULL, 0, 0, false, false, false, NULL }
1460 };
1461
1462 /* If PIC, we cannot make sibling calls to global functions
1463 because the PLT requires %ebx live.
1464 If we are returning floats on the register stack, we cannot make
1465 sibling calls to functions that return floats. (The stack adjust
1466 instruction will wind up after the sibcall jump, and not be executed.) */
1467
1468 static bool
1469 ix86_function_ok_for_sibcall (decl, exp)
1470 tree decl;
1471 tree exp;
1472 {
1473 /* If we are generating position-independent code, we cannot sibcall
1474 optimize any indirect call, or a direct call to a global function,
1475 as the PLT requires %ebx be live. */
1476 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1477 return false;
1478
1479 /* If we are returning floats on the 80387 register stack, we cannot
1480 make a sibcall from a function that doesn't return a float to a
1481 function that does; the necessary stack adjustment will not be
1482 executed. */
1483 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1484 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1485 return false;
1486
1487 /* If this call is indirect, we'll need to be able to use a call-clobbered
1488 register for the address of the target function. Make sure that all
1489 such registers are not used for passing parameters. */
1490 if (!decl && !TARGET_64BIT)
1491 {
1492 int regparm = ix86_regparm;
1493 tree attr, type;
1494
1495 /* We're looking at the CALL_EXPR, we need the type of the function. */
1496 type = TREE_OPERAND (exp, 0); /* pointer expression */
1497 type = TREE_TYPE (type); /* pointer type */
1498 type = TREE_TYPE (type); /* function type */
1499
1500 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1501 if (attr)
1502 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1503
1504 if (regparm >= 3)
1505 {
1506 /* ??? Need to count the actual number of registers to be used,
1507 not the possible number of registers. Fix later. */
1508 return false;
1509 }
1510 }
1511
1512 /* Otherwise okay. That also includes certain types of indirect calls. */
1513 return true;
1514 }
1515
1516 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1517 arguments as in struct attribute_spec.handler. */
1518 static tree
1519 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1520 tree *node;
1521 tree name;
1522 tree args ATTRIBUTE_UNUSED;
1523 int flags ATTRIBUTE_UNUSED;
1524 bool *no_add_attrs;
1525 {
1526 if (TREE_CODE (*node) != FUNCTION_TYPE
1527 && TREE_CODE (*node) != METHOD_TYPE
1528 && TREE_CODE (*node) != FIELD_DECL
1529 && TREE_CODE (*node) != TYPE_DECL)
1530 {
1531 warning ("`%s' attribute only applies to functions",
1532 IDENTIFIER_POINTER (name));
1533 *no_add_attrs = true;
1534 }
1535 else
1536 {
1537 if (is_attribute_p ("fastcall", name))
1538 {
1539 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1540 {
1541 error ("fastcall and stdcall attributes are not compatible");
1542 }
1543 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1544 {
1545 error ("fastcall and regparm attributes are not compatible");
1546 }
1547 }
1548 else if (is_attribute_p ("stdcall", name))
1549 {
1550 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1551 {
1552 error ("fastcall and stdcall attributes are not compatible");
1553 }
1554 }
1555 }
1556
1557 if (TARGET_64BIT)
1558 {
1559 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1560 *no_add_attrs = true;
1561 }
1562
1563 return NULL_TREE;
1564 }
1565
1566 /* Handle a "regparm" attribute;
1567 arguments as in struct attribute_spec.handler. */
1568 static tree
1569 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1570 tree *node;
1571 tree name;
1572 tree args;
1573 int flags ATTRIBUTE_UNUSED;
1574 bool *no_add_attrs;
1575 {
1576 if (TREE_CODE (*node) != FUNCTION_TYPE
1577 && TREE_CODE (*node) != METHOD_TYPE
1578 && TREE_CODE (*node) != FIELD_DECL
1579 && TREE_CODE (*node) != TYPE_DECL)
1580 {
1581 warning ("`%s' attribute only applies to functions",
1582 IDENTIFIER_POINTER (name));
1583 *no_add_attrs = true;
1584 }
1585 else
1586 {
1587 tree cst;
1588
1589 cst = TREE_VALUE (args);
1590 if (TREE_CODE (cst) != INTEGER_CST)
1591 {
1592 warning ("`%s' attribute requires an integer constant argument",
1593 IDENTIFIER_POINTER (name));
1594 *no_add_attrs = true;
1595 }
1596 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1597 {
1598 warning ("argument to `%s' attribute larger than %d",
1599 IDENTIFIER_POINTER (name), REGPARM_MAX);
1600 *no_add_attrs = true;
1601 }
1602
1603 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1604 {
1605 error ("fastcall and regparm attributes are not compatible");
1606 }
1607 }
1608
1609 return NULL_TREE;
1610 }
1611
1612 /* Return 0 if the attributes for two types are incompatible, 1 if they
1613 are compatible, and 2 if they are nearly compatible (which causes a
1614 warning to be generated). */
1615
1616 static int
1617 ix86_comp_type_attributes (type1, type2)
1618 tree type1;
1619 tree type2;
1620 {
1621 /* Check for mismatch of non-default calling convention. */
1622 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1623
1624 if (TREE_CODE (type1) != FUNCTION_TYPE)
1625 return 1;
1626
1627 /* Check for mismatched fastcall types */
1628 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1629 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1630 return 0;
1631
1632 /* Check for mismatched return types (cdecl vs stdcall). */
1633 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1634 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1635 return 0;
1636 return 1;
1637 }
1638 \f
1639 /* Return the regparm value for a fuctio with the indicated TYPE. */
1640
1641 static int
1642 ix86_fntype_regparm (type)
1643 tree type;
1644 {
1645 tree attr;
1646
1647 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1648 if (attr)
1649 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1650 else
1651 return ix86_regparm;
1652 }
1653
1654 /* Value is the number of bytes of arguments automatically
1655 popped when returning from a subroutine call.
1656 FUNDECL is the declaration node of the function (as a tree),
1657 FUNTYPE is the data type of the function (as a tree),
1658 or for a library call it is an identifier node for the subroutine name.
1659 SIZE is the number of bytes of arguments passed on the stack.
1660
1661 On the 80386, the RTD insn may be used to pop them if the number
1662 of args is fixed, but if the number is variable then the caller
1663 must pop them all. RTD can't be used for library calls now
1664 because the library is compiled with the Unix compiler.
1665 Use of RTD is a selectable option, since it is incompatible with
1666 standard Unix calling sequences. If the option is not selected,
1667 the caller must always pop the args.
1668
1669 The attribute stdcall is equivalent to RTD on a per module basis. */
1670
1671 int
1672 ix86_return_pops_args (fundecl, funtype, size)
1673 tree fundecl;
1674 tree funtype;
1675 int size;
1676 {
1677 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1678
1679 /* Cdecl functions override -mrtd, and never pop the stack. */
1680 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1681
1682 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1683 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1684 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1685 rtd = 1;
1686
1687 if (rtd
1688 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1689 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1690 == void_type_node)))
1691 return size;
1692 }
1693
1694 /* Lose any fake structure return argument if it is passed on the stack. */
1695 if (aggregate_value_p (TREE_TYPE (funtype))
1696 && !TARGET_64BIT)
1697 {
1698 int nregs = ix86_fntype_regparm (funtype);
1699
1700 if (!nregs)
1701 return GET_MODE_SIZE (Pmode);
1702 }
1703
1704 return 0;
1705 }
1706 \f
1707 /* Argument support functions. */
1708
1709 /* Return true when register may be used to pass function parameters. */
1710 bool
1711 ix86_function_arg_regno_p (regno)
1712 int regno;
1713 {
1714 int i;
1715 if (!TARGET_64BIT)
1716 return (regno < REGPARM_MAX
1717 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1718 if (SSE_REGNO_P (regno) && TARGET_SSE)
1719 return true;
1720 /* RAX is used as hidden argument to va_arg functions. */
1721 if (!regno)
1722 return true;
1723 for (i = 0; i < REGPARM_MAX; i++)
1724 if (regno == x86_64_int_parameter_registers[i])
1725 return true;
1726 return false;
1727 }
1728
1729 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1730 for a call to a function whose data type is FNTYPE.
1731 For a library call, FNTYPE is 0. */
1732
1733 void
1734 init_cumulative_args (cum, fntype, libname)
1735 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1736 tree fntype; /* tree ptr for function decl */
1737 rtx libname; /* SYMBOL_REF of library name or 0 */
1738 {
1739 static CUMULATIVE_ARGS zero_cum;
1740 tree param, next_param;
1741
1742 if (TARGET_DEBUG_ARG)
1743 {
1744 fprintf (stderr, "\ninit_cumulative_args (");
1745 if (fntype)
1746 fprintf (stderr, "fntype code = %s, ret code = %s",
1747 tree_code_name[(int) TREE_CODE (fntype)],
1748 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1749 else
1750 fprintf (stderr, "no fntype");
1751
1752 if (libname)
1753 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1754 }
1755
1756 *cum = zero_cum;
1757
1758 /* Set up the number of registers to use for passing arguments. */
1759 cum->nregs = ix86_regparm;
1760 cum->sse_nregs = SSE_REGPARM_MAX;
1761 if (fntype && !TARGET_64BIT)
1762 {
1763 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1764
1765 if (attr)
1766 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1767 }
1768 cum->maybe_vaarg = false;
1769
1770 /* Use ecx and edx registers if function has fastcall attribute */
1771 if (fntype && !TARGET_64BIT)
1772 {
1773 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1774 {
1775 cum->nregs = 2;
1776 cum->fastcall = 1;
1777 }
1778 }
1779
1780
1781 /* Determine if this function has variable arguments. This is
1782 indicated by the last argument being 'void_type_mode' if there
1783 are no variable arguments. If there are variable arguments, then
1784 we won't pass anything in registers */
1785
1786 if (cum->nregs)
1787 {
1788 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1789 param != 0; param = next_param)
1790 {
1791 next_param = TREE_CHAIN (param);
1792 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1793 {
1794 if (!TARGET_64BIT)
1795 {
1796 cum->nregs = 0;
1797 cum->fastcall = 0;
1798 }
1799 cum->maybe_vaarg = true;
1800 }
1801 }
1802 }
1803 if ((!fntype && !libname)
1804 || (fntype && !TYPE_ARG_TYPES (fntype)))
1805 cum->maybe_vaarg = 1;
1806
1807 if (TARGET_DEBUG_ARG)
1808 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1809
1810 return;
1811 }
1812
1813 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1814 of this code is to classify each 8bytes of incoming argument by the register
1815 class and assign registers accordingly. */
1816
1817 /* Return the union class of CLASS1 and CLASS2.
1818 See the x86-64 PS ABI for details. */
1819
1820 static enum x86_64_reg_class
1821 merge_classes (class1, class2)
1822 enum x86_64_reg_class class1, class2;
1823 {
1824 /* Rule #1: If both classes are equal, this is the resulting class. */
1825 if (class1 == class2)
1826 return class1;
1827
1828 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1829 the other class. */
1830 if (class1 == X86_64_NO_CLASS)
1831 return class2;
1832 if (class2 == X86_64_NO_CLASS)
1833 return class1;
1834
1835 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1836 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1837 return X86_64_MEMORY_CLASS;
1838
1839 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1840 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1841 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1842 return X86_64_INTEGERSI_CLASS;
1843 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1844 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1845 return X86_64_INTEGER_CLASS;
1846
1847 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1848 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1849 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1850 return X86_64_MEMORY_CLASS;
1851
1852 /* Rule #6: Otherwise class SSE is used. */
1853 return X86_64_SSE_CLASS;
1854 }
1855
1856 /* Classify the argument of type TYPE and mode MODE.
1857 CLASSES will be filled by the register class used to pass each word
1858 of the operand. The number of words is returned. In case the parameter
1859 should be passed in memory, 0 is returned. As a special case for zero
1860 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1861
1862 BIT_OFFSET is used internally for handling records and specifies offset
1863 of the offset in bits modulo 256 to avoid overflow cases.
1864
1865 See the x86-64 PS ABI for details.
1866 */
1867
1868 static int
1869 classify_argument (mode, type, classes, bit_offset)
1870 enum machine_mode mode;
1871 tree type;
1872 enum x86_64_reg_class classes[MAX_CLASSES];
1873 int bit_offset;
1874 {
1875 int bytes =
1876 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1877 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1878
1879 /* Variable sized entities are always passed/returned in memory. */
1880 if (bytes < 0)
1881 return 0;
1882
1883 if (type && AGGREGATE_TYPE_P (type))
1884 {
1885 int i;
1886 tree field;
1887 enum x86_64_reg_class subclasses[MAX_CLASSES];
1888
1889 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1890 if (bytes > 16)
1891 return 0;
1892
1893 for (i = 0; i < words; i++)
1894 classes[i] = X86_64_NO_CLASS;
1895
1896 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1897 signalize memory class, so handle it as special case. */
1898 if (!words)
1899 {
1900 classes[0] = X86_64_NO_CLASS;
1901 return 1;
1902 }
1903
1904 /* Classify each field of record and merge classes. */
1905 if (TREE_CODE (type) == RECORD_TYPE)
1906 {
1907 /* For classes first merge in the field of the subclasses. */
1908 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1909 {
1910 tree bases = TYPE_BINFO_BASETYPES (type);
1911 int n_bases = TREE_VEC_LENGTH (bases);
1912 int i;
1913
1914 for (i = 0; i < n_bases; ++i)
1915 {
1916 tree binfo = TREE_VEC_ELT (bases, i);
1917 int num;
1918 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1919 tree type = BINFO_TYPE (binfo);
1920
1921 num = classify_argument (TYPE_MODE (type),
1922 type, subclasses,
1923 (offset + bit_offset) % 256);
1924 if (!num)
1925 return 0;
1926 for (i = 0; i < num; i++)
1927 {
1928 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1929 classes[i + pos] =
1930 merge_classes (subclasses[i], classes[i + pos]);
1931 }
1932 }
1933 }
1934 /* And now merge the fields of structure. */
1935 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1936 {
1937 if (TREE_CODE (field) == FIELD_DECL)
1938 {
1939 int num;
1940
1941 /* Bitfields are always classified as integer. Handle them
1942 early, since later code would consider them to be
1943 misaligned integers. */
1944 if (DECL_BIT_FIELD (field))
1945 {
1946 for (i = int_bit_position (field) / 8 / 8;
1947 i < (int_bit_position (field)
1948 + tree_low_cst (DECL_SIZE (field), 0)
1949 + 63) / 8 / 8; i++)
1950 classes[i] =
1951 merge_classes (X86_64_INTEGER_CLASS,
1952 classes[i]);
1953 }
1954 else
1955 {
1956 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1957 TREE_TYPE (field), subclasses,
1958 (int_bit_position (field)
1959 + bit_offset) % 256);
1960 if (!num)
1961 return 0;
1962 for (i = 0; i < num; i++)
1963 {
1964 int pos =
1965 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1966 classes[i + pos] =
1967 merge_classes (subclasses[i], classes[i + pos]);
1968 }
1969 }
1970 }
1971 }
1972 }
1973 /* Arrays are handled as small records. */
1974 else if (TREE_CODE (type) == ARRAY_TYPE)
1975 {
1976 int num;
1977 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1978 TREE_TYPE (type), subclasses, bit_offset);
1979 if (!num)
1980 return 0;
1981
1982 /* The partial classes are now full classes. */
1983 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1984 subclasses[0] = X86_64_SSE_CLASS;
1985 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1986 subclasses[0] = X86_64_INTEGER_CLASS;
1987
1988 for (i = 0; i < words; i++)
1989 classes[i] = subclasses[i % num];
1990 }
1991 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1992 else if (TREE_CODE (type) == UNION_TYPE
1993 || TREE_CODE (type) == QUAL_UNION_TYPE)
1994 {
1995 /* For classes first merge in the field of the subclasses. */
1996 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1997 {
1998 tree bases = TYPE_BINFO_BASETYPES (type);
1999 int n_bases = TREE_VEC_LENGTH (bases);
2000 int i;
2001
2002 for (i = 0; i < n_bases; ++i)
2003 {
2004 tree binfo = TREE_VEC_ELT (bases, i);
2005 int num;
2006 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2007 tree type = BINFO_TYPE (binfo);
2008
2009 num = classify_argument (TYPE_MODE (type),
2010 type, subclasses,
2011 (offset + (bit_offset % 64)) % 256);
2012 if (!num)
2013 return 0;
2014 for (i = 0; i < num; i++)
2015 {
2016 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2017 classes[i + pos] =
2018 merge_classes (subclasses[i], classes[i + pos]);
2019 }
2020 }
2021 }
2022 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2023 {
2024 if (TREE_CODE (field) == FIELD_DECL)
2025 {
2026 int num;
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2028 TREE_TYPE (field), subclasses,
2029 bit_offset);
2030 if (!num)
2031 return 0;
2032 for (i = 0; i < num; i++)
2033 classes[i] = merge_classes (subclasses[i], classes[i]);
2034 }
2035 }
2036 }
2037 else
2038 abort ();
2039
2040 /* Final merger cleanup. */
2041 for (i = 0; i < words; i++)
2042 {
2043 /* If one class is MEMORY, everything should be passed in
2044 memory. */
2045 if (classes[i] == X86_64_MEMORY_CLASS)
2046 return 0;
2047
2048 /* The X86_64_SSEUP_CLASS should be always preceded by
2049 X86_64_SSE_CLASS. */
2050 if (classes[i] == X86_64_SSEUP_CLASS
2051 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2052 classes[i] = X86_64_SSE_CLASS;
2053
2054 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2055 if (classes[i] == X86_64_X87UP_CLASS
2056 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2057 classes[i] = X86_64_SSE_CLASS;
2058 }
2059 return words;
2060 }
2061
2062 /* Compute alignment needed. We align all types to natural boundaries with
2063 exception of XFmode that is aligned to 64bits. */
2064 if (mode != VOIDmode && mode != BLKmode)
2065 {
2066 int mode_alignment = GET_MODE_BITSIZE (mode);
2067
2068 if (mode == XFmode)
2069 mode_alignment = 128;
2070 else if (mode == XCmode)
2071 mode_alignment = 256;
2072 /* Misaligned fields are always returned in memory. */
2073 if (bit_offset % mode_alignment)
2074 return 0;
2075 }
2076
2077 /* Classification of atomic types. */
2078 switch (mode)
2079 {
2080 case DImode:
2081 case SImode:
2082 case HImode:
2083 case QImode:
2084 case CSImode:
2085 case CHImode:
2086 case CQImode:
2087 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2088 classes[0] = X86_64_INTEGERSI_CLASS;
2089 else
2090 classes[0] = X86_64_INTEGER_CLASS;
2091 return 1;
2092 case CDImode:
2093 case TImode:
2094 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2095 return 2;
2096 case CTImode:
2097 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2098 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2099 return 4;
2100 case SFmode:
2101 if (!(bit_offset % 64))
2102 classes[0] = X86_64_SSESF_CLASS;
2103 else
2104 classes[0] = X86_64_SSE_CLASS;
2105 return 1;
2106 case DFmode:
2107 classes[0] = X86_64_SSEDF_CLASS;
2108 return 1;
2109 case TFmode:
2110 classes[0] = X86_64_X87_CLASS;
2111 classes[1] = X86_64_X87UP_CLASS;
2112 return 2;
2113 case TCmode:
2114 classes[0] = X86_64_X87_CLASS;
2115 classes[1] = X86_64_X87UP_CLASS;
2116 classes[2] = X86_64_X87_CLASS;
2117 classes[3] = X86_64_X87UP_CLASS;
2118 return 4;
2119 case DCmode:
2120 classes[0] = X86_64_SSEDF_CLASS;
2121 classes[1] = X86_64_SSEDF_CLASS;
2122 return 2;
2123 case SCmode:
2124 classes[0] = X86_64_SSE_CLASS;
2125 return 1;
2126 case V4SFmode:
2127 case V4SImode:
2128 case V16QImode:
2129 case V8HImode:
2130 case V2DFmode:
2131 case V2DImode:
2132 classes[0] = X86_64_SSE_CLASS;
2133 classes[1] = X86_64_SSEUP_CLASS;
2134 return 2;
2135 case V2SFmode:
2136 case V2SImode:
2137 case V4HImode:
2138 case V8QImode:
2139 return 0;
2140 case BLKmode:
2141 case VOIDmode:
2142 return 0;
2143 default:
2144 abort ();
2145 }
2146 }
2147
2148 /* Examine the argument and return set number of register required in each
2149 class. Return 0 iff parameter should be passed in memory. */
2150 static int
2151 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2152 enum machine_mode mode;
2153 tree type;
2154 int *int_nregs, *sse_nregs;
2155 int in_return;
2156 {
2157 enum x86_64_reg_class class[MAX_CLASSES];
2158 int n = classify_argument (mode, type, class, 0);
2159
2160 *int_nregs = 0;
2161 *sse_nregs = 0;
2162 if (!n)
2163 return 0;
2164 for (n--; n >= 0; n--)
2165 switch (class[n])
2166 {
2167 case X86_64_INTEGER_CLASS:
2168 case X86_64_INTEGERSI_CLASS:
2169 (*int_nregs)++;
2170 break;
2171 case X86_64_SSE_CLASS:
2172 case X86_64_SSESF_CLASS:
2173 case X86_64_SSEDF_CLASS:
2174 (*sse_nregs)++;
2175 break;
2176 case X86_64_NO_CLASS:
2177 case X86_64_SSEUP_CLASS:
2178 break;
2179 case X86_64_X87_CLASS:
2180 case X86_64_X87UP_CLASS:
2181 if (!in_return)
2182 return 0;
2183 break;
2184 case X86_64_MEMORY_CLASS:
2185 abort ();
2186 }
2187 return 1;
2188 }
2189 /* Construct container for the argument used by GCC interface. See
2190 FUNCTION_ARG for the detailed description. */
2191 static rtx
2192 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2193 enum machine_mode mode;
2194 tree type;
2195 int in_return;
2196 int nintregs, nsseregs;
2197 const int * intreg;
2198 int sse_regno;
2199 {
2200 enum machine_mode tmpmode;
2201 int bytes =
2202 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2203 enum x86_64_reg_class class[MAX_CLASSES];
2204 int n;
2205 int i;
2206 int nexps = 0;
2207 int needed_sseregs, needed_intregs;
2208 rtx exp[MAX_CLASSES];
2209 rtx ret;
2210
2211 n = classify_argument (mode, type, class, 0);
2212 if (TARGET_DEBUG_ARG)
2213 {
2214 if (!n)
2215 fprintf (stderr, "Memory class\n");
2216 else
2217 {
2218 fprintf (stderr, "Classes:");
2219 for (i = 0; i < n; i++)
2220 {
2221 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2222 }
2223 fprintf (stderr, "\n");
2224 }
2225 }
2226 if (!n)
2227 return NULL;
2228 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2229 return NULL;
2230 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2231 return NULL;
2232
2233 /* First construct simple cases. Avoid SCmode, since we want to use
2234 single register to pass this type. */
2235 if (n == 1 && mode != SCmode)
2236 switch (class[0])
2237 {
2238 case X86_64_INTEGER_CLASS:
2239 case X86_64_INTEGERSI_CLASS:
2240 return gen_rtx_REG (mode, intreg[0]);
2241 case X86_64_SSE_CLASS:
2242 case X86_64_SSESF_CLASS:
2243 case X86_64_SSEDF_CLASS:
2244 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2245 case X86_64_X87_CLASS:
2246 return gen_rtx_REG (mode, FIRST_STACK_REG);
2247 case X86_64_NO_CLASS:
2248 /* Zero sized array, struct or class. */
2249 return NULL;
2250 default:
2251 abort ();
2252 }
2253 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2254 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2255 if (n == 2
2256 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2257 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2258 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2259 && class[1] == X86_64_INTEGER_CLASS
2260 && (mode == CDImode || mode == TImode)
2261 && intreg[0] + 1 == intreg[1])
2262 return gen_rtx_REG (mode, intreg[0]);
2263 if (n == 4
2264 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2265 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2266 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2267
2268 /* Otherwise figure out the entries of the PARALLEL. */
2269 for (i = 0; i < n; i++)
2270 {
2271 switch (class[i])
2272 {
2273 case X86_64_NO_CLASS:
2274 break;
2275 case X86_64_INTEGER_CLASS:
2276 case X86_64_INTEGERSI_CLASS:
2277 /* Merge TImodes on aligned occassions here too. */
2278 if (i * 8 + 8 > bytes)
2279 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2280 else if (class[i] == X86_64_INTEGERSI_CLASS)
2281 tmpmode = SImode;
2282 else
2283 tmpmode = DImode;
2284 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2285 if (tmpmode == BLKmode)
2286 tmpmode = DImode;
2287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2288 gen_rtx_REG (tmpmode, *intreg),
2289 GEN_INT (i*8));
2290 intreg++;
2291 break;
2292 case X86_64_SSESF_CLASS:
2293 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2294 gen_rtx_REG (SFmode,
2295 SSE_REGNO (sse_regno)),
2296 GEN_INT (i*8));
2297 sse_regno++;
2298 break;
2299 case X86_64_SSEDF_CLASS:
2300 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2301 gen_rtx_REG (DFmode,
2302 SSE_REGNO (sse_regno)),
2303 GEN_INT (i*8));
2304 sse_regno++;
2305 break;
2306 case X86_64_SSE_CLASS:
2307 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2308 tmpmode = TImode;
2309 else
2310 tmpmode = DImode;
2311 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2312 gen_rtx_REG (tmpmode,
2313 SSE_REGNO (sse_regno)),
2314 GEN_INT (i*8));
2315 if (tmpmode == TImode)
2316 i++;
2317 sse_regno++;
2318 break;
2319 default:
2320 abort ();
2321 }
2322 }
2323 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2324 for (i = 0; i < nexps; i++)
2325 XVECEXP (ret, 0, i) = exp [i];
2326 return ret;
2327 }
2328
2329 /* Update the data in CUM to advance over an argument
2330 of mode MODE and data type TYPE.
2331 (TYPE is null for libcalls where that information may not be available.) */
2332
2333 void
2334 function_arg_advance (cum, mode, type, named)
2335 CUMULATIVE_ARGS *cum; /* current arg information */
2336 enum machine_mode mode; /* current arg mode */
2337 tree type; /* type of the argument or 0 if lib support */
2338 int named; /* whether or not the argument was named */
2339 {
2340 int bytes =
2341 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2342 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2343
2344 if (TARGET_DEBUG_ARG)
2345 fprintf (stderr,
2346 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2347 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2348 if (TARGET_64BIT)
2349 {
2350 int int_nregs, sse_nregs;
2351 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2352 cum->words += words;
2353 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2354 {
2355 cum->nregs -= int_nregs;
2356 cum->sse_nregs -= sse_nregs;
2357 cum->regno += int_nregs;
2358 cum->sse_regno += sse_nregs;
2359 }
2360 else
2361 cum->words += words;
2362 }
2363 else
2364 {
2365 if (TARGET_SSE && mode == TImode)
2366 {
2367 cum->sse_words += words;
2368 cum->sse_nregs -= 1;
2369 cum->sse_regno += 1;
2370 if (cum->sse_nregs <= 0)
2371 {
2372 cum->sse_nregs = 0;
2373 cum->sse_regno = 0;
2374 }
2375 }
2376 else
2377 {
2378 cum->words += words;
2379 cum->nregs -= words;
2380 cum->regno += words;
2381
2382 if (cum->nregs <= 0)
2383 {
2384 cum->nregs = 0;
2385 cum->regno = 0;
2386 }
2387 }
2388 }
2389 return;
2390 }
2391
2392 /* Define where to put the arguments to a function.
2393 Value is zero to push the argument on the stack,
2394 or a hard register in which to store the argument.
2395
2396 MODE is the argument's machine mode.
2397 TYPE is the data type of the argument (as a tree).
2398 This is null for libcalls where that information may
2399 not be available.
2400 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2401 the preceding args and about the function being called.
2402 NAMED is nonzero if this argument is a named parameter
2403 (otherwise it is an extra parameter matching an ellipsis). */
2404
2405 rtx
2406 function_arg (cum, mode, type, named)
2407 CUMULATIVE_ARGS *cum; /* current arg information */
2408 enum machine_mode mode; /* current arg mode */
2409 tree type; /* type of the argument or 0 if lib support */
2410 int named; /* != 0 for normal args, == 0 for ... args */
2411 {
2412 rtx ret = NULL_RTX;
2413 int bytes =
2414 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2415 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2416
2417 /* Handle an hidden AL argument containing number of registers for varargs
2418 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2419 any AL settings. */
2420 if (mode == VOIDmode)
2421 {
2422 if (TARGET_64BIT)
2423 return GEN_INT (cum->maybe_vaarg
2424 ? (cum->sse_nregs < 0
2425 ? SSE_REGPARM_MAX
2426 : cum->sse_regno)
2427 : -1);
2428 else
2429 return constm1_rtx;
2430 }
2431 if (TARGET_64BIT)
2432 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2433 &x86_64_int_parameter_registers [cum->regno],
2434 cum->sse_regno);
2435 else
2436 switch (mode)
2437 {
2438 /* For now, pass fp/complex values on the stack. */
2439 default:
2440 break;
2441
2442 case BLKmode:
2443 case DImode:
2444 case SImode:
2445 case HImode:
2446 case QImode:
2447 if (words <= cum->nregs)
2448 {
2449 int regno = cum->regno;
2450
2451 /* Fastcall allocates the first two DWORD (SImode) or
2452 smaller arguments to ECX and EDX. */
2453 if (cum->fastcall)
2454 {
2455 if (mode == BLKmode || mode == DImode)
2456 break;
2457
2458 /* ECX not EAX is the first allocated register. */
2459 if (regno == 0)
2460 regno = 2;
2461 }
2462 ret = gen_rtx_REG (mode, regno);
2463 }
2464 break;
2465 case TImode:
2466 if (cum->sse_nregs)
2467 ret = gen_rtx_REG (mode, cum->sse_regno);
2468 break;
2469 }
2470
2471 if (TARGET_DEBUG_ARG)
2472 {
2473 fprintf (stderr,
2474 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2475 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2476
2477 if (ret)
2478 print_simple_rtl (stderr, ret);
2479 else
2480 fprintf (stderr, ", stack");
2481
2482 fprintf (stderr, " )\n");
2483 }
2484
2485 return ret;
2486 }
2487
2488 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2489 and type. */
2490
2491 int
2492 ix86_function_arg_boundary (mode, type)
2493 enum machine_mode mode;
2494 tree type;
2495 {
2496 int align;
2497 if (!TARGET_64BIT)
2498 return PARM_BOUNDARY;
2499 if (type)
2500 align = TYPE_ALIGN (type);
2501 else
2502 align = GET_MODE_ALIGNMENT (mode);
2503 if (align < PARM_BOUNDARY)
2504 align = PARM_BOUNDARY;
2505 if (align > 128)
2506 align = 128;
2507 return align;
2508 }
2509
2510 /* Return true if N is a possible register number of function value. */
2511 bool
2512 ix86_function_value_regno_p (regno)
2513 int regno;
2514 {
2515 if (!TARGET_64BIT)
2516 {
2517 return ((regno) == 0
2518 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2519 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2520 }
2521 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2522 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2523 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2524 }
2525
2526 /* Define how to find the value returned by a function.
2527 VALTYPE is the data type of the value (as a tree).
2528 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2529 otherwise, FUNC is 0. */
2530 rtx
2531 ix86_function_value (valtype)
2532 tree valtype;
2533 {
2534 if (TARGET_64BIT)
2535 {
2536 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2537 REGPARM_MAX, SSE_REGPARM_MAX,
2538 x86_64_int_return_registers, 0);
2539 /* For zero sized structures, construct_continer return NULL, but we need
2540 to keep rest of compiler happy by returning meaningfull value. */
2541 if (!ret)
2542 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2543 return ret;
2544 }
2545 else
2546 return gen_rtx_REG (TYPE_MODE (valtype),
2547 ix86_value_regno (TYPE_MODE (valtype)));
2548 }
2549
2550 /* Return false iff type is returned in memory. */
2551 int
2552 ix86_return_in_memory (type)
2553 tree type;
2554 {
2555 int needed_intregs, needed_sseregs;
2556 if (TARGET_64BIT)
2557 {
2558 return !examine_argument (TYPE_MODE (type), type, 1,
2559 &needed_intregs, &needed_sseregs);
2560 }
2561 else
2562 {
2563 if (TYPE_MODE (type) == BLKmode
2564 || (VECTOR_MODE_P (TYPE_MODE (type))
2565 && int_size_in_bytes (type) == 8)
2566 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2567 && TYPE_MODE (type) != TFmode
2568 && !VECTOR_MODE_P (TYPE_MODE (type))))
2569 return 1;
2570 return 0;
2571 }
2572 }
2573
2574 /* Define how to find the value returned by a library function
2575 assuming the value has mode MODE. */
2576 rtx
2577 ix86_libcall_value (mode)
2578 enum machine_mode mode;
2579 {
2580 if (TARGET_64BIT)
2581 {
2582 switch (mode)
2583 {
2584 case SFmode:
2585 case SCmode:
2586 case DFmode:
2587 case DCmode:
2588 return gen_rtx_REG (mode, FIRST_SSE_REG);
2589 case TFmode:
2590 case TCmode:
2591 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2592 default:
2593 return gen_rtx_REG (mode, 0);
2594 }
2595 }
2596 else
2597 return gen_rtx_REG (mode, ix86_value_regno (mode));
2598 }
2599
2600 /* Given a mode, return the register to use for a return value. */
2601
2602 static int
2603 ix86_value_regno (mode)
2604 enum machine_mode mode;
2605 {
2606 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2607 return FIRST_FLOAT_REG;
2608 if (mode == TImode || VECTOR_MODE_P (mode))
2609 return FIRST_SSE_REG;
2610 return 0;
2611 }
2612 \f
2613 /* Create the va_list data type. */
2614
2615 tree
2616 ix86_build_va_list ()
2617 {
2618 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2619
2620 /* For i386 we use plain pointer to argument area. */
2621 if (!TARGET_64BIT)
2622 return build_pointer_type (char_type_node);
2623
2624 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2625 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2626
2627 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2628 unsigned_type_node);
2629 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2630 unsigned_type_node);
2631 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2632 ptr_type_node);
2633 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2634 ptr_type_node);
2635
2636 DECL_FIELD_CONTEXT (f_gpr) = record;
2637 DECL_FIELD_CONTEXT (f_fpr) = record;
2638 DECL_FIELD_CONTEXT (f_ovf) = record;
2639 DECL_FIELD_CONTEXT (f_sav) = record;
2640
2641 TREE_CHAIN (record) = type_decl;
2642 TYPE_NAME (record) = type_decl;
2643 TYPE_FIELDS (record) = f_gpr;
2644 TREE_CHAIN (f_gpr) = f_fpr;
2645 TREE_CHAIN (f_fpr) = f_ovf;
2646 TREE_CHAIN (f_ovf) = f_sav;
2647
2648 layout_type (record);
2649
2650 /* The correct type is an array type of one element. */
2651 return build_array_type (record, build_index_type (size_zero_node));
2652 }
2653
2654 /* Perform any needed actions needed for a function that is receiving a
2655 variable number of arguments.
2656
2657 CUM is as above.
2658
2659 MODE and TYPE are the mode and type of the current parameter.
2660
2661 PRETEND_SIZE is a variable that should be set to the amount of stack
2662 that must be pushed by the prolog to pretend that our caller pushed
2663 it.
2664
2665 Normally, this macro will push all remaining incoming registers on the
2666 stack and set PRETEND_SIZE to the length of the registers pushed. */
2667
2668 void
2669 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2670 CUMULATIVE_ARGS *cum;
2671 enum machine_mode mode;
2672 tree type;
2673 int *pretend_size ATTRIBUTE_UNUSED;
2674 int no_rtl;
2675
2676 {
2677 CUMULATIVE_ARGS next_cum;
2678 rtx save_area = NULL_RTX, mem;
2679 rtx label;
2680 rtx label_ref;
2681 rtx tmp_reg;
2682 rtx nsse_reg;
2683 int set;
2684 tree fntype;
2685 int stdarg_p;
2686 int i;
2687
2688 if (!TARGET_64BIT)
2689 return;
2690
2691 /* Indicate to allocate space on the stack for varargs save area. */
2692 ix86_save_varrargs_registers = 1;
2693
2694 fntype = TREE_TYPE (current_function_decl);
2695 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2696 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2697 != void_type_node));
2698
2699 /* For varargs, we do not want to skip the dummy va_dcl argument.
2700 For stdargs, we do want to skip the last named argument. */
2701 next_cum = *cum;
2702 if (stdarg_p)
2703 function_arg_advance (&next_cum, mode, type, 1);
2704
2705 if (!no_rtl)
2706 save_area = frame_pointer_rtx;
2707
2708 set = get_varargs_alias_set ();
2709
2710 for (i = next_cum.regno; i < ix86_regparm; i++)
2711 {
2712 mem = gen_rtx_MEM (Pmode,
2713 plus_constant (save_area, i * UNITS_PER_WORD));
2714 set_mem_alias_set (mem, set);
2715 emit_move_insn (mem, gen_rtx_REG (Pmode,
2716 x86_64_int_parameter_registers[i]));
2717 }
2718
2719 if (next_cum.sse_nregs)
2720 {
2721 /* Now emit code to save SSE registers. The AX parameter contains number
2722 of SSE parameter regsiters used to call this function. We use
2723 sse_prologue_save insn template that produces computed jump across
2724 SSE saves. We need some preparation work to get this working. */
2725
2726 label = gen_label_rtx ();
2727 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2728
2729 /* Compute address to jump to :
2730 label - 5*eax + nnamed_sse_arguments*5 */
2731 tmp_reg = gen_reg_rtx (Pmode);
2732 nsse_reg = gen_reg_rtx (Pmode);
2733 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2734 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2735 gen_rtx_MULT (Pmode, nsse_reg,
2736 GEN_INT (4))));
2737 if (next_cum.sse_regno)
2738 emit_move_insn
2739 (nsse_reg,
2740 gen_rtx_CONST (DImode,
2741 gen_rtx_PLUS (DImode,
2742 label_ref,
2743 GEN_INT (next_cum.sse_regno * 4))));
2744 else
2745 emit_move_insn (nsse_reg, label_ref);
2746 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2747
2748 /* Compute address of memory block we save into. We always use pointer
2749 pointing 127 bytes after first byte to store - this is needed to keep
2750 instruction size limited by 4 bytes. */
2751 tmp_reg = gen_reg_rtx (Pmode);
2752 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2753 plus_constant (save_area,
2754 8 * REGPARM_MAX + 127)));
2755 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2756 set_mem_alias_set (mem, set);
2757 set_mem_align (mem, BITS_PER_WORD);
2758
2759 /* And finally do the dirty job! */
2760 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2761 GEN_INT (next_cum.sse_regno), label));
2762 }
2763
2764 }
2765
2766 /* Implement va_start. */
2767
2768 void
2769 ix86_va_start (valist, nextarg)
2770 tree valist;
2771 rtx nextarg;
2772 {
2773 HOST_WIDE_INT words, n_gpr, n_fpr;
2774 tree f_gpr, f_fpr, f_ovf, f_sav;
2775 tree gpr, fpr, ovf, sav, t;
2776
2777 /* Only 64bit target needs something special. */
2778 if (!TARGET_64BIT)
2779 {
2780 std_expand_builtin_va_start (valist, nextarg);
2781 return;
2782 }
2783
2784 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2785 f_fpr = TREE_CHAIN (f_gpr);
2786 f_ovf = TREE_CHAIN (f_fpr);
2787 f_sav = TREE_CHAIN (f_ovf);
2788
2789 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2790 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2791 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2792 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2793 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2794
2795 /* Count number of gp and fp argument registers used. */
2796 words = current_function_args_info.words;
2797 n_gpr = current_function_args_info.regno;
2798 n_fpr = current_function_args_info.sse_regno;
2799
2800 if (TARGET_DEBUG_ARG)
2801 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2802 (int) words, (int) n_gpr, (int) n_fpr);
2803
2804 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2805 build_int_2 (n_gpr * 8, 0));
2806 TREE_SIDE_EFFECTS (t) = 1;
2807 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2808
2809 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2810 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2811 TREE_SIDE_EFFECTS (t) = 1;
2812 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2813
2814 /* Find the overflow area. */
2815 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2816 if (words != 0)
2817 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2818 build_int_2 (words * UNITS_PER_WORD, 0));
2819 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2820 TREE_SIDE_EFFECTS (t) = 1;
2821 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2822
2823 /* Find the register save area.
2824 Prologue of the function save it right above stack frame. */
2825 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2826 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2827 TREE_SIDE_EFFECTS (t) = 1;
2828 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2829 }
2830
2831 /* Implement va_arg. */
2832 rtx
2833 ix86_va_arg (valist, type)
2834 tree valist, type;
2835 {
2836 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2837 tree f_gpr, f_fpr, f_ovf, f_sav;
2838 tree gpr, fpr, ovf, sav, t;
2839 int size, rsize;
2840 rtx lab_false, lab_over = NULL_RTX;
2841 rtx addr_rtx, r;
2842 rtx container;
2843
2844 /* Only 64bit target needs something special. */
2845 if (!TARGET_64BIT)
2846 {
2847 return std_expand_builtin_va_arg (valist, type);
2848 }
2849
2850 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2851 f_fpr = TREE_CHAIN (f_gpr);
2852 f_ovf = TREE_CHAIN (f_fpr);
2853 f_sav = TREE_CHAIN (f_ovf);
2854
2855 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2856 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2857 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2858 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2859 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2860
2861 size = int_size_in_bytes (type);
2862 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2863
2864 container = construct_container (TYPE_MODE (type), type, 0,
2865 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2866 /*
2867 * Pull the value out of the saved registers ...
2868 */
2869
2870 addr_rtx = gen_reg_rtx (Pmode);
2871
2872 if (container)
2873 {
2874 rtx int_addr_rtx, sse_addr_rtx;
2875 int needed_intregs, needed_sseregs;
2876 int need_temp;
2877
2878 lab_over = gen_label_rtx ();
2879 lab_false = gen_label_rtx ();
2880
2881 examine_argument (TYPE_MODE (type), type, 0,
2882 &needed_intregs, &needed_sseregs);
2883
2884
2885 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2886 || TYPE_ALIGN (type) > 128);
2887
2888 /* In case we are passing structure, verify that it is consetuctive block
2889 on the register save area. If not we need to do moves. */
2890 if (!need_temp && !REG_P (container))
2891 {
2892 /* Verify that all registers are strictly consetuctive */
2893 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2894 {
2895 int i;
2896
2897 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2898 {
2899 rtx slot = XVECEXP (container, 0, i);
2900 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2901 || INTVAL (XEXP (slot, 1)) != i * 16)
2902 need_temp = 1;
2903 }
2904 }
2905 else
2906 {
2907 int i;
2908
2909 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2910 {
2911 rtx slot = XVECEXP (container, 0, i);
2912 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2913 || INTVAL (XEXP (slot, 1)) != i * 8)
2914 need_temp = 1;
2915 }
2916 }
2917 }
2918 if (!need_temp)
2919 {
2920 int_addr_rtx = addr_rtx;
2921 sse_addr_rtx = addr_rtx;
2922 }
2923 else
2924 {
2925 int_addr_rtx = gen_reg_rtx (Pmode);
2926 sse_addr_rtx = gen_reg_rtx (Pmode);
2927 }
2928 /* First ensure that we fit completely in registers. */
2929 if (needed_intregs)
2930 {
2931 emit_cmp_and_jump_insns (expand_expr
2932 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2933 GEN_INT ((REGPARM_MAX - needed_intregs +
2934 1) * 8), GE, const1_rtx, SImode,
2935 1, lab_false);
2936 }
2937 if (needed_sseregs)
2938 {
2939 emit_cmp_and_jump_insns (expand_expr
2940 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2941 GEN_INT ((SSE_REGPARM_MAX -
2942 needed_sseregs + 1) * 16 +
2943 REGPARM_MAX * 8), GE, const1_rtx,
2944 SImode, 1, lab_false);
2945 }
2946
2947 /* Compute index to start of area used for integer regs. */
2948 if (needed_intregs)
2949 {
2950 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2951 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2952 if (r != int_addr_rtx)
2953 emit_move_insn (int_addr_rtx, r);
2954 }
2955 if (needed_sseregs)
2956 {
2957 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2958 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2959 if (r != sse_addr_rtx)
2960 emit_move_insn (sse_addr_rtx, r);
2961 }
2962 if (need_temp)
2963 {
2964 int i;
2965 rtx mem;
2966
2967 /* Never use the memory itself, as it has the alias set. */
2968 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2969 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2970 set_mem_alias_set (mem, get_varargs_alias_set ());
2971 set_mem_align (mem, BITS_PER_UNIT);
2972
2973 for (i = 0; i < XVECLEN (container, 0); i++)
2974 {
2975 rtx slot = XVECEXP (container, 0, i);
2976 rtx reg = XEXP (slot, 0);
2977 enum machine_mode mode = GET_MODE (reg);
2978 rtx src_addr;
2979 rtx src_mem;
2980 int src_offset;
2981 rtx dest_mem;
2982
2983 if (SSE_REGNO_P (REGNO (reg)))
2984 {
2985 src_addr = sse_addr_rtx;
2986 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2987 }
2988 else
2989 {
2990 src_addr = int_addr_rtx;
2991 src_offset = REGNO (reg) * 8;
2992 }
2993 src_mem = gen_rtx_MEM (mode, src_addr);
2994 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2995 src_mem = adjust_address (src_mem, mode, src_offset);
2996 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2997 emit_move_insn (dest_mem, src_mem);
2998 }
2999 }
3000
3001 if (needed_intregs)
3002 {
3003 t =
3004 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3005 build_int_2 (needed_intregs * 8, 0));
3006 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3007 TREE_SIDE_EFFECTS (t) = 1;
3008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3009 }
3010 if (needed_sseregs)
3011 {
3012 t =
3013 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3014 build_int_2 (needed_sseregs * 16, 0));
3015 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3016 TREE_SIDE_EFFECTS (t) = 1;
3017 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3018 }
3019
3020 emit_jump_insn (gen_jump (lab_over));
3021 emit_barrier ();
3022 emit_label (lab_false);
3023 }
3024
3025 /* ... otherwise out of the overflow area. */
3026
3027 /* Care for on-stack alignment if needed. */
3028 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3029 t = ovf;
3030 else
3031 {
3032 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3033 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3034 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3035 }
3036 t = save_expr (t);
3037
3038 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3039 if (r != addr_rtx)
3040 emit_move_insn (addr_rtx, r);
3041
3042 t =
3043 build (PLUS_EXPR, TREE_TYPE (t), t,
3044 build_int_2 (rsize * UNITS_PER_WORD, 0));
3045 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3046 TREE_SIDE_EFFECTS (t) = 1;
3047 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3048
3049 if (container)
3050 emit_label (lab_over);
3051
3052 return addr_rtx;
3053 }
3054 \f
3055 /* Return nonzero if OP is either a i387 or SSE fp register. */
3056 int
3057 any_fp_register_operand (op, mode)
3058 rtx op;
3059 enum machine_mode mode ATTRIBUTE_UNUSED;
3060 {
3061 return ANY_FP_REG_P (op);
3062 }
3063
3064 /* Return nonzero if OP is an i387 fp register. */
3065 int
3066 fp_register_operand (op, mode)
3067 rtx op;
3068 enum machine_mode mode ATTRIBUTE_UNUSED;
3069 {
3070 return FP_REG_P (op);
3071 }
3072
3073 /* Return nonzero if OP is a non-fp register_operand. */
3074 int
3075 register_and_not_any_fp_reg_operand (op, mode)
3076 rtx op;
3077 enum machine_mode mode;
3078 {
3079 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3080 }
3081
3082 /* Return nonzero of OP is a register operand other than an
3083 i387 fp register. */
3084 int
3085 register_and_not_fp_reg_operand (op, mode)
3086 rtx op;
3087 enum machine_mode mode;
3088 {
3089 return register_operand (op, mode) && !FP_REG_P (op);
3090 }
3091
3092 /* Return nonzero if OP is general operand representable on x86_64. */
3093
3094 int
3095 x86_64_general_operand (op, mode)
3096 rtx op;
3097 enum machine_mode mode;
3098 {
3099 if (!TARGET_64BIT)
3100 return general_operand (op, mode);
3101 if (nonimmediate_operand (op, mode))
3102 return 1;
3103 return x86_64_sign_extended_value (op);
3104 }
3105
3106 /* Return nonzero if OP is general operand representable on x86_64
3107 as either sign extended or zero extended constant. */
3108
3109 int
3110 x86_64_szext_general_operand (op, mode)
3111 rtx op;
3112 enum machine_mode mode;
3113 {
3114 if (!TARGET_64BIT)
3115 return general_operand (op, mode);
3116 if (nonimmediate_operand (op, mode))
3117 return 1;
3118 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3119 }
3120
3121 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3122
3123 int
3124 x86_64_nonmemory_operand (op, mode)
3125 rtx op;
3126 enum machine_mode mode;
3127 {
3128 if (!TARGET_64BIT)
3129 return nonmemory_operand (op, mode);
3130 if (register_operand (op, mode))
3131 return 1;
3132 return x86_64_sign_extended_value (op);
3133 }
3134
3135 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3136
3137 int
3138 x86_64_movabs_operand (op, mode)
3139 rtx op;
3140 enum machine_mode mode;
3141 {
3142 if (!TARGET_64BIT || !flag_pic)
3143 return nonmemory_operand (op, mode);
3144 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3145 return 1;
3146 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3147 return 1;
3148 return 0;
3149 }
3150
3151 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3152
3153 int
3154 x86_64_szext_nonmemory_operand (op, mode)
3155 rtx op;
3156 enum machine_mode mode;
3157 {
3158 if (!TARGET_64BIT)
3159 return nonmemory_operand (op, mode);
3160 if (register_operand (op, mode))
3161 return 1;
3162 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3163 }
3164
3165 /* Return nonzero if OP is immediate operand representable on x86_64. */
3166
3167 int
3168 x86_64_immediate_operand (op, mode)
3169 rtx op;
3170 enum machine_mode mode;
3171 {
3172 if (!TARGET_64BIT)
3173 return immediate_operand (op, mode);
3174 return x86_64_sign_extended_value (op);
3175 }
3176
3177 /* Return nonzero if OP is immediate operand representable on x86_64. */
3178
3179 int
3180 x86_64_zext_immediate_operand (op, mode)
3181 rtx op;
3182 enum machine_mode mode ATTRIBUTE_UNUSED;
3183 {
3184 return x86_64_zero_extended_value (op);
3185 }
3186
3187 /* Return nonzero if OP is (const_int 1), else return zero. */
3188
3189 int
3190 const_int_1_operand (op, mode)
3191 rtx op;
3192 enum machine_mode mode ATTRIBUTE_UNUSED;
3193 {
3194 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3195 }
3196
3197 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3198 for shift & compare patterns, as shifting by 0 does not change flags),
3199 else return zero. */
3200
3201 int
3202 const_int_1_31_operand (op, mode)
3203 rtx op;
3204 enum machine_mode mode ATTRIBUTE_UNUSED;
3205 {
3206 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3207 }
3208
3209 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3210 reference and a constant. */
3211
3212 int
3213 symbolic_operand (op, mode)
3214 register rtx op;
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
3216 {
3217 switch (GET_CODE (op))
3218 {
3219 case SYMBOL_REF:
3220 case LABEL_REF:
3221 return 1;
3222
3223 case CONST:
3224 op = XEXP (op, 0);
3225 if (GET_CODE (op) == SYMBOL_REF
3226 || GET_CODE (op) == LABEL_REF
3227 || (GET_CODE (op) == UNSPEC
3228 && (XINT (op, 1) == UNSPEC_GOT
3229 || XINT (op, 1) == UNSPEC_GOTOFF
3230 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3231 return 1;
3232 if (GET_CODE (op) != PLUS
3233 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3234 return 0;
3235
3236 op = XEXP (op, 0);
3237 if (GET_CODE (op) == SYMBOL_REF
3238 || GET_CODE (op) == LABEL_REF)
3239 return 1;
3240 /* Only @GOTOFF gets offsets. */
3241 if (GET_CODE (op) != UNSPEC
3242 || XINT (op, 1) != UNSPEC_GOTOFF)
3243 return 0;
3244
3245 op = XVECEXP (op, 0, 0);
3246 if (GET_CODE (op) == SYMBOL_REF
3247 || GET_CODE (op) == LABEL_REF)
3248 return 1;
3249 return 0;
3250
3251 default:
3252 return 0;
3253 }
3254 }
3255
3256 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3257
3258 int
3259 pic_symbolic_operand (op, mode)
3260 register rtx op;
3261 enum machine_mode mode ATTRIBUTE_UNUSED;
3262 {
3263 if (GET_CODE (op) != CONST)
3264 return 0;
3265 op = XEXP (op, 0);
3266 if (TARGET_64BIT)
3267 {
3268 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3269 return 1;
3270 }
3271 else
3272 {
3273 if (GET_CODE (op) == UNSPEC)
3274 return 1;
3275 if (GET_CODE (op) != PLUS
3276 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3277 return 0;
3278 op = XEXP (op, 0);
3279 if (GET_CODE (op) == UNSPEC)
3280 return 1;
3281 }
3282 return 0;
3283 }
3284
3285 /* Return true if OP is a symbolic operand that resolves locally. */
3286
3287 static int
3288 local_symbolic_operand (op, mode)
3289 rtx op;
3290 enum machine_mode mode ATTRIBUTE_UNUSED;
3291 {
3292 if (GET_CODE (op) == CONST
3293 && GET_CODE (XEXP (op, 0)) == PLUS
3294 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3295 op = XEXP (XEXP (op, 0), 0);
3296
3297 if (GET_CODE (op) == LABEL_REF)
3298 return 1;
3299
3300 if (GET_CODE (op) != SYMBOL_REF)
3301 return 0;
3302
3303 /* These we've been told are local by varasm and encode_section_info
3304 respectively. */
3305 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3306 return 1;
3307
3308 /* There is, however, a not insubstantial body of code in the rest of
3309 the compiler that assumes it can just stick the results of
3310 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3311 /* ??? This is a hack. Should update the body of the compiler to
3312 always create a DECL an invoke targetm.encode_section_info. */
3313 if (strncmp (XSTR (op, 0), internal_label_prefix,
3314 internal_label_prefix_len) == 0)
3315 return 1;
3316
3317 return 0;
3318 }
3319
3320 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3321
3322 int
3323 tls_symbolic_operand (op, mode)
3324 register rtx op;
3325 enum machine_mode mode ATTRIBUTE_UNUSED;
3326 {
3327 const char *symbol_str;
3328
3329 if (GET_CODE (op) != SYMBOL_REF)
3330 return 0;
3331 symbol_str = XSTR (op, 0);
3332
3333 if (symbol_str[0] != '%')
3334 return 0;
3335 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3336 }
3337
3338 static int
3339 tls_symbolic_operand_1 (op, kind)
3340 rtx op;
3341 enum tls_model kind;
3342 {
3343 const char *symbol_str;
3344
3345 if (GET_CODE (op) != SYMBOL_REF)
3346 return 0;
3347 symbol_str = XSTR (op, 0);
3348
3349 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3350 }
3351
3352 int
3353 global_dynamic_symbolic_operand (op, mode)
3354 register rtx op;
3355 enum machine_mode mode ATTRIBUTE_UNUSED;
3356 {
3357 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3358 }
3359
3360 int
3361 local_dynamic_symbolic_operand (op, mode)
3362 register rtx op;
3363 enum machine_mode mode ATTRIBUTE_UNUSED;
3364 {
3365 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3366 }
3367
3368 int
3369 initial_exec_symbolic_operand (op, mode)
3370 register rtx op;
3371 enum machine_mode mode ATTRIBUTE_UNUSED;
3372 {
3373 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3374 }
3375
3376 int
3377 local_exec_symbolic_operand (op, mode)
3378 register rtx op;
3379 enum machine_mode mode ATTRIBUTE_UNUSED;
3380 {
3381 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3382 }
3383
3384 /* Test for a valid operand for a call instruction. Don't allow the
3385 arg pointer register or virtual regs since they may decay into
3386 reg + const, which the patterns can't handle. */
3387
3388 int
3389 call_insn_operand (op, mode)
3390 rtx op;
3391 enum machine_mode mode ATTRIBUTE_UNUSED;
3392 {
3393 /* Disallow indirect through a virtual register. This leads to
3394 compiler aborts when trying to eliminate them. */
3395 if (GET_CODE (op) == REG
3396 && (op == arg_pointer_rtx
3397 || op == frame_pointer_rtx
3398 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3399 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3400 return 0;
3401
3402 /* Disallow `call 1234'. Due to varying assembler lameness this
3403 gets either rejected or translated to `call .+1234'. */
3404 if (GET_CODE (op) == CONST_INT)
3405 return 0;
3406
3407 /* Explicitly allow SYMBOL_REF even if pic. */
3408 if (GET_CODE (op) == SYMBOL_REF)
3409 return 1;
3410
3411 /* Otherwise we can allow any general_operand in the address. */
3412 return general_operand (op, Pmode);
3413 }
3414
3415 /* Test for a valid operand for a call instruction. Don't allow the
3416 arg pointer register or virtual regs since they may decay into
3417 reg + const, which the patterns can't handle. */
3418
3419 int
3420 sibcall_insn_operand (op, mode)
3421 rtx op;
3422 enum machine_mode mode ATTRIBUTE_UNUSED;
3423 {
3424 /* Disallow indirect through a virtual register. This leads to
3425 compiler aborts when trying to eliminate them. */
3426 if (GET_CODE (op) == REG
3427 && (op == arg_pointer_rtx
3428 || op == frame_pointer_rtx
3429 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3430 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3431 return 0;
3432
3433 /* Explicitly allow SYMBOL_REF even if pic. */
3434 if (GET_CODE (op) == SYMBOL_REF)
3435 return 1;
3436
3437 /* Otherwise we can only allow register operands. */
3438 return register_operand (op, Pmode);
3439 }
3440
3441 int
3442 constant_call_address_operand (op, mode)
3443 rtx op;
3444 enum machine_mode mode ATTRIBUTE_UNUSED;
3445 {
3446 if (GET_CODE (op) == CONST
3447 && GET_CODE (XEXP (op, 0)) == PLUS
3448 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3449 op = XEXP (XEXP (op, 0), 0);
3450 return GET_CODE (op) == SYMBOL_REF;
3451 }
3452
3453 /* Match exactly zero and one. */
3454
3455 int
3456 const0_operand (op, mode)
3457 register rtx op;
3458 enum machine_mode mode;
3459 {
3460 return op == CONST0_RTX (mode);
3461 }
3462
3463 int
3464 const1_operand (op, mode)
3465 register rtx op;
3466 enum machine_mode mode ATTRIBUTE_UNUSED;
3467 {
3468 return op == const1_rtx;
3469 }
3470
3471 /* Match 2, 4, or 8. Used for leal multiplicands. */
3472
3473 int
3474 const248_operand (op, mode)
3475 register rtx op;
3476 enum machine_mode mode ATTRIBUTE_UNUSED;
3477 {
3478 return (GET_CODE (op) == CONST_INT
3479 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3480 }
3481
3482 /* True if this is a constant appropriate for an increment or decremenmt. */
3483
3484 int
3485 incdec_operand (op, mode)
3486 register rtx op;
3487 enum machine_mode mode ATTRIBUTE_UNUSED;
3488 {
3489 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3490 registers, since carry flag is not set. */
3491 if (TARGET_PENTIUM4 && !optimize_size)
3492 return 0;
3493 return op == const1_rtx || op == constm1_rtx;
3494 }
3495
3496 /* Return nonzero if OP is acceptable as operand of DImode shift
3497 expander. */
3498
3499 int
3500 shiftdi_operand (op, mode)
3501 rtx op;
3502 enum machine_mode mode ATTRIBUTE_UNUSED;
3503 {
3504 if (TARGET_64BIT)
3505 return nonimmediate_operand (op, mode);
3506 else
3507 return register_operand (op, mode);
3508 }
3509
3510 /* Return false if this is the stack pointer, or any other fake
3511 register eliminable to the stack pointer. Otherwise, this is
3512 a register operand.
3513
3514 This is used to prevent esp from being used as an index reg.
3515 Which would only happen in pathological cases. */
3516
3517 int
3518 reg_no_sp_operand (op, mode)
3519 register rtx op;
3520 enum machine_mode mode;
3521 {
3522 rtx t = op;
3523 if (GET_CODE (t) == SUBREG)
3524 t = SUBREG_REG (t);
3525 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3526 return 0;
3527
3528 return register_operand (op, mode);
3529 }
3530
3531 int
3532 mmx_reg_operand (op, mode)
3533 register rtx op;
3534 enum machine_mode mode ATTRIBUTE_UNUSED;
3535 {
3536 return MMX_REG_P (op);
3537 }
3538
3539 /* Return false if this is any eliminable register. Otherwise
3540 general_operand. */
3541
3542 int
3543 general_no_elim_operand (op, mode)
3544 register rtx op;
3545 enum machine_mode mode;
3546 {
3547 rtx t = op;
3548 if (GET_CODE (t) == SUBREG)
3549 t = SUBREG_REG (t);
3550 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3551 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3552 || t == virtual_stack_dynamic_rtx)
3553 return 0;
3554 if (REG_P (t)
3555 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3556 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3557 return 0;
3558
3559 return general_operand (op, mode);
3560 }
3561
3562 /* Return false if this is any eliminable register. Otherwise
3563 register_operand or const_int. */
3564
3565 int
3566 nonmemory_no_elim_operand (op, mode)
3567 register rtx op;
3568 enum machine_mode mode;
3569 {
3570 rtx t = op;
3571 if (GET_CODE (t) == SUBREG)
3572 t = SUBREG_REG (t);
3573 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3574 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3575 || t == virtual_stack_dynamic_rtx)
3576 return 0;
3577
3578 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3579 }
3580
3581 /* Return false if this is any eliminable register or stack register,
3582 otherwise work like register_operand. */
3583
3584 int
3585 index_register_operand (op, mode)
3586 register rtx op;
3587 enum machine_mode mode;
3588 {
3589 rtx t = op;
3590 if (GET_CODE (t) == SUBREG)
3591 t = SUBREG_REG (t);
3592 if (!REG_P (t))
3593 return 0;
3594 if (t == arg_pointer_rtx
3595 || t == frame_pointer_rtx
3596 || t == virtual_incoming_args_rtx
3597 || t == virtual_stack_vars_rtx
3598 || t == virtual_stack_dynamic_rtx
3599 || REGNO (t) == STACK_POINTER_REGNUM)
3600 return 0;
3601
3602 return general_operand (op, mode);
3603 }
3604
3605 /* Return true if op is a Q_REGS class register. */
3606
3607 int
3608 q_regs_operand (op, mode)
3609 register rtx op;
3610 enum machine_mode mode;
3611 {
3612 if (mode != VOIDmode && GET_MODE (op) != mode)
3613 return 0;
3614 if (GET_CODE (op) == SUBREG)
3615 op = SUBREG_REG (op);
3616 return ANY_QI_REG_P (op);
3617 }
3618
3619 /* Return true if op is an flags register. */
3620
3621 int
3622 flags_reg_operand (op, mode)
3623 register rtx op;
3624 enum machine_mode mode;
3625 {
3626 if (mode != VOIDmode && GET_MODE (op) != mode)
3627 return 0;
3628 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3629 }
3630
3631 /* Return true if op is a NON_Q_REGS class register. */
3632
3633 int
3634 non_q_regs_operand (op, mode)
3635 register rtx op;
3636 enum machine_mode mode;
3637 {
3638 if (mode != VOIDmode && GET_MODE (op) != mode)
3639 return 0;
3640 if (GET_CODE (op) == SUBREG)
3641 op = SUBREG_REG (op);
3642 return NON_QI_REG_P (op);
3643 }
3644
3645 int
3646 zero_extended_scalar_load_operand (op, mode)
3647 rtx op;
3648 enum machine_mode mode ATTRIBUTE_UNUSED;
3649 {
3650 unsigned n_elts;
3651 if (GET_CODE (op) != MEM)
3652 return 0;
3653 op = maybe_get_pool_constant (op);
3654 if (!op)
3655 return 0;
3656 if (GET_CODE (op) != CONST_VECTOR)
3657 return 0;
3658 n_elts =
3659 (GET_MODE_SIZE (GET_MODE (op)) /
3660 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3661 for (n_elts--; n_elts > 0; n_elts--)
3662 {
3663 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3664 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3665 return 0;
3666 }
3667 return 1;
3668 }
3669
3670 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3671 insns. */
3672 int
3673 sse_comparison_operator (op, mode)
3674 rtx op;
3675 enum machine_mode mode ATTRIBUTE_UNUSED;
3676 {
3677 enum rtx_code code = GET_CODE (op);
3678 switch (code)
3679 {
3680 /* Operations supported directly. */
3681 case EQ:
3682 case LT:
3683 case LE:
3684 case UNORDERED:
3685 case NE:
3686 case UNGE:
3687 case UNGT:
3688 case ORDERED:
3689 return 1;
3690 /* These are equivalent to ones above in non-IEEE comparisons. */
3691 case UNEQ:
3692 case UNLT:
3693 case UNLE:
3694 case LTGT:
3695 case GE:
3696 case GT:
3697 return !TARGET_IEEE_FP;
3698 default:
3699 return 0;
3700 }
3701 }
3702 /* Return 1 if OP is a valid comparison operator in valid mode. */
3703 int
3704 ix86_comparison_operator (op, mode)
3705 register rtx op;
3706 enum machine_mode mode;
3707 {
3708 enum machine_mode inmode;
3709 enum rtx_code code = GET_CODE (op);
3710 if (mode != VOIDmode && GET_MODE (op) != mode)
3711 return 0;
3712 if (GET_RTX_CLASS (code) != '<')
3713 return 0;
3714 inmode = GET_MODE (XEXP (op, 0));
3715
3716 if (inmode == CCFPmode || inmode == CCFPUmode)
3717 {
3718 enum rtx_code second_code, bypass_code;
3719 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3720 return (bypass_code == NIL && second_code == NIL);
3721 }
3722 switch (code)
3723 {
3724 case EQ: case NE:
3725 return 1;
3726 case LT: case GE:
3727 if (inmode == CCmode || inmode == CCGCmode
3728 || inmode == CCGOCmode || inmode == CCNOmode)
3729 return 1;
3730 return 0;
3731 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3732 if (inmode == CCmode)
3733 return 1;
3734 return 0;
3735 case GT: case LE:
3736 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3737 return 1;
3738 return 0;
3739 default:
3740 return 0;
3741 }
3742 }
3743
3744 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3745
3746 int
3747 fcmov_comparison_operator (op, mode)
3748 register rtx op;
3749 enum machine_mode mode;
3750 {
3751 enum machine_mode inmode;
3752 enum rtx_code code = GET_CODE (op);
3753 if (mode != VOIDmode && GET_MODE (op) != mode)
3754 return 0;
3755 if (GET_RTX_CLASS (code) != '<')
3756 return 0;
3757 inmode = GET_MODE (XEXP (op, 0));
3758 if (inmode == CCFPmode || inmode == CCFPUmode)
3759 {
3760 enum rtx_code second_code, bypass_code;
3761 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3762 if (bypass_code != NIL || second_code != NIL)
3763 return 0;
3764 code = ix86_fp_compare_code_to_integer (code);
3765 }
3766 /* i387 supports just limited amount of conditional codes. */
3767 switch (code)
3768 {
3769 case LTU: case GTU: case LEU: case GEU:
3770 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3771 return 1;
3772 return 0;
3773 case ORDERED: case UNORDERED:
3774 case EQ: case NE:
3775 return 1;
3776 default:
3777 return 0;
3778 }
3779 }
3780
3781 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3782
3783 int
3784 promotable_binary_operator (op, mode)
3785 register rtx op;
3786 enum machine_mode mode ATTRIBUTE_UNUSED;
3787 {
3788 switch (GET_CODE (op))
3789 {
3790 case MULT:
3791 /* Modern CPUs have same latency for HImode and SImode multiply,
3792 but 386 and 486 do HImode multiply faster. */
3793 return ix86_cpu > PROCESSOR_I486;
3794 case PLUS:
3795 case AND:
3796 case IOR:
3797 case XOR:
3798 case ASHIFT:
3799 return 1;
3800 default:
3801 return 0;
3802 }
3803 }
3804
3805 /* Nearly general operand, but accept any const_double, since we wish
3806 to be able to drop them into memory rather than have them get pulled
3807 into registers. */
3808
3809 int
3810 cmp_fp_expander_operand (op, mode)
3811 register rtx op;
3812 enum machine_mode mode;
3813 {
3814 if (mode != VOIDmode && mode != GET_MODE (op))
3815 return 0;
3816 if (GET_CODE (op) == CONST_DOUBLE)
3817 return 1;
3818 return general_operand (op, mode);
3819 }
3820
3821 /* Match an SI or HImode register for a zero_extract. */
3822
3823 int
3824 ext_register_operand (op, mode)
3825 register rtx op;
3826 enum machine_mode mode ATTRIBUTE_UNUSED;
3827 {
3828 int regno;
3829 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3830 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3831 return 0;
3832
3833 if (!register_operand (op, VOIDmode))
3834 return 0;
3835
3836 /* Be curefull to accept only registers having upper parts. */
3837 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3838 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3839 }
3840
3841 /* Return 1 if this is a valid binary floating-point operation.
3842 OP is the expression matched, and MODE is its mode. */
3843
3844 int
3845 binary_fp_operator (op, mode)
3846 register rtx op;
3847 enum machine_mode mode;
3848 {
3849 if (mode != VOIDmode && mode != GET_MODE (op))
3850 return 0;
3851
3852 switch (GET_CODE (op))
3853 {
3854 case PLUS:
3855 case MINUS:
3856 case MULT:
3857 case DIV:
3858 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3859
3860 default:
3861 return 0;
3862 }
3863 }
3864
3865 int
3866 mult_operator (op, mode)
3867 register rtx op;
3868 enum machine_mode mode ATTRIBUTE_UNUSED;
3869 {
3870 return GET_CODE (op) == MULT;
3871 }
3872
3873 int
3874 div_operator (op, mode)
3875 register rtx op;
3876 enum machine_mode mode ATTRIBUTE_UNUSED;
3877 {
3878 return GET_CODE (op) == DIV;
3879 }
3880
3881 int
3882 arith_or_logical_operator (op, mode)
3883 rtx op;
3884 enum machine_mode mode;
3885 {
3886 return ((mode == VOIDmode || GET_MODE (op) == mode)
3887 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3888 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3889 }
3890
3891 /* Returns 1 if OP is memory operand with a displacement. */
3892
3893 int
3894 memory_displacement_operand (op, mode)
3895 register rtx op;
3896 enum machine_mode mode;
3897 {
3898 struct ix86_address parts;
3899
3900 if (! memory_operand (op, mode))
3901 return 0;
3902
3903 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3904 abort ();
3905
3906 return parts.disp != NULL_RTX;
3907 }
3908
3909 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3910 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3911
3912 ??? It seems likely that this will only work because cmpsi is an
3913 expander, and no actual insns use this. */
3914
3915 int
3916 cmpsi_operand (op, mode)
3917 rtx op;
3918 enum machine_mode mode;
3919 {
3920 if (nonimmediate_operand (op, mode))
3921 return 1;
3922
3923 if (GET_CODE (op) == AND
3924 && GET_MODE (op) == SImode
3925 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3926 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3927 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3928 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3929 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3930 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3931 return 1;
3932
3933 return 0;
3934 }
3935
3936 /* Returns 1 if OP is memory operand that can not be represented by the
3937 modRM array. */
3938
3939 int
3940 long_memory_operand (op, mode)
3941 register rtx op;
3942 enum machine_mode mode;
3943 {
3944 if (! memory_operand (op, mode))
3945 return 0;
3946
3947 return memory_address_length (op) != 0;
3948 }
3949
3950 /* Return nonzero if the rtx is known aligned. */
3951
3952 int
3953 aligned_operand (op, mode)
3954 rtx op;
3955 enum machine_mode mode;
3956 {
3957 struct ix86_address parts;
3958
3959 if (!general_operand (op, mode))
3960 return 0;
3961
3962 /* Registers and immediate operands are always "aligned". */
3963 if (GET_CODE (op) != MEM)
3964 return 1;
3965
3966 /* Don't even try to do any aligned optimizations with volatiles. */
3967 if (MEM_VOLATILE_P (op))
3968 return 0;
3969
3970 op = XEXP (op, 0);
3971
3972 /* Pushes and pops are only valid on the stack pointer. */
3973 if (GET_CODE (op) == PRE_DEC
3974 || GET_CODE (op) == POST_INC)
3975 return 1;
3976
3977 /* Decode the address. */
3978 if (! ix86_decompose_address (op, &parts))
3979 abort ();
3980
3981 if (parts.base && GET_CODE (parts.base) == SUBREG)
3982 parts.base = SUBREG_REG (parts.base);
3983 if (parts.index && GET_CODE (parts.index) == SUBREG)
3984 parts.index = SUBREG_REG (parts.index);
3985
3986 /* Look for some component that isn't known to be aligned. */
3987 if (parts.index)
3988 {
3989 if (parts.scale < 4
3990 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3991 return 0;
3992 }
3993 if (parts.base)
3994 {
3995 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3996 return 0;
3997 }
3998 if (parts.disp)
3999 {
4000 if (GET_CODE (parts.disp) != CONST_INT
4001 || (INTVAL (parts.disp) & 3) != 0)
4002 return 0;
4003 }
4004
4005 /* Didn't find one -- this must be an aligned address. */
4006 return 1;
4007 }
4008 \f
4009 /* Return true if the constant is something that can be loaded with
4010 a special instruction. Only handle 0.0 and 1.0; others are less
4011 worthwhile. */
4012
4013 int
4014 standard_80387_constant_p (x)
4015 rtx x;
4016 {
4017 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4018 return -1;
4019 /* Note that on the 80387, other constants, such as pi, that we should support
4020 too. On some machines, these are much slower to load as standard constant,
4021 than to load from doubles in memory. */
4022 if (x == CONST0_RTX (GET_MODE (x)))
4023 return 1;
4024 if (x == CONST1_RTX (GET_MODE (x)))
4025 return 2;
4026 return 0;
4027 }
4028
4029 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4030 */
4031 int
4032 standard_sse_constant_p (x)
4033 rtx x;
4034 {
4035 if (x == const0_rtx)
4036 return 1;
4037 return (x == CONST0_RTX (GET_MODE (x)));
4038 }
4039
4040 /* Returns 1 if OP contains a symbol reference */
4041
4042 int
4043 symbolic_reference_mentioned_p (op)
4044 rtx op;
4045 {
4046 register const char *fmt;
4047 register int i;
4048
4049 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4050 return 1;
4051
4052 fmt = GET_RTX_FORMAT (GET_CODE (op));
4053 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4054 {
4055 if (fmt[i] == 'E')
4056 {
4057 register int j;
4058
4059 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4060 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4061 return 1;
4062 }
4063
4064 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4065 return 1;
4066 }
4067
4068 return 0;
4069 }
4070
4071 /* Return 1 if it is appropriate to emit `ret' instructions in the
4072 body of a function. Do this only if the epilogue is simple, needing a
4073 couple of insns. Prior to reloading, we can't tell how many registers
4074 must be saved, so return 0 then. Return 0 if there is no frame
4075 marker to de-allocate.
4076
4077 If NON_SAVING_SETJMP is defined and true, then it is not possible
4078 for the epilogue to be simple, so return 0. This is a special case
4079 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4080 until final, but jump_optimize may need to know sooner if a
4081 `return' is OK. */
4082
4083 int
4084 ix86_can_use_return_insn_p ()
4085 {
4086 struct ix86_frame frame;
4087
4088 #ifdef NON_SAVING_SETJMP
4089 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4090 return 0;
4091 #endif
4092
4093 if (! reload_completed || frame_pointer_needed)
4094 return 0;
4095
4096 /* Don't allow more than 32 pop, since that's all we can do
4097 with one instruction. */
4098 if (current_function_pops_args
4099 && current_function_args_size >= 32768)
4100 return 0;
4101
4102 ix86_compute_frame_layout (&frame);
4103 return frame.to_allocate == 0 && frame.nregs == 0;
4104 }
4105 \f
4106 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4107 int
4108 x86_64_sign_extended_value (value)
4109 rtx value;
4110 {
4111 switch (GET_CODE (value))
4112 {
4113 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4114 to be at least 32 and this all acceptable constants are
4115 represented as CONST_INT. */
4116 case CONST_INT:
4117 if (HOST_BITS_PER_WIDE_INT == 32)
4118 return 1;
4119 else
4120 {
4121 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4122 return trunc_int_for_mode (val, SImode) == val;
4123 }
4124 break;
4125
4126 /* For certain code models, the symbolic references are known to fit.
4127 in CM_SMALL_PIC model we know it fits if it is local to the shared
4128 library. Don't count TLS SYMBOL_REFs here, since they should fit
4129 only if inside of UNSPEC handled below. */
4130 case SYMBOL_REF:
4131 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4132
4133 /* For certain code models, the code is near as well. */
4134 case LABEL_REF:
4135 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4136 || ix86_cmodel == CM_KERNEL);
4137
4138 /* We also may accept the offsetted memory references in certain special
4139 cases. */
4140 case CONST:
4141 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4142 switch (XINT (XEXP (value, 0), 1))
4143 {
4144 case UNSPEC_GOTPCREL:
4145 case UNSPEC_DTPOFF:
4146 case UNSPEC_GOTNTPOFF:
4147 case UNSPEC_NTPOFF:
4148 return 1;
4149 default:
4150 break;
4151 }
4152 if (GET_CODE (XEXP (value, 0)) == PLUS)
4153 {
4154 rtx op1 = XEXP (XEXP (value, 0), 0);
4155 rtx op2 = XEXP (XEXP (value, 0), 1);
4156 HOST_WIDE_INT offset;
4157
4158 if (ix86_cmodel == CM_LARGE)
4159 return 0;
4160 if (GET_CODE (op2) != CONST_INT)
4161 return 0;
4162 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4163 switch (GET_CODE (op1))
4164 {
4165 case SYMBOL_REF:
4166 /* For CM_SMALL assume that latest object is 16MB before
4167 end of 31bits boundary. We may also accept pretty
4168 large negative constants knowing that all objects are
4169 in the positive half of address space. */
4170 if (ix86_cmodel == CM_SMALL
4171 && offset < 16*1024*1024
4172 && trunc_int_for_mode (offset, SImode) == offset)
4173 return 1;
4174 /* For CM_KERNEL we know that all object resist in the
4175 negative half of 32bits address space. We may not
4176 accept negative offsets, since they may be just off
4177 and we may accept pretty large positive ones. */
4178 if (ix86_cmodel == CM_KERNEL
4179 && offset > 0
4180 && trunc_int_for_mode (offset, SImode) == offset)
4181 return 1;
4182 break;
4183 case LABEL_REF:
4184 /* These conditions are similar to SYMBOL_REF ones, just the
4185 constraints for code models differ. */
4186 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4187 && offset < 16*1024*1024
4188 && trunc_int_for_mode (offset, SImode) == offset)
4189 return 1;
4190 if (ix86_cmodel == CM_KERNEL
4191 && offset > 0
4192 && trunc_int_for_mode (offset, SImode) == offset)
4193 return 1;
4194 break;
4195 case UNSPEC:
4196 switch (XINT (op1, 1))
4197 {
4198 case UNSPEC_DTPOFF:
4199 case UNSPEC_NTPOFF:
4200 if (offset > 0
4201 && trunc_int_for_mode (offset, SImode) == offset)
4202 return 1;
4203 }
4204 break;
4205 default:
4206 return 0;
4207 }
4208 }
4209 return 0;
4210 default:
4211 return 0;
4212 }
4213 }
4214
4215 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4216 int
4217 x86_64_zero_extended_value (value)
4218 rtx value;
4219 {
4220 switch (GET_CODE (value))
4221 {
4222 case CONST_DOUBLE:
4223 if (HOST_BITS_PER_WIDE_INT == 32)
4224 return (GET_MODE (value) == VOIDmode
4225 && !CONST_DOUBLE_HIGH (value));
4226 else
4227 return 0;
4228 case CONST_INT:
4229 if (HOST_BITS_PER_WIDE_INT == 32)
4230 return INTVAL (value) >= 0;
4231 else
4232 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4233 break;
4234
4235 /* For certain code models, the symbolic references are known to fit. */
4236 case SYMBOL_REF:
4237 return ix86_cmodel == CM_SMALL;
4238
4239 /* For certain code models, the code is near as well. */
4240 case LABEL_REF:
4241 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4242
4243 /* We also may accept the offsetted memory references in certain special
4244 cases. */
4245 case CONST:
4246 if (GET_CODE (XEXP (value, 0)) == PLUS)
4247 {
4248 rtx op1 = XEXP (XEXP (value, 0), 0);
4249 rtx op2 = XEXP (XEXP (value, 0), 1);
4250
4251 if (ix86_cmodel == CM_LARGE)
4252 return 0;
4253 switch (GET_CODE (op1))
4254 {
4255 case SYMBOL_REF:
4256 return 0;
4257 /* For small code model we may accept pretty large positive
4258 offsets, since one bit is available for free. Negative
4259 offsets are limited by the size of NULL pointer area
4260 specified by the ABI. */
4261 if (ix86_cmodel == CM_SMALL
4262 && GET_CODE (op2) == CONST_INT
4263 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4264 && (trunc_int_for_mode (INTVAL (op2), SImode)
4265 == INTVAL (op2)))
4266 return 1;
4267 /* ??? For the kernel, we may accept adjustment of
4268 -0x10000000, since we know that it will just convert
4269 negative address space to positive, but perhaps this
4270 is not worthwhile. */
4271 break;
4272 case LABEL_REF:
4273 /* These conditions are similar to SYMBOL_REF ones, just the
4274 constraints for code models differ. */
4275 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4276 && GET_CODE (op2) == CONST_INT
4277 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4278 && (trunc_int_for_mode (INTVAL (op2), SImode)
4279 == INTVAL (op2)))
4280 return 1;
4281 break;
4282 default:
4283 return 0;
4284 }
4285 }
4286 return 0;
4287 default:
4288 return 0;
4289 }
4290 }
4291
4292 /* Value should be nonzero if functions must have frame pointers.
4293 Zero means the frame pointer need not be set up (and parms may
4294 be accessed via the stack pointer) in functions that seem suitable. */
4295
4296 int
4297 ix86_frame_pointer_required ()
4298 {
4299 /* If we accessed previous frames, then the generated code expects
4300 to be able to access the saved ebp value in our frame. */
4301 if (cfun->machine->accesses_prev_frame)
4302 return 1;
4303
4304 /* Several x86 os'es need a frame pointer for other reasons,
4305 usually pertaining to setjmp. */
4306 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4307 return 1;
4308
4309 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4310 the frame pointer by default. Turn it back on now if we've not
4311 got a leaf function. */
4312 if (TARGET_OMIT_LEAF_FRAME_POINTER
4313 && (!current_function_is_leaf))
4314 return 1;
4315
4316 if (current_function_profile)
4317 return 1;
4318
4319 return 0;
4320 }
4321
4322 /* Record that the current function accesses previous call frames. */
4323
4324 void
4325 ix86_setup_frame_addresses ()
4326 {
4327 cfun->machine->accesses_prev_frame = 1;
4328 }
4329 \f
4330 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4331 # define USE_HIDDEN_LINKONCE 1
4332 #else
4333 # define USE_HIDDEN_LINKONCE 0
4334 #endif
4335
4336 static int pic_labels_used;
4337
4338 /* Fills in the label name that should be used for a pc thunk for
4339 the given register. */
4340
4341 static void
4342 get_pc_thunk_name (name, regno)
4343 char name[32];
4344 unsigned int regno;
4345 {
4346 if (USE_HIDDEN_LINKONCE)
4347 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4348 else
4349 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4350 }
4351
4352
4353 /* This function generates code for -fpic that loads %ebx with
4354 the return address of the caller and then returns. */
4355
4356 void
4357 ix86_asm_file_end (file)
4358 FILE *file;
4359 {
4360 rtx xops[2];
4361 int regno;
4362
4363 for (regno = 0; regno < 8; ++regno)
4364 {
4365 char name[32];
4366
4367 if (! ((pic_labels_used >> regno) & 1))
4368 continue;
4369
4370 get_pc_thunk_name (name, regno);
4371
4372 if (USE_HIDDEN_LINKONCE)
4373 {
4374 tree decl;
4375
4376 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4377 error_mark_node);
4378 TREE_PUBLIC (decl) = 1;
4379 TREE_STATIC (decl) = 1;
4380 DECL_ONE_ONLY (decl) = 1;
4381
4382 (*targetm.asm_out.unique_section) (decl, 0);
4383 named_section (decl, NULL, 0);
4384
4385 (*targetm.asm_out.globalize_label) (file, name);
4386 fputs ("\t.hidden\t", file);
4387 assemble_name (file, name);
4388 fputc ('\n', file);
4389 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4390 }
4391 else
4392 {
4393 text_section ();
4394 ASM_OUTPUT_LABEL (file, name);
4395 }
4396
4397 xops[0] = gen_rtx_REG (SImode, regno);
4398 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4399 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4400 output_asm_insn ("ret", xops);
4401 }
4402 }
4403
4404 /* Emit code for the SET_GOT patterns. */
4405
4406 const char *
4407 output_set_got (dest)
4408 rtx dest;
4409 {
4410 rtx xops[3];
4411
4412 xops[0] = dest;
4413 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4414
4415 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4416 {
4417 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4418
4419 if (!flag_pic)
4420 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4421 else
4422 output_asm_insn ("call\t%a2", xops);
4423
4424 #if TARGET_MACHO
4425 /* Output the "canonical" label name ("Lxx$pb") here too. This
4426 is what will be referred to by the Mach-O PIC subsystem. */
4427 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4428 #endif
4429 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4430 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4431
4432 if (flag_pic)
4433 output_asm_insn ("pop{l}\t%0", xops);
4434 }
4435 else
4436 {
4437 char name[32];
4438 get_pc_thunk_name (name, REGNO (dest));
4439 pic_labels_used |= 1 << REGNO (dest);
4440
4441 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4442 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4443 output_asm_insn ("call\t%X2", xops);
4444 }
4445
4446 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4447 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4448 else if (!TARGET_MACHO)
4449 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4450
4451 return "";
4452 }
4453
4454 /* Generate an "push" pattern for input ARG. */
4455
4456 static rtx
4457 gen_push (arg)
4458 rtx arg;
4459 {
4460 return gen_rtx_SET (VOIDmode,
4461 gen_rtx_MEM (Pmode,
4462 gen_rtx_PRE_DEC (Pmode,
4463 stack_pointer_rtx)),
4464 arg);
4465 }
4466
4467 /* Return >= 0 if there is an unused call-clobbered register available
4468 for the entire function. */
4469
4470 static unsigned int
4471 ix86_select_alt_pic_regnum ()
4472 {
4473 if (current_function_is_leaf && !current_function_profile)
4474 {
4475 int i;
4476 for (i = 2; i >= 0; --i)
4477 if (!regs_ever_live[i])
4478 return i;
4479 }
4480
4481 return INVALID_REGNUM;
4482 }
4483
4484 /* Return 1 if we need to save REGNO. */
4485 static int
4486 ix86_save_reg (regno, maybe_eh_return)
4487 unsigned int regno;
4488 int maybe_eh_return;
4489 {
4490 if (pic_offset_table_rtx
4491 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4492 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4493 || current_function_profile
4494 || current_function_calls_eh_return))
4495 {
4496 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4497 return 0;
4498 return 1;
4499 }
4500
4501 if (current_function_calls_eh_return && maybe_eh_return)
4502 {
4503 unsigned i;
4504 for (i = 0; ; i++)
4505 {
4506 unsigned test = EH_RETURN_DATA_REGNO (i);
4507 if (test == INVALID_REGNUM)
4508 break;
4509 if (test == regno)
4510 return 1;
4511 }
4512 }
4513
4514 return (regs_ever_live[regno]
4515 && !call_used_regs[regno]
4516 && !fixed_regs[regno]
4517 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4518 }
4519
4520 /* Return number of registers to be saved on the stack. */
4521
4522 static int
4523 ix86_nsaved_regs ()
4524 {
4525 int nregs = 0;
4526 int regno;
4527
4528 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4529 if (ix86_save_reg (regno, true))
4530 nregs++;
4531 return nregs;
4532 }
4533
4534 /* Return the offset between two registers, one to be eliminated, and the other
4535 its replacement, at the start of a routine. */
4536
4537 HOST_WIDE_INT
4538 ix86_initial_elimination_offset (from, to)
4539 int from;
4540 int to;
4541 {
4542 struct ix86_frame frame;
4543 ix86_compute_frame_layout (&frame);
4544
4545 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4546 return frame.hard_frame_pointer_offset;
4547 else if (from == FRAME_POINTER_REGNUM
4548 && to == HARD_FRAME_POINTER_REGNUM)
4549 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4550 else
4551 {
4552 if (to != STACK_POINTER_REGNUM)
4553 abort ();
4554 else if (from == ARG_POINTER_REGNUM)
4555 return frame.stack_pointer_offset;
4556 else if (from != FRAME_POINTER_REGNUM)
4557 abort ();
4558 else
4559 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4560 }
4561 }
4562
4563 /* Fill structure ix86_frame about frame of currently computed function. */
4564
4565 static void
4566 ix86_compute_frame_layout (frame)
4567 struct ix86_frame *frame;
4568 {
4569 HOST_WIDE_INT total_size;
4570 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4571 int offset;
4572 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4573 HOST_WIDE_INT size = get_frame_size ();
4574
4575 frame->nregs = ix86_nsaved_regs ();
4576 total_size = size;
4577
4578 /* Skip return address and saved base pointer. */
4579 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4580
4581 frame->hard_frame_pointer_offset = offset;
4582
4583 /* Do some sanity checking of stack_alignment_needed and
4584 preferred_alignment, since i386 port is the only using those features
4585 that may break easily. */
4586
4587 if (size && !stack_alignment_needed)
4588 abort ();
4589 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4590 abort ();
4591 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4592 abort ();
4593 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4594 abort ();
4595
4596 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4597 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4598
4599 /* Register save area */
4600 offset += frame->nregs * UNITS_PER_WORD;
4601
4602 /* Va-arg area */
4603 if (ix86_save_varrargs_registers)
4604 {
4605 offset += X86_64_VARARGS_SIZE;
4606 frame->va_arg_size = X86_64_VARARGS_SIZE;
4607 }
4608 else
4609 frame->va_arg_size = 0;
4610
4611 /* Align start of frame for local function. */
4612 frame->padding1 = ((offset + stack_alignment_needed - 1)
4613 & -stack_alignment_needed) - offset;
4614
4615 offset += frame->padding1;
4616
4617 /* Frame pointer points here. */
4618 frame->frame_pointer_offset = offset;
4619
4620 offset += size;
4621
4622 /* Add outgoing arguments area. Can be skipped if we eliminated
4623 all the function calls as dead code. */
4624 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4625 {
4626 offset += current_function_outgoing_args_size;
4627 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4628 }
4629 else
4630 frame->outgoing_arguments_size = 0;
4631
4632 /* Align stack boundary. Only needed if we're calling another function
4633 or using alloca. */
4634 if (!current_function_is_leaf || current_function_calls_alloca)
4635 frame->padding2 = ((offset + preferred_alignment - 1)
4636 & -preferred_alignment) - offset;
4637 else
4638 frame->padding2 = 0;
4639
4640 offset += frame->padding2;
4641
4642 /* We've reached end of stack frame. */
4643 frame->stack_pointer_offset = offset;
4644
4645 /* Size prologue needs to allocate. */
4646 frame->to_allocate =
4647 (size + frame->padding1 + frame->padding2
4648 + frame->outgoing_arguments_size + frame->va_arg_size);
4649
4650 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4651 && current_function_is_leaf)
4652 {
4653 frame->red_zone_size = frame->to_allocate;
4654 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4655 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4656 }
4657 else
4658 frame->red_zone_size = 0;
4659 frame->to_allocate -= frame->red_zone_size;
4660 frame->stack_pointer_offset -= frame->red_zone_size;
4661 #if 0
4662 fprintf (stderr, "nregs: %i\n", frame->nregs);
4663 fprintf (stderr, "size: %i\n", size);
4664 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4665 fprintf (stderr, "padding1: %i\n", frame->padding1);
4666 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4667 fprintf (stderr, "padding2: %i\n", frame->padding2);
4668 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4669 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4670 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4671 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4672 frame->hard_frame_pointer_offset);
4673 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4674 #endif
4675 }
4676
4677 /* Emit code to save registers in the prologue. */
4678
4679 static void
4680 ix86_emit_save_regs ()
4681 {
4682 register int regno;
4683 rtx insn;
4684
4685 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4686 if (ix86_save_reg (regno, true))
4687 {
4688 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4689 RTX_FRAME_RELATED_P (insn) = 1;
4690 }
4691 }
4692
4693 /* Emit code to save registers using MOV insns. First register
4694 is restored from POINTER + OFFSET. */
4695 static void
4696 ix86_emit_save_regs_using_mov (pointer, offset)
4697 rtx pointer;
4698 HOST_WIDE_INT offset;
4699 {
4700 int regno;
4701 rtx insn;
4702
4703 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4704 if (ix86_save_reg (regno, true))
4705 {
4706 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4707 Pmode, offset),
4708 gen_rtx_REG (Pmode, regno));
4709 RTX_FRAME_RELATED_P (insn) = 1;
4710 offset += UNITS_PER_WORD;
4711 }
4712 }
4713
4714 /* Expand the prologue into a bunch of separate insns. */
4715
4716 void
4717 ix86_expand_prologue ()
4718 {
4719 rtx insn;
4720 bool pic_reg_used;
4721 struct ix86_frame frame;
4722 int use_mov = 0;
4723 HOST_WIDE_INT allocate;
4724
4725 ix86_compute_frame_layout (&frame);
4726 if (!optimize_size)
4727 {
4728 int count = frame.nregs;
4729
4730 /* The fast prologue uses move instead of push to save registers. This
4731 is significantly longer, but also executes faster as modern hardware
4732 can execute the moves in parallel, but can't do that for push/pop.
4733
4734 Be curefull about choosing what prologue to emit: When function takes
4735 many instructions to execute we may use slow version as well as in
4736 case function is known to be outside hot spot (this is known with
4737 feedback only). Weight the size of function by number of registers
4738 to save as it is cheap to use one or two push instructions but very
4739 slow to use many of them. */
4740 if (count)
4741 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4742 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4743 || (flag_branch_probabilities
4744 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4745 use_fast_prologue_epilogue = 0;
4746 else
4747 use_fast_prologue_epilogue = !expensive_function_p (count);
4748 if (TARGET_PROLOGUE_USING_MOVE)
4749 use_mov = use_fast_prologue_epilogue;
4750 }
4751
4752 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4753 slower on all targets. Also sdb doesn't like it. */
4754
4755 if (frame_pointer_needed)
4756 {
4757 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4758 RTX_FRAME_RELATED_P (insn) = 1;
4759
4760 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4761 RTX_FRAME_RELATED_P (insn) = 1;
4762 }
4763
4764 allocate = frame.to_allocate;
4765 /* In case we are dealing only with single register and empty frame,
4766 push is equivalent of the mov+add sequence. */
4767 if (allocate == 0 && frame.nregs <= 1)
4768 use_mov = 0;
4769
4770 if (!use_mov)
4771 ix86_emit_save_regs ();
4772 else
4773 allocate += frame.nregs * UNITS_PER_WORD;
4774
4775 if (allocate == 0)
4776 ;
4777 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4778 {
4779 insn = emit_insn (gen_pro_epilogue_adjust_stack
4780 (stack_pointer_rtx, stack_pointer_rtx,
4781 GEN_INT (-allocate)));
4782 RTX_FRAME_RELATED_P (insn) = 1;
4783 }
4784 else
4785 {
4786 /* ??? Is this only valid for Win32? */
4787
4788 rtx arg0, sym;
4789
4790 if (TARGET_64BIT)
4791 abort ();
4792
4793 arg0 = gen_rtx_REG (SImode, 0);
4794 emit_move_insn (arg0, GEN_INT (allocate));
4795
4796 sym = gen_rtx_MEM (FUNCTION_MODE,
4797 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4798 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4799
4800 CALL_INSN_FUNCTION_USAGE (insn)
4801 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4802 CALL_INSN_FUNCTION_USAGE (insn));
4803 }
4804 if (use_mov)
4805 {
4806 if (!frame_pointer_needed || !frame.to_allocate)
4807 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4808 else
4809 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4810 -frame.nregs * UNITS_PER_WORD);
4811 }
4812
4813 #ifdef SUBTARGET_PROLOGUE
4814 SUBTARGET_PROLOGUE;
4815 #endif
4816
4817 pic_reg_used = false;
4818 if (pic_offset_table_rtx
4819 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4820 || current_function_profile))
4821 {
4822 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4823
4824 if (alt_pic_reg_used != INVALID_REGNUM)
4825 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4826
4827 pic_reg_used = true;
4828 }
4829
4830 if (pic_reg_used)
4831 {
4832 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4833
4834 /* Even with accurate pre-reload life analysis, we can wind up
4835 deleting all references to the pic register after reload.
4836 Consider if cross-jumping unifies two sides of a branch
4837 controled by a comparison vs the only read from a global.
4838 In which case, allow the set_got to be deleted, though we're
4839 too late to do anything about the ebx save in the prologue. */
4840 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4841 }
4842
4843 /* Prevent function calls from be scheduled before the call to mcount.
4844 In the pic_reg_used case, make sure that the got load isn't deleted. */
4845 if (current_function_profile)
4846 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4847 }
4848
4849 /* Emit code to restore saved registers using MOV insns. First register
4850 is restored from POINTER + OFFSET. */
4851 static void
4852 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4853 rtx pointer;
4854 int offset;
4855 int maybe_eh_return;
4856 {
4857 int regno;
4858
4859 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4860 if (ix86_save_reg (regno, maybe_eh_return))
4861 {
4862 emit_move_insn (gen_rtx_REG (Pmode, regno),
4863 adjust_address (gen_rtx_MEM (Pmode, pointer),
4864 Pmode, offset));
4865 offset += UNITS_PER_WORD;
4866 }
4867 }
4868
4869 /* Restore function stack, frame, and registers. */
4870
4871 void
4872 ix86_expand_epilogue (style)
4873 int style;
4874 {
4875 int regno;
4876 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4877 struct ix86_frame frame;
4878 HOST_WIDE_INT offset;
4879
4880 ix86_compute_frame_layout (&frame);
4881
4882 /* Calculate start of saved registers relative to ebp. Special care
4883 must be taken for the normal return case of a function using
4884 eh_return: the eax and edx registers are marked as saved, but not
4885 restored along this path. */
4886 offset = frame.nregs;
4887 if (current_function_calls_eh_return && style != 2)
4888 offset -= 2;
4889 offset *= -UNITS_PER_WORD;
4890
4891 /* If we're only restoring one register and sp is not valid then
4892 using a move instruction to restore the register since it's
4893 less work than reloading sp and popping the register.
4894
4895 The default code result in stack adjustment using add/lea instruction,
4896 while this code results in LEAVE instruction (or discrete equivalent),
4897 so it is profitable in some other cases as well. Especially when there
4898 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4899 and there is exactly one register to pop. This heruistic may need some
4900 tuning in future. */
4901 if ((!sp_valid && frame.nregs <= 1)
4902 || (TARGET_EPILOGUE_USING_MOVE
4903 && use_fast_prologue_epilogue
4904 && (frame.nregs > 1 || frame.to_allocate))
4905 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4906 || (frame_pointer_needed && TARGET_USE_LEAVE
4907 && use_fast_prologue_epilogue && frame.nregs == 1)
4908 || current_function_calls_eh_return)
4909 {
4910 /* Restore registers. We can use ebp or esp to address the memory
4911 locations. If both are available, default to ebp, since offsets
4912 are known to be small. Only exception is esp pointing directly to the
4913 end of block of saved registers, where we may simplify addressing
4914 mode. */
4915
4916 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4917 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4918 frame.to_allocate, style == 2);
4919 else
4920 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4921 offset, style == 2);
4922
4923 /* eh_return epilogues need %ecx added to the stack pointer. */
4924 if (style == 2)
4925 {
4926 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4927
4928 if (frame_pointer_needed)
4929 {
4930 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4931 tmp = plus_constant (tmp, UNITS_PER_WORD);
4932 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4933
4934 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4935 emit_move_insn (hard_frame_pointer_rtx, tmp);
4936
4937 emit_insn (gen_pro_epilogue_adjust_stack
4938 (stack_pointer_rtx, sa, const0_rtx));
4939 }
4940 else
4941 {
4942 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4943 tmp = plus_constant (tmp, (frame.to_allocate
4944 + frame.nregs * UNITS_PER_WORD));
4945 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4946 }
4947 }
4948 else if (!frame_pointer_needed)
4949 emit_insn (gen_pro_epilogue_adjust_stack
4950 (stack_pointer_rtx, stack_pointer_rtx,
4951 GEN_INT (frame.to_allocate
4952 + frame.nregs * UNITS_PER_WORD)));
4953 /* If not an i386, mov & pop is faster than "leave". */
4954 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4955 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4956 else
4957 {
4958 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4959 hard_frame_pointer_rtx,
4960 const0_rtx));
4961 if (TARGET_64BIT)
4962 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4963 else
4964 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4965 }
4966 }
4967 else
4968 {
4969 /* First step is to deallocate the stack frame so that we can
4970 pop the registers. */
4971 if (!sp_valid)
4972 {
4973 if (!frame_pointer_needed)
4974 abort ();
4975 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4976 hard_frame_pointer_rtx,
4977 GEN_INT (offset)));
4978 }
4979 else if (frame.to_allocate)
4980 emit_insn (gen_pro_epilogue_adjust_stack
4981 (stack_pointer_rtx, stack_pointer_rtx,
4982 GEN_INT (frame.to_allocate)));
4983
4984 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4985 if (ix86_save_reg (regno, false))
4986 {
4987 if (TARGET_64BIT)
4988 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4989 else
4990 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4991 }
4992 if (frame_pointer_needed)
4993 {
4994 /* Leave results in shorter dependency chains on CPUs that are
4995 able to grok it fast. */
4996 if (TARGET_USE_LEAVE)
4997 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4998 else if (TARGET_64BIT)
4999 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5000 else
5001 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5002 }
5003 }
5004
5005 /* Sibcall epilogues don't want a return instruction. */
5006 if (style == 0)
5007 return;
5008
5009 if (current_function_pops_args && current_function_args_size)
5010 {
5011 rtx popc = GEN_INT (current_function_pops_args);
5012
5013 /* i386 can only pop 64K bytes. If asked to pop more, pop
5014 return address, do explicit add, and jump indirectly to the
5015 caller. */
5016
5017 if (current_function_pops_args >= 65536)
5018 {
5019 rtx ecx = gen_rtx_REG (SImode, 2);
5020
5021 /* There are is no "pascal" calling convention in 64bit ABI. */
5022 if (TARGET_64BIT)
5023 abort ();
5024
5025 emit_insn (gen_popsi1 (ecx));
5026 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5027 emit_jump_insn (gen_return_indirect_internal (ecx));
5028 }
5029 else
5030 emit_jump_insn (gen_return_pop_internal (popc));
5031 }
5032 else
5033 emit_jump_insn (gen_return_internal ());
5034 }
5035
5036 /* Reset from the function's potential modifications. */
5037
5038 static void
5039 ix86_output_function_epilogue (file, size)
5040 FILE *file ATTRIBUTE_UNUSED;
5041 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5042 {
5043 if (pic_offset_table_rtx)
5044 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5045 }
5046 \f
5047 /* Extract the parts of an RTL expression that is a valid memory address
5048 for an instruction. Return 0 if the structure of the address is
5049 grossly off. Return -1 if the address contains ASHIFT, so it is not
5050 strictly valid, but still used for computing length of lea instruction.
5051 */
5052
5053 static int
5054 ix86_decompose_address (addr, out)
5055 register rtx addr;
5056 struct ix86_address *out;
5057 {
5058 rtx base = NULL_RTX;
5059 rtx index = NULL_RTX;
5060 rtx disp = NULL_RTX;
5061 HOST_WIDE_INT scale = 1;
5062 rtx scale_rtx = NULL_RTX;
5063 int retval = 1;
5064
5065 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5066 base = addr;
5067 else if (GET_CODE (addr) == PLUS)
5068 {
5069 rtx op0 = XEXP (addr, 0);
5070 rtx op1 = XEXP (addr, 1);
5071 enum rtx_code code0 = GET_CODE (op0);
5072 enum rtx_code code1 = GET_CODE (op1);
5073
5074 if (code0 == REG || code0 == SUBREG)
5075 {
5076 if (code1 == REG || code1 == SUBREG)
5077 index = op0, base = op1; /* index + base */
5078 else
5079 base = op0, disp = op1; /* base + displacement */
5080 }
5081 else if (code0 == MULT)
5082 {
5083 index = XEXP (op0, 0);
5084 scale_rtx = XEXP (op0, 1);
5085 if (code1 == REG || code1 == SUBREG)
5086 base = op1; /* index*scale + base */
5087 else
5088 disp = op1; /* index*scale + disp */
5089 }
5090 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5091 {
5092 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5093 scale_rtx = XEXP (XEXP (op0, 0), 1);
5094 base = XEXP (op0, 1);
5095 disp = op1;
5096 }
5097 else if (code0 == PLUS)
5098 {
5099 index = XEXP (op0, 0); /* index + base + disp */
5100 base = XEXP (op0, 1);
5101 disp = op1;
5102 }
5103 else
5104 return 0;
5105 }
5106 else if (GET_CODE (addr) == MULT)
5107 {
5108 index = XEXP (addr, 0); /* index*scale */
5109 scale_rtx = XEXP (addr, 1);
5110 }
5111 else if (GET_CODE (addr) == ASHIFT)
5112 {
5113 rtx tmp;
5114
5115 /* We're called for lea too, which implements ashift on occasion. */
5116 index = XEXP (addr, 0);
5117 tmp = XEXP (addr, 1);
5118 if (GET_CODE (tmp) != CONST_INT)
5119 return 0;
5120 scale = INTVAL (tmp);
5121 if ((unsigned HOST_WIDE_INT) scale > 3)
5122 return 0;
5123 scale = 1 << scale;
5124 retval = -1;
5125 }
5126 else
5127 disp = addr; /* displacement */
5128
5129 /* Extract the integral value of scale. */
5130 if (scale_rtx)
5131 {
5132 if (GET_CODE (scale_rtx) != CONST_INT)
5133 return 0;
5134 scale = INTVAL (scale_rtx);
5135 }
5136
5137 /* Allow arg pointer and stack pointer as index if there is not scaling */
5138 if (base && index && scale == 1
5139 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5140 || index == stack_pointer_rtx))
5141 {
5142 rtx tmp = base;
5143 base = index;
5144 index = tmp;
5145 }
5146
5147 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5148 if ((base == hard_frame_pointer_rtx
5149 || base == frame_pointer_rtx
5150 || base == arg_pointer_rtx) && !disp)
5151 disp = const0_rtx;
5152
5153 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5154 Avoid this by transforming to [%esi+0]. */
5155 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5156 && base && !index && !disp
5157 && REG_P (base)
5158 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5159 disp = const0_rtx;
5160
5161 /* Special case: encode reg+reg instead of reg*2. */
5162 if (!base && index && scale && scale == 2)
5163 base = index, scale = 1;
5164
5165 /* Special case: scaling cannot be encoded without base or displacement. */
5166 if (!base && !disp && index && scale != 1)
5167 disp = const0_rtx;
5168
5169 out->base = base;
5170 out->index = index;
5171 out->disp = disp;
5172 out->scale = scale;
5173
5174 return retval;
5175 }
5176 \f
5177 /* Return cost of the memory address x.
5178 For i386, it is better to use a complex address than let gcc copy
5179 the address into a reg and make a new pseudo. But not if the address
5180 requires to two regs - that would mean more pseudos with longer
5181 lifetimes. */
5182 int
5183 ix86_address_cost (x)
5184 rtx x;
5185 {
5186 struct ix86_address parts;
5187 int cost = 1;
5188
5189 if (!ix86_decompose_address (x, &parts))
5190 abort ();
5191
5192 if (parts.base && GET_CODE (parts.base) == SUBREG)
5193 parts.base = SUBREG_REG (parts.base);
5194 if (parts.index && GET_CODE (parts.index) == SUBREG)
5195 parts.index = SUBREG_REG (parts.index);
5196
5197 /* More complex memory references are better. */
5198 if (parts.disp && parts.disp != const0_rtx)
5199 cost--;
5200
5201 /* Attempt to minimize number of registers in the address. */
5202 if ((parts.base
5203 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5204 || (parts.index
5205 && (!REG_P (parts.index)
5206 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5207 cost++;
5208
5209 if (parts.base
5210 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5211 && parts.index
5212 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5213 && parts.base != parts.index)
5214 cost++;
5215
5216 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5217 since it's predecode logic can't detect the length of instructions
5218 and it degenerates to vector decoded. Increase cost of such
5219 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5220 to split such addresses or even refuse such addresses at all.
5221
5222 Following addressing modes are affected:
5223 [base+scale*index]
5224 [scale*index+disp]
5225 [base+index]
5226
5227 The first and last case may be avoidable by explicitly coding the zero in
5228 memory address, but I don't have AMD-K6 machine handy to check this
5229 theory. */
5230
5231 if (TARGET_K6
5232 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5233 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5234 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5235 cost += 10;
5236
5237 return cost;
5238 }
5239 \f
5240 /* If X is a machine specific address (i.e. a symbol or label being
5241 referenced as a displacement from the GOT implemented using an
5242 UNSPEC), then return the base term. Otherwise return X. */
5243
5244 rtx
5245 ix86_find_base_term (x)
5246 rtx x;
5247 {
5248 rtx term;
5249
5250 if (TARGET_64BIT)
5251 {
5252 if (GET_CODE (x) != CONST)
5253 return x;
5254 term = XEXP (x, 0);
5255 if (GET_CODE (term) == PLUS
5256 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5257 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5258 term = XEXP (term, 0);
5259 if (GET_CODE (term) != UNSPEC
5260 || XINT (term, 1) != UNSPEC_GOTPCREL)
5261 return x;
5262
5263 term = XVECEXP (term, 0, 0);
5264
5265 if (GET_CODE (term) != SYMBOL_REF
5266 && GET_CODE (term) != LABEL_REF)
5267 return x;
5268
5269 return term;
5270 }
5271
5272 if (GET_CODE (x) != PLUS
5273 || XEXP (x, 0) != pic_offset_table_rtx
5274 || GET_CODE (XEXP (x, 1)) != CONST)
5275 return x;
5276
5277 term = XEXP (XEXP (x, 1), 0);
5278
5279 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5280 term = XEXP (term, 0);
5281
5282 if (GET_CODE (term) != UNSPEC
5283 || XINT (term, 1) != UNSPEC_GOTOFF)
5284 return x;
5285
5286 term = XVECEXP (term, 0, 0);
5287
5288 if (GET_CODE (term) != SYMBOL_REF
5289 && GET_CODE (term) != LABEL_REF)
5290 return x;
5291
5292 return term;
5293 }
5294 \f
5295 /* Determine if a given RTX is a valid constant. We already know this
5296 satisfies CONSTANT_P. */
5297
5298 bool
5299 legitimate_constant_p (x)
5300 rtx x;
5301 {
5302 rtx inner;
5303
5304 switch (GET_CODE (x))
5305 {
5306 case SYMBOL_REF:
5307 /* TLS symbols are not constant. */
5308 if (tls_symbolic_operand (x, Pmode))
5309 return false;
5310 break;
5311
5312 case CONST:
5313 inner = XEXP (x, 0);
5314
5315 /* Offsets of TLS symbols are never valid.
5316 Discourage CSE from creating them. */
5317 if (GET_CODE (inner) == PLUS
5318 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5319 return false;
5320
5321 /* Only some unspecs are valid as "constants". */
5322 if (GET_CODE (inner) == UNSPEC)
5323 switch (XINT (inner, 1))
5324 {
5325 case UNSPEC_TPOFF:
5326 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5327 default:
5328 return false;
5329 }
5330 break;
5331
5332 default:
5333 break;
5334 }
5335
5336 /* Otherwise we handle everything else in the move patterns. */
5337 return true;
5338 }
5339
5340 /* Determine if it's legal to put X into the constant pool. This
5341 is not possible for the address of thread-local symbols, which
5342 is checked above. */
5343
5344 static bool
5345 ix86_cannot_force_const_mem (x)
5346 rtx x;
5347 {
5348 return !legitimate_constant_p (x);
5349 }
5350
5351 /* Determine if a given RTX is a valid constant address. */
5352
5353 bool
5354 constant_address_p (x)
5355 rtx x;
5356 {
5357 switch (GET_CODE (x))
5358 {
5359 case LABEL_REF:
5360 case CONST_INT:
5361 return true;
5362
5363 case CONST_DOUBLE:
5364 return TARGET_64BIT;
5365
5366 case CONST:
5367 /* For Mach-O, really believe the CONST. */
5368 if (TARGET_MACHO)
5369 return true;
5370 /* Otherwise fall through. */
5371 case SYMBOL_REF:
5372 return !flag_pic && legitimate_constant_p (x);
5373
5374 default:
5375 return false;
5376 }
5377 }
5378
5379 /* Nonzero if the constant value X is a legitimate general operand
5380 when generating PIC code. It is given that flag_pic is on and
5381 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5382
5383 bool
5384 legitimate_pic_operand_p (x)
5385 rtx x;
5386 {
5387 rtx inner;
5388
5389 switch (GET_CODE (x))
5390 {
5391 case CONST:
5392 inner = XEXP (x, 0);
5393
5394 /* Only some unspecs are valid as "constants". */
5395 if (GET_CODE (inner) == UNSPEC)
5396 switch (XINT (inner, 1))
5397 {
5398 case UNSPEC_TPOFF:
5399 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5400 default:
5401 return false;
5402 }
5403 /* FALLTHRU */
5404
5405 case SYMBOL_REF:
5406 case LABEL_REF:
5407 return legitimate_pic_address_disp_p (x);
5408
5409 default:
5410 return true;
5411 }
5412 }
5413
5414 /* Determine if a given CONST RTX is a valid memory displacement
5415 in PIC mode. */
5416
5417 int
5418 legitimate_pic_address_disp_p (disp)
5419 register rtx disp;
5420 {
5421 bool saw_plus;
5422
5423 /* In 64bit mode we can allow direct addresses of symbols and labels
5424 when they are not dynamic symbols. */
5425 if (TARGET_64BIT)
5426 {
5427 /* TLS references should always be enclosed in UNSPEC. */
5428 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5429 return 0;
5430 if (GET_CODE (disp) == SYMBOL_REF
5431 && ix86_cmodel == CM_SMALL_PIC
5432 && (CONSTANT_POOL_ADDRESS_P (disp)
5433 || SYMBOL_REF_FLAG (disp)))
5434 return 1;
5435 if (GET_CODE (disp) == LABEL_REF)
5436 return 1;
5437 if (GET_CODE (disp) == CONST
5438 && GET_CODE (XEXP (disp, 0)) == PLUS
5439 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5440 && ix86_cmodel == CM_SMALL_PIC
5441 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5442 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5443 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5444 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5445 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5446 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5447 return 1;
5448 }
5449 if (GET_CODE (disp) != CONST)
5450 return 0;
5451 disp = XEXP (disp, 0);
5452
5453 if (TARGET_64BIT)
5454 {
5455 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5456 of GOT tables. We should not need these anyway. */
5457 if (GET_CODE (disp) != UNSPEC
5458 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5459 return 0;
5460
5461 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5462 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5463 return 0;
5464 return 1;
5465 }
5466
5467 saw_plus = false;
5468 if (GET_CODE (disp) == PLUS)
5469 {
5470 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5471 return 0;
5472 disp = XEXP (disp, 0);
5473 saw_plus = true;
5474 }
5475
5476 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5477 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5478 {
5479 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5480 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5481 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5482 {
5483 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5484 if (strstr (sym_name, "$pb") != 0)
5485 return 1;
5486 }
5487 }
5488
5489 if (GET_CODE (disp) != UNSPEC)
5490 return 0;
5491
5492 switch (XINT (disp, 1))
5493 {
5494 case UNSPEC_GOT:
5495 if (saw_plus)
5496 return false;
5497 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5498 case UNSPEC_GOTOFF:
5499 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5500 case UNSPEC_GOTTPOFF:
5501 case UNSPEC_GOTNTPOFF:
5502 case UNSPEC_INDNTPOFF:
5503 if (saw_plus)
5504 return false;
5505 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5506 case UNSPEC_NTPOFF:
5507 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5508 case UNSPEC_DTPOFF:
5509 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5510 }
5511
5512 return 0;
5513 }
5514
5515 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5516 memory address for an instruction. The MODE argument is the machine mode
5517 for the MEM expression that wants to use this address.
5518
5519 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5520 convert common non-canonical forms to canonical form so that they will
5521 be recognized. */
5522
5523 int
5524 legitimate_address_p (mode, addr, strict)
5525 enum machine_mode mode;
5526 register rtx addr;
5527 int strict;
5528 {
5529 struct ix86_address parts;
5530 rtx base, index, disp;
5531 HOST_WIDE_INT scale;
5532 const char *reason = NULL;
5533 rtx reason_rtx = NULL_RTX;
5534
5535 if (TARGET_DEBUG_ADDR)
5536 {
5537 fprintf (stderr,
5538 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5539 GET_MODE_NAME (mode), strict);
5540 debug_rtx (addr);
5541 }
5542
5543 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5544 {
5545 if (TARGET_DEBUG_ADDR)
5546 fprintf (stderr, "Success.\n");
5547 return TRUE;
5548 }
5549
5550 if (ix86_decompose_address (addr, &parts) <= 0)
5551 {
5552 reason = "decomposition failed";
5553 goto report_error;
5554 }
5555
5556 base = parts.base;
5557 index = parts.index;
5558 disp = parts.disp;
5559 scale = parts.scale;
5560
5561 /* Validate base register.
5562
5563 Don't allow SUBREG's here, it can lead to spill failures when the base
5564 is one word out of a two word structure, which is represented internally
5565 as a DImode int. */
5566
5567 if (base)
5568 {
5569 rtx reg;
5570 reason_rtx = base;
5571
5572 if (GET_CODE (base) == SUBREG)
5573 reg = SUBREG_REG (base);
5574 else
5575 reg = base;
5576
5577 if (GET_CODE (reg) != REG)
5578 {
5579 reason = "base is not a register";
5580 goto report_error;
5581 }
5582
5583 if (GET_MODE (base) != Pmode)
5584 {
5585 reason = "base is not in Pmode";
5586 goto report_error;
5587 }
5588
5589 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5590 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5591 {
5592 reason = "base is not valid";
5593 goto report_error;
5594 }
5595 }
5596
5597 /* Validate index register.
5598
5599 Don't allow SUBREG's here, it can lead to spill failures when the index
5600 is one word out of a two word structure, which is represented internally
5601 as a DImode int. */
5602
5603 if (index)
5604 {
5605 rtx reg;
5606 reason_rtx = index;
5607
5608 if (GET_CODE (index) == SUBREG)
5609 reg = SUBREG_REG (index);
5610 else
5611 reg = index;
5612
5613 if (GET_CODE (reg) != REG)
5614 {
5615 reason = "index is not a register";
5616 goto report_error;
5617 }
5618
5619 if (GET_MODE (index) != Pmode)
5620 {
5621 reason = "index is not in Pmode";
5622 goto report_error;
5623 }
5624
5625 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5626 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5627 {
5628 reason = "index is not valid";
5629 goto report_error;
5630 }
5631 }
5632
5633 /* Validate scale factor. */
5634 if (scale != 1)
5635 {
5636 reason_rtx = GEN_INT (scale);
5637 if (!index)
5638 {
5639 reason = "scale without index";
5640 goto report_error;
5641 }
5642
5643 if (scale != 2 && scale != 4 && scale != 8)
5644 {
5645 reason = "scale is not a valid multiplier";
5646 goto report_error;
5647 }
5648 }
5649
5650 /* Validate displacement. */
5651 if (disp)
5652 {
5653 reason_rtx = disp;
5654
5655 if (GET_CODE (disp) == CONST
5656 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5657 switch (XINT (XEXP (disp, 0), 1))
5658 {
5659 case UNSPEC_GOT:
5660 case UNSPEC_GOTOFF:
5661 case UNSPEC_GOTPCREL:
5662 if (!flag_pic)
5663 abort ();
5664 goto is_legitimate_pic;
5665
5666 case UNSPEC_GOTTPOFF:
5667 case UNSPEC_GOTNTPOFF:
5668 case UNSPEC_INDNTPOFF:
5669 case UNSPEC_NTPOFF:
5670 case UNSPEC_DTPOFF:
5671 break;
5672
5673 default:
5674 reason = "invalid address unspec";
5675 goto report_error;
5676 }
5677
5678 else if (flag_pic && (SYMBOLIC_CONST (disp)
5679 #if TARGET_MACHO
5680 && !machopic_operand_p (disp)
5681 #endif
5682 ))
5683 {
5684 is_legitimate_pic:
5685 if (TARGET_64BIT && (index || base))
5686 {
5687 /* foo@dtpoff(%rX) is ok. */
5688 if (GET_CODE (disp) != CONST
5689 || GET_CODE (XEXP (disp, 0)) != PLUS
5690 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5691 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5692 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5693 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5694 {
5695 reason = "non-constant pic memory reference";
5696 goto report_error;
5697 }
5698 }
5699 else if (! legitimate_pic_address_disp_p (disp))
5700 {
5701 reason = "displacement is an invalid pic construct";
5702 goto report_error;
5703 }
5704
5705 /* This code used to verify that a symbolic pic displacement
5706 includes the pic_offset_table_rtx register.
5707
5708 While this is good idea, unfortunately these constructs may
5709 be created by "adds using lea" optimization for incorrect
5710 code like:
5711
5712 int a;
5713 int foo(int i)
5714 {
5715 return *(&a+i);
5716 }
5717
5718 This code is nonsensical, but results in addressing
5719 GOT table with pic_offset_table_rtx base. We can't
5720 just refuse it easily, since it gets matched by
5721 "addsi3" pattern, that later gets split to lea in the
5722 case output register differs from input. While this
5723 can be handled by separate addsi pattern for this case
5724 that never results in lea, this seems to be easier and
5725 correct fix for crash to disable this test. */
5726 }
5727 else if (!CONSTANT_ADDRESS_P (disp))
5728 {
5729 reason = "displacement is not constant";
5730 goto report_error;
5731 }
5732 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5733 {
5734 reason = "displacement is out of range";
5735 goto report_error;
5736 }
5737 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5738 {
5739 reason = "displacement is a const_double";
5740 goto report_error;
5741 }
5742 }
5743
5744 /* Everything looks valid. */
5745 if (TARGET_DEBUG_ADDR)
5746 fprintf (stderr, "Success.\n");
5747 return TRUE;
5748
5749 report_error:
5750 if (TARGET_DEBUG_ADDR)
5751 {
5752 fprintf (stderr, "Error: %s\n", reason);
5753 debug_rtx (reason_rtx);
5754 }
5755 return FALSE;
5756 }
5757 \f
5758 /* Return an unique alias set for the GOT. */
5759
5760 static HOST_WIDE_INT
5761 ix86_GOT_alias_set ()
5762 {
5763 static HOST_WIDE_INT set = -1;
5764 if (set == -1)
5765 set = new_alias_set ();
5766 return set;
5767 }
5768
5769 /* Return a legitimate reference for ORIG (an address) using the
5770 register REG. If REG is 0, a new pseudo is generated.
5771
5772 There are two types of references that must be handled:
5773
5774 1. Global data references must load the address from the GOT, via
5775 the PIC reg. An insn is emitted to do this load, and the reg is
5776 returned.
5777
5778 2. Static data references, constant pool addresses, and code labels
5779 compute the address as an offset from the GOT, whose base is in
5780 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5781 differentiate them from global data objects. The returned
5782 address is the PIC reg + an unspec constant.
5783
5784 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5785 reg also appears in the address. */
5786
5787 rtx
5788 legitimize_pic_address (orig, reg)
5789 rtx orig;
5790 rtx reg;
5791 {
5792 rtx addr = orig;
5793 rtx new = orig;
5794 rtx base;
5795
5796 #if TARGET_MACHO
5797 if (reg == 0)
5798 reg = gen_reg_rtx (Pmode);
5799 /* Use the generic Mach-O PIC machinery. */
5800 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5801 #endif
5802
5803 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5804 new = addr;
5805 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5806 {
5807 /* This symbol may be referenced via a displacement from the PIC
5808 base address (@GOTOFF). */
5809
5810 if (reload_in_progress)
5811 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5812 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5813 new = gen_rtx_CONST (Pmode, new);
5814 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5815
5816 if (reg != 0)
5817 {
5818 emit_move_insn (reg, new);
5819 new = reg;
5820 }
5821 }
5822 else if (GET_CODE (addr) == SYMBOL_REF)
5823 {
5824 if (TARGET_64BIT)
5825 {
5826 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5827 new = gen_rtx_CONST (Pmode, new);
5828 new = gen_rtx_MEM (Pmode, new);
5829 RTX_UNCHANGING_P (new) = 1;
5830 set_mem_alias_set (new, ix86_GOT_alias_set ());
5831
5832 if (reg == 0)
5833 reg = gen_reg_rtx (Pmode);
5834 /* Use directly gen_movsi, otherwise the address is loaded
5835 into register for CSE. We don't want to CSE this addresses,
5836 instead we CSE addresses from the GOT table, so skip this. */
5837 emit_insn (gen_movsi (reg, new));
5838 new = reg;
5839 }
5840 else
5841 {
5842 /* This symbol must be referenced via a load from the
5843 Global Offset Table (@GOT). */
5844
5845 if (reload_in_progress)
5846 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5847 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5848 new = gen_rtx_CONST (Pmode, new);
5849 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5850 new = gen_rtx_MEM (Pmode, new);
5851 RTX_UNCHANGING_P (new) = 1;
5852 set_mem_alias_set (new, ix86_GOT_alias_set ());
5853
5854 if (reg == 0)
5855 reg = gen_reg_rtx (Pmode);
5856 emit_move_insn (reg, new);
5857 new = reg;
5858 }
5859 }
5860 else
5861 {
5862 if (GET_CODE (addr) == CONST)
5863 {
5864 addr = XEXP (addr, 0);
5865
5866 /* We must match stuff we generate before. Assume the only
5867 unspecs that can get here are ours. Not that we could do
5868 anything with them anyway... */
5869 if (GET_CODE (addr) == UNSPEC
5870 || (GET_CODE (addr) == PLUS
5871 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5872 return orig;
5873 if (GET_CODE (addr) != PLUS)
5874 abort ();
5875 }
5876 if (GET_CODE (addr) == PLUS)
5877 {
5878 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5879
5880 /* Check first to see if this is a constant offset from a @GOTOFF
5881 symbol reference. */
5882 if (local_symbolic_operand (op0, Pmode)
5883 && GET_CODE (op1) == CONST_INT)
5884 {
5885 if (!TARGET_64BIT)
5886 {
5887 if (reload_in_progress)
5888 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5889 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5890 UNSPEC_GOTOFF);
5891 new = gen_rtx_PLUS (Pmode, new, op1);
5892 new = gen_rtx_CONST (Pmode, new);
5893 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5894
5895 if (reg != 0)
5896 {
5897 emit_move_insn (reg, new);
5898 new = reg;
5899 }
5900 }
5901 else
5902 {
5903 if (INTVAL (op1) < -16*1024*1024
5904 || INTVAL (op1) >= 16*1024*1024)
5905 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5906 }
5907 }
5908 else
5909 {
5910 base = legitimize_pic_address (XEXP (addr, 0), reg);
5911 new = legitimize_pic_address (XEXP (addr, 1),
5912 base == reg ? NULL_RTX : reg);
5913
5914 if (GET_CODE (new) == CONST_INT)
5915 new = plus_constant (base, INTVAL (new));
5916 else
5917 {
5918 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5919 {
5920 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5921 new = XEXP (new, 1);
5922 }
5923 new = gen_rtx_PLUS (Pmode, base, new);
5924 }
5925 }
5926 }
5927 }
5928 return new;
5929 }
5930
5931 static void
5932 ix86_encode_section_info (decl, first)
5933 tree decl;
5934 int first ATTRIBUTE_UNUSED;
5935 {
5936 bool local_p = (*targetm.binds_local_p) (decl);
5937 rtx rtl, symbol;
5938
5939 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5940 if (GET_CODE (rtl) != MEM)
5941 return;
5942 symbol = XEXP (rtl, 0);
5943 if (GET_CODE (symbol) != SYMBOL_REF)
5944 return;
5945
5946 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5947 symbol so that we may access it directly in the GOT. */
5948
5949 if (flag_pic)
5950 SYMBOL_REF_FLAG (symbol) = local_p;
5951
5952 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5953 "local dynamic", "initial exec" or "local exec" TLS models
5954 respectively. */
5955
5956 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5957 {
5958 const char *symbol_str;
5959 char *newstr;
5960 size_t len;
5961 enum tls_model kind = decl_tls_model (decl);
5962
5963 if (TARGET_64BIT && ! flag_pic)
5964 {
5965 /* x86-64 doesn't allow non-pic code for shared libraries,
5966 so don't generate GD/LD TLS models for non-pic code. */
5967 switch (kind)
5968 {
5969 case TLS_MODEL_GLOBAL_DYNAMIC:
5970 kind = TLS_MODEL_INITIAL_EXEC; break;
5971 case TLS_MODEL_LOCAL_DYNAMIC:
5972 kind = TLS_MODEL_LOCAL_EXEC; break;
5973 default:
5974 break;
5975 }
5976 }
5977
5978 symbol_str = XSTR (symbol, 0);
5979
5980 if (symbol_str[0] == '%')
5981 {
5982 if (symbol_str[1] == tls_model_chars[kind])
5983 return;
5984 symbol_str += 2;
5985 }
5986 len = strlen (symbol_str) + 1;
5987 newstr = alloca (len + 2);
5988
5989 newstr[0] = '%';
5990 newstr[1] = tls_model_chars[kind];
5991 memcpy (newstr + 2, symbol_str, len);
5992
5993 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5994 }
5995 }
5996
5997 /* Undo the above when printing symbol names. */
5998
5999 static const char *
6000 ix86_strip_name_encoding (str)
6001 const char *str;
6002 {
6003 if (str[0] == '%')
6004 str += 2;
6005 if (str [0] == '*')
6006 str += 1;
6007 return str;
6008 }
6009 \f
6010 /* Load the thread pointer into a register. */
6011
6012 static rtx
6013 get_thread_pointer ()
6014 {
6015 rtx tp;
6016
6017 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6018 tp = gen_rtx_MEM (Pmode, tp);
6019 RTX_UNCHANGING_P (tp) = 1;
6020 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6021 tp = force_reg (Pmode, tp);
6022
6023 return tp;
6024 }
6025
6026 /* Try machine-dependent ways of modifying an illegitimate address
6027 to be legitimate. If we find one, return the new, valid address.
6028 This macro is used in only one place: `memory_address' in explow.c.
6029
6030 OLDX is the address as it was before break_out_memory_refs was called.
6031 In some cases it is useful to look at this to decide what needs to be done.
6032
6033 MODE and WIN are passed so that this macro can use
6034 GO_IF_LEGITIMATE_ADDRESS.
6035
6036 It is always safe for this macro to do nothing. It exists to recognize
6037 opportunities to optimize the output.
6038
6039 For the 80386, we handle X+REG by loading X into a register R and
6040 using R+REG. R will go in a general reg and indexing will be used.
6041 However, if REG is a broken-out memory address or multiplication,
6042 nothing needs to be done because REG can certainly go in a general reg.
6043
6044 When -fpic is used, special handling is needed for symbolic references.
6045 See comments by legitimize_pic_address in i386.c for details. */
6046
6047 rtx
6048 legitimize_address (x, oldx, mode)
6049 register rtx x;
6050 register rtx oldx ATTRIBUTE_UNUSED;
6051 enum machine_mode mode;
6052 {
6053 int changed = 0;
6054 unsigned log;
6055
6056 if (TARGET_DEBUG_ADDR)
6057 {
6058 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6059 GET_MODE_NAME (mode));
6060 debug_rtx (x);
6061 }
6062
6063 log = tls_symbolic_operand (x, mode);
6064 if (log)
6065 {
6066 rtx dest, base, off, pic;
6067 int type;
6068
6069 switch (log)
6070 {
6071 case TLS_MODEL_GLOBAL_DYNAMIC:
6072 dest = gen_reg_rtx (Pmode);
6073 if (TARGET_64BIT)
6074 {
6075 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6076
6077 start_sequence ();
6078 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6079 insns = get_insns ();
6080 end_sequence ();
6081
6082 emit_libcall_block (insns, dest, rax, x);
6083 }
6084 else
6085 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6086 break;
6087
6088 case TLS_MODEL_LOCAL_DYNAMIC:
6089 base = gen_reg_rtx (Pmode);
6090 if (TARGET_64BIT)
6091 {
6092 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6093
6094 start_sequence ();
6095 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6096 insns = get_insns ();
6097 end_sequence ();
6098
6099 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6100 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6101 emit_libcall_block (insns, base, rax, note);
6102 }
6103 else
6104 emit_insn (gen_tls_local_dynamic_base_32 (base));
6105
6106 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6107 off = gen_rtx_CONST (Pmode, off);
6108
6109 return gen_rtx_PLUS (Pmode, base, off);
6110
6111 case TLS_MODEL_INITIAL_EXEC:
6112 if (TARGET_64BIT)
6113 {
6114 pic = NULL;
6115 type = UNSPEC_GOTNTPOFF;
6116 }
6117 else if (flag_pic)
6118 {
6119 if (reload_in_progress)
6120 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6121 pic = pic_offset_table_rtx;
6122 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6123 }
6124 else if (!TARGET_GNU_TLS)
6125 {
6126 pic = gen_reg_rtx (Pmode);
6127 emit_insn (gen_set_got (pic));
6128 type = UNSPEC_GOTTPOFF;
6129 }
6130 else
6131 {
6132 pic = NULL;
6133 type = UNSPEC_INDNTPOFF;
6134 }
6135
6136 base = get_thread_pointer ();
6137
6138 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6139 off = gen_rtx_CONST (Pmode, off);
6140 if (pic)
6141 off = gen_rtx_PLUS (Pmode, pic, off);
6142 off = gen_rtx_MEM (Pmode, off);
6143 RTX_UNCHANGING_P (off) = 1;
6144 set_mem_alias_set (off, ix86_GOT_alias_set ());
6145 dest = gen_reg_rtx (Pmode);
6146
6147 if (TARGET_64BIT || TARGET_GNU_TLS)
6148 {
6149 emit_move_insn (dest, off);
6150 return gen_rtx_PLUS (Pmode, base, dest);
6151 }
6152 else
6153 emit_insn (gen_subsi3 (dest, base, off));
6154 break;
6155
6156 case TLS_MODEL_LOCAL_EXEC:
6157 base = get_thread_pointer ();
6158
6159 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6160 (TARGET_64BIT || TARGET_GNU_TLS)
6161 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6162 off = gen_rtx_CONST (Pmode, off);
6163
6164 if (TARGET_64BIT || TARGET_GNU_TLS)
6165 return gen_rtx_PLUS (Pmode, base, off);
6166 else
6167 {
6168 dest = gen_reg_rtx (Pmode);
6169 emit_insn (gen_subsi3 (dest, base, off));
6170 }
6171 break;
6172
6173 default:
6174 abort ();
6175 }
6176
6177 return dest;
6178 }
6179
6180 if (flag_pic && SYMBOLIC_CONST (x))
6181 return legitimize_pic_address (x, 0);
6182
6183 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6184 if (GET_CODE (x) == ASHIFT
6185 && GET_CODE (XEXP (x, 1)) == CONST_INT
6186 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6187 {
6188 changed = 1;
6189 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6190 GEN_INT (1 << log));
6191 }
6192
6193 if (GET_CODE (x) == PLUS)
6194 {
6195 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6196
6197 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6198 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6199 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6200 {
6201 changed = 1;
6202 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6203 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6204 GEN_INT (1 << log));
6205 }
6206
6207 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6208 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6209 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6210 {
6211 changed = 1;
6212 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6213 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6214 GEN_INT (1 << log));
6215 }
6216
6217 /* Put multiply first if it isn't already. */
6218 if (GET_CODE (XEXP (x, 1)) == MULT)
6219 {
6220 rtx tmp = XEXP (x, 0);
6221 XEXP (x, 0) = XEXP (x, 1);
6222 XEXP (x, 1) = tmp;
6223 changed = 1;
6224 }
6225
6226 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6227 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6228 created by virtual register instantiation, register elimination, and
6229 similar optimizations. */
6230 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6231 {
6232 changed = 1;
6233 x = gen_rtx_PLUS (Pmode,
6234 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6235 XEXP (XEXP (x, 1), 0)),
6236 XEXP (XEXP (x, 1), 1));
6237 }
6238
6239 /* Canonicalize
6240 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6241 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6242 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6243 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6244 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6245 && CONSTANT_P (XEXP (x, 1)))
6246 {
6247 rtx constant;
6248 rtx other = NULL_RTX;
6249
6250 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6251 {
6252 constant = XEXP (x, 1);
6253 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6254 }
6255 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6256 {
6257 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6258 other = XEXP (x, 1);
6259 }
6260 else
6261 constant = 0;
6262
6263 if (constant)
6264 {
6265 changed = 1;
6266 x = gen_rtx_PLUS (Pmode,
6267 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6268 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6269 plus_constant (other, INTVAL (constant)));
6270 }
6271 }
6272
6273 if (changed && legitimate_address_p (mode, x, FALSE))
6274 return x;
6275
6276 if (GET_CODE (XEXP (x, 0)) == MULT)
6277 {
6278 changed = 1;
6279 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6280 }
6281
6282 if (GET_CODE (XEXP (x, 1)) == MULT)
6283 {
6284 changed = 1;
6285 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6286 }
6287
6288 if (changed
6289 && GET_CODE (XEXP (x, 1)) == REG
6290 && GET_CODE (XEXP (x, 0)) == REG)
6291 return x;
6292
6293 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6294 {
6295 changed = 1;
6296 x = legitimize_pic_address (x, 0);
6297 }
6298
6299 if (changed && legitimate_address_p (mode, x, FALSE))
6300 return x;
6301
6302 if (GET_CODE (XEXP (x, 0)) == REG)
6303 {
6304 register rtx temp = gen_reg_rtx (Pmode);
6305 register rtx val = force_operand (XEXP (x, 1), temp);
6306 if (val != temp)
6307 emit_move_insn (temp, val);
6308
6309 XEXP (x, 1) = temp;
6310 return x;
6311 }
6312
6313 else if (GET_CODE (XEXP (x, 1)) == REG)
6314 {
6315 register rtx temp = gen_reg_rtx (Pmode);
6316 register rtx val = force_operand (XEXP (x, 0), temp);
6317 if (val != temp)
6318 emit_move_insn (temp, val);
6319
6320 XEXP (x, 0) = temp;
6321 return x;
6322 }
6323 }
6324
6325 return x;
6326 }
6327 \f
6328 /* Print an integer constant expression in assembler syntax. Addition
6329 and subtraction are the only arithmetic that may appear in these
6330 expressions. FILE is the stdio stream to write to, X is the rtx, and
6331 CODE is the operand print code from the output string. */
6332
6333 static void
6334 output_pic_addr_const (file, x, code)
6335 FILE *file;
6336 rtx x;
6337 int code;
6338 {
6339 char buf[256];
6340
6341 switch (GET_CODE (x))
6342 {
6343 case PC:
6344 if (flag_pic)
6345 putc ('.', file);
6346 else
6347 abort ();
6348 break;
6349
6350 case SYMBOL_REF:
6351 assemble_name (file, XSTR (x, 0));
6352 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6353 fputs ("@PLT", file);
6354 break;
6355
6356 case LABEL_REF:
6357 x = XEXP (x, 0);
6358 /* FALLTHRU */
6359 case CODE_LABEL:
6360 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6361 assemble_name (asm_out_file, buf);
6362 break;
6363
6364 case CONST_INT:
6365 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6366 break;
6367
6368 case CONST:
6369 /* This used to output parentheses around the expression,
6370 but that does not work on the 386 (either ATT or BSD assembler). */
6371 output_pic_addr_const (file, XEXP (x, 0), code);
6372 break;
6373
6374 case CONST_DOUBLE:
6375 if (GET_MODE (x) == VOIDmode)
6376 {
6377 /* We can use %d if the number is <32 bits and positive. */
6378 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6379 fprintf (file, "0x%lx%08lx",
6380 (unsigned long) CONST_DOUBLE_HIGH (x),
6381 (unsigned long) CONST_DOUBLE_LOW (x));
6382 else
6383 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6384 }
6385 else
6386 /* We can't handle floating point constants;
6387 PRINT_OPERAND must handle them. */
6388 output_operand_lossage ("floating constant misused");
6389 break;
6390
6391 case PLUS:
6392 /* Some assemblers need integer constants to appear first. */
6393 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6394 {
6395 output_pic_addr_const (file, XEXP (x, 0), code);
6396 putc ('+', file);
6397 output_pic_addr_const (file, XEXP (x, 1), code);
6398 }
6399 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6400 {
6401 output_pic_addr_const (file, XEXP (x, 1), code);
6402 putc ('+', file);
6403 output_pic_addr_const (file, XEXP (x, 0), code);
6404 }
6405 else
6406 abort ();
6407 break;
6408
6409 case MINUS:
6410 if (!TARGET_MACHO)
6411 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6412 output_pic_addr_const (file, XEXP (x, 0), code);
6413 putc ('-', file);
6414 output_pic_addr_const (file, XEXP (x, 1), code);
6415 if (!TARGET_MACHO)
6416 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6417 break;
6418
6419 case UNSPEC:
6420 if (XVECLEN (x, 0) != 1)
6421 abort ();
6422 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6423 switch (XINT (x, 1))
6424 {
6425 case UNSPEC_GOT:
6426 fputs ("@GOT", file);
6427 break;
6428 case UNSPEC_GOTOFF:
6429 fputs ("@GOTOFF", file);
6430 break;
6431 case UNSPEC_GOTPCREL:
6432 fputs ("@GOTPCREL(%rip)", file);
6433 break;
6434 case UNSPEC_GOTTPOFF:
6435 /* FIXME: This might be @TPOFF in Sun ld too. */
6436 fputs ("@GOTTPOFF", file);
6437 break;
6438 case UNSPEC_TPOFF:
6439 fputs ("@TPOFF", file);
6440 break;
6441 case UNSPEC_NTPOFF:
6442 if (TARGET_64BIT)
6443 fputs ("@TPOFF", file);
6444 else
6445 fputs ("@NTPOFF", file);
6446 break;
6447 case UNSPEC_DTPOFF:
6448 fputs ("@DTPOFF", file);
6449 break;
6450 case UNSPEC_GOTNTPOFF:
6451 if (TARGET_64BIT)
6452 fputs ("@GOTTPOFF(%rip)", file);
6453 else
6454 fputs ("@GOTNTPOFF", file);
6455 break;
6456 case UNSPEC_INDNTPOFF:
6457 fputs ("@INDNTPOFF", file);
6458 break;
6459 default:
6460 output_operand_lossage ("invalid UNSPEC as operand");
6461 break;
6462 }
6463 break;
6464
6465 default:
6466 output_operand_lossage ("invalid expression as operand");
6467 }
6468 }
6469
6470 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6471 We need to handle our special PIC relocations. */
6472
6473 void
6474 i386_dwarf_output_addr_const (file, x)
6475 FILE *file;
6476 rtx x;
6477 {
6478 #ifdef ASM_QUAD
6479 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6480 #else
6481 if (TARGET_64BIT)
6482 abort ();
6483 fprintf (file, "%s", ASM_LONG);
6484 #endif
6485 if (flag_pic)
6486 output_pic_addr_const (file, x, '\0');
6487 else
6488 output_addr_const (file, x);
6489 fputc ('\n', file);
6490 }
6491
6492 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6493 We need to emit DTP-relative relocations. */
6494
6495 void
6496 i386_output_dwarf_dtprel (file, size, x)
6497 FILE *file;
6498 int size;
6499 rtx x;
6500 {
6501 fputs (ASM_LONG, file);
6502 output_addr_const (file, x);
6503 fputs ("@DTPOFF", file);
6504 switch (size)
6505 {
6506 case 4:
6507 break;
6508 case 8:
6509 fputs (", 0", file);
6510 break;
6511 default:
6512 abort ();
6513 }
6514 }
6515
6516 /* In the name of slightly smaller debug output, and to cater to
6517 general assembler losage, recognize PIC+GOTOFF and turn it back
6518 into a direct symbol reference. */
6519
6520 rtx
6521 i386_simplify_dwarf_addr (orig_x)
6522 rtx orig_x;
6523 {
6524 rtx x = orig_x, y;
6525
6526 if (GET_CODE (x) == MEM)
6527 x = XEXP (x, 0);
6528
6529 if (TARGET_64BIT)
6530 {
6531 if (GET_CODE (x) != CONST
6532 || GET_CODE (XEXP (x, 0)) != UNSPEC
6533 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6534 || GET_CODE (orig_x) != MEM)
6535 return orig_x;
6536 return XVECEXP (XEXP (x, 0), 0, 0);
6537 }
6538
6539 if (GET_CODE (x) != PLUS
6540 || GET_CODE (XEXP (x, 1)) != CONST)
6541 return orig_x;
6542
6543 if (GET_CODE (XEXP (x, 0)) == REG
6544 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6545 /* %ebx + GOT/GOTOFF */
6546 y = NULL;
6547 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6548 {
6549 /* %ebx + %reg * scale + GOT/GOTOFF */
6550 y = XEXP (x, 0);
6551 if (GET_CODE (XEXP (y, 0)) == REG
6552 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6553 y = XEXP (y, 1);
6554 else if (GET_CODE (XEXP (y, 1)) == REG
6555 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6556 y = XEXP (y, 0);
6557 else
6558 return orig_x;
6559 if (GET_CODE (y) != REG
6560 && GET_CODE (y) != MULT
6561 && GET_CODE (y) != ASHIFT)
6562 return orig_x;
6563 }
6564 else
6565 return orig_x;
6566
6567 x = XEXP (XEXP (x, 1), 0);
6568 if (GET_CODE (x) == UNSPEC
6569 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6570 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6571 {
6572 if (y)
6573 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6574 return XVECEXP (x, 0, 0);
6575 }
6576
6577 if (GET_CODE (x) == PLUS
6578 && GET_CODE (XEXP (x, 0)) == UNSPEC
6579 && GET_CODE (XEXP (x, 1)) == CONST_INT
6580 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6581 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6582 && GET_CODE (orig_x) != MEM)))
6583 {
6584 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6585 if (y)
6586 return gen_rtx_PLUS (Pmode, y, x);
6587 return x;
6588 }
6589
6590 return orig_x;
6591 }
6592 \f
6593 static void
6594 put_condition_code (code, mode, reverse, fp, file)
6595 enum rtx_code code;
6596 enum machine_mode mode;
6597 int reverse, fp;
6598 FILE *file;
6599 {
6600 const char *suffix;
6601
6602 if (mode == CCFPmode || mode == CCFPUmode)
6603 {
6604 enum rtx_code second_code, bypass_code;
6605 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6606 if (bypass_code != NIL || second_code != NIL)
6607 abort ();
6608 code = ix86_fp_compare_code_to_integer (code);
6609 mode = CCmode;
6610 }
6611 if (reverse)
6612 code = reverse_condition (code);
6613
6614 switch (code)
6615 {
6616 case EQ:
6617 suffix = "e";
6618 break;
6619 case NE:
6620 suffix = "ne";
6621 break;
6622 case GT:
6623 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6624 abort ();
6625 suffix = "g";
6626 break;
6627 case GTU:
6628 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6629 Those same assemblers have the same but opposite losage on cmov. */
6630 if (mode != CCmode)
6631 abort ();
6632 suffix = fp ? "nbe" : "a";
6633 break;
6634 case LT:
6635 if (mode == CCNOmode || mode == CCGOCmode)
6636 suffix = "s";
6637 else if (mode == CCmode || mode == CCGCmode)
6638 suffix = "l";
6639 else
6640 abort ();
6641 break;
6642 case LTU:
6643 if (mode != CCmode)
6644 abort ();
6645 suffix = "b";
6646 break;
6647 case GE:
6648 if (mode == CCNOmode || mode == CCGOCmode)
6649 suffix = "ns";
6650 else if (mode == CCmode || mode == CCGCmode)
6651 suffix = "ge";
6652 else
6653 abort ();
6654 break;
6655 case GEU:
6656 /* ??? As above. */
6657 if (mode != CCmode)
6658 abort ();
6659 suffix = fp ? "nb" : "ae";
6660 break;
6661 case LE:
6662 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6663 abort ();
6664 suffix = "le";
6665 break;
6666 case LEU:
6667 if (mode != CCmode)
6668 abort ();
6669 suffix = "be";
6670 break;
6671 case UNORDERED:
6672 suffix = fp ? "u" : "p";
6673 break;
6674 case ORDERED:
6675 suffix = fp ? "nu" : "np";
6676 break;
6677 default:
6678 abort ();
6679 }
6680 fputs (suffix, file);
6681 }
6682
6683 void
6684 print_reg (x, code, file)
6685 rtx x;
6686 int code;
6687 FILE *file;
6688 {
6689 if (REGNO (x) == ARG_POINTER_REGNUM
6690 || REGNO (x) == FRAME_POINTER_REGNUM
6691 || REGNO (x) == FLAGS_REG
6692 || REGNO (x) == FPSR_REG)
6693 abort ();
6694
6695 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6696 putc ('%', file);
6697
6698 if (code == 'w' || MMX_REG_P (x))
6699 code = 2;
6700 else if (code == 'b')
6701 code = 1;
6702 else if (code == 'k')
6703 code = 4;
6704 else if (code == 'q')
6705 code = 8;
6706 else if (code == 'y')
6707 code = 3;
6708 else if (code == 'h')
6709 code = 0;
6710 else
6711 code = GET_MODE_SIZE (GET_MODE (x));
6712
6713 /* Irritatingly, AMD extended registers use different naming convention
6714 from the normal registers. */
6715 if (REX_INT_REG_P (x))
6716 {
6717 if (!TARGET_64BIT)
6718 abort ();
6719 switch (code)
6720 {
6721 case 0:
6722 error ("extended registers have no high halves");
6723 break;
6724 case 1:
6725 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6726 break;
6727 case 2:
6728 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6729 break;
6730 case 4:
6731 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6732 break;
6733 case 8:
6734 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6735 break;
6736 default:
6737 error ("unsupported operand size for extended register");
6738 break;
6739 }
6740 return;
6741 }
6742 switch (code)
6743 {
6744 case 3:
6745 if (STACK_TOP_P (x))
6746 {
6747 fputs ("st(0)", file);
6748 break;
6749 }
6750 /* FALLTHRU */
6751 case 8:
6752 case 4:
6753 case 12:
6754 if (! ANY_FP_REG_P (x))
6755 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6756 /* FALLTHRU */
6757 case 16:
6758 case 2:
6759 fputs (hi_reg_name[REGNO (x)], file);
6760 break;
6761 case 1:
6762 fputs (qi_reg_name[REGNO (x)], file);
6763 break;
6764 case 0:
6765 fputs (qi_high_reg_name[REGNO (x)], file);
6766 break;
6767 default:
6768 abort ();
6769 }
6770 }
6771
6772 /* Locate some local-dynamic symbol still in use by this function
6773 so that we can print its name in some tls_local_dynamic_base
6774 pattern. */
6775
6776 static const char *
6777 get_some_local_dynamic_name ()
6778 {
6779 rtx insn;
6780
6781 if (cfun->machine->some_ld_name)
6782 return cfun->machine->some_ld_name;
6783
6784 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6785 if (INSN_P (insn)
6786 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6787 return cfun->machine->some_ld_name;
6788
6789 abort ();
6790 }
6791
6792 static int
6793 get_some_local_dynamic_name_1 (px, data)
6794 rtx *px;
6795 void *data ATTRIBUTE_UNUSED;
6796 {
6797 rtx x = *px;
6798
6799 if (GET_CODE (x) == SYMBOL_REF
6800 && local_dynamic_symbolic_operand (x, Pmode))
6801 {
6802 cfun->machine->some_ld_name = XSTR (x, 0);
6803 return 1;
6804 }
6805
6806 return 0;
6807 }
6808
6809 /* Meaning of CODE:
6810 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6811 C -- print opcode suffix for set/cmov insn.
6812 c -- like C, but print reversed condition
6813 F,f -- likewise, but for floating-point.
6814 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6815 nothing
6816 R -- print the prefix for register names.
6817 z -- print the opcode suffix for the size of the current operand.
6818 * -- print a star (in certain assembler syntax)
6819 A -- print an absolute memory reference.
6820 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6821 s -- print a shift double count, followed by the assemblers argument
6822 delimiter.
6823 b -- print the QImode name of the register for the indicated operand.
6824 %b0 would print %al if operands[0] is reg 0.
6825 w -- likewise, print the HImode name of the register.
6826 k -- likewise, print the SImode name of the register.
6827 q -- likewise, print the DImode name of the register.
6828 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6829 y -- print "st(0)" instead of "st" as a register.
6830 D -- print condition for SSE cmp instruction.
6831 P -- if PIC, print an @PLT suffix.
6832 X -- don't print any sort of PIC '@' suffix for a symbol.
6833 & -- print some in-use local-dynamic symbol name.
6834 */
6835
6836 void
6837 print_operand (file, x, code)
6838 FILE *file;
6839 rtx x;
6840 int code;
6841 {
6842 if (code)
6843 {
6844 switch (code)
6845 {
6846 case '*':
6847 if (ASSEMBLER_DIALECT == ASM_ATT)
6848 putc ('*', file);
6849 return;
6850
6851 case '&':
6852 assemble_name (file, get_some_local_dynamic_name ());
6853 return;
6854
6855 case 'A':
6856 if (ASSEMBLER_DIALECT == ASM_ATT)
6857 putc ('*', file);
6858 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6859 {
6860 /* Intel syntax. For absolute addresses, registers should not
6861 be surrounded by braces. */
6862 if (GET_CODE (x) != REG)
6863 {
6864 putc ('[', file);
6865 PRINT_OPERAND (file, x, 0);
6866 putc (']', file);
6867 return;
6868 }
6869 }
6870 else
6871 abort ();
6872
6873 PRINT_OPERAND (file, x, 0);
6874 return;
6875
6876
6877 case 'L':
6878 if (ASSEMBLER_DIALECT == ASM_ATT)
6879 putc ('l', file);
6880 return;
6881
6882 case 'W':
6883 if (ASSEMBLER_DIALECT == ASM_ATT)
6884 putc ('w', file);
6885 return;
6886
6887 case 'B':
6888 if (ASSEMBLER_DIALECT == ASM_ATT)
6889 putc ('b', file);
6890 return;
6891
6892 case 'Q':
6893 if (ASSEMBLER_DIALECT == ASM_ATT)
6894 putc ('l', file);
6895 return;
6896
6897 case 'S':
6898 if (ASSEMBLER_DIALECT == ASM_ATT)
6899 putc ('s', file);
6900 return;
6901
6902 case 'T':
6903 if (ASSEMBLER_DIALECT == ASM_ATT)
6904 putc ('t', file);
6905 return;
6906
6907 case 'z':
6908 /* 387 opcodes don't get size suffixes if the operands are
6909 registers. */
6910 if (STACK_REG_P (x))
6911 return;
6912
6913 /* Likewise if using Intel opcodes. */
6914 if (ASSEMBLER_DIALECT == ASM_INTEL)
6915 return;
6916
6917 /* This is the size of op from size of operand. */
6918 switch (GET_MODE_SIZE (GET_MODE (x)))
6919 {
6920 case 2:
6921 #ifdef HAVE_GAS_FILDS_FISTS
6922 putc ('s', file);
6923 #endif
6924 return;
6925
6926 case 4:
6927 if (GET_MODE (x) == SFmode)
6928 {
6929 putc ('s', file);
6930 return;
6931 }
6932 else
6933 putc ('l', file);
6934 return;
6935
6936 case 12:
6937 case 16:
6938 putc ('t', file);
6939 return;
6940
6941 case 8:
6942 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6943 {
6944 #ifdef GAS_MNEMONICS
6945 putc ('q', file);
6946 #else
6947 putc ('l', file);
6948 putc ('l', file);
6949 #endif
6950 }
6951 else
6952 putc ('l', file);
6953 return;
6954
6955 default:
6956 abort ();
6957 }
6958
6959 case 'b':
6960 case 'w':
6961 case 'k':
6962 case 'q':
6963 case 'h':
6964 case 'y':
6965 case 'X':
6966 case 'P':
6967 break;
6968
6969 case 's':
6970 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6971 {
6972 PRINT_OPERAND (file, x, 0);
6973 putc (',', file);
6974 }
6975 return;
6976
6977 case 'D':
6978 /* Little bit of braindamage here. The SSE compare instructions
6979 does use completely different names for the comparisons that the
6980 fp conditional moves. */
6981 switch (GET_CODE (x))
6982 {
6983 case EQ:
6984 case UNEQ:
6985 fputs ("eq", file);
6986 break;
6987 case LT:
6988 case UNLT:
6989 fputs ("lt", file);
6990 break;
6991 case LE:
6992 case UNLE:
6993 fputs ("le", file);
6994 break;
6995 case UNORDERED:
6996 fputs ("unord", file);
6997 break;
6998 case NE:
6999 case LTGT:
7000 fputs ("neq", file);
7001 break;
7002 case UNGE:
7003 case GE:
7004 fputs ("nlt", file);
7005 break;
7006 case UNGT:
7007 case GT:
7008 fputs ("nle", file);
7009 break;
7010 case ORDERED:
7011 fputs ("ord", file);
7012 break;
7013 default:
7014 abort ();
7015 break;
7016 }
7017 return;
7018 case 'O':
7019 #ifdef CMOV_SUN_AS_SYNTAX
7020 if (ASSEMBLER_DIALECT == ASM_ATT)
7021 {
7022 switch (GET_MODE (x))
7023 {
7024 case HImode: putc ('w', file); break;
7025 case SImode:
7026 case SFmode: putc ('l', file); break;
7027 case DImode:
7028 case DFmode: putc ('q', file); break;
7029 default: abort ();
7030 }
7031 putc ('.', file);
7032 }
7033 #endif
7034 return;
7035 case 'C':
7036 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7037 return;
7038 case 'F':
7039 #ifdef CMOV_SUN_AS_SYNTAX
7040 if (ASSEMBLER_DIALECT == ASM_ATT)
7041 putc ('.', file);
7042 #endif
7043 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7044 return;
7045
7046 /* Like above, but reverse condition */
7047 case 'c':
7048 /* Check to see if argument to %c is really a constant
7049 and not a condition code which needs to be reversed. */
7050 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7051 {
7052 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7053 return;
7054 }
7055 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7056 return;
7057 case 'f':
7058 #ifdef CMOV_SUN_AS_SYNTAX
7059 if (ASSEMBLER_DIALECT == ASM_ATT)
7060 putc ('.', file);
7061 #endif
7062 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7063 return;
7064 case '+':
7065 {
7066 rtx x;
7067
7068 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7069 return;
7070
7071 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7072 if (x)
7073 {
7074 int pred_val = INTVAL (XEXP (x, 0));
7075
7076 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7077 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7078 {
7079 int taken = pred_val > REG_BR_PROB_BASE / 2;
7080 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7081
7082 /* Emit hints only in the case default branch prediction
7083 heruistics would fail. */
7084 if (taken != cputaken)
7085 {
7086 /* We use 3e (DS) prefix for taken branches and
7087 2e (CS) prefix for not taken branches. */
7088 if (taken)
7089 fputs ("ds ; ", file);
7090 else
7091 fputs ("cs ; ", file);
7092 }
7093 }
7094 }
7095 return;
7096 }
7097 default:
7098 output_operand_lossage ("invalid operand code `%c'", code);
7099 }
7100 }
7101
7102 if (GET_CODE (x) == REG)
7103 {
7104 PRINT_REG (x, code, file);
7105 }
7106
7107 else if (GET_CODE (x) == MEM)
7108 {
7109 /* No `byte ptr' prefix for call instructions. */
7110 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7111 {
7112 const char * size;
7113 switch (GET_MODE_SIZE (GET_MODE (x)))
7114 {
7115 case 1: size = "BYTE"; break;
7116 case 2: size = "WORD"; break;
7117 case 4: size = "DWORD"; break;
7118 case 8: size = "QWORD"; break;
7119 case 12: size = "XWORD"; break;
7120 case 16: size = "XMMWORD"; break;
7121 default:
7122 abort ();
7123 }
7124
7125 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7126 if (code == 'b')
7127 size = "BYTE";
7128 else if (code == 'w')
7129 size = "WORD";
7130 else if (code == 'k')
7131 size = "DWORD";
7132
7133 fputs (size, file);
7134 fputs (" PTR ", file);
7135 }
7136
7137 x = XEXP (x, 0);
7138 if (flag_pic && CONSTANT_ADDRESS_P (x))
7139 output_pic_addr_const (file, x, code);
7140 /* Avoid (%rip) for call operands. */
7141 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7142 && GET_CODE (x) != CONST_INT)
7143 output_addr_const (file, x);
7144 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7145 output_operand_lossage ("invalid constraints for operand");
7146 else
7147 output_address (x);
7148 }
7149
7150 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7151 {
7152 REAL_VALUE_TYPE r;
7153 long l;
7154
7155 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7156 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7157
7158 if (ASSEMBLER_DIALECT == ASM_ATT)
7159 putc ('$', file);
7160 fprintf (file, "0x%lx", l);
7161 }
7162
7163 /* These float cases don't actually occur as immediate operands. */
7164 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7165 {
7166 char dstr[30];
7167
7168 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7169 fprintf (file, "%s", dstr);
7170 }
7171
7172 else if (GET_CODE (x) == CONST_DOUBLE
7173 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7174 {
7175 char dstr[30];
7176
7177 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7178 fprintf (file, "%s", dstr);
7179 }
7180
7181 else
7182 {
7183 if (code != 'P')
7184 {
7185 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7186 {
7187 if (ASSEMBLER_DIALECT == ASM_ATT)
7188 putc ('$', file);
7189 }
7190 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7191 || GET_CODE (x) == LABEL_REF)
7192 {
7193 if (ASSEMBLER_DIALECT == ASM_ATT)
7194 putc ('$', file);
7195 else
7196 fputs ("OFFSET FLAT:", file);
7197 }
7198 }
7199 if (GET_CODE (x) == CONST_INT)
7200 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7201 else if (flag_pic)
7202 output_pic_addr_const (file, x, code);
7203 else
7204 output_addr_const (file, x);
7205 }
7206 }
7207 \f
7208 /* Print a memory operand whose address is ADDR. */
7209
7210 void
7211 print_operand_address (file, addr)
7212 FILE *file;
7213 register rtx addr;
7214 {
7215 struct ix86_address parts;
7216 rtx base, index, disp;
7217 int scale;
7218
7219 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7220 {
7221 if (ASSEMBLER_DIALECT == ASM_INTEL)
7222 fputs ("DWORD PTR ", file);
7223 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7224 putc ('%', file);
7225 if (TARGET_64BIT)
7226 fputs ("fs:0", file);
7227 else
7228 fputs ("gs:0", file);
7229 return;
7230 }
7231
7232 if (! ix86_decompose_address (addr, &parts))
7233 abort ();
7234
7235 base = parts.base;
7236 index = parts.index;
7237 disp = parts.disp;
7238 scale = parts.scale;
7239
7240 if (!base && !index)
7241 {
7242 /* Displacement only requires special attention. */
7243
7244 if (GET_CODE (disp) == CONST_INT)
7245 {
7246 if (ASSEMBLER_DIALECT == ASM_INTEL)
7247 {
7248 if (USER_LABEL_PREFIX[0] == 0)
7249 putc ('%', file);
7250 fputs ("ds:", file);
7251 }
7252 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7253 }
7254 else if (flag_pic)
7255 output_pic_addr_const (file, addr, 0);
7256 else
7257 output_addr_const (file, addr);
7258
7259 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7260 if (TARGET_64BIT
7261 && ((GET_CODE (addr) == SYMBOL_REF
7262 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7263 || GET_CODE (addr) == LABEL_REF
7264 || (GET_CODE (addr) == CONST
7265 && GET_CODE (XEXP (addr, 0)) == PLUS
7266 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7267 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7268 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7269 fputs ("(%rip)", file);
7270 }
7271 else
7272 {
7273 if (ASSEMBLER_DIALECT == ASM_ATT)
7274 {
7275 if (disp)
7276 {
7277 if (flag_pic)
7278 output_pic_addr_const (file, disp, 0);
7279 else if (GET_CODE (disp) == LABEL_REF)
7280 output_asm_label (disp);
7281 else
7282 output_addr_const (file, disp);
7283 }
7284
7285 putc ('(', file);
7286 if (base)
7287 PRINT_REG (base, 0, file);
7288 if (index)
7289 {
7290 putc (',', file);
7291 PRINT_REG (index, 0, file);
7292 if (scale != 1)
7293 fprintf (file, ",%d", scale);
7294 }
7295 putc (')', file);
7296 }
7297 else
7298 {
7299 rtx offset = NULL_RTX;
7300
7301 if (disp)
7302 {
7303 /* Pull out the offset of a symbol; print any symbol itself. */
7304 if (GET_CODE (disp) == CONST
7305 && GET_CODE (XEXP (disp, 0)) == PLUS
7306 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7307 {
7308 offset = XEXP (XEXP (disp, 0), 1);
7309 disp = gen_rtx_CONST (VOIDmode,
7310 XEXP (XEXP (disp, 0), 0));
7311 }
7312
7313 if (flag_pic)
7314 output_pic_addr_const (file, disp, 0);
7315 else if (GET_CODE (disp) == LABEL_REF)
7316 output_asm_label (disp);
7317 else if (GET_CODE (disp) == CONST_INT)
7318 offset = disp;
7319 else
7320 output_addr_const (file, disp);
7321 }
7322
7323 putc ('[', file);
7324 if (base)
7325 {
7326 PRINT_REG (base, 0, file);
7327 if (offset)
7328 {
7329 if (INTVAL (offset) >= 0)
7330 putc ('+', file);
7331 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7332 }
7333 }
7334 else if (offset)
7335 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7336 else
7337 putc ('0', file);
7338
7339 if (index)
7340 {
7341 putc ('+', file);
7342 PRINT_REG (index, 0, file);
7343 if (scale != 1)
7344 fprintf (file, "*%d", scale);
7345 }
7346 putc (']', file);
7347 }
7348 }
7349 }
7350
7351 bool
7352 output_addr_const_extra (file, x)
7353 FILE *file;
7354 rtx x;
7355 {
7356 rtx op;
7357
7358 if (GET_CODE (x) != UNSPEC)
7359 return false;
7360
7361 op = XVECEXP (x, 0, 0);
7362 switch (XINT (x, 1))
7363 {
7364 case UNSPEC_GOTTPOFF:
7365 output_addr_const (file, op);
7366 /* FIXME: This might be @TPOFF in Sun ld. */
7367 fputs ("@GOTTPOFF", file);
7368 break;
7369 case UNSPEC_TPOFF:
7370 output_addr_const (file, op);
7371 fputs ("@TPOFF", file);
7372 break;
7373 case UNSPEC_NTPOFF:
7374 output_addr_const (file, op);
7375 if (TARGET_64BIT)
7376 fputs ("@TPOFF", file);
7377 else
7378 fputs ("@NTPOFF", file);
7379 break;
7380 case UNSPEC_DTPOFF:
7381 output_addr_const (file, op);
7382 fputs ("@DTPOFF", file);
7383 break;
7384 case UNSPEC_GOTNTPOFF:
7385 output_addr_const (file, op);
7386 if (TARGET_64BIT)
7387 fputs ("@GOTTPOFF(%rip)", file);
7388 else
7389 fputs ("@GOTNTPOFF", file);
7390 break;
7391 case UNSPEC_INDNTPOFF:
7392 output_addr_const (file, op);
7393 fputs ("@INDNTPOFF", file);
7394 break;
7395
7396 default:
7397 return false;
7398 }
7399
7400 return true;
7401 }
7402 \f
7403 /* Split one or more DImode RTL references into pairs of SImode
7404 references. The RTL can be REG, offsettable MEM, integer constant, or
7405 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7406 split and "num" is its length. lo_half and hi_half are output arrays
7407 that parallel "operands". */
7408
7409 void
7410 split_di (operands, num, lo_half, hi_half)
7411 rtx operands[];
7412 int num;
7413 rtx lo_half[], hi_half[];
7414 {
7415 while (num--)
7416 {
7417 rtx op = operands[num];
7418
7419 /* simplify_subreg refuse to split volatile memory addresses,
7420 but we still have to handle it. */
7421 if (GET_CODE (op) == MEM)
7422 {
7423 lo_half[num] = adjust_address (op, SImode, 0);
7424 hi_half[num] = adjust_address (op, SImode, 4);
7425 }
7426 else
7427 {
7428 lo_half[num] = simplify_gen_subreg (SImode, op,
7429 GET_MODE (op) == VOIDmode
7430 ? DImode : GET_MODE (op), 0);
7431 hi_half[num] = simplify_gen_subreg (SImode, op,
7432 GET_MODE (op) == VOIDmode
7433 ? DImode : GET_MODE (op), 4);
7434 }
7435 }
7436 }
7437 /* Split one or more TImode RTL references into pairs of SImode
7438 references. The RTL can be REG, offsettable MEM, integer constant, or
7439 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7440 split and "num" is its length. lo_half and hi_half are output arrays
7441 that parallel "operands". */
7442
7443 void
7444 split_ti (operands, num, lo_half, hi_half)
7445 rtx operands[];
7446 int num;
7447 rtx lo_half[], hi_half[];
7448 {
7449 while (num--)
7450 {
7451 rtx op = operands[num];
7452
7453 /* simplify_subreg refuse to split volatile memory addresses, but we
7454 still have to handle it. */
7455 if (GET_CODE (op) == MEM)
7456 {
7457 lo_half[num] = adjust_address (op, DImode, 0);
7458 hi_half[num] = adjust_address (op, DImode, 8);
7459 }
7460 else
7461 {
7462 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7463 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7464 }
7465 }
7466 }
7467 \f
7468 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7469 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7470 is the expression of the binary operation. The output may either be
7471 emitted here, or returned to the caller, like all output_* functions.
7472
7473 There is no guarantee that the operands are the same mode, as they
7474 might be within FLOAT or FLOAT_EXTEND expressions. */
7475
7476 #ifndef SYSV386_COMPAT
7477 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7478 wants to fix the assemblers because that causes incompatibility
7479 with gcc. No-one wants to fix gcc because that causes
7480 incompatibility with assemblers... You can use the option of
7481 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7482 #define SYSV386_COMPAT 1
7483 #endif
7484
7485 const char *
7486 output_387_binary_op (insn, operands)
7487 rtx insn;
7488 rtx *operands;
7489 {
7490 static char buf[30];
7491 const char *p;
7492 const char *ssep;
7493 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7494
7495 #ifdef ENABLE_CHECKING
7496 /* Even if we do not want to check the inputs, this documents input
7497 constraints. Which helps in understanding the following code. */
7498 if (STACK_REG_P (operands[0])
7499 && ((REG_P (operands[1])
7500 && REGNO (operands[0]) == REGNO (operands[1])
7501 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7502 || (REG_P (operands[2])
7503 && REGNO (operands[0]) == REGNO (operands[2])
7504 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7505 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7506 ; /* ok */
7507 else if (!is_sse)
7508 abort ();
7509 #endif
7510
7511 switch (GET_CODE (operands[3]))
7512 {
7513 case PLUS:
7514 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7515 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7516 p = "fiadd";
7517 else
7518 p = "fadd";
7519 ssep = "add";
7520 break;
7521
7522 case MINUS:
7523 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7524 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7525 p = "fisub";
7526 else
7527 p = "fsub";
7528 ssep = "sub";
7529 break;
7530
7531 case MULT:
7532 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7533 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7534 p = "fimul";
7535 else
7536 p = "fmul";
7537 ssep = "mul";
7538 break;
7539
7540 case DIV:
7541 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7542 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7543 p = "fidiv";
7544 else
7545 p = "fdiv";
7546 ssep = "div";
7547 break;
7548
7549 default:
7550 abort ();
7551 }
7552
7553 if (is_sse)
7554 {
7555 strcpy (buf, ssep);
7556 if (GET_MODE (operands[0]) == SFmode)
7557 strcat (buf, "ss\t{%2, %0|%0, %2}");
7558 else
7559 strcat (buf, "sd\t{%2, %0|%0, %2}");
7560 return buf;
7561 }
7562 strcpy (buf, p);
7563
7564 switch (GET_CODE (operands[3]))
7565 {
7566 case MULT:
7567 case PLUS:
7568 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7569 {
7570 rtx temp = operands[2];
7571 operands[2] = operands[1];
7572 operands[1] = temp;
7573 }
7574
7575 /* know operands[0] == operands[1]. */
7576
7577 if (GET_CODE (operands[2]) == MEM)
7578 {
7579 p = "%z2\t%2";
7580 break;
7581 }
7582
7583 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7584 {
7585 if (STACK_TOP_P (operands[0]))
7586 /* How is it that we are storing to a dead operand[2]?
7587 Well, presumably operands[1] is dead too. We can't
7588 store the result to st(0) as st(0) gets popped on this
7589 instruction. Instead store to operands[2] (which I
7590 think has to be st(1)). st(1) will be popped later.
7591 gcc <= 2.8.1 didn't have this check and generated
7592 assembly code that the Unixware assembler rejected. */
7593 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7594 else
7595 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7596 break;
7597 }
7598
7599 if (STACK_TOP_P (operands[0]))
7600 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7601 else
7602 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7603 break;
7604
7605 case MINUS:
7606 case DIV:
7607 if (GET_CODE (operands[1]) == MEM)
7608 {
7609 p = "r%z1\t%1";
7610 break;
7611 }
7612
7613 if (GET_CODE (operands[2]) == MEM)
7614 {
7615 p = "%z2\t%2";
7616 break;
7617 }
7618
7619 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7620 {
7621 #if SYSV386_COMPAT
7622 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7623 derived assemblers, confusingly reverse the direction of
7624 the operation for fsub{r} and fdiv{r} when the
7625 destination register is not st(0). The Intel assembler
7626 doesn't have this brain damage. Read !SYSV386_COMPAT to
7627 figure out what the hardware really does. */
7628 if (STACK_TOP_P (operands[0]))
7629 p = "{p\t%0, %2|rp\t%2, %0}";
7630 else
7631 p = "{rp\t%2, %0|p\t%0, %2}";
7632 #else
7633 if (STACK_TOP_P (operands[0]))
7634 /* As above for fmul/fadd, we can't store to st(0). */
7635 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7636 else
7637 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7638 #endif
7639 break;
7640 }
7641
7642 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7643 {
7644 #if SYSV386_COMPAT
7645 if (STACK_TOP_P (operands[0]))
7646 p = "{rp\t%0, %1|p\t%1, %0}";
7647 else
7648 p = "{p\t%1, %0|rp\t%0, %1}";
7649 #else
7650 if (STACK_TOP_P (operands[0]))
7651 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7652 else
7653 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7654 #endif
7655 break;
7656 }
7657
7658 if (STACK_TOP_P (operands[0]))
7659 {
7660 if (STACK_TOP_P (operands[1]))
7661 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7662 else
7663 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7664 break;
7665 }
7666 else if (STACK_TOP_P (operands[1]))
7667 {
7668 #if SYSV386_COMPAT
7669 p = "{\t%1, %0|r\t%0, %1}";
7670 #else
7671 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7672 #endif
7673 }
7674 else
7675 {
7676 #if SYSV386_COMPAT
7677 p = "{r\t%2, %0|\t%0, %2}";
7678 #else
7679 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7680 #endif
7681 }
7682 break;
7683
7684 default:
7685 abort ();
7686 }
7687
7688 strcat (buf, p);
7689 return buf;
7690 }
7691
7692 /* Output code to initialize control word copies used by
7693 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7694 is set to control word rounding downwards. */
7695 void
7696 emit_i387_cw_initialization (normal, round_down)
7697 rtx normal, round_down;
7698 {
7699 rtx reg = gen_reg_rtx (HImode);
7700
7701 emit_insn (gen_x86_fnstcw_1 (normal));
7702 emit_move_insn (reg, normal);
7703 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7704 && !TARGET_64BIT)
7705 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7706 else
7707 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7708 emit_move_insn (round_down, reg);
7709 }
7710
7711 /* Output code for INSN to convert a float to a signed int. OPERANDS
7712 are the insn operands. The output may be [HSD]Imode and the input
7713 operand may be [SDX]Fmode. */
7714
7715 const char *
7716 output_fix_trunc (insn, operands)
7717 rtx insn;
7718 rtx *operands;
7719 {
7720 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7721 int dimode_p = GET_MODE (operands[0]) == DImode;
7722
7723 /* Jump through a hoop or two for DImode, since the hardware has no
7724 non-popping instruction. We used to do this a different way, but
7725 that was somewhat fragile and broke with post-reload splitters. */
7726 if (dimode_p && !stack_top_dies)
7727 output_asm_insn ("fld\t%y1", operands);
7728
7729 if (!STACK_TOP_P (operands[1]))
7730 abort ();
7731
7732 if (GET_CODE (operands[0]) != MEM)
7733 abort ();
7734
7735 output_asm_insn ("fldcw\t%3", operands);
7736 if (stack_top_dies || dimode_p)
7737 output_asm_insn ("fistp%z0\t%0", operands);
7738 else
7739 output_asm_insn ("fist%z0\t%0", operands);
7740 output_asm_insn ("fldcw\t%2", operands);
7741
7742 return "";
7743 }
7744
7745 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7746 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7747 when fucom should be used. */
7748
7749 const char *
7750 output_fp_compare (insn, operands, eflags_p, unordered_p)
7751 rtx insn;
7752 rtx *operands;
7753 int eflags_p, unordered_p;
7754 {
7755 int stack_top_dies;
7756 rtx cmp_op0 = operands[0];
7757 rtx cmp_op1 = operands[1];
7758 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7759
7760 if (eflags_p == 2)
7761 {
7762 cmp_op0 = cmp_op1;
7763 cmp_op1 = operands[2];
7764 }
7765 if (is_sse)
7766 {
7767 if (GET_MODE (operands[0]) == SFmode)
7768 if (unordered_p)
7769 return "ucomiss\t{%1, %0|%0, %1}";
7770 else
7771 return "comiss\t{%1, %0|%0, %y}";
7772 else
7773 if (unordered_p)
7774 return "ucomisd\t{%1, %0|%0, %1}";
7775 else
7776 return "comisd\t{%1, %0|%0, %y}";
7777 }
7778
7779 if (! STACK_TOP_P (cmp_op0))
7780 abort ();
7781
7782 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7783
7784 if (STACK_REG_P (cmp_op1)
7785 && stack_top_dies
7786 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7787 && REGNO (cmp_op1) != FIRST_STACK_REG)
7788 {
7789 /* If both the top of the 387 stack dies, and the other operand
7790 is also a stack register that dies, then this must be a
7791 `fcompp' float compare */
7792
7793 if (eflags_p == 1)
7794 {
7795 /* There is no double popping fcomi variant. Fortunately,
7796 eflags is immune from the fstp's cc clobbering. */
7797 if (unordered_p)
7798 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7799 else
7800 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7801 return "fstp\t%y0";
7802 }
7803 else
7804 {
7805 if (eflags_p == 2)
7806 {
7807 if (unordered_p)
7808 return "fucompp\n\tfnstsw\t%0";
7809 else
7810 return "fcompp\n\tfnstsw\t%0";
7811 }
7812 else
7813 {
7814 if (unordered_p)
7815 return "fucompp";
7816 else
7817 return "fcompp";
7818 }
7819 }
7820 }
7821 else
7822 {
7823 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7824
7825 static const char * const alt[24] =
7826 {
7827 "fcom%z1\t%y1",
7828 "fcomp%z1\t%y1",
7829 "fucom%z1\t%y1",
7830 "fucomp%z1\t%y1",
7831
7832 "ficom%z1\t%y1",
7833 "ficomp%z1\t%y1",
7834 NULL,
7835 NULL,
7836
7837 "fcomi\t{%y1, %0|%0, %y1}",
7838 "fcomip\t{%y1, %0|%0, %y1}",
7839 "fucomi\t{%y1, %0|%0, %y1}",
7840 "fucomip\t{%y1, %0|%0, %y1}",
7841
7842 NULL,
7843 NULL,
7844 NULL,
7845 NULL,
7846
7847 "fcom%z2\t%y2\n\tfnstsw\t%0",
7848 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7849 "fucom%z2\t%y2\n\tfnstsw\t%0",
7850 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7851
7852 "ficom%z2\t%y2\n\tfnstsw\t%0",
7853 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7854 NULL,
7855 NULL
7856 };
7857
7858 int mask;
7859 const char *ret;
7860
7861 mask = eflags_p << 3;
7862 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7863 mask |= unordered_p << 1;
7864 mask |= stack_top_dies;
7865
7866 if (mask >= 24)
7867 abort ();
7868 ret = alt[mask];
7869 if (ret == NULL)
7870 abort ();
7871
7872 return ret;
7873 }
7874 }
7875
7876 void
7877 ix86_output_addr_vec_elt (file, value)
7878 FILE *file;
7879 int value;
7880 {
7881 const char *directive = ASM_LONG;
7882
7883 if (TARGET_64BIT)
7884 {
7885 #ifdef ASM_QUAD
7886 directive = ASM_QUAD;
7887 #else
7888 abort ();
7889 #endif
7890 }
7891
7892 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7893 }
7894
7895 void
7896 ix86_output_addr_diff_elt (file, value, rel)
7897 FILE *file;
7898 int value, rel;
7899 {
7900 if (TARGET_64BIT)
7901 fprintf (file, "%s%s%d-%s%d\n",
7902 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7903 else if (HAVE_AS_GOTOFF_IN_DATA)
7904 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7905 #if TARGET_MACHO
7906 else if (TARGET_MACHO)
7907 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7908 machopic_function_base_name () + 1);
7909 #endif
7910 else
7911 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7912 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7913 }
7914 \f
7915 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7916 for the target. */
7917
7918 void
7919 ix86_expand_clear (dest)
7920 rtx dest;
7921 {
7922 rtx tmp;
7923
7924 /* We play register width games, which are only valid after reload. */
7925 if (!reload_completed)
7926 abort ();
7927
7928 /* Avoid HImode and its attendant prefix byte. */
7929 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7930 dest = gen_rtx_REG (SImode, REGNO (dest));
7931
7932 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7933
7934 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7935 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7936 {
7937 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7938 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7939 }
7940
7941 emit_insn (tmp);
7942 }
7943
7944 /* X is an unchanging MEM. If it is a constant pool reference, return
7945 the constant pool rtx, else NULL. */
7946
7947 static rtx
7948 maybe_get_pool_constant (x)
7949 rtx x;
7950 {
7951 x = XEXP (x, 0);
7952
7953 if (flag_pic && ! TARGET_64BIT)
7954 {
7955 if (GET_CODE (x) != PLUS)
7956 return NULL_RTX;
7957 if (XEXP (x, 0) != pic_offset_table_rtx)
7958 return NULL_RTX;
7959 x = XEXP (x, 1);
7960 if (GET_CODE (x) != CONST)
7961 return NULL_RTX;
7962 x = XEXP (x, 0);
7963 if (GET_CODE (x) != UNSPEC)
7964 return NULL_RTX;
7965 if (XINT (x, 1) != UNSPEC_GOTOFF)
7966 return NULL_RTX;
7967 x = XVECEXP (x, 0, 0);
7968 }
7969
7970 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7971 return get_pool_constant (x);
7972
7973 return NULL_RTX;
7974 }
7975
7976 void
7977 ix86_expand_move (mode, operands)
7978 enum machine_mode mode;
7979 rtx operands[];
7980 {
7981 int strict = (reload_in_progress || reload_completed);
7982 rtx insn, op0, op1, tmp;
7983
7984 op0 = operands[0];
7985 op1 = operands[1];
7986
7987 if (tls_symbolic_operand (op1, Pmode))
7988 {
7989 op1 = legitimize_address (op1, op1, VOIDmode);
7990 if (GET_CODE (op0) == MEM)
7991 {
7992 tmp = gen_reg_rtx (mode);
7993 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7994 op1 = tmp;
7995 }
7996 }
7997 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7998 {
7999 #if TARGET_MACHO
8000 if (MACHOPIC_PURE)
8001 {
8002 rtx temp = ((reload_in_progress
8003 || ((op0 && GET_CODE (op0) == REG)
8004 && mode == Pmode))
8005 ? op0 : gen_reg_rtx (Pmode));
8006 op1 = machopic_indirect_data_reference (op1, temp);
8007 op1 = machopic_legitimize_pic_address (op1, mode,
8008 temp == op1 ? 0 : temp);
8009 }
8010 else
8011 {
8012 if (MACHOPIC_INDIRECT)
8013 op1 = machopic_indirect_data_reference (op1, 0);
8014 }
8015 if (op0 != op1)
8016 {
8017 insn = gen_rtx_SET (VOIDmode, op0, op1);
8018 emit_insn (insn);
8019 }
8020 return;
8021 #endif /* TARGET_MACHO */
8022 if (GET_CODE (op0) == MEM)
8023 op1 = force_reg (Pmode, op1);
8024 else
8025 {
8026 rtx temp = op0;
8027 if (GET_CODE (temp) != REG)
8028 temp = gen_reg_rtx (Pmode);
8029 temp = legitimize_pic_address (op1, temp);
8030 if (temp == op0)
8031 return;
8032 op1 = temp;
8033 }
8034 }
8035 else
8036 {
8037 if (GET_CODE (op0) == MEM
8038 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8039 || !push_operand (op0, mode))
8040 && GET_CODE (op1) == MEM)
8041 op1 = force_reg (mode, op1);
8042
8043 if (push_operand (op0, mode)
8044 && ! general_no_elim_operand (op1, mode))
8045 op1 = copy_to_mode_reg (mode, op1);
8046
8047 /* Force large constants in 64bit compilation into register
8048 to get them CSEed. */
8049 if (TARGET_64BIT && mode == DImode
8050 && immediate_operand (op1, mode)
8051 && !x86_64_zero_extended_value (op1)
8052 && !register_operand (op0, mode)
8053 && optimize && !reload_completed && !reload_in_progress)
8054 op1 = copy_to_mode_reg (mode, op1);
8055
8056 if (FLOAT_MODE_P (mode))
8057 {
8058 /* If we are loading a floating point constant to a register,
8059 force the value to memory now, since we'll get better code
8060 out the back end. */
8061
8062 if (strict)
8063 ;
8064 else if (GET_CODE (op1) == CONST_DOUBLE
8065 && register_operand (op0, mode))
8066 op1 = validize_mem (force_const_mem (mode, op1));
8067 }
8068 }
8069
8070 insn = gen_rtx_SET (VOIDmode, op0, op1);
8071
8072 emit_insn (insn);
8073 }
8074
8075 void
8076 ix86_expand_vector_move (mode, operands)
8077 enum machine_mode mode;
8078 rtx operands[];
8079 {
8080 /* Force constants other than zero into memory. We do not know how
8081 the instructions used to build constants modify the upper 64 bits
8082 of the register, once we have that information we may be able
8083 to handle some of them more efficiently. */
8084 if ((reload_in_progress | reload_completed) == 0
8085 && register_operand (operands[0], mode)
8086 && CONSTANT_P (operands[1]))
8087 operands[1] = force_const_mem (mode, operands[1]);
8088
8089 /* Make operand1 a register if it isn't already. */
8090 if (!no_new_pseudos
8091 && !register_operand (operands[0], mode)
8092 && !register_operand (operands[1], mode))
8093 {
8094 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8095 emit_move_insn (operands[0], temp);
8096 return;
8097 }
8098
8099 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8100 }
8101
8102 /* Attempt to expand a binary operator. Make the expansion closer to the
8103 actual machine, then just general_operand, which will allow 3 separate
8104 memory references (one output, two input) in a single insn. */
8105
8106 void
8107 ix86_expand_binary_operator (code, mode, operands)
8108 enum rtx_code code;
8109 enum machine_mode mode;
8110 rtx operands[];
8111 {
8112 int matching_memory;
8113 rtx src1, src2, dst, op, clob;
8114
8115 dst = operands[0];
8116 src1 = operands[1];
8117 src2 = operands[2];
8118
8119 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8120 if (GET_RTX_CLASS (code) == 'c'
8121 && (rtx_equal_p (dst, src2)
8122 || immediate_operand (src1, mode)))
8123 {
8124 rtx temp = src1;
8125 src1 = src2;
8126 src2 = temp;
8127 }
8128
8129 /* If the destination is memory, and we do not have matching source
8130 operands, do things in registers. */
8131 matching_memory = 0;
8132 if (GET_CODE (dst) == MEM)
8133 {
8134 if (rtx_equal_p (dst, src1))
8135 matching_memory = 1;
8136 else if (GET_RTX_CLASS (code) == 'c'
8137 && rtx_equal_p (dst, src2))
8138 matching_memory = 2;
8139 else
8140 dst = gen_reg_rtx (mode);
8141 }
8142
8143 /* Both source operands cannot be in memory. */
8144 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8145 {
8146 if (matching_memory != 2)
8147 src2 = force_reg (mode, src2);
8148 else
8149 src1 = force_reg (mode, src1);
8150 }
8151
8152 /* If the operation is not commutable, source 1 cannot be a constant
8153 or non-matching memory. */
8154 if ((CONSTANT_P (src1)
8155 || (!matching_memory && GET_CODE (src1) == MEM))
8156 && GET_RTX_CLASS (code) != 'c')
8157 src1 = force_reg (mode, src1);
8158
8159 /* If optimizing, copy to regs to improve CSE */
8160 if (optimize && ! no_new_pseudos)
8161 {
8162 if (GET_CODE (dst) == MEM)
8163 dst = gen_reg_rtx (mode);
8164 if (GET_CODE (src1) == MEM)
8165 src1 = force_reg (mode, src1);
8166 if (GET_CODE (src2) == MEM)
8167 src2 = force_reg (mode, src2);
8168 }
8169
8170 /* Emit the instruction. */
8171
8172 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8173 if (reload_in_progress)
8174 {
8175 /* Reload doesn't know about the flags register, and doesn't know that
8176 it doesn't want to clobber it. We can only do this with PLUS. */
8177 if (code != PLUS)
8178 abort ();
8179 emit_insn (op);
8180 }
8181 else
8182 {
8183 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8184 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8185 }
8186
8187 /* Fix up the destination if needed. */
8188 if (dst != operands[0])
8189 emit_move_insn (operands[0], dst);
8190 }
8191
8192 /* Return TRUE or FALSE depending on whether the binary operator meets the
8193 appropriate constraints. */
8194
8195 int
8196 ix86_binary_operator_ok (code, mode, operands)
8197 enum rtx_code code;
8198 enum machine_mode mode ATTRIBUTE_UNUSED;
8199 rtx operands[3];
8200 {
8201 /* Both source operands cannot be in memory. */
8202 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8203 return 0;
8204 /* If the operation is not commutable, source 1 cannot be a constant. */
8205 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8206 return 0;
8207 /* If the destination is memory, we must have a matching source operand. */
8208 if (GET_CODE (operands[0]) == MEM
8209 && ! (rtx_equal_p (operands[0], operands[1])
8210 || (GET_RTX_CLASS (code) == 'c'
8211 && rtx_equal_p (operands[0], operands[2]))))
8212 return 0;
8213 /* If the operation is not commutable and the source 1 is memory, we must
8214 have a matching destination. */
8215 if (GET_CODE (operands[1]) == MEM
8216 && GET_RTX_CLASS (code) != 'c'
8217 && ! rtx_equal_p (operands[0], operands[1]))
8218 return 0;
8219 return 1;
8220 }
8221
8222 /* Attempt to expand a unary operator. Make the expansion closer to the
8223 actual machine, then just general_operand, which will allow 2 separate
8224 memory references (one output, one input) in a single insn. */
8225
8226 void
8227 ix86_expand_unary_operator (code, mode, operands)
8228 enum rtx_code code;
8229 enum machine_mode mode;
8230 rtx operands[];
8231 {
8232 int matching_memory;
8233 rtx src, dst, op, clob;
8234
8235 dst = operands[0];
8236 src = operands[1];
8237
8238 /* If the destination is memory, and we do not have matching source
8239 operands, do things in registers. */
8240 matching_memory = 0;
8241 if (GET_CODE (dst) == MEM)
8242 {
8243 if (rtx_equal_p (dst, src))
8244 matching_memory = 1;
8245 else
8246 dst = gen_reg_rtx (mode);
8247 }
8248
8249 /* When source operand is memory, destination must match. */
8250 if (!matching_memory && GET_CODE (src) == MEM)
8251 src = force_reg (mode, src);
8252
8253 /* If optimizing, copy to regs to improve CSE */
8254 if (optimize && ! no_new_pseudos)
8255 {
8256 if (GET_CODE (dst) == MEM)
8257 dst = gen_reg_rtx (mode);
8258 if (GET_CODE (src) == MEM)
8259 src = force_reg (mode, src);
8260 }
8261
8262 /* Emit the instruction. */
8263
8264 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8265 if (reload_in_progress || code == NOT)
8266 {
8267 /* Reload doesn't know about the flags register, and doesn't know that
8268 it doesn't want to clobber it. */
8269 if (code != NOT)
8270 abort ();
8271 emit_insn (op);
8272 }
8273 else
8274 {
8275 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8276 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8277 }
8278
8279 /* Fix up the destination if needed. */
8280 if (dst != operands[0])
8281 emit_move_insn (operands[0], dst);
8282 }
8283
8284 /* Return TRUE or FALSE depending on whether the unary operator meets the
8285 appropriate constraints. */
8286
8287 int
8288 ix86_unary_operator_ok (code, mode, operands)
8289 enum rtx_code code ATTRIBUTE_UNUSED;
8290 enum machine_mode mode ATTRIBUTE_UNUSED;
8291 rtx operands[2] ATTRIBUTE_UNUSED;
8292 {
8293 /* If one of operands is memory, source and destination must match. */
8294 if ((GET_CODE (operands[0]) == MEM
8295 || GET_CODE (operands[1]) == MEM)
8296 && ! rtx_equal_p (operands[0], operands[1]))
8297 return FALSE;
8298 return TRUE;
8299 }
8300
8301 /* Return TRUE or FALSE depending on whether the first SET in INSN
8302 has source and destination with matching CC modes, and that the
8303 CC mode is at least as constrained as REQ_MODE. */
8304
8305 int
8306 ix86_match_ccmode (insn, req_mode)
8307 rtx insn;
8308 enum machine_mode req_mode;
8309 {
8310 rtx set;
8311 enum machine_mode set_mode;
8312
8313 set = PATTERN (insn);
8314 if (GET_CODE (set) == PARALLEL)
8315 set = XVECEXP (set, 0, 0);
8316 if (GET_CODE (set) != SET)
8317 abort ();
8318 if (GET_CODE (SET_SRC (set)) != COMPARE)
8319 abort ();
8320
8321 set_mode = GET_MODE (SET_DEST (set));
8322 switch (set_mode)
8323 {
8324 case CCNOmode:
8325 if (req_mode != CCNOmode
8326 && (req_mode != CCmode
8327 || XEXP (SET_SRC (set), 1) != const0_rtx))
8328 return 0;
8329 break;
8330 case CCmode:
8331 if (req_mode == CCGCmode)
8332 return 0;
8333 /* FALLTHRU */
8334 case CCGCmode:
8335 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8336 return 0;
8337 /* FALLTHRU */
8338 case CCGOCmode:
8339 if (req_mode == CCZmode)
8340 return 0;
8341 /* FALLTHRU */
8342 case CCZmode:
8343 break;
8344
8345 default:
8346 abort ();
8347 }
8348
8349 return (GET_MODE (SET_SRC (set)) == set_mode);
8350 }
8351
8352 /* Generate insn patterns to do an integer compare of OPERANDS. */
8353
8354 static rtx
8355 ix86_expand_int_compare (code, op0, op1)
8356 enum rtx_code code;
8357 rtx op0, op1;
8358 {
8359 enum machine_mode cmpmode;
8360 rtx tmp, flags;
8361
8362 cmpmode = SELECT_CC_MODE (code, op0, op1);
8363 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8364
8365 /* This is very simple, but making the interface the same as in the
8366 FP case makes the rest of the code easier. */
8367 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8368 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8369
8370 /* Return the test that should be put into the flags user, i.e.
8371 the bcc, scc, or cmov instruction. */
8372 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8373 }
8374
8375 /* Figure out whether to use ordered or unordered fp comparisons.
8376 Return the appropriate mode to use. */
8377
8378 enum machine_mode
8379 ix86_fp_compare_mode (code)
8380 enum rtx_code code ATTRIBUTE_UNUSED;
8381 {
8382 /* ??? In order to make all comparisons reversible, we do all comparisons
8383 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8384 all forms trapping and nontrapping comparisons, we can make inequality
8385 comparisons trapping again, since it results in better code when using
8386 FCOM based compares. */
8387 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8388 }
8389
8390 enum machine_mode
8391 ix86_cc_mode (code, op0, op1)
8392 enum rtx_code code;
8393 rtx op0, op1;
8394 {
8395 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8396 return ix86_fp_compare_mode (code);
8397 switch (code)
8398 {
8399 /* Only zero flag is needed. */
8400 case EQ: /* ZF=0 */
8401 case NE: /* ZF!=0 */
8402 return CCZmode;
8403 /* Codes needing carry flag. */
8404 case GEU: /* CF=0 */
8405 case GTU: /* CF=0 & ZF=0 */
8406 case LTU: /* CF=1 */
8407 case LEU: /* CF=1 | ZF=1 */
8408 return CCmode;
8409 /* Codes possibly doable only with sign flag when
8410 comparing against zero. */
8411 case GE: /* SF=OF or SF=0 */
8412 case LT: /* SF<>OF or SF=1 */
8413 if (op1 == const0_rtx)
8414 return CCGOCmode;
8415 else
8416 /* For other cases Carry flag is not required. */
8417 return CCGCmode;
8418 /* Codes doable only with sign flag when comparing
8419 against zero, but we miss jump instruction for it
8420 so we need to use relational tests agains overflow
8421 that thus needs to be zero. */
8422 case GT: /* ZF=0 & SF=OF */
8423 case LE: /* ZF=1 | SF<>OF */
8424 if (op1 == const0_rtx)
8425 return CCNOmode;
8426 else
8427 return CCGCmode;
8428 /* strcmp pattern do (use flags) and combine may ask us for proper
8429 mode. */
8430 case USE:
8431 return CCmode;
8432 default:
8433 abort ();
8434 }
8435 }
8436
8437 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8438
8439 int
8440 ix86_use_fcomi_compare (code)
8441 enum rtx_code code ATTRIBUTE_UNUSED;
8442 {
8443 enum rtx_code swapped_code = swap_condition (code);
8444 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8445 || (ix86_fp_comparison_cost (swapped_code)
8446 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8447 }
8448
8449 /* Swap, force into registers, or otherwise massage the two operands
8450 to a fp comparison. The operands are updated in place; the new
8451 comparsion code is returned. */
8452
8453 static enum rtx_code
8454 ix86_prepare_fp_compare_args (code, pop0, pop1)
8455 enum rtx_code code;
8456 rtx *pop0, *pop1;
8457 {
8458 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8459 rtx op0 = *pop0, op1 = *pop1;
8460 enum machine_mode op_mode = GET_MODE (op0);
8461 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8462
8463 /* All of the unordered compare instructions only work on registers.
8464 The same is true of the XFmode compare instructions. The same is
8465 true of the fcomi compare instructions. */
8466
8467 if (!is_sse
8468 && (fpcmp_mode == CCFPUmode
8469 || op_mode == XFmode
8470 || op_mode == TFmode
8471 || ix86_use_fcomi_compare (code)))
8472 {
8473 op0 = force_reg (op_mode, op0);
8474 op1 = force_reg (op_mode, op1);
8475 }
8476 else
8477 {
8478 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8479 things around if they appear profitable, otherwise force op0
8480 into a register. */
8481
8482 if (standard_80387_constant_p (op0) == 0
8483 || (GET_CODE (op0) == MEM
8484 && ! (standard_80387_constant_p (op1) == 0
8485 || GET_CODE (op1) == MEM)))
8486 {
8487 rtx tmp;
8488 tmp = op0, op0 = op1, op1 = tmp;
8489 code = swap_condition (code);
8490 }
8491
8492 if (GET_CODE (op0) != REG)
8493 op0 = force_reg (op_mode, op0);
8494
8495 if (CONSTANT_P (op1))
8496 {
8497 if (standard_80387_constant_p (op1))
8498 op1 = force_reg (op_mode, op1);
8499 else
8500 op1 = validize_mem (force_const_mem (op_mode, op1));
8501 }
8502 }
8503
8504 /* Try to rearrange the comparison to make it cheaper. */
8505 if (ix86_fp_comparison_cost (code)
8506 > ix86_fp_comparison_cost (swap_condition (code))
8507 && (GET_CODE (op1) == REG || !no_new_pseudos))
8508 {
8509 rtx tmp;
8510 tmp = op0, op0 = op1, op1 = tmp;
8511 code = swap_condition (code);
8512 if (GET_CODE (op0) != REG)
8513 op0 = force_reg (op_mode, op0);
8514 }
8515
8516 *pop0 = op0;
8517 *pop1 = op1;
8518 return code;
8519 }
8520
8521 /* Convert comparison codes we use to represent FP comparison to integer
8522 code that will result in proper branch. Return UNKNOWN if no such code
8523 is available. */
8524 static enum rtx_code
8525 ix86_fp_compare_code_to_integer (code)
8526 enum rtx_code code;
8527 {
8528 switch (code)
8529 {
8530 case GT:
8531 return GTU;
8532 case GE:
8533 return GEU;
8534 case ORDERED:
8535 case UNORDERED:
8536 return code;
8537 break;
8538 case UNEQ:
8539 return EQ;
8540 break;
8541 case UNLT:
8542 return LTU;
8543 break;
8544 case UNLE:
8545 return LEU;
8546 break;
8547 case LTGT:
8548 return NE;
8549 break;
8550 default:
8551 return UNKNOWN;
8552 }
8553 }
8554
8555 /* Split comparison code CODE into comparisons we can do using branch
8556 instructions. BYPASS_CODE is comparison code for branch that will
8557 branch around FIRST_CODE and SECOND_CODE. If some of branches
8558 is not required, set value to NIL.
8559 We never require more than two branches. */
8560 static void
8561 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8562 enum rtx_code code, *bypass_code, *first_code, *second_code;
8563 {
8564 *first_code = code;
8565 *bypass_code = NIL;
8566 *second_code = NIL;
8567
8568 /* The fcomi comparison sets flags as follows:
8569
8570 cmp ZF PF CF
8571 > 0 0 0
8572 < 0 0 1
8573 = 1 0 0
8574 un 1 1 1 */
8575
8576 switch (code)
8577 {
8578 case GT: /* GTU - CF=0 & ZF=0 */
8579 case GE: /* GEU - CF=0 */
8580 case ORDERED: /* PF=0 */
8581 case UNORDERED: /* PF=1 */
8582 case UNEQ: /* EQ - ZF=1 */
8583 case UNLT: /* LTU - CF=1 */
8584 case UNLE: /* LEU - CF=1 | ZF=1 */
8585 case LTGT: /* EQ - ZF=0 */
8586 break;
8587 case LT: /* LTU - CF=1 - fails on unordered */
8588 *first_code = UNLT;
8589 *bypass_code = UNORDERED;
8590 break;
8591 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8592 *first_code = UNLE;
8593 *bypass_code = UNORDERED;
8594 break;
8595 case EQ: /* EQ - ZF=1 - fails on unordered */
8596 *first_code = UNEQ;
8597 *bypass_code = UNORDERED;
8598 break;
8599 case NE: /* NE - ZF=0 - fails on unordered */
8600 *first_code = LTGT;
8601 *second_code = UNORDERED;
8602 break;
8603 case UNGE: /* GEU - CF=0 - fails on unordered */
8604 *first_code = GE;
8605 *second_code = UNORDERED;
8606 break;
8607 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8608 *first_code = GT;
8609 *second_code = UNORDERED;
8610 break;
8611 default:
8612 abort ();
8613 }
8614 if (!TARGET_IEEE_FP)
8615 {
8616 *second_code = NIL;
8617 *bypass_code = NIL;
8618 }
8619 }
8620
8621 /* Return cost of comparison done fcom + arithmetics operations on AX.
8622 All following functions do use number of instructions as an cost metrics.
8623 In future this should be tweaked to compute bytes for optimize_size and
8624 take into account performance of various instructions on various CPUs. */
8625 static int
8626 ix86_fp_comparison_arithmetics_cost (code)
8627 enum rtx_code code;
8628 {
8629 if (!TARGET_IEEE_FP)
8630 return 4;
8631 /* The cost of code output by ix86_expand_fp_compare. */
8632 switch (code)
8633 {
8634 case UNLE:
8635 case UNLT:
8636 case LTGT:
8637 case GT:
8638 case GE:
8639 case UNORDERED:
8640 case ORDERED:
8641 case UNEQ:
8642 return 4;
8643 break;
8644 case LT:
8645 case NE:
8646 case EQ:
8647 case UNGE:
8648 return 5;
8649 break;
8650 case LE:
8651 case UNGT:
8652 return 6;
8653 break;
8654 default:
8655 abort ();
8656 }
8657 }
8658
8659 /* Return cost of comparison done using fcomi operation.
8660 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8661 static int
8662 ix86_fp_comparison_fcomi_cost (code)
8663 enum rtx_code code;
8664 {
8665 enum rtx_code bypass_code, first_code, second_code;
8666 /* Return arbitarily high cost when instruction is not supported - this
8667 prevents gcc from using it. */
8668 if (!TARGET_CMOVE)
8669 return 1024;
8670 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8671 return (bypass_code != NIL || second_code != NIL) + 2;
8672 }
8673
8674 /* Return cost of comparison done using sahf operation.
8675 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8676 static int
8677 ix86_fp_comparison_sahf_cost (code)
8678 enum rtx_code code;
8679 {
8680 enum rtx_code bypass_code, first_code, second_code;
8681 /* Return arbitarily high cost when instruction is not preferred - this
8682 avoids gcc from using it. */
8683 if (!TARGET_USE_SAHF && !optimize_size)
8684 return 1024;
8685 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8686 return (bypass_code != NIL || second_code != NIL) + 3;
8687 }
8688
8689 /* Compute cost of the comparison done using any method.
8690 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8691 static int
8692 ix86_fp_comparison_cost (code)
8693 enum rtx_code code;
8694 {
8695 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8696 int min;
8697
8698 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8699 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8700
8701 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8702 if (min > sahf_cost)
8703 min = sahf_cost;
8704 if (min > fcomi_cost)
8705 min = fcomi_cost;
8706 return min;
8707 }
8708
8709 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8710
8711 static rtx
8712 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8713 enum rtx_code code;
8714 rtx op0, op1, scratch;
8715 rtx *second_test;
8716 rtx *bypass_test;
8717 {
8718 enum machine_mode fpcmp_mode, intcmp_mode;
8719 rtx tmp, tmp2;
8720 int cost = ix86_fp_comparison_cost (code);
8721 enum rtx_code bypass_code, first_code, second_code;
8722
8723 fpcmp_mode = ix86_fp_compare_mode (code);
8724 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8725
8726 if (second_test)
8727 *second_test = NULL_RTX;
8728 if (bypass_test)
8729 *bypass_test = NULL_RTX;
8730
8731 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8732
8733 /* Do fcomi/sahf based test when profitable. */
8734 if ((bypass_code == NIL || bypass_test)
8735 && (second_code == NIL || second_test)
8736 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8737 {
8738 if (TARGET_CMOVE)
8739 {
8740 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8741 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8742 tmp);
8743 emit_insn (tmp);
8744 }
8745 else
8746 {
8747 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8748 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8749 if (!scratch)
8750 scratch = gen_reg_rtx (HImode);
8751 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8752 emit_insn (gen_x86_sahf_1 (scratch));
8753 }
8754
8755 /* The FP codes work out to act like unsigned. */
8756 intcmp_mode = fpcmp_mode;
8757 code = first_code;
8758 if (bypass_code != NIL)
8759 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8760 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8761 const0_rtx);
8762 if (second_code != NIL)
8763 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8764 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8765 const0_rtx);
8766 }
8767 else
8768 {
8769 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8770 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8771 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8772 if (!scratch)
8773 scratch = gen_reg_rtx (HImode);
8774 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8775
8776 /* In the unordered case, we have to check C2 for NaN's, which
8777 doesn't happen to work out to anything nice combination-wise.
8778 So do some bit twiddling on the value we've got in AH to come
8779 up with an appropriate set of condition codes. */
8780
8781 intcmp_mode = CCNOmode;
8782 switch (code)
8783 {
8784 case GT:
8785 case UNGT:
8786 if (code == GT || !TARGET_IEEE_FP)
8787 {
8788 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8789 code = EQ;
8790 }
8791 else
8792 {
8793 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8794 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8795 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8796 intcmp_mode = CCmode;
8797 code = GEU;
8798 }
8799 break;
8800 case LT:
8801 case UNLT:
8802 if (code == LT && TARGET_IEEE_FP)
8803 {
8804 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8805 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8806 intcmp_mode = CCmode;
8807 code = EQ;
8808 }
8809 else
8810 {
8811 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8812 code = NE;
8813 }
8814 break;
8815 case GE:
8816 case UNGE:
8817 if (code == GE || !TARGET_IEEE_FP)
8818 {
8819 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8820 code = EQ;
8821 }
8822 else
8823 {
8824 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8825 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8826 GEN_INT (0x01)));
8827 code = NE;
8828 }
8829 break;
8830 case LE:
8831 case UNLE:
8832 if (code == LE && TARGET_IEEE_FP)
8833 {
8834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8835 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8836 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8837 intcmp_mode = CCmode;
8838 code = LTU;
8839 }
8840 else
8841 {
8842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8843 code = NE;
8844 }
8845 break;
8846 case EQ:
8847 case UNEQ:
8848 if (code == EQ && TARGET_IEEE_FP)
8849 {
8850 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8851 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8852 intcmp_mode = CCmode;
8853 code = EQ;
8854 }
8855 else
8856 {
8857 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8858 code = NE;
8859 break;
8860 }
8861 break;
8862 case NE:
8863 case LTGT:
8864 if (code == NE && TARGET_IEEE_FP)
8865 {
8866 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8867 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8868 GEN_INT (0x40)));
8869 code = NE;
8870 }
8871 else
8872 {
8873 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8874 code = EQ;
8875 }
8876 break;
8877
8878 case UNORDERED:
8879 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8880 code = NE;
8881 break;
8882 case ORDERED:
8883 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8884 code = EQ;
8885 break;
8886
8887 default:
8888 abort ();
8889 }
8890 }
8891
8892 /* Return the test that should be put into the flags user, i.e.
8893 the bcc, scc, or cmov instruction. */
8894 return gen_rtx_fmt_ee (code, VOIDmode,
8895 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8896 const0_rtx);
8897 }
8898
8899 rtx
8900 ix86_expand_compare (code, second_test, bypass_test)
8901 enum rtx_code code;
8902 rtx *second_test, *bypass_test;
8903 {
8904 rtx op0, op1, ret;
8905 op0 = ix86_compare_op0;
8906 op1 = ix86_compare_op1;
8907
8908 if (second_test)
8909 *second_test = NULL_RTX;
8910 if (bypass_test)
8911 *bypass_test = NULL_RTX;
8912
8913 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8914 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8915 second_test, bypass_test);
8916 else
8917 ret = ix86_expand_int_compare (code, op0, op1);
8918
8919 return ret;
8920 }
8921
8922 /* Return true if the CODE will result in nontrivial jump sequence. */
8923 bool
8924 ix86_fp_jump_nontrivial_p (code)
8925 enum rtx_code code;
8926 {
8927 enum rtx_code bypass_code, first_code, second_code;
8928 if (!TARGET_CMOVE)
8929 return true;
8930 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8931 return bypass_code != NIL || second_code != NIL;
8932 }
8933
8934 void
8935 ix86_expand_branch (code, label)
8936 enum rtx_code code;
8937 rtx label;
8938 {
8939 rtx tmp;
8940
8941 switch (GET_MODE (ix86_compare_op0))
8942 {
8943 case QImode:
8944 case HImode:
8945 case SImode:
8946 simple:
8947 tmp = ix86_expand_compare (code, NULL, NULL);
8948 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8949 gen_rtx_LABEL_REF (VOIDmode, label),
8950 pc_rtx);
8951 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8952 return;
8953
8954 case SFmode:
8955 case DFmode:
8956 case XFmode:
8957 case TFmode:
8958 {
8959 rtvec vec;
8960 int use_fcomi;
8961 enum rtx_code bypass_code, first_code, second_code;
8962
8963 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8964 &ix86_compare_op1);
8965
8966 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8967
8968 /* Check whether we will use the natural sequence with one jump. If
8969 so, we can expand jump early. Otherwise delay expansion by
8970 creating compound insn to not confuse optimizers. */
8971 if (bypass_code == NIL && second_code == NIL
8972 && TARGET_CMOVE)
8973 {
8974 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8975 gen_rtx_LABEL_REF (VOIDmode, label),
8976 pc_rtx, NULL_RTX);
8977 }
8978 else
8979 {
8980 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8981 ix86_compare_op0, ix86_compare_op1);
8982 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8983 gen_rtx_LABEL_REF (VOIDmode, label),
8984 pc_rtx);
8985 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8986
8987 use_fcomi = ix86_use_fcomi_compare (code);
8988 vec = rtvec_alloc (3 + !use_fcomi);
8989 RTVEC_ELT (vec, 0) = tmp;
8990 RTVEC_ELT (vec, 1)
8991 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8992 RTVEC_ELT (vec, 2)
8993 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8994 if (! use_fcomi)
8995 RTVEC_ELT (vec, 3)
8996 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8997
8998 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8999 }
9000 return;
9001 }
9002
9003 case DImode:
9004 if (TARGET_64BIT)
9005 goto simple;
9006 /* Expand DImode branch into multiple compare+branch. */
9007 {
9008 rtx lo[2], hi[2], label2;
9009 enum rtx_code code1, code2, code3;
9010
9011 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9012 {
9013 tmp = ix86_compare_op0;
9014 ix86_compare_op0 = ix86_compare_op1;
9015 ix86_compare_op1 = tmp;
9016 code = swap_condition (code);
9017 }
9018 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9019 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9020
9021 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9022 avoid two branches. This costs one extra insn, so disable when
9023 optimizing for size. */
9024
9025 if ((code == EQ || code == NE)
9026 && (!optimize_size
9027 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9028 {
9029 rtx xor0, xor1;
9030
9031 xor1 = hi[0];
9032 if (hi[1] != const0_rtx)
9033 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9034 NULL_RTX, 0, OPTAB_WIDEN);
9035
9036 xor0 = lo[0];
9037 if (lo[1] != const0_rtx)
9038 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9039 NULL_RTX, 0, OPTAB_WIDEN);
9040
9041 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9042 NULL_RTX, 0, OPTAB_WIDEN);
9043
9044 ix86_compare_op0 = tmp;
9045 ix86_compare_op1 = const0_rtx;
9046 ix86_expand_branch (code, label);
9047 return;
9048 }
9049
9050 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9051 op1 is a constant and the low word is zero, then we can just
9052 examine the high word. */
9053
9054 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9055 switch (code)
9056 {
9057 case LT: case LTU: case GE: case GEU:
9058 ix86_compare_op0 = hi[0];
9059 ix86_compare_op1 = hi[1];
9060 ix86_expand_branch (code, label);
9061 return;
9062 default:
9063 break;
9064 }
9065
9066 /* Otherwise, we need two or three jumps. */
9067
9068 label2 = gen_label_rtx ();
9069
9070 code1 = code;
9071 code2 = swap_condition (code);
9072 code3 = unsigned_condition (code);
9073
9074 switch (code)
9075 {
9076 case LT: case GT: case LTU: case GTU:
9077 break;
9078
9079 case LE: code1 = LT; code2 = GT; break;
9080 case GE: code1 = GT; code2 = LT; break;
9081 case LEU: code1 = LTU; code2 = GTU; break;
9082 case GEU: code1 = GTU; code2 = LTU; break;
9083
9084 case EQ: code1 = NIL; code2 = NE; break;
9085 case NE: code2 = NIL; break;
9086
9087 default:
9088 abort ();
9089 }
9090
9091 /*
9092 * a < b =>
9093 * if (hi(a) < hi(b)) goto true;
9094 * if (hi(a) > hi(b)) goto false;
9095 * if (lo(a) < lo(b)) goto true;
9096 * false:
9097 */
9098
9099 ix86_compare_op0 = hi[0];
9100 ix86_compare_op1 = hi[1];
9101
9102 if (code1 != NIL)
9103 ix86_expand_branch (code1, label);
9104 if (code2 != NIL)
9105 ix86_expand_branch (code2, label2);
9106
9107 ix86_compare_op0 = lo[0];
9108 ix86_compare_op1 = lo[1];
9109 ix86_expand_branch (code3, label);
9110
9111 if (code2 != NIL)
9112 emit_label (label2);
9113 return;
9114 }
9115
9116 default:
9117 abort ();
9118 }
9119 }
9120
9121 /* Split branch based on floating point condition. */
9122 void
9123 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9124 enum rtx_code code;
9125 rtx op1, op2, target1, target2, tmp;
9126 {
9127 rtx second, bypass;
9128 rtx label = NULL_RTX;
9129 rtx condition;
9130 int bypass_probability = -1, second_probability = -1, probability = -1;
9131 rtx i;
9132
9133 if (target2 != pc_rtx)
9134 {
9135 rtx tmp = target2;
9136 code = reverse_condition_maybe_unordered (code);
9137 target2 = target1;
9138 target1 = tmp;
9139 }
9140
9141 condition = ix86_expand_fp_compare (code, op1, op2,
9142 tmp, &second, &bypass);
9143
9144 if (split_branch_probability >= 0)
9145 {
9146 /* Distribute the probabilities across the jumps.
9147 Assume the BYPASS and SECOND to be always test
9148 for UNORDERED. */
9149 probability = split_branch_probability;
9150
9151 /* Value of 1 is low enough to make no need for probability
9152 to be updated. Later we may run some experiments and see
9153 if unordered values are more frequent in practice. */
9154 if (bypass)
9155 bypass_probability = 1;
9156 if (second)
9157 second_probability = 1;
9158 }
9159 if (bypass != NULL_RTX)
9160 {
9161 label = gen_label_rtx ();
9162 i = emit_jump_insn (gen_rtx_SET
9163 (VOIDmode, pc_rtx,
9164 gen_rtx_IF_THEN_ELSE (VOIDmode,
9165 bypass,
9166 gen_rtx_LABEL_REF (VOIDmode,
9167 label),
9168 pc_rtx)));
9169 if (bypass_probability >= 0)
9170 REG_NOTES (i)
9171 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9172 GEN_INT (bypass_probability),
9173 REG_NOTES (i));
9174 }
9175 i = emit_jump_insn (gen_rtx_SET
9176 (VOIDmode, pc_rtx,
9177 gen_rtx_IF_THEN_ELSE (VOIDmode,
9178 condition, target1, target2)));
9179 if (probability >= 0)
9180 REG_NOTES (i)
9181 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9182 GEN_INT (probability),
9183 REG_NOTES (i));
9184 if (second != NULL_RTX)
9185 {
9186 i = emit_jump_insn (gen_rtx_SET
9187 (VOIDmode, pc_rtx,
9188 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9189 target2)));
9190 if (second_probability >= 0)
9191 REG_NOTES (i)
9192 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9193 GEN_INT (second_probability),
9194 REG_NOTES (i));
9195 }
9196 if (label != NULL_RTX)
9197 emit_label (label);
9198 }
9199
9200 int
9201 ix86_expand_setcc (code, dest)
9202 enum rtx_code code;
9203 rtx dest;
9204 {
9205 rtx ret, tmp, tmpreg;
9206 rtx second_test, bypass_test;
9207
9208 if (GET_MODE (ix86_compare_op0) == DImode
9209 && !TARGET_64BIT)
9210 return 0; /* FAIL */
9211
9212 if (GET_MODE (dest) != QImode)
9213 abort ();
9214
9215 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9216 PUT_MODE (ret, QImode);
9217
9218 tmp = dest;
9219 tmpreg = dest;
9220
9221 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9222 if (bypass_test || second_test)
9223 {
9224 rtx test = second_test;
9225 int bypass = 0;
9226 rtx tmp2 = gen_reg_rtx (QImode);
9227 if (bypass_test)
9228 {
9229 if (second_test)
9230 abort ();
9231 test = bypass_test;
9232 bypass = 1;
9233 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9234 }
9235 PUT_MODE (test, QImode);
9236 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9237
9238 if (bypass)
9239 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9240 else
9241 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9242 }
9243
9244 return 1; /* DONE */
9245 }
9246
9247 /* Expand comparison setting or clearing carry flag. Return true when sucesfull
9248 and set pop for the operation. */
9249 bool
9250 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9251 rtx op0, op1, *pop;
9252 enum rtx_code code;
9253 {
9254 enum machine_mode mode =
9255 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9256
9257 /* Do not handle DImode compares that go trought special path. Also we can't
9258 deal with FP compares yet. This is possible to add. */
9259 if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
9260 return false;
9261 switch (code)
9262 {
9263 case LTU:
9264 case GEU:
9265 break;
9266
9267 /* Convert a==0 into (unsigned)a<1. */
9268 case EQ:
9269 case NE:
9270 if (op1 != const0_rtx)
9271 return false;
9272 op1 = const1_rtx;
9273 code = (code == EQ ? LTU : GEU);
9274 break;
9275
9276 /* Convert a>b into b<a or a>=b-1. */
9277 case GTU:
9278 case LEU:
9279 if (GET_CODE (op1) == CONST_INT)
9280 {
9281 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9282 /* Bail out on overflow. We still can swap operands but that
9283 would force loading of the constant into register. */
9284 if (op1 == const0_rtx
9285 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9286 return false;
9287 code = (code == GTU ? GEU : LTU);
9288 }
9289 else
9290 {
9291 rtx tmp = op1;
9292 op1 = op0;
9293 op0 = tmp;
9294 code = (code == GTU ? LTU : GEU);
9295 }
9296 break;
9297
9298 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9299 case LT:
9300 case GE:
9301 if (mode == DImode || op1 != const0_rtx)
9302 return false;
9303 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9304 code = (code == LT ? GEU : LTU);
9305 break;
9306 case LE:
9307 case GT:
9308 if (mode == DImode || op1 != constm1_rtx)
9309 return false;
9310 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9311 code = (code == LE ? GEU : LTU);
9312 break;
9313
9314 default:
9315 return false;
9316 }
9317 ix86_compare_op0 = op0;
9318 ix86_compare_op1 = op1;
9319 *pop = ix86_expand_compare (code, NULL, NULL);
9320 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9321 abort ();
9322 return true;
9323 }
9324
9325 int
9326 ix86_expand_int_movcc (operands)
9327 rtx operands[];
9328 {
9329 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9330 rtx compare_seq, compare_op;
9331 rtx second_test, bypass_test;
9332 enum machine_mode mode = GET_MODE (operands[0]);
9333 bool sign_bit_compare_p = false;;
9334
9335 start_sequence ();
9336 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9337 compare_seq = get_insns ();
9338 end_sequence ();
9339
9340 compare_code = GET_CODE (compare_op);
9341
9342 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9343 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9344 sign_bit_compare_p = true;
9345
9346 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9347 HImode insns, we'd be swallowed in word prefix ops. */
9348
9349 if ((mode != HImode || TARGET_FAST_PREFIX)
9350 && (mode != DImode || TARGET_64BIT)
9351 && GET_CODE (operands[2]) == CONST_INT
9352 && GET_CODE (operands[3]) == CONST_INT)
9353 {
9354 rtx out = operands[0];
9355 HOST_WIDE_INT ct = INTVAL (operands[2]);
9356 HOST_WIDE_INT cf = INTVAL (operands[3]);
9357 HOST_WIDE_INT diff;
9358
9359 diff = ct - cf;
9360 /* Sign bit compares are better done using shifts than we do by using
9361 sbb. */
9362 if (sign_bit_compare_p
9363 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9364 ix86_compare_op1, &compare_op))
9365 {
9366 /* Detect overlap between destination and compare sources. */
9367 rtx tmp = out;
9368
9369 if (!sign_bit_compare_p)
9370 {
9371 compare_code = GET_CODE (compare_op);
9372
9373 /* To simplify rest of code, restrict to the GEU case. */
9374 if (compare_code == LTU)
9375 {
9376 HOST_WIDE_INT tmp = ct;
9377 ct = cf;
9378 cf = tmp;
9379 compare_code = reverse_condition (compare_code);
9380 code = reverse_condition (code);
9381 }
9382 diff = ct - cf;
9383
9384 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9385 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9386 tmp = gen_reg_rtx (mode);
9387
9388 if (mode == DImode)
9389 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9390 else
9391 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
9392 }
9393 else
9394 {
9395 if (code == GT || code == GE)
9396 code = reverse_condition (code);
9397 else
9398 {
9399 HOST_WIDE_INT tmp = ct;
9400 ct = cf;
9401 cf = tmp;
9402 }
9403 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9404 ix86_compare_op1, VOIDmode, 0, -1);
9405 }
9406
9407 if (diff == 1)
9408 {
9409 /*
9410 * cmpl op0,op1
9411 * sbbl dest,dest
9412 * [addl dest, ct]
9413 *
9414 * Size 5 - 8.
9415 */
9416 if (ct)
9417 tmp = expand_simple_binop (mode, PLUS,
9418 tmp, GEN_INT (ct),
9419 copy_rtx (tmp), 1, OPTAB_DIRECT);
9420 }
9421 else if (cf == -1)
9422 {
9423 /*
9424 * cmpl op0,op1
9425 * sbbl dest,dest
9426 * orl $ct, dest
9427 *
9428 * Size 8.
9429 */
9430 tmp = expand_simple_binop (mode, IOR,
9431 tmp, GEN_INT (ct),
9432 copy_rtx (tmp), 1, OPTAB_DIRECT);
9433 }
9434 else if (diff == -1 && ct)
9435 {
9436 /*
9437 * cmpl op0,op1
9438 * sbbl dest,dest
9439 * notl dest
9440 * [addl dest, cf]
9441 *
9442 * Size 8 - 11.
9443 */
9444 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9445 if (cf)
9446 tmp = expand_simple_binop (mode, PLUS,
9447 copy_rtx (tmp), GEN_INT (cf),
9448 copy_rtx (tmp), 1, OPTAB_DIRECT);
9449 }
9450 else
9451 {
9452 /*
9453 * cmpl op0,op1
9454 * sbbl dest,dest
9455 * [notl dest]
9456 * andl cf - ct, dest
9457 * [addl dest, ct]
9458 *
9459 * Size 8 - 11.
9460 */
9461
9462 if (cf == 0)
9463 {
9464 cf = ct;
9465 ct = 0;
9466 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9467 }
9468
9469 tmp = expand_simple_binop (mode, AND,
9470 copy_rtx (tmp),
9471 gen_int_mode (cf - ct, mode),
9472 copy_rtx (tmp), 1, OPTAB_DIRECT);
9473 if (ct)
9474 tmp = expand_simple_binop (mode, PLUS,
9475 copy_rtx (tmp), GEN_INT (ct),
9476 copy_rtx (tmp), 1, OPTAB_DIRECT);
9477 }
9478
9479 if (!rtx_equal_p (tmp, out))
9480 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9481
9482 return 1; /* DONE */
9483 }
9484
9485 if (diff < 0)
9486 {
9487 HOST_WIDE_INT tmp;
9488 tmp = ct, ct = cf, cf = tmp;
9489 diff = -diff;
9490 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9491 {
9492 /* We may be reversing unordered compare to normal compare, that
9493 is not valid in general (we may convert non-trapping condition
9494 to trapping one), however on i386 we currently emit all
9495 comparisons unordered. */
9496 compare_code = reverse_condition_maybe_unordered (compare_code);
9497 code = reverse_condition_maybe_unordered (code);
9498 }
9499 else
9500 {
9501 compare_code = reverse_condition (compare_code);
9502 code = reverse_condition (code);
9503 }
9504 }
9505
9506 compare_code = NIL;
9507 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9508 && GET_CODE (ix86_compare_op1) == CONST_INT)
9509 {
9510 if (ix86_compare_op1 == const0_rtx
9511 && (code == LT || code == GE))
9512 compare_code = code;
9513 else if (ix86_compare_op1 == constm1_rtx)
9514 {
9515 if (code == LE)
9516 compare_code = LT;
9517 else if (code == GT)
9518 compare_code = GE;
9519 }
9520 }
9521
9522 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9523 if (compare_code != NIL
9524 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9525 && (cf == -1 || ct == -1))
9526 {
9527 /* If lea code below could be used, only optimize
9528 if it results in a 2 insn sequence. */
9529
9530 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9531 || diff == 3 || diff == 5 || diff == 9)
9532 || (compare_code == LT && ct == -1)
9533 || (compare_code == GE && cf == -1))
9534 {
9535 /*
9536 * notl op1 (if necessary)
9537 * sarl $31, op1
9538 * orl cf, op1
9539 */
9540 if (ct != -1)
9541 {
9542 cf = ct;
9543 ct = -1;
9544 code = reverse_condition (code);
9545 }
9546
9547 out = emit_store_flag (out, code, ix86_compare_op0,
9548 ix86_compare_op1, VOIDmode, 0, -1);
9549
9550 out = expand_simple_binop (mode, IOR,
9551 out, GEN_INT (cf),
9552 out, 1, OPTAB_DIRECT);
9553 if (out != operands[0])
9554 emit_move_insn (operands[0], out);
9555
9556 return 1; /* DONE */
9557 }
9558 }
9559
9560
9561 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9562 || diff == 3 || diff == 5 || diff == 9)
9563 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9564 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9565 {
9566 /*
9567 * xorl dest,dest
9568 * cmpl op1,op2
9569 * setcc dest
9570 * lea cf(dest*(ct-cf)),dest
9571 *
9572 * Size 14.
9573 *
9574 * This also catches the degenerate setcc-only case.
9575 */
9576
9577 rtx tmp;
9578 int nops;
9579
9580 out = emit_store_flag (out, code, ix86_compare_op0,
9581 ix86_compare_op1, VOIDmode, 0, 1);
9582
9583 nops = 0;
9584 /* On x86_64 the lea instruction operates on Pmode, so we need
9585 to get arithmetics done in proper mode to match. */
9586 if (diff == 1)
9587 tmp = out;
9588 else
9589 {
9590 rtx out1;
9591 out1 = out;
9592 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9593 nops++;
9594 if (diff & 1)
9595 {
9596 tmp = gen_rtx_PLUS (mode, tmp, out1);
9597 nops++;
9598 }
9599 }
9600 if (cf != 0)
9601 {
9602 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9603 nops++;
9604 }
9605 if (!rtx_equal_p (tmp, out))
9606 {
9607 if (nops == 1)
9608 out = force_operand (tmp, out);
9609 else
9610 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9611 }
9612 if (!rtx_equal_p (out, operands[0]))
9613 emit_move_insn (operands[0], copy_rtx (out));
9614
9615 return 1; /* DONE */
9616 }
9617
9618 /*
9619 * General case: Jumpful:
9620 * xorl dest,dest cmpl op1, op2
9621 * cmpl op1, op2 movl ct, dest
9622 * setcc dest jcc 1f
9623 * decl dest movl cf, dest
9624 * andl (cf-ct),dest 1:
9625 * addl ct,dest
9626 *
9627 * Size 20. Size 14.
9628 *
9629 * This is reasonably steep, but branch mispredict costs are
9630 * high on modern cpus, so consider failing only if optimizing
9631 * for space.
9632 */
9633
9634 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9635 && BRANCH_COST >= 2)
9636 {
9637 if (cf == 0)
9638 {
9639 cf = ct;
9640 ct = 0;
9641 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9642 /* We may be reversing unordered compare to normal compare,
9643 that is not valid in general (we may convert non-trapping
9644 condition to trapping one), however on i386 we currently
9645 emit all comparisons unordered. */
9646 code = reverse_condition_maybe_unordered (code);
9647 else
9648 {
9649 code = reverse_condition (code);
9650 if (compare_code != NIL)
9651 compare_code = reverse_condition (compare_code);
9652 }
9653 }
9654
9655 if (compare_code != NIL)
9656 {
9657 /* notl op1 (if needed)
9658 sarl $31, op1
9659 andl (cf-ct), op1
9660 addl ct, op1
9661
9662 For x < 0 (resp. x <= -1) there will be no notl,
9663 so if possible swap the constants to get rid of the
9664 complement.
9665 True/false will be -1/0 while code below (store flag
9666 followed by decrement) is 0/-1, so the constants need
9667 to be exchanged once more. */
9668
9669 if (compare_code == GE || !cf)
9670 {
9671 code = reverse_condition (code);
9672 compare_code = LT;
9673 }
9674 else
9675 {
9676 HOST_WIDE_INT tmp = cf;
9677 cf = ct;
9678 ct = tmp;
9679 }
9680
9681 out = emit_store_flag (out, code, ix86_compare_op0,
9682 ix86_compare_op1, VOIDmode, 0, -1);
9683 }
9684 else
9685 {
9686 out = emit_store_flag (out, code, ix86_compare_op0,
9687 ix86_compare_op1, VOIDmode, 0, 1);
9688
9689 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9690 copy_rtx (out), 1, OPTAB_DIRECT);
9691 }
9692
9693 out = expand_simple_binop (mode, AND, copy_rtx (out),
9694 gen_int_mode (cf - ct, mode),
9695 copy_rtx (out), 1, OPTAB_DIRECT);
9696 if (ct)
9697 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9698 copy_rtx (out), 1, OPTAB_DIRECT);
9699 if (!rtx_equal_p (out, operands[0]))
9700 emit_move_insn (operands[0], copy_rtx (out));
9701
9702 return 1; /* DONE */
9703 }
9704 }
9705
9706 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9707 {
9708 /* Try a few things more with specific constants and a variable. */
9709
9710 optab op;
9711 rtx var, orig_out, out, tmp;
9712
9713 if (BRANCH_COST <= 2)
9714 return 0; /* FAIL */
9715
9716 /* If one of the two operands is an interesting constant, load a
9717 constant with the above and mask it in with a logical operation. */
9718
9719 if (GET_CODE (operands[2]) == CONST_INT)
9720 {
9721 var = operands[3];
9722 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9723 operands[3] = constm1_rtx, op = and_optab;
9724 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9725 operands[3] = const0_rtx, op = ior_optab;
9726 else
9727 return 0; /* FAIL */
9728 }
9729 else if (GET_CODE (operands[3]) == CONST_INT)
9730 {
9731 var = operands[2];
9732 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9733 operands[2] = constm1_rtx, op = and_optab;
9734 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9735 operands[2] = const0_rtx, op = ior_optab;
9736 else
9737 return 0; /* FAIL */
9738 }
9739 else
9740 return 0; /* FAIL */
9741
9742 orig_out = operands[0];
9743 tmp = gen_reg_rtx (mode);
9744 operands[0] = tmp;
9745
9746 /* Recurse to get the constant loaded. */
9747 if (ix86_expand_int_movcc (operands) == 0)
9748 return 0; /* FAIL */
9749
9750 /* Mask in the interesting variable. */
9751 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9752 OPTAB_WIDEN);
9753 if (!rtx_equal_p (out, orig_out))
9754 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9755
9756 return 1; /* DONE */
9757 }
9758
9759 /*
9760 * For comparison with above,
9761 *
9762 * movl cf,dest
9763 * movl ct,tmp
9764 * cmpl op1,op2
9765 * cmovcc tmp,dest
9766 *
9767 * Size 15.
9768 */
9769
9770 if (! nonimmediate_operand (operands[2], mode))
9771 operands[2] = force_reg (mode, operands[2]);
9772 if (! nonimmediate_operand (operands[3], mode))
9773 operands[3] = force_reg (mode, operands[3]);
9774
9775 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9776 {
9777 rtx tmp = gen_reg_rtx (mode);
9778 emit_move_insn (tmp, operands[3]);
9779 operands[3] = tmp;
9780 }
9781 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9782 {
9783 rtx tmp = gen_reg_rtx (mode);
9784 emit_move_insn (tmp, operands[2]);
9785 operands[2] = tmp;
9786 }
9787
9788 if (! register_operand (operands[2], VOIDmode)
9789 && (mode == QImode
9790 || ! register_operand (operands[3], VOIDmode)))
9791 operands[2] = force_reg (mode, operands[2]);
9792
9793 if (mode == QImode
9794 && ! register_operand (operands[3], VOIDmode))
9795 operands[3] = force_reg (mode, operands[3]);
9796
9797 emit_insn (compare_seq);
9798 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9799 gen_rtx_IF_THEN_ELSE (mode,
9800 compare_op, operands[2],
9801 operands[3])));
9802 if (bypass_test)
9803 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9804 gen_rtx_IF_THEN_ELSE (mode,
9805 bypass_test,
9806 copy_rtx (operands[3]),
9807 copy_rtx (operands[0]))));
9808 if (second_test)
9809 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9810 gen_rtx_IF_THEN_ELSE (mode,
9811 second_test,
9812 copy_rtx (operands[2]),
9813 copy_rtx (operands[0]))));
9814
9815 return 1; /* DONE */
9816 }
9817
9818 int
9819 ix86_expand_fp_movcc (operands)
9820 rtx operands[];
9821 {
9822 enum rtx_code code;
9823 rtx tmp;
9824 rtx compare_op, second_test, bypass_test;
9825
9826 /* For SF/DFmode conditional moves based on comparisons
9827 in same mode, we may want to use SSE min/max instructions. */
9828 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9829 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9830 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9831 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9832 && (!TARGET_IEEE_FP
9833 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9834 /* We may be called from the post-reload splitter. */
9835 && (!REG_P (operands[0])
9836 || SSE_REG_P (operands[0])
9837 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9838 {
9839 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9840 code = GET_CODE (operands[1]);
9841
9842 /* See if we have (cross) match between comparison operands and
9843 conditional move operands. */
9844 if (rtx_equal_p (operands[2], op1))
9845 {
9846 rtx tmp = op0;
9847 op0 = op1;
9848 op1 = tmp;
9849 code = reverse_condition_maybe_unordered (code);
9850 }
9851 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9852 {
9853 /* Check for min operation. */
9854 if (code == LT || code == UNLE)
9855 {
9856 if (code == UNLE)
9857 {
9858 rtx tmp = op0;
9859 op0 = op1;
9860 op1 = tmp;
9861 }
9862 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9863 if (memory_operand (op0, VOIDmode))
9864 op0 = force_reg (GET_MODE (operands[0]), op0);
9865 if (GET_MODE (operands[0]) == SFmode)
9866 emit_insn (gen_minsf3 (operands[0], op0, op1));
9867 else
9868 emit_insn (gen_mindf3 (operands[0], op0, op1));
9869 return 1;
9870 }
9871 /* Check for max operation. */
9872 if (code == GT || code == UNGE)
9873 {
9874 if (code == UNGE)
9875 {
9876 rtx tmp = op0;
9877 op0 = op1;
9878 op1 = tmp;
9879 }
9880 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9881 if (memory_operand (op0, VOIDmode))
9882 op0 = force_reg (GET_MODE (operands[0]), op0);
9883 if (GET_MODE (operands[0]) == SFmode)
9884 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9885 else
9886 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9887 return 1;
9888 }
9889 }
9890 /* Manage condition to be sse_comparison_operator. In case we are
9891 in non-ieee mode, try to canonicalize the destination operand
9892 to be first in the comparison - this helps reload to avoid extra
9893 moves. */
9894 if (!sse_comparison_operator (operands[1], VOIDmode)
9895 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9896 {
9897 rtx tmp = ix86_compare_op0;
9898 ix86_compare_op0 = ix86_compare_op1;
9899 ix86_compare_op1 = tmp;
9900 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9901 VOIDmode, ix86_compare_op0,
9902 ix86_compare_op1);
9903 }
9904 /* Similary try to manage result to be first operand of conditional
9905 move. We also don't support the NE comparison on SSE, so try to
9906 avoid it. */
9907 if ((rtx_equal_p (operands[0], operands[3])
9908 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9909 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9910 {
9911 rtx tmp = operands[2];
9912 operands[2] = operands[3];
9913 operands[3] = tmp;
9914 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9915 (GET_CODE (operands[1])),
9916 VOIDmode, ix86_compare_op0,
9917 ix86_compare_op1);
9918 }
9919 if (GET_MODE (operands[0]) == SFmode)
9920 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9921 operands[2], operands[3],
9922 ix86_compare_op0, ix86_compare_op1));
9923 else
9924 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9925 operands[2], operands[3],
9926 ix86_compare_op0, ix86_compare_op1));
9927 return 1;
9928 }
9929
9930 /* The floating point conditional move instructions don't directly
9931 support conditions resulting from a signed integer comparison. */
9932
9933 code = GET_CODE (operands[1]);
9934 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9935
9936 /* The floating point conditional move instructions don't directly
9937 support signed integer comparisons. */
9938
9939 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9940 {
9941 if (second_test != NULL || bypass_test != NULL)
9942 abort ();
9943 tmp = gen_reg_rtx (QImode);
9944 ix86_expand_setcc (code, tmp);
9945 code = NE;
9946 ix86_compare_op0 = tmp;
9947 ix86_compare_op1 = const0_rtx;
9948 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9949 }
9950 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9951 {
9952 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9953 emit_move_insn (tmp, operands[3]);
9954 operands[3] = tmp;
9955 }
9956 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9957 {
9958 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9959 emit_move_insn (tmp, operands[2]);
9960 operands[2] = tmp;
9961 }
9962
9963 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9964 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9965 compare_op,
9966 operands[2],
9967 operands[3])));
9968 if (bypass_test)
9969 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9970 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9971 bypass_test,
9972 operands[3],
9973 operands[0])));
9974 if (second_test)
9975 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9976 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9977 second_test,
9978 operands[2],
9979 operands[0])));
9980
9981 return 1;
9982 }
9983
9984 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9985 works for floating pointer parameters and nonoffsetable memories.
9986 For pushes, it returns just stack offsets; the values will be saved
9987 in the right order. Maximally three parts are generated. */
9988
9989 static int
9990 ix86_split_to_parts (operand, parts, mode)
9991 rtx operand;
9992 rtx *parts;
9993 enum machine_mode mode;
9994 {
9995 int size;
9996
9997 if (!TARGET_64BIT)
9998 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9999 else
10000 size = (GET_MODE_SIZE (mode) + 4) / 8;
10001
10002 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10003 abort ();
10004 if (size < 2 || size > 3)
10005 abort ();
10006
10007 /* Optimize constant pool reference to immediates. This is used by fp
10008 moves, that force all constants to memory to allow combining. */
10009 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10010 {
10011 rtx tmp = maybe_get_pool_constant (operand);
10012 if (tmp)
10013 operand = tmp;
10014 }
10015
10016 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10017 {
10018 /* The only non-offsetable memories we handle are pushes. */
10019 if (! push_operand (operand, VOIDmode))
10020 abort ();
10021
10022 operand = copy_rtx (operand);
10023 PUT_MODE (operand, Pmode);
10024 parts[0] = parts[1] = parts[2] = operand;
10025 }
10026 else if (!TARGET_64BIT)
10027 {
10028 if (mode == DImode)
10029 split_di (&operand, 1, &parts[0], &parts[1]);
10030 else
10031 {
10032 if (REG_P (operand))
10033 {
10034 if (!reload_completed)
10035 abort ();
10036 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10037 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10038 if (size == 3)
10039 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10040 }
10041 else if (offsettable_memref_p (operand))
10042 {
10043 operand = adjust_address (operand, SImode, 0);
10044 parts[0] = operand;
10045 parts[1] = adjust_address (operand, SImode, 4);
10046 if (size == 3)
10047 parts[2] = adjust_address (operand, SImode, 8);
10048 }
10049 else if (GET_CODE (operand) == CONST_DOUBLE)
10050 {
10051 REAL_VALUE_TYPE r;
10052 long l[4];
10053
10054 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10055 switch (mode)
10056 {
10057 case XFmode:
10058 case TFmode:
10059 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10060 parts[2] = gen_int_mode (l[2], SImode);
10061 break;
10062 case DFmode:
10063 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10064 break;
10065 default:
10066 abort ();
10067 }
10068 parts[1] = gen_int_mode (l[1], SImode);
10069 parts[0] = gen_int_mode (l[0], SImode);
10070 }
10071 else
10072 abort ();
10073 }
10074 }
10075 else
10076 {
10077 if (mode == TImode)
10078 split_ti (&operand, 1, &parts[0], &parts[1]);
10079 if (mode == XFmode || mode == TFmode)
10080 {
10081 if (REG_P (operand))
10082 {
10083 if (!reload_completed)
10084 abort ();
10085 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10086 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10087 }
10088 else if (offsettable_memref_p (operand))
10089 {
10090 operand = adjust_address (operand, DImode, 0);
10091 parts[0] = operand;
10092 parts[1] = adjust_address (operand, SImode, 8);
10093 }
10094 else if (GET_CODE (operand) == CONST_DOUBLE)
10095 {
10096 REAL_VALUE_TYPE r;
10097 long l[3];
10098
10099 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10100 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10101 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10102 if (HOST_BITS_PER_WIDE_INT >= 64)
10103 parts[0]
10104 = gen_int_mode
10105 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10106 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10107 DImode);
10108 else
10109 parts[0] = immed_double_const (l[0], l[1], DImode);
10110 parts[1] = gen_int_mode (l[2], SImode);
10111 }
10112 else
10113 abort ();
10114 }
10115 }
10116
10117 return size;
10118 }
10119
10120 /* Emit insns to perform a move or push of DI, DF, and XF values.
10121 Return false when normal moves are needed; true when all required
10122 insns have been emitted. Operands 2-4 contain the input values
10123 int the correct order; operands 5-7 contain the output values. */
10124
10125 void
10126 ix86_split_long_move (operands)
10127 rtx operands[];
10128 {
10129 rtx part[2][3];
10130 int nparts;
10131 int push = 0;
10132 int collisions = 0;
10133 enum machine_mode mode = GET_MODE (operands[0]);
10134
10135 /* The DFmode expanders may ask us to move double.
10136 For 64bit target this is single move. By hiding the fact
10137 here we simplify i386.md splitters. */
10138 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10139 {
10140 /* Optimize constant pool reference to immediates. This is used by
10141 fp moves, that force all constants to memory to allow combining. */
10142
10143 if (GET_CODE (operands[1]) == MEM
10144 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10145 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10146 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10147 if (push_operand (operands[0], VOIDmode))
10148 {
10149 operands[0] = copy_rtx (operands[0]);
10150 PUT_MODE (operands[0], Pmode);
10151 }
10152 else
10153 operands[0] = gen_lowpart (DImode, operands[0]);
10154 operands[1] = gen_lowpart (DImode, operands[1]);
10155 emit_move_insn (operands[0], operands[1]);
10156 return;
10157 }
10158
10159 /* The only non-offsettable memory we handle is push. */
10160 if (push_operand (operands[0], VOIDmode))
10161 push = 1;
10162 else if (GET_CODE (operands[0]) == MEM
10163 && ! offsettable_memref_p (operands[0]))
10164 abort ();
10165
10166 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10167 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10168
10169 /* When emitting push, take care for source operands on the stack. */
10170 if (push && GET_CODE (operands[1]) == MEM
10171 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10172 {
10173 if (nparts == 3)
10174 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10175 XEXP (part[1][2], 0));
10176 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10177 XEXP (part[1][1], 0));
10178 }
10179
10180 /* We need to do copy in the right order in case an address register
10181 of the source overlaps the destination. */
10182 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10183 {
10184 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10185 collisions++;
10186 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10187 collisions++;
10188 if (nparts == 3
10189 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10190 collisions++;
10191
10192 /* Collision in the middle part can be handled by reordering. */
10193 if (collisions == 1 && nparts == 3
10194 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10195 {
10196 rtx tmp;
10197 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10198 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10199 }
10200
10201 /* If there are more collisions, we can't handle it by reordering.
10202 Do an lea to the last part and use only one colliding move. */
10203 else if (collisions > 1)
10204 {
10205 collisions = 1;
10206 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10207 XEXP (part[1][0], 0)));
10208 part[1][0] = change_address (part[1][0],
10209 TARGET_64BIT ? DImode : SImode,
10210 part[0][nparts - 1]);
10211 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10212 if (nparts == 3)
10213 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10214 }
10215 }
10216
10217 if (push)
10218 {
10219 if (!TARGET_64BIT)
10220 {
10221 if (nparts == 3)
10222 {
10223 /* We use only first 12 bytes of TFmode value, but for pushing we
10224 are required to adjust stack as if we were pushing real 16byte
10225 value. */
10226 if (mode == TFmode && !TARGET_64BIT)
10227 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10228 GEN_INT (-4)));
10229 emit_move_insn (part[0][2], part[1][2]);
10230 }
10231 }
10232 else
10233 {
10234 /* In 64bit mode we don't have 32bit push available. In case this is
10235 register, it is OK - we will just use larger counterpart. We also
10236 retype memory - these comes from attempt to avoid REX prefix on
10237 moving of second half of TFmode value. */
10238 if (GET_MODE (part[1][1]) == SImode)
10239 {
10240 if (GET_CODE (part[1][1]) == MEM)
10241 part[1][1] = adjust_address (part[1][1], DImode, 0);
10242 else if (REG_P (part[1][1]))
10243 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10244 else
10245 abort ();
10246 if (GET_MODE (part[1][0]) == SImode)
10247 part[1][0] = part[1][1];
10248 }
10249 }
10250 emit_move_insn (part[0][1], part[1][1]);
10251 emit_move_insn (part[0][0], part[1][0]);
10252 return;
10253 }
10254
10255 /* Choose correct order to not overwrite the source before it is copied. */
10256 if ((REG_P (part[0][0])
10257 && REG_P (part[1][1])
10258 && (REGNO (part[0][0]) == REGNO (part[1][1])
10259 || (nparts == 3
10260 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10261 || (collisions > 0
10262 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10263 {
10264 if (nparts == 3)
10265 {
10266 operands[2] = part[0][2];
10267 operands[3] = part[0][1];
10268 operands[4] = part[0][0];
10269 operands[5] = part[1][2];
10270 operands[6] = part[1][1];
10271 operands[7] = part[1][0];
10272 }
10273 else
10274 {
10275 operands[2] = part[0][1];
10276 operands[3] = part[0][0];
10277 operands[5] = part[1][1];
10278 operands[6] = part[1][0];
10279 }
10280 }
10281 else
10282 {
10283 if (nparts == 3)
10284 {
10285 operands[2] = part[0][0];
10286 operands[3] = part[0][1];
10287 operands[4] = part[0][2];
10288 operands[5] = part[1][0];
10289 operands[6] = part[1][1];
10290 operands[7] = part[1][2];
10291 }
10292 else
10293 {
10294 operands[2] = part[0][0];
10295 operands[3] = part[0][1];
10296 operands[5] = part[1][0];
10297 operands[6] = part[1][1];
10298 }
10299 }
10300 emit_move_insn (operands[2], operands[5]);
10301 emit_move_insn (operands[3], operands[6]);
10302 if (nparts == 3)
10303 emit_move_insn (operands[4], operands[7]);
10304
10305 return;
10306 }
10307
10308 void
10309 ix86_split_ashldi (operands, scratch)
10310 rtx *operands, scratch;
10311 {
10312 rtx low[2], high[2];
10313 int count;
10314
10315 if (GET_CODE (operands[2]) == CONST_INT)
10316 {
10317 split_di (operands, 2, low, high);
10318 count = INTVAL (operands[2]) & 63;
10319
10320 if (count >= 32)
10321 {
10322 emit_move_insn (high[0], low[1]);
10323 emit_move_insn (low[0], const0_rtx);
10324
10325 if (count > 32)
10326 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10327 }
10328 else
10329 {
10330 if (!rtx_equal_p (operands[0], operands[1]))
10331 emit_move_insn (operands[0], operands[1]);
10332 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10333 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10334 }
10335 }
10336 else
10337 {
10338 if (!rtx_equal_p (operands[0], operands[1]))
10339 emit_move_insn (operands[0], operands[1]);
10340
10341 split_di (operands, 1, low, high);
10342
10343 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10344 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10345
10346 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10347 {
10348 if (! no_new_pseudos)
10349 scratch = force_reg (SImode, const0_rtx);
10350 else
10351 emit_move_insn (scratch, const0_rtx);
10352
10353 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10354 scratch));
10355 }
10356 else
10357 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10358 }
10359 }
10360
10361 void
10362 ix86_split_ashrdi (operands, scratch)
10363 rtx *operands, scratch;
10364 {
10365 rtx low[2], high[2];
10366 int count;
10367
10368 if (GET_CODE (operands[2]) == CONST_INT)
10369 {
10370 split_di (operands, 2, low, high);
10371 count = INTVAL (operands[2]) & 63;
10372
10373 if (count >= 32)
10374 {
10375 emit_move_insn (low[0], high[1]);
10376
10377 if (! reload_completed)
10378 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10379 else
10380 {
10381 emit_move_insn (high[0], low[0]);
10382 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10383 }
10384
10385 if (count > 32)
10386 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10387 }
10388 else
10389 {
10390 if (!rtx_equal_p (operands[0], operands[1]))
10391 emit_move_insn (operands[0], operands[1]);
10392 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10393 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10394 }
10395 }
10396 else
10397 {
10398 if (!rtx_equal_p (operands[0], operands[1]))
10399 emit_move_insn (operands[0], operands[1]);
10400
10401 split_di (operands, 1, low, high);
10402
10403 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10404 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10405
10406 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10407 {
10408 if (! no_new_pseudos)
10409 scratch = gen_reg_rtx (SImode);
10410 emit_move_insn (scratch, high[0]);
10411 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10412 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10413 scratch));
10414 }
10415 else
10416 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10417 }
10418 }
10419
10420 void
10421 ix86_split_lshrdi (operands, scratch)
10422 rtx *operands, scratch;
10423 {
10424 rtx low[2], high[2];
10425 int count;
10426
10427 if (GET_CODE (operands[2]) == CONST_INT)
10428 {
10429 split_di (operands, 2, low, high);
10430 count = INTVAL (operands[2]) & 63;
10431
10432 if (count >= 32)
10433 {
10434 emit_move_insn (low[0], high[1]);
10435 emit_move_insn (high[0], const0_rtx);
10436
10437 if (count > 32)
10438 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10439 }
10440 else
10441 {
10442 if (!rtx_equal_p (operands[0], operands[1]))
10443 emit_move_insn (operands[0], operands[1]);
10444 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10445 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10446 }
10447 }
10448 else
10449 {
10450 if (!rtx_equal_p (operands[0], operands[1]))
10451 emit_move_insn (operands[0], operands[1]);
10452
10453 split_di (operands, 1, low, high);
10454
10455 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10456 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10457
10458 /* Heh. By reversing the arguments, we can reuse this pattern. */
10459 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10460 {
10461 if (! no_new_pseudos)
10462 scratch = force_reg (SImode, const0_rtx);
10463 else
10464 emit_move_insn (scratch, const0_rtx);
10465
10466 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10467 scratch));
10468 }
10469 else
10470 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10471 }
10472 }
10473
10474 /* Helper function for the string operations below. Dest VARIABLE whether
10475 it is aligned to VALUE bytes. If true, jump to the label. */
10476 static rtx
10477 ix86_expand_aligntest (variable, value)
10478 rtx variable;
10479 int value;
10480 {
10481 rtx label = gen_label_rtx ();
10482 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10483 if (GET_MODE (variable) == DImode)
10484 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10485 else
10486 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10487 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10488 1, label);
10489 return label;
10490 }
10491
10492 /* Adjust COUNTER by the VALUE. */
10493 static void
10494 ix86_adjust_counter (countreg, value)
10495 rtx countreg;
10496 HOST_WIDE_INT value;
10497 {
10498 if (GET_MODE (countreg) == DImode)
10499 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10500 else
10501 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10502 }
10503
10504 /* Zero extend possibly SImode EXP to Pmode register. */
10505 rtx
10506 ix86_zero_extend_to_Pmode (exp)
10507 rtx exp;
10508 {
10509 rtx r;
10510 if (GET_MODE (exp) == VOIDmode)
10511 return force_reg (Pmode, exp);
10512 if (GET_MODE (exp) == Pmode)
10513 return copy_to_mode_reg (Pmode, exp);
10514 r = gen_reg_rtx (Pmode);
10515 emit_insn (gen_zero_extendsidi2 (r, exp));
10516 return r;
10517 }
10518
10519 /* Expand string move (memcpy) operation. Use i386 string operations when
10520 profitable. expand_clrstr contains similar code. */
10521 int
10522 ix86_expand_movstr (dst, src, count_exp, align_exp)
10523 rtx dst, src, count_exp, align_exp;
10524 {
10525 rtx srcreg, destreg, countreg;
10526 enum machine_mode counter_mode;
10527 HOST_WIDE_INT align = 0;
10528 unsigned HOST_WIDE_INT count = 0;
10529 rtx insns;
10530
10531 start_sequence ();
10532
10533 if (GET_CODE (align_exp) == CONST_INT)
10534 align = INTVAL (align_exp);
10535
10536 /* This simple hack avoids all inlining code and simplifies code below. */
10537 if (!TARGET_ALIGN_STRINGOPS)
10538 align = 64;
10539
10540 if (GET_CODE (count_exp) == CONST_INT)
10541 count = INTVAL (count_exp);
10542
10543 /* Figure out proper mode for counter. For 32bits it is always SImode,
10544 for 64bits use SImode when possible, otherwise DImode.
10545 Set count to number of bytes copied when known at compile time. */
10546 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10547 || x86_64_zero_extended_value (count_exp))
10548 counter_mode = SImode;
10549 else
10550 counter_mode = DImode;
10551
10552 if (counter_mode != SImode && counter_mode != DImode)
10553 abort ();
10554
10555 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10556 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10557
10558 emit_insn (gen_cld ());
10559
10560 /* When optimizing for size emit simple rep ; movsb instruction for
10561 counts not divisible by 4. */
10562
10563 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10564 {
10565 countreg = ix86_zero_extend_to_Pmode (count_exp);
10566 if (TARGET_64BIT)
10567 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10568 destreg, srcreg, countreg));
10569 else
10570 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10571 destreg, srcreg, countreg));
10572 }
10573
10574 /* For constant aligned (or small unaligned) copies use rep movsl
10575 followed by code copying the rest. For PentiumPro ensure 8 byte
10576 alignment to allow rep movsl acceleration. */
10577
10578 else if (count != 0
10579 && (align >= 8
10580 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10581 || optimize_size || count < (unsigned int) 64))
10582 {
10583 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10584 if (count & ~(size - 1))
10585 {
10586 countreg = copy_to_mode_reg (counter_mode,
10587 GEN_INT ((count >> (size == 4 ? 2 : 3))
10588 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10589 countreg = ix86_zero_extend_to_Pmode (countreg);
10590 if (size == 4)
10591 {
10592 if (TARGET_64BIT)
10593 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10594 destreg, srcreg, countreg));
10595 else
10596 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10597 destreg, srcreg, countreg));
10598 }
10599 else
10600 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10601 destreg, srcreg, countreg));
10602 }
10603 if (size == 8 && (count & 0x04))
10604 emit_insn (gen_strmovsi (destreg, srcreg));
10605 if (count & 0x02)
10606 emit_insn (gen_strmovhi (destreg, srcreg));
10607 if (count & 0x01)
10608 emit_insn (gen_strmovqi (destreg, srcreg));
10609 }
10610 /* The generic code based on the glibc implementation:
10611 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10612 allowing accelerated copying there)
10613 - copy the data using rep movsl
10614 - copy the rest. */
10615 else
10616 {
10617 rtx countreg2;
10618 rtx label = NULL;
10619 int desired_alignment = (TARGET_PENTIUMPRO
10620 && (count == 0 || count >= (unsigned int) 260)
10621 ? 8 : UNITS_PER_WORD);
10622
10623 /* In case we don't know anything about the alignment, default to
10624 library version, since it is usually equally fast and result in
10625 shorter code.
10626
10627 Also emit call when we know that the count is large and call overhead
10628 will not be important. */
10629 if (!TARGET_INLINE_ALL_STRINGOPS
10630 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10631 {
10632 end_sequence ();
10633 return 0;
10634 }
10635
10636 if (TARGET_SINGLE_STRINGOP)
10637 emit_insn (gen_cld ());
10638
10639 countreg2 = gen_reg_rtx (Pmode);
10640 countreg = copy_to_mode_reg (counter_mode, count_exp);
10641
10642 /* We don't use loops to align destination and to copy parts smaller
10643 than 4 bytes, because gcc is able to optimize such code better (in
10644 the case the destination or the count really is aligned, gcc is often
10645 able to predict the branches) and also it is friendlier to the
10646 hardware branch prediction.
10647
10648 Using loops is benefical for generic case, because we can
10649 handle small counts using the loops. Many CPUs (such as Athlon)
10650 have large REP prefix setup costs.
10651
10652 This is quite costy. Maybe we can revisit this decision later or
10653 add some customizability to this code. */
10654
10655 if (count == 0 && align < desired_alignment)
10656 {
10657 label = gen_label_rtx ();
10658 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10659 LEU, 0, counter_mode, 1, label);
10660 }
10661 if (align <= 1)
10662 {
10663 rtx label = ix86_expand_aligntest (destreg, 1);
10664 emit_insn (gen_strmovqi (destreg, srcreg));
10665 ix86_adjust_counter (countreg, 1);
10666 emit_label (label);
10667 LABEL_NUSES (label) = 1;
10668 }
10669 if (align <= 2)
10670 {
10671 rtx label = ix86_expand_aligntest (destreg, 2);
10672 emit_insn (gen_strmovhi (destreg, srcreg));
10673 ix86_adjust_counter (countreg, 2);
10674 emit_label (label);
10675 LABEL_NUSES (label) = 1;
10676 }
10677 if (align <= 4 && desired_alignment > 4)
10678 {
10679 rtx label = ix86_expand_aligntest (destreg, 4);
10680 emit_insn (gen_strmovsi (destreg, srcreg));
10681 ix86_adjust_counter (countreg, 4);
10682 emit_label (label);
10683 LABEL_NUSES (label) = 1;
10684 }
10685
10686 if (label && desired_alignment > 4 && !TARGET_64BIT)
10687 {
10688 emit_label (label);
10689 LABEL_NUSES (label) = 1;
10690 label = NULL_RTX;
10691 }
10692 if (!TARGET_SINGLE_STRINGOP)
10693 emit_insn (gen_cld ());
10694 if (TARGET_64BIT)
10695 {
10696 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10697 GEN_INT (3)));
10698 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10699 destreg, srcreg, countreg2));
10700 }
10701 else
10702 {
10703 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10704 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10705 destreg, srcreg, countreg2));
10706 }
10707
10708 if (label)
10709 {
10710 emit_label (label);
10711 LABEL_NUSES (label) = 1;
10712 }
10713 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10714 emit_insn (gen_strmovsi (destreg, srcreg));
10715 if ((align <= 4 || count == 0) && TARGET_64BIT)
10716 {
10717 rtx label = ix86_expand_aligntest (countreg, 4);
10718 emit_insn (gen_strmovsi (destreg, srcreg));
10719 emit_label (label);
10720 LABEL_NUSES (label) = 1;
10721 }
10722 if (align > 2 && count != 0 && (count & 2))
10723 emit_insn (gen_strmovhi (destreg, srcreg));
10724 if (align <= 2 || count == 0)
10725 {
10726 rtx label = ix86_expand_aligntest (countreg, 2);
10727 emit_insn (gen_strmovhi (destreg, srcreg));
10728 emit_label (label);
10729 LABEL_NUSES (label) = 1;
10730 }
10731 if (align > 1 && count != 0 && (count & 1))
10732 emit_insn (gen_strmovqi (destreg, srcreg));
10733 if (align <= 1 || count == 0)
10734 {
10735 rtx label = ix86_expand_aligntest (countreg, 1);
10736 emit_insn (gen_strmovqi (destreg, srcreg));
10737 emit_label (label);
10738 LABEL_NUSES (label) = 1;
10739 }
10740 }
10741
10742 insns = get_insns ();
10743 end_sequence ();
10744
10745 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10746 emit_insn (insns);
10747 return 1;
10748 }
10749
10750 /* Expand string clear operation (bzero). Use i386 string operations when
10751 profitable. expand_movstr contains similar code. */
10752 int
10753 ix86_expand_clrstr (src, count_exp, align_exp)
10754 rtx src, count_exp, align_exp;
10755 {
10756 rtx destreg, zeroreg, countreg;
10757 enum machine_mode counter_mode;
10758 HOST_WIDE_INT align = 0;
10759 unsigned HOST_WIDE_INT count = 0;
10760
10761 if (GET_CODE (align_exp) == CONST_INT)
10762 align = INTVAL (align_exp);
10763
10764 /* This simple hack avoids all inlining code and simplifies code below. */
10765 if (!TARGET_ALIGN_STRINGOPS)
10766 align = 32;
10767
10768 if (GET_CODE (count_exp) == CONST_INT)
10769 count = INTVAL (count_exp);
10770 /* Figure out proper mode for counter. For 32bits it is always SImode,
10771 for 64bits use SImode when possible, otherwise DImode.
10772 Set count to number of bytes copied when known at compile time. */
10773 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10774 || x86_64_zero_extended_value (count_exp))
10775 counter_mode = SImode;
10776 else
10777 counter_mode = DImode;
10778
10779 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10780
10781 emit_insn (gen_cld ());
10782
10783 /* When optimizing for size emit simple rep ; movsb instruction for
10784 counts not divisible by 4. */
10785
10786 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10787 {
10788 countreg = ix86_zero_extend_to_Pmode (count_exp);
10789 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10790 if (TARGET_64BIT)
10791 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10792 destreg, countreg));
10793 else
10794 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10795 destreg, countreg));
10796 }
10797 else if (count != 0
10798 && (align >= 8
10799 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10800 || optimize_size || count < (unsigned int) 64))
10801 {
10802 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10803 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10804 if (count & ~(size - 1))
10805 {
10806 countreg = copy_to_mode_reg (counter_mode,
10807 GEN_INT ((count >> (size == 4 ? 2 : 3))
10808 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10809 countreg = ix86_zero_extend_to_Pmode (countreg);
10810 if (size == 4)
10811 {
10812 if (TARGET_64BIT)
10813 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10814 destreg, countreg));
10815 else
10816 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10817 destreg, countreg));
10818 }
10819 else
10820 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10821 destreg, countreg));
10822 }
10823 if (size == 8 && (count & 0x04))
10824 emit_insn (gen_strsetsi (destreg,
10825 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10826 if (count & 0x02)
10827 emit_insn (gen_strsethi (destreg,
10828 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10829 if (count & 0x01)
10830 emit_insn (gen_strsetqi (destreg,
10831 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10832 }
10833 else
10834 {
10835 rtx countreg2;
10836 rtx label = NULL;
10837 /* Compute desired alignment of the string operation. */
10838 int desired_alignment = (TARGET_PENTIUMPRO
10839 && (count == 0 || count >= (unsigned int) 260)
10840 ? 8 : UNITS_PER_WORD);
10841
10842 /* In case we don't know anything about the alignment, default to
10843 library version, since it is usually equally fast and result in
10844 shorter code.
10845
10846 Also emit call when we know that the count is large and call overhead
10847 will not be important. */
10848 if (!TARGET_INLINE_ALL_STRINGOPS
10849 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10850 return 0;
10851
10852 if (TARGET_SINGLE_STRINGOP)
10853 emit_insn (gen_cld ());
10854
10855 countreg2 = gen_reg_rtx (Pmode);
10856 countreg = copy_to_mode_reg (counter_mode, count_exp);
10857 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10858
10859 if (count == 0 && align < desired_alignment)
10860 {
10861 label = gen_label_rtx ();
10862 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10863 LEU, 0, counter_mode, 1, label);
10864 }
10865 if (align <= 1)
10866 {
10867 rtx label = ix86_expand_aligntest (destreg, 1);
10868 emit_insn (gen_strsetqi (destreg,
10869 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10870 ix86_adjust_counter (countreg, 1);
10871 emit_label (label);
10872 LABEL_NUSES (label) = 1;
10873 }
10874 if (align <= 2)
10875 {
10876 rtx label = ix86_expand_aligntest (destreg, 2);
10877 emit_insn (gen_strsethi (destreg,
10878 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10879 ix86_adjust_counter (countreg, 2);
10880 emit_label (label);
10881 LABEL_NUSES (label) = 1;
10882 }
10883 if (align <= 4 && desired_alignment > 4)
10884 {
10885 rtx label = ix86_expand_aligntest (destreg, 4);
10886 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10887 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10888 : zeroreg)));
10889 ix86_adjust_counter (countreg, 4);
10890 emit_label (label);
10891 LABEL_NUSES (label) = 1;
10892 }
10893
10894 if (label && desired_alignment > 4 && !TARGET_64BIT)
10895 {
10896 emit_label (label);
10897 LABEL_NUSES (label) = 1;
10898 label = NULL_RTX;
10899 }
10900
10901 if (!TARGET_SINGLE_STRINGOP)
10902 emit_insn (gen_cld ());
10903 if (TARGET_64BIT)
10904 {
10905 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10906 GEN_INT (3)));
10907 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10908 destreg, countreg2));
10909 }
10910 else
10911 {
10912 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10913 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10914 destreg, countreg2));
10915 }
10916 if (label)
10917 {
10918 emit_label (label);
10919 LABEL_NUSES (label) = 1;
10920 }
10921
10922 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10923 emit_insn (gen_strsetsi (destreg,
10924 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10925 if (TARGET_64BIT && (align <= 4 || count == 0))
10926 {
10927 rtx label = ix86_expand_aligntest (countreg, 4);
10928 emit_insn (gen_strsetsi (destreg,
10929 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10930 emit_label (label);
10931 LABEL_NUSES (label) = 1;
10932 }
10933 if (align > 2 && count != 0 && (count & 2))
10934 emit_insn (gen_strsethi (destreg,
10935 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10936 if (align <= 2 || count == 0)
10937 {
10938 rtx label = ix86_expand_aligntest (countreg, 2);
10939 emit_insn (gen_strsethi (destreg,
10940 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10941 emit_label (label);
10942 LABEL_NUSES (label) = 1;
10943 }
10944 if (align > 1 && count != 0 && (count & 1))
10945 emit_insn (gen_strsetqi (destreg,
10946 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10947 if (align <= 1 || count == 0)
10948 {
10949 rtx label = ix86_expand_aligntest (countreg, 1);
10950 emit_insn (gen_strsetqi (destreg,
10951 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10952 emit_label (label);
10953 LABEL_NUSES (label) = 1;
10954 }
10955 }
10956 return 1;
10957 }
10958 /* Expand strlen. */
10959 int
10960 ix86_expand_strlen (out, src, eoschar, align)
10961 rtx out, src, eoschar, align;
10962 {
10963 rtx addr, scratch1, scratch2, scratch3, scratch4;
10964
10965 /* The generic case of strlen expander is long. Avoid it's
10966 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10967
10968 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10969 && !TARGET_INLINE_ALL_STRINGOPS
10970 && !optimize_size
10971 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10972 return 0;
10973
10974 addr = force_reg (Pmode, XEXP (src, 0));
10975 scratch1 = gen_reg_rtx (Pmode);
10976
10977 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10978 && !optimize_size)
10979 {
10980 /* Well it seems that some optimizer does not combine a call like
10981 foo(strlen(bar), strlen(bar));
10982 when the move and the subtraction is done here. It does calculate
10983 the length just once when these instructions are done inside of
10984 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10985 often used and I use one fewer register for the lifetime of
10986 output_strlen_unroll() this is better. */
10987
10988 emit_move_insn (out, addr);
10989
10990 ix86_expand_strlensi_unroll_1 (out, align);
10991
10992 /* strlensi_unroll_1 returns the address of the zero at the end of
10993 the string, like memchr(), so compute the length by subtracting
10994 the start address. */
10995 if (TARGET_64BIT)
10996 emit_insn (gen_subdi3 (out, out, addr));
10997 else
10998 emit_insn (gen_subsi3 (out, out, addr));
10999 }
11000 else
11001 {
11002 scratch2 = gen_reg_rtx (Pmode);
11003 scratch3 = gen_reg_rtx (Pmode);
11004 scratch4 = force_reg (Pmode, constm1_rtx);
11005
11006 emit_move_insn (scratch3, addr);
11007 eoschar = force_reg (QImode, eoschar);
11008
11009 emit_insn (gen_cld ());
11010 if (TARGET_64BIT)
11011 {
11012 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11013 align, scratch4, scratch3));
11014 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11015 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11016 }
11017 else
11018 {
11019 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11020 align, scratch4, scratch3));
11021 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11022 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11023 }
11024 }
11025 return 1;
11026 }
11027
11028 /* Expand the appropriate insns for doing strlen if not just doing
11029 repnz; scasb
11030
11031 out = result, initialized with the start address
11032 align_rtx = alignment of the address.
11033 scratch = scratch register, initialized with the startaddress when
11034 not aligned, otherwise undefined
11035
11036 This is just the body. It needs the initialisations mentioned above and
11037 some address computing at the end. These things are done in i386.md. */
11038
11039 static void
11040 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11041 rtx out, align_rtx;
11042 {
11043 int align;
11044 rtx tmp;
11045 rtx align_2_label = NULL_RTX;
11046 rtx align_3_label = NULL_RTX;
11047 rtx align_4_label = gen_label_rtx ();
11048 rtx end_0_label = gen_label_rtx ();
11049 rtx mem;
11050 rtx tmpreg = gen_reg_rtx (SImode);
11051 rtx scratch = gen_reg_rtx (SImode);
11052
11053 align = 0;
11054 if (GET_CODE (align_rtx) == CONST_INT)
11055 align = INTVAL (align_rtx);
11056
11057 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11058
11059 /* Is there a known alignment and is it less than 4? */
11060 if (align < 4)
11061 {
11062 rtx scratch1 = gen_reg_rtx (Pmode);
11063 emit_move_insn (scratch1, out);
11064 /* Is there a known alignment and is it not 2? */
11065 if (align != 2)
11066 {
11067 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11068 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11069
11070 /* Leave just the 3 lower bits. */
11071 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11072 NULL_RTX, 0, OPTAB_WIDEN);
11073
11074 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11075 Pmode, 1, align_4_label);
11076 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11077 Pmode, 1, align_2_label);
11078 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11079 Pmode, 1, align_3_label);
11080 }
11081 else
11082 {
11083 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11084 check if is aligned to 4 - byte. */
11085
11086 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11087 NULL_RTX, 0, OPTAB_WIDEN);
11088
11089 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11090 Pmode, 1, align_4_label);
11091 }
11092
11093 mem = gen_rtx_MEM (QImode, out);
11094
11095 /* Now compare the bytes. */
11096
11097 /* Compare the first n unaligned byte on a byte per byte basis. */
11098 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11099 QImode, 1, end_0_label);
11100
11101 /* Increment the address. */
11102 if (TARGET_64BIT)
11103 emit_insn (gen_adddi3 (out, out, const1_rtx));
11104 else
11105 emit_insn (gen_addsi3 (out, out, const1_rtx));
11106
11107 /* Not needed with an alignment of 2 */
11108 if (align != 2)
11109 {
11110 emit_label (align_2_label);
11111
11112 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11113 end_0_label);
11114
11115 if (TARGET_64BIT)
11116 emit_insn (gen_adddi3 (out, out, const1_rtx));
11117 else
11118 emit_insn (gen_addsi3 (out, out, const1_rtx));
11119
11120 emit_label (align_3_label);
11121 }
11122
11123 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11124 end_0_label);
11125
11126 if (TARGET_64BIT)
11127 emit_insn (gen_adddi3 (out, out, const1_rtx));
11128 else
11129 emit_insn (gen_addsi3 (out, out, const1_rtx));
11130 }
11131
11132 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11133 align this loop. It gives only huge programs, but does not help to
11134 speed up. */
11135 emit_label (align_4_label);
11136
11137 mem = gen_rtx_MEM (SImode, out);
11138 emit_move_insn (scratch, mem);
11139 if (TARGET_64BIT)
11140 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11141 else
11142 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11143
11144 /* This formula yields a nonzero result iff one of the bytes is zero.
11145 This saves three branches inside loop and many cycles. */
11146
11147 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11148 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11149 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11150 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11151 gen_int_mode (0x80808080, SImode)));
11152 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11153 align_4_label);
11154
11155 if (TARGET_CMOVE)
11156 {
11157 rtx reg = gen_reg_rtx (SImode);
11158 rtx reg2 = gen_reg_rtx (Pmode);
11159 emit_move_insn (reg, tmpreg);
11160 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11161
11162 /* If zero is not in the first two bytes, move two bytes forward. */
11163 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11164 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11165 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11166 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11167 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11168 reg,
11169 tmpreg)));
11170 /* Emit lea manually to avoid clobbering of flags. */
11171 emit_insn (gen_rtx_SET (SImode, reg2,
11172 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11173
11174 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11175 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11176 emit_insn (gen_rtx_SET (VOIDmode, out,
11177 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11178 reg2,
11179 out)));
11180
11181 }
11182 else
11183 {
11184 rtx end_2_label = gen_label_rtx ();
11185 /* Is zero in the first two bytes? */
11186
11187 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11188 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11189 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11190 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11191 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11192 pc_rtx);
11193 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11194 JUMP_LABEL (tmp) = end_2_label;
11195
11196 /* Not in the first two. Move two bytes forward. */
11197 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11198 if (TARGET_64BIT)
11199 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11200 else
11201 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11202
11203 emit_label (end_2_label);
11204
11205 }
11206
11207 /* Avoid branch in fixing the byte. */
11208 tmpreg = gen_lowpart (QImode, tmpreg);
11209 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11210 if (TARGET_64BIT)
11211 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11212 else
11213 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11214
11215 emit_label (end_0_label);
11216 }
11217
11218 void
11219 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11220 rtx retval, fnaddr, callarg1, callarg2, pop;
11221 int sibcall;
11222 {
11223 rtx use = NULL, call;
11224
11225 if (pop == const0_rtx)
11226 pop = NULL;
11227 if (TARGET_64BIT && pop)
11228 abort ();
11229
11230 #if TARGET_MACHO
11231 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11232 fnaddr = machopic_indirect_call_target (fnaddr);
11233 #else
11234 /* Static functions and indirect calls don't need the pic register. */
11235 if (! TARGET_64BIT && flag_pic
11236 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11237 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11238 use_reg (&use, pic_offset_table_rtx);
11239
11240 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11241 {
11242 rtx al = gen_rtx_REG (QImode, 0);
11243 emit_move_insn (al, callarg2);
11244 use_reg (&use, al);
11245 }
11246 #endif /* TARGET_MACHO */
11247
11248 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11249 {
11250 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11251 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11252 }
11253 if (sibcall && TARGET_64BIT
11254 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11255 {
11256 rtx addr;
11257 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11258 fnaddr = gen_rtx_REG (Pmode, 40);
11259 emit_move_insn (fnaddr, addr);
11260 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11261 }
11262
11263 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11264 if (retval)
11265 call = gen_rtx_SET (VOIDmode, retval, call);
11266 if (pop)
11267 {
11268 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11269 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11270 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11271 }
11272
11273 call = emit_call_insn (call);
11274 if (use)
11275 CALL_INSN_FUNCTION_USAGE (call) = use;
11276 }
11277
11278 \f
11279 /* Clear stack slot assignments remembered from previous functions.
11280 This is called from INIT_EXPANDERS once before RTL is emitted for each
11281 function. */
11282
11283 static struct machine_function *
11284 ix86_init_machine_status ()
11285 {
11286 return ggc_alloc_cleared (sizeof (struct machine_function));
11287 }
11288
11289 /* Return a MEM corresponding to a stack slot with mode MODE.
11290 Allocate a new slot if necessary.
11291
11292 The RTL for a function can have several slots available: N is
11293 which slot to use. */
11294
11295 rtx
11296 assign_386_stack_local (mode, n)
11297 enum machine_mode mode;
11298 int n;
11299 {
11300 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11301 abort ();
11302
11303 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11304 ix86_stack_locals[(int) mode][n]
11305 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11306
11307 return ix86_stack_locals[(int) mode][n];
11308 }
11309
11310 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11311
11312 static GTY(()) rtx ix86_tls_symbol;
11313 rtx
11314 ix86_tls_get_addr ()
11315 {
11316
11317 if (!ix86_tls_symbol)
11318 {
11319 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11320 (TARGET_GNU_TLS && !TARGET_64BIT)
11321 ? "___tls_get_addr"
11322 : "__tls_get_addr");
11323 }
11324
11325 return ix86_tls_symbol;
11326 }
11327 \f
11328 /* Calculate the length of the memory address in the instruction
11329 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11330
11331 static int
11332 memory_address_length (addr)
11333 rtx addr;
11334 {
11335 struct ix86_address parts;
11336 rtx base, index, disp;
11337 int len;
11338
11339 if (GET_CODE (addr) == PRE_DEC
11340 || GET_CODE (addr) == POST_INC
11341 || GET_CODE (addr) == PRE_MODIFY
11342 || GET_CODE (addr) == POST_MODIFY)
11343 return 0;
11344
11345 if (! ix86_decompose_address (addr, &parts))
11346 abort ();
11347
11348 base = parts.base;
11349 index = parts.index;
11350 disp = parts.disp;
11351 len = 0;
11352
11353 /* Register Indirect. */
11354 if (base && !index && !disp)
11355 {
11356 /* Special cases: ebp and esp need the two-byte modrm form. */
11357 if (addr == stack_pointer_rtx
11358 || addr == arg_pointer_rtx
11359 || addr == frame_pointer_rtx
11360 || addr == hard_frame_pointer_rtx)
11361 len = 1;
11362 }
11363
11364 /* Direct Addressing. */
11365 else if (disp && !base && !index)
11366 len = 4;
11367
11368 else
11369 {
11370 /* Find the length of the displacement constant. */
11371 if (disp)
11372 {
11373 if (GET_CODE (disp) == CONST_INT
11374 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11375 len = 1;
11376 else
11377 len = 4;
11378 }
11379
11380 /* An index requires the two-byte modrm form. */
11381 if (index)
11382 len += 1;
11383 }
11384
11385 return len;
11386 }
11387
11388 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11389 is set, expect that insn have 8bit immediate alternative. */
11390 int
11391 ix86_attr_length_immediate_default (insn, shortform)
11392 rtx insn;
11393 int shortform;
11394 {
11395 int len = 0;
11396 int i;
11397 extract_insn_cached (insn);
11398 for (i = recog_data.n_operands - 1; i >= 0; --i)
11399 if (CONSTANT_P (recog_data.operand[i]))
11400 {
11401 if (len)
11402 abort ();
11403 if (shortform
11404 && GET_CODE (recog_data.operand[i]) == CONST_INT
11405 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11406 len = 1;
11407 else
11408 {
11409 switch (get_attr_mode (insn))
11410 {
11411 case MODE_QI:
11412 len+=1;
11413 break;
11414 case MODE_HI:
11415 len+=2;
11416 break;
11417 case MODE_SI:
11418 len+=4;
11419 break;
11420 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11421 case MODE_DI:
11422 len+=4;
11423 break;
11424 default:
11425 fatal_insn ("unknown insn mode", insn);
11426 }
11427 }
11428 }
11429 return len;
11430 }
11431 /* Compute default value for "length_address" attribute. */
11432 int
11433 ix86_attr_length_address_default (insn)
11434 rtx insn;
11435 {
11436 int i;
11437 extract_insn_cached (insn);
11438 for (i = recog_data.n_operands - 1; i >= 0; --i)
11439 if (GET_CODE (recog_data.operand[i]) == MEM)
11440 {
11441 return memory_address_length (XEXP (recog_data.operand[i], 0));
11442 break;
11443 }
11444 return 0;
11445 }
11446 \f
11447 /* Return the maximum number of instructions a cpu can issue. */
11448
11449 static int
11450 ix86_issue_rate ()
11451 {
11452 switch (ix86_cpu)
11453 {
11454 case PROCESSOR_PENTIUM:
11455 case PROCESSOR_K6:
11456 return 2;
11457
11458 case PROCESSOR_PENTIUMPRO:
11459 case PROCESSOR_PENTIUM4:
11460 case PROCESSOR_ATHLON:
11461 case PROCESSOR_K8:
11462 return 3;
11463
11464 default:
11465 return 1;
11466 }
11467 }
11468
11469 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11470 by DEP_INSN and nothing set by DEP_INSN. */
11471
11472 static int
11473 ix86_flags_dependant (insn, dep_insn, insn_type)
11474 rtx insn, dep_insn;
11475 enum attr_type insn_type;
11476 {
11477 rtx set, set2;
11478
11479 /* Simplify the test for uninteresting insns. */
11480 if (insn_type != TYPE_SETCC
11481 && insn_type != TYPE_ICMOV
11482 && insn_type != TYPE_FCMOV
11483 && insn_type != TYPE_IBR)
11484 return 0;
11485
11486 if ((set = single_set (dep_insn)) != 0)
11487 {
11488 set = SET_DEST (set);
11489 set2 = NULL_RTX;
11490 }
11491 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11492 && XVECLEN (PATTERN (dep_insn), 0) == 2
11493 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11494 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11495 {
11496 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11497 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11498 }
11499 else
11500 return 0;
11501
11502 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11503 return 0;
11504
11505 /* This test is true if the dependent insn reads the flags but
11506 not any other potentially set register. */
11507 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11508 return 0;
11509
11510 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11511 return 0;
11512
11513 return 1;
11514 }
11515
11516 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11517 address with operands set by DEP_INSN. */
11518
11519 static int
11520 ix86_agi_dependant (insn, dep_insn, insn_type)
11521 rtx insn, dep_insn;
11522 enum attr_type insn_type;
11523 {
11524 rtx addr;
11525
11526 if (insn_type == TYPE_LEA
11527 && TARGET_PENTIUM)
11528 {
11529 addr = PATTERN (insn);
11530 if (GET_CODE (addr) == SET)
11531 ;
11532 else if (GET_CODE (addr) == PARALLEL
11533 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11534 addr = XVECEXP (addr, 0, 0);
11535 else
11536 abort ();
11537 addr = SET_SRC (addr);
11538 }
11539 else
11540 {
11541 int i;
11542 extract_insn_cached (insn);
11543 for (i = recog_data.n_operands - 1; i >= 0; --i)
11544 if (GET_CODE (recog_data.operand[i]) == MEM)
11545 {
11546 addr = XEXP (recog_data.operand[i], 0);
11547 goto found;
11548 }
11549 return 0;
11550 found:;
11551 }
11552
11553 return modified_in_p (addr, dep_insn);
11554 }
11555
11556 static int
11557 ix86_adjust_cost (insn, link, dep_insn, cost)
11558 rtx insn, link, dep_insn;
11559 int cost;
11560 {
11561 enum attr_type insn_type, dep_insn_type;
11562 enum attr_memory memory, dep_memory;
11563 rtx set, set2;
11564 int dep_insn_code_number;
11565
11566 /* Anti and output depenancies have zero cost on all CPUs. */
11567 if (REG_NOTE_KIND (link) != 0)
11568 return 0;
11569
11570 dep_insn_code_number = recog_memoized (dep_insn);
11571
11572 /* If we can't recognize the insns, we can't really do anything. */
11573 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11574 return cost;
11575
11576 insn_type = get_attr_type (insn);
11577 dep_insn_type = get_attr_type (dep_insn);
11578
11579 switch (ix86_cpu)
11580 {
11581 case PROCESSOR_PENTIUM:
11582 /* Address Generation Interlock adds a cycle of latency. */
11583 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11584 cost += 1;
11585
11586 /* ??? Compares pair with jump/setcc. */
11587 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11588 cost = 0;
11589
11590 /* Floating point stores require value to be ready one cycle ealier. */
11591 if (insn_type == TYPE_FMOV
11592 && get_attr_memory (insn) == MEMORY_STORE
11593 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11594 cost += 1;
11595 break;
11596
11597 case PROCESSOR_PENTIUMPRO:
11598 memory = get_attr_memory (insn);
11599 dep_memory = get_attr_memory (dep_insn);
11600
11601 /* Since we can't represent delayed latencies of load+operation,
11602 increase the cost here for non-imov insns. */
11603 if (dep_insn_type != TYPE_IMOV
11604 && dep_insn_type != TYPE_FMOV
11605 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11606 cost += 1;
11607
11608 /* INT->FP conversion is expensive. */
11609 if (get_attr_fp_int_src (dep_insn))
11610 cost += 5;
11611
11612 /* There is one cycle extra latency between an FP op and a store. */
11613 if (insn_type == TYPE_FMOV
11614 && (set = single_set (dep_insn)) != NULL_RTX
11615 && (set2 = single_set (insn)) != NULL_RTX
11616 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11617 && GET_CODE (SET_DEST (set2)) == MEM)
11618 cost += 1;
11619
11620 /* Show ability of reorder buffer to hide latency of load by executing
11621 in parallel with previous instruction in case
11622 previous instruction is not needed to compute the address. */
11623 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11624 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11625 {
11626 /* Claim moves to take one cycle, as core can issue one load
11627 at time and the next load can start cycle later. */
11628 if (dep_insn_type == TYPE_IMOV
11629 || dep_insn_type == TYPE_FMOV)
11630 cost = 1;
11631 else if (cost > 1)
11632 cost--;
11633 }
11634 break;
11635
11636 case PROCESSOR_K6:
11637 memory = get_attr_memory (insn);
11638 dep_memory = get_attr_memory (dep_insn);
11639 /* The esp dependency is resolved before the instruction is really
11640 finished. */
11641 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11642 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11643 return 1;
11644
11645 /* Since we can't represent delayed latencies of load+operation,
11646 increase the cost here for non-imov insns. */
11647 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11648 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11649
11650 /* INT->FP conversion is expensive. */
11651 if (get_attr_fp_int_src (dep_insn))
11652 cost += 5;
11653
11654 /* Show ability of reorder buffer to hide latency of load by executing
11655 in parallel with previous instruction in case
11656 previous instruction is not needed to compute the address. */
11657 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11658 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11659 {
11660 /* Claim moves to take one cycle, as core can issue one load
11661 at time and the next load can start cycle later. */
11662 if (dep_insn_type == TYPE_IMOV
11663 || dep_insn_type == TYPE_FMOV)
11664 cost = 1;
11665 else if (cost > 2)
11666 cost -= 2;
11667 else
11668 cost = 1;
11669 }
11670 break;
11671
11672 case PROCESSOR_ATHLON:
11673 case PROCESSOR_K8:
11674 memory = get_attr_memory (insn);
11675 dep_memory = get_attr_memory (dep_insn);
11676
11677 /* Show ability of reorder buffer to hide latency of load by executing
11678 in parallel with previous instruction in case
11679 previous instruction is not needed to compute the address. */
11680 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11681 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11682 {
11683 /* Claim moves to take one cycle, as core can issue one load
11684 at time and the next load can start cycle later. */
11685 if (dep_insn_type == TYPE_IMOV
11686 || dep_insn_type == TYPE_FMOV)
11687 cost = 0;
11688 else if (cost >= 3)
11689 cost -= 3;
11690 else
11691 cost = 0;
11692 }
11693
11694 default:
11695 break;
11696 }
11697
11698 return cost;
11699 }
11700
11701 static union
11702 {
11703 struct ppro_sched_data
11704 {
11705 rtx decode[3];
11706 int issued_this_cycle;
11707 } ppro;
11708 } ix86_sched_data;
11709
11710 static enum attr_ppro_uops
11711 ix86_safe_ppro_uops (insn)
11712 rtx insn;
11713 {
11714 if (recog_memoized (insn) >= 0)
11715 return get_attr_ppro_uops (insn);
11716 else
11717 return PPRO_UOPS_MANY;
11718 }
11719
11720 static void
11721 ix86_dump_ppro_packet (dump)
11722 FILE *dump;
11723 {
11724 if (ix86_sched_data.ppro.decode[0])
11725 {
11726 fprintf (dump, "PPRO packet: %d",
11727 INSN_UID (ix86_sched_data.ppro.decode[0]));
11728 if (ix86_sched_data.ppro.decode[1])
11729 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11730 if (ix86_sched_data.ppro.decode[2])
11731 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11732 fputc ('\n', dump);
11733 }
11734 }
11735
11736 /* We're beginning a new block. Initialize data structures as necessary. */
11737
11738 static void
11739 ix86_sched_init (dump, sched_verbose, veclen)
11740 FILE *dump ATTRIBUTE_UNUSED;
11741 int sched_verbose ATTRIBUTE_UNUSED;
11742 int veclen ATTRIBUTE_UNUSED;
11743 {
11744 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11745 }
11746
11747 /* Shift INSN to SLOT, and shift everything else down. */
11748
11749 static void
11750 ix86_reorder_insn (insnp, slot)
11751 rtx *insnp, *slot;
11752 {
11753 if (insnp != slot)
11754 {
11755 rtx insn = *insnp;
11756 do
11757 insnp[0] = insnp[1];
11758 while (++insnp != slot);
11759 *insnp = insn;
11760 }
11761 }
11762
11763 static void
11764 ix86_sched_reorder_ppro (ready, e_ready)
11765 rtx *ready;
11766 rtx *e_ready;
11767 {
11768 rtx decode[3];
11769 enum attr_ppro_uops cur_uops;
11770 int issued_this_cycle;
11771 rtx *insnp;
11772 int i;
11773
11774 /* At this point .ppro.decode contains the state of the three
11775 decoders from last "cycle". That is, those insns that were
11776 actually independent. But here we're scheduling for the
11777 decoder, and we may find things that are decodable in the
11778 same cycle. */
11779
11780 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11781 issued_this_cycle = 0;
11782
11783 insnp = e_ready;
11784 cur_uops = ix86_safe_ppro_uops (*insnp);
11785
11786 /* If the decoders are empty, and we've a complex insn at the
11787 head of the priority queue, let it issue without complaint. */
11788 if (decode[0] == NULL)
11789 {
11790 if (cur_uops == PPRO_UOPS_MANY)
11791 {
11792 decode[0] = *insnp;
11793 goto ppro_done;
11794 }
11795
11796 /* Otherwise, search for a 2-4 uop unsn to issue. */
11797 while (cur_uops != PPRO_UOPS_FEW)
11798 {
11799 if (insnp == ready)
11800 break;
11801 cur_uops = ix86_safe_ppro_uops (*--insnp);
11802 }
11803
11804 /* If so, move it to the head of the line. */
11805 if (cur_uops == PPRO_UOPS_FEW)
11806 ix86_reorder_insn (insnp, e_ready);
11807
11808 /* Issue the head of the queue. */
11809 issued_this_cycle = 1;
11810 decode[0] = *e_ready--;
11811 }
11812
11813 /* Look for simple insns to fill in the other two slots. */
11814 for (i = 1; i < 3; ++i)
11815 if (decode[i] == NULL)
11816 {
11817 if (ready > e_ready)
11818 goto ppro_done;
11819
11820 insnp = e_ready;
11821 cur_uops = ix86_safe_ppro_uops (*insnp);
11822 while (cur_uops != PPRO_UOPS_ONE)
11823 {
11824 if (insnp == ready)
11825 break;
11826 cur_uops = ix86_safe_ppro_uops (*--insnp);
11827 }
11828
11829 /* Found one. Move it to the head of the queue and issue it. */
11830 if (cur_uops == PPRO_UOPS_ONE)
11831 {
11832 ix86_reorder_insn (insnp, e_ready);
11833 decode[i] = *e_ready--;
11834 issued_this_cycle++;
11835 continue;
11836 }
11837
11838 /* ??? Didn't find one. Ideally, here we would do a lazy split
11839 of 2-uop insns, issue one and queue the other. */
11840 }
11841
11842 ppro_done:
11843 if (issued_this_cycle == 0)
11844 issued_this_cycle = 1;
11845 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11846 }
11847
11848 /* We are about to being issuing insns for this clock cycle.
11849 Override the default sort algorithm to better slot instructions. */
11850 static int
11851 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11852 FILE *dump ATTRIBUTE_UNUSED;
11853 int sched_verbose ATTRIBUTE_UNUSED;
11854 rtx *ready;
11855 int *n_readyp;
11856 int clock_var ATTRIBUTE_UNUSED;
11857 {
11858 int n_ready = *n_readyp;
11859 rtx *e_ready = ready + n_ready - 1;
11860
11861 /* Make sure to go ahead and initialize key items in
11862 ix86_sched_data if we are not going to bother trying to
11863 reorder the ready queue. */
11864 if (n_ready < 2)
11865 {
11866 ix86_sched_data.ppro.issued_this_cycle = 1;
11867 goto out;
11868 }
11869
11870 switch (ix86_cpu)
11871 {
11872 default:
11873 break;
11874
11875 case PROCESSOR_PENTIUMPRO:
11876 ix86_sched_reorder_ppro (ready, e_ready);
11877 break;
11878 }
11879
11880 out:
11881 return ix86_issue_rate ();
11882 }
11883
11884 /* We are about to issue INSN. Return the number of insns left on the
11885 ready queue that can be issued this cycle. */
11886
11887 static int
11888 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11889 FILE *dump;
11890 int sched_verbose;
11891 rtx insn;
11892 int can_issue_more;
11893 {
11894 int i;
11895 switch (ix86_cpu)
11896 {
11897 default:
11898 return can_issue_more - 1;
11899
11900 case PROCESSOR_PENTIUMPRO:
11901 {
11902 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11903
11904 if (uops == PPRO_UOPS_MANY)
11905 {
11906 if (sched_verbose)
11907 ix86_dump_ppro_packet (dump);
11908 ix86_sched_data.ppro.decode[0] = insn;
11909 ix86_sched_data.ppro.decode[1] = NULL;
11910 ix86_sched_data.ppro.decode[2] = NULL;
11911 if (sched_verbose)
11912 ix86_dump_ppro_packet (dump);
11913 ix86_sched_data.ppro.decode[0] = NULL;
11914 }
11915 else if (uops == PPRO_UOPS_FEW)
11916 {
11917 if (sched_verbose)
11918 ix86_dump_ppro_packet (dump);
11919 ix86_sched_data.ppro.decode[0] = insn;
11920 ix86_sched_data.ppro.decode[1] = NULL;
11921 ix86_sched_data.ppro.decode[2] = NULL;
11922 }
11923 else
11924 {
11925 for (i = 0; i < 3; ++i)
11926 if (ix86_sched_data.ppro.decode[i] == NULL)
11927 {
11928 ix86_sched_data.ppro.decode[i] = insn;
11929 break;
11930 }
11931 if (i == 3)
11932 abort ();
11933 if (i == 2)
11934 {
11935 if (sched_verbose)
11936 ix86_dump_ppro_packet (dump);
11937 ix86_sched_data.ppro.decode[0] = NULL;
11938 ix86_sched_data.ppro.decode[1] = NULL;
11939 ix86_sched_data.ppro.decode[2] = NULL;
11940 }
11941 }
11942 }
11943 return --ix86_sched_data.ppro.issued_this_cycle;
11944 }
11945 }
11946
11947 static int
11948 ia32_use_dfa_pipeline_interface ()
11949 {
11950 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
11951 return 1;
11952 return 0;
11953 }
11954
11955 /* How many alternative schedules to try. This should be as wide as the
11956 scheduling freedom in the DFA, but no wider. Making this value too
11957 large results extra work for the scheduler. */
11958
11959 static int
11960 ia32_multipass_dfa_lookahead ()
11961 {
11962 if (ix86_cpu == PROCESSOR_PENTIUM)
11963 return 2;
11964 else
11965 return 0;
11966 }
11967
11968 \f
11969 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11970 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11971 appropriate. */
11972
11973 void
11974 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11975 rtx insns;
11976 rtx dstref, srcref, dstreg, srcreg;
11977 {
11978 rtx insn;
11979
11980 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11981 if (INSN_P (insn))
11982 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11983 dstreg, srcreg);
11984 }
11985
11986 /* Subroutine of above to actually do the updating by recursively walking
11987 the rtx. */
11988
11989 static void
11990 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11991 rtx x;
11992 rtx dstref, srcref, dstreg, srcreg;
11993 {
11994 enum rtx_code code = GET_CODE (x);
11995 const char *format_ptr = GET_RTX_FORMAT (code);
11996 int i, j;
11997
11998 if (code == MEM && XEXP (x, 0) == dstreg)
11999 MEM_COPY_ATTRIBUTES (x, dstref);
12000 else if (code == MEM && XEXP (x, 0) == srcreg)
12001 MEM_COPY_ATTRIBUTES (x, srcref);
12002
12003 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12004 {
12005 if (*format_ptr == 'e')
12006 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12007 dstreg, srcreg);
12008 else if (*format_ptr == 'E')
12009 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12010 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12011 dstreg, srcreg);
12012 }
12013 }
12014 \f
12015 /* Compute the alignment given to a constant that is being placed in memory.
12016 EXP is the constant and ALIGN is the alignment that the object would
12017 ordinarily have.
12018 The value of this function is used instead of that alignment to align
12019 the object. */
12020
12021 int
12022 ix86_constant_alignment (exp, align)
12023 tree exp;
12024 int align;
12025 {
12026 if (TREE_CODE (exp) == REAL_CST)
12027 {
12028 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12029 return 64;
12030 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12031 return 128;
12032 }
12033 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12034 && align < 256)
12035 return 256;
12036
12037 return align;
12038 }
12039
12040 /* Compute the alignment for a static variable.
12041 TYPE is the data type, and ALIGN is the alignment that
12042 the object would ordinarily have. The value of this function is used
12043 instead of that alignment to align the object. */
12044
12045 int
12046 ix86_data_alignment (type, align)
12047 tree type;
12048 int align;
12049 {
12050 if (AGGREGATE_TYPE_P (type)
12051 && TYPE_SIZE (type)
12052 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12053 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12054 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12055 return 256;
12056
12057 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12058 to 16byte boundary. */
12059 if (TARGET_64BIT)
12060 {
12061 if (AGGREGATE_TYPE_P (type)
12062 && TYPE_SIZE (type)
12063 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12064 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12065 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12066 return 128;
12067 }
12068
12069 if (TREE_CODE (type) == ARRAY_TYPE)
12070 {
12071 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12072 return 64;
12073 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12074 return 128;
12075 }
12076 else if (TREE_CODE (type) == COMPLEX_TYPE)
12077 {
12078
12079 if (TYPE_MODE (type) == DCmode && align < 64)
12080 return 64;
12081 if (TYPE_MODE (type) == XCmode && align < 128)
12082 return 128;
12083 }
12084 else if ((TREE_CODE (type) == RECORD_TYPE
12085 || TREE_CODE (type) == UNION_TYPE
12086 || TREE_CODE (type) == QUAL_UNION_TYPE)
12087 && TYPE_FIELDS (type))
12088 {
12089 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12090 return 64;
12091 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12092 return 128;
12093 }
12094 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12095 || TREE_CODE (type) == INTEGER_TYPE)
12096 {
12097 if (TYPE_MODE (type) == DFmode && align < 64)
12098 return 64;
12099 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12100 return 128;
12101 }
12102
12103 return align;
12104 }
12105
12106 /* Compute the alignment for a local variable.
12107 TYPE is the data type, and ALIGN is the alignment that
12108 the object would ordinarily have. The value of this macro is used
12109 instead of that alignment to align the object. */
12110
12111 int
12112 ix86_local_alignment (type, align)
12113 tree type;
12114 int align;
12115 {
12116 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12117 to 16byte boundary. */
12118 if (TARGET_64BIT)
12119 {
12120 if (AGGREGATE_TYPE_P (type)
12121 && TYPE_SIZE (type)
12122 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12123 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12124 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12125 return 128;
12126 }
12127 if (TREE_CODE (type) == ARRAY_TYPE)
12128 {
12129 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12130 return 64;
12131 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12132 return 128;
12133 }
12134 else if (TREE_CODE (type) == COMPLEX_TYPE)
12135 {
12136 if (TYPE_MODE (type) == DCmode && align < 64)
12137 return 64;
12138 if (TYPE_MODE (type) == XCmode && align < 128)
12139 return 128;
12140 }
12141 else if ((TREE_CODE (type) == RECORD_TYPE
12142 || TREE_CODE (type) == UNION_TYPE
12143 || TREE_CODE (type) == QUAL_UNION_TYPE)
12144 && TYPE_FIELDS (type))
12145 {
12146 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12147 return 64;
12148 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12149 return 128;
12150 }
12151 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12152 || TREE_CODE (type) == INTEGER_TYPE)
12153 {
12154
12155 if (TYPE_MODE (type) == DFmode && align < 64)
12156 return 64;
12157 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12158 return 128;
12159 }
12160 return align;
12161 }
12162 \f
12163 /* Emit RTL insns to initialize the variable parts of a trampoline.
12164 FNADDR is an RTX for the address of the function's pure code.
12165 CXT is an RTX for the static chain value for the function. */
12166 void
12167 x86_initialize_trampoline (tramp, fnaddr, cxt)
12168 rtx tramp, fnaddr, cxt;
12169 {
12170 if (!TARGET_64BIT)
12171 {
12172 /* Compute offset from the end of the jmp to the target function. */
12173 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12174 plus_constant (tramp, 10),
12175 NULL_RTX, 1, OPTAB_DIRECT);
12176 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12177 gen_int_mode (0xb9, QImode));
12178 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12179 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12180 gen_int_mode (0xe9, QImode));
12181 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12182 }
12183 else
12184 {
12185 int offset = 0;
12186 /* Try to load address using shorter movl instead of movabs.
12187 We may want to support movq for kernel mode, but kernel does not use
12188 trampolines at the moment. */
12189 if (x86_64_zero_extended_value (fnaddr))
12190 {
12191 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12192 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12193 gen_int_mode (0xbb41, HImode));
12194 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12195 gen_lowpart (SImode, fnaddr));
12196 offset += 6;
12197 }
12198 else
12199 {
12200 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12201 gen_int_mode (0xbb49, HImode));
12202 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12203 fnaddr);
12204 offset += 10;
12205 }
12206 /* Load static chain using movabs to r10. */
12207 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12208 gen_int_mode (0xba49, HImode));
12209 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12210 cxt);
12211 offset += 10;
12212 /* Jump to the r11 */
12213 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12214 gen_int_mode (0xff49, HImode));
12215 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12216 gen_int_mode (0xe3, QImode));
12217 offset += 3;
12218 if (offset > TRAMPOLINE_SIZE)
12219 abort ();
12220 }
12221
12222 #ifdef TRANSFER_FROM_TRAMPOLINE
12223 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12224 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12225 #endif
12226 }
12227 \f
12228 #define def_builtin(MASK, NAME, TYPE, CODE) \
12229 do { \
12230 if ((MASK) & target_flags) \
12231 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12232 NULL, NULL_TREE); \
12233 } while (0)
12234
12235 struct builtin_description
12236 {
12237 const unsigned int mask;
12238 const enum insn_code icode;
12239 const char *const name;
12240 const enum ix86_builtins code;
12241 const enum rtx_code comparison;
12242 const unsigned int flag;
12243 };
12244
12245 /* Used for builtins that are enabled both by -msse and -msse2. */
12246 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12247
12248 static const struct builtin_description bdesc_comi[] =
12249 {
12250 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12251 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12252 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12253 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12254 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12255 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12256 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12257 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12258 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12259 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12260 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12261 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12262 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12263 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12264 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12265 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12266 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12267 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12268 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12269 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12270 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12271 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12272 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12273 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12274 };
12275
12276 static const struct builtin_description bdesc_2arg[] =
12277 {
12278 /* SSE */
12279 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12280 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12281 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12282 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12283 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12284 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12285 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12286 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12287
12288 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12289 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12290 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12291 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12292 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12293 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12294 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12295 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12296 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12297 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12298 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12299 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12300 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12301 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12302 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12303 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12304 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12305 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12306 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12307 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12308
12309 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12310 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12311 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12312 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12313
12314 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12315 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12316 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12317 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12318
12319 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12320 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12321 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12322 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12323 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12324
12325 /* MMX */
12326 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12327 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12328 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12329 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12330 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12331 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12332
12333 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12334 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12335 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12336 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12337 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12338 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12339 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12340 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12341
12342 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12343 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12344 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12345
12346 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12347 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12348 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12349 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12350
12351 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12352 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12353
12354 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12355 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12356 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12357 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12358 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12359 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12360
12361 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12362 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12363 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12364 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12365
12366 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12367 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12368 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12369 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12370 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12371 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12372
12373 /* Special. */
12374 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12375 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12376 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12377
12378 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12379 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12380
12381 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12382 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12383 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12384 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12385 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12386 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12387
12388 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12389 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12390 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12391 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12392 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12393 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12394
12395 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12396 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12397 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12398 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12399
12400 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12401 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12402
12403 /* SSE2 */
12404 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12405 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12406 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12407 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12408 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12409 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12410 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12411 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12412
12413 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12414 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12415 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12416 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12417 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12418 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12419 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12420 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12421 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12422 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12423 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12424 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12425 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12426 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12427 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12428 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12429 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12430 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12431 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12432 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12433
12434 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12435 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12436 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12437 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12438
12439 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12440 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12441 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12442 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12443
12444 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12445 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12446 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12447
12448 /* SSE2 MMX */
12449 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12450 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12451 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12452 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12453 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12454 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12455 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12456 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12457
12458 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12459 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12460 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12461 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12462 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12463 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12464 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12465 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12466
12467 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12468 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12469 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12470 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12471
12472 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12473 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12474 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12475 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12476
12477 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12478 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12479
12480 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12481 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12482 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12483 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12484 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12485 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12486
12487 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12488 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12489 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12490 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12491
12492 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12493 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12494 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12495 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12496 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12497 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12498 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12499 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12500
12501 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12502 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12503 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12504
12505 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12506 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12507
12508 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12509 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12510 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12511 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12512 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12513 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12514
12515 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12516 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12517 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12518 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12519 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12520 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12521
12522 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12523 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12524 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12525 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12526
12527 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12528
12529 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12530 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12531 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12532 };
12533
12534 static const struct builtin_description bdesc_1arg[] =
12535 {
12536 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12537 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12538
12539 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12540 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12541 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12542
12543 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12544 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12545 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12546 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12547
12548 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12549 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12550 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12551 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12552
12553 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12554
12555 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12556 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12557
12558 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12559 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12560 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12561 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12562 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12563
12564 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12565
12566 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12567 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12568
12569 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12570 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12571 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12572
12573 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12574 };
12575
12576 void
12577 ix86_init_builtins ()
12578 {
12579 if (TARGET_MMX)
12580 ix86_init_mmx_sse_builtins ();
12581 }
12582
12583 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12584 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12585 builtins. */
12586 static void
12587 ix86_init_mmx_sse_builtins ()
12588 {
12589 const struct builtin_description * d;
12590 size_t i;
12591
12592 tree pchar_type_node = build_pointer_type (char_type_node);
12593 tree pfloat_type_node = build_pointer_type (float_type_node);
12594 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12595 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12596 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12597
12598 /* Comparisons. */
12599 tree int_ftype_v4sf_v4sf
12600 = build_function_type_list (integer_type_node,
12601 V4SF_type_node, V4SF_type_node, NULL_TREE);
12602 tree v4si_ftype_v4sf_v4sf
12603 = build_function_type_list (V4SI_type_node,
12604 V4SF_type_node, V4SF_type_node, NULL_TREE);
12605 /* MMX/SSE/integer conversions. */
12606 tree int_ftype_v4sf
12607 = build_function_type_list (integer_type_node,
12608 V4SF_type_node, NULL_TREE);
12609 tree int_ftype_v8qi
12610 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12611 tree v4sf_ftype_v4sf_int
12612 = build_function_type_list (V4SF_type_node,
12613 V4SF_type_node, integer_type_node, NULL_TREE);
12614 tree v4sf_ftype_v4sf_v2si
12615 = build_function_type_list (V4SF_type_node,
12616 V4SF_type_node, V2SI_type_node, NULL_TREE);
12617 tree int_ftype_v4hi_int
12618 = build_function_type_list (integer_type_node,
12619 V4HI_type_node, integer_type_node, NULL_TREE);
12620 tree v4hi_ftype_v4hi_int_int
12621 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12622 integer_type_node, integer_type_node,
12623 NULL_TREE);
12624 /* Miscellaneous. */
12625 tree v8qi_ftype_v4hi_v4hi
12626 = build_function_type_list (V8QI_type_node,
12627 V4HI_type_node, V4HI_type_node, NULL_TREE);
12628 tree v4hi_ftype_v2si_v2si
12629 = build_function_type_list (V4HI_type_node,
12630 V2SI_type_node, V2SI_type_node, NULL_TREE);
12631 tree v4sf_ftype_v4sf_v4sf_int
12632 = build_function_type_list (V4SF_type_node,
12633 V4SF_type_node, V4SF_type_node,
12634 integer_type_node, NULL_TREE);
12635 tree v2si_ftype_v4hi_v4hi
12636 = build_function_type_list (V2SI_type_node,
12637 V4HI_type_node, V4HI_type_node, NULL_TREE);
12638 tree v4hi_ftype_v4hi_int
12639 = build_function_type_list (V4HI_type_node,
12640 V4HI_type_node, integer_type_node, NULL_TREE);
12641 tree v4hi_ftype_v4hi_di
12642 = build_function_type_list (V4HI_type_node,
12643 V4HI_type_node, long_long_unsigned_type_node,
12644 NULL_TREE);
12645 tree v2si_ftype_v2si_di
12646 = build_function_type_list (V2SI_type_node,
12647 V2SI_type_node, long_long_unsigned_type_node,
12648 NULL_TREE);
12649 tree void_ftype_void
12650 = build_function_type (void_type_node, void_list_node);
12651 tree void_ftype_unsigned
12652 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12653 tree unsigned_ftype_void
12654 = build_function_type (unsigned_type_node, void_list_node);
12655 tree di_ftype_void
12656 = build_function_type (long_long_unsigned_type_node, void_list_node);
12657 tree v4sf_ftype_void
12658 = build_function_type (V4SF_type_node, void_list_node);
12659 tree v2si_ftype_v4sf
12660 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12661 /* Loads/stores. */
12662 tree void_ftype_v8qi_v8qi_pchar
12663 = build_function_type_list (void_type_node,
12664 V8QI_type_node, V8QI_type_node,
12665 pchar_type_node, NULL_TREE);
12666 tree v4sf_ftype_pfloat
12667 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12668 /* @@@ the type is bogus */
12669 tree v4sf_ftype_v4sf_pv2si
12670 = build_function_type_list (V4SF_type_node,
12671 V4SF_type_node, pv2si_type_node, NULL_TREE);
12672 tree void_ftype_pv2si_v4sf
12673 = build_function_type_list (void_type_node,
12674 pv2si_type_node, V4SF_type_node, NULL_TREE);
12675 tree void_ftype_pfloat_v4sf
12676 = build_function_type_list (void_type_node,
12677 pfloat_type_node, V4SF_type_node, NULL_TREE);
12678 tree void_ftype_pdi_di
12679 = build_function_type_list (void_type_node,
12680 pdi_type_node, long_long_unsigned_type_node,
12681 NULL_TREE);
12682 tree void_ftype_pv2di_v2di
12683 = build_function_type_list (void_type_node,
12684 pv2di_type_node, V2DI_type_node, NULL_TREE);
12685 /* Normal vector unops. */
12686 tree v4sf_ftype_v4sf
12687 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12688
12689 /* Normal vector binops. */
12690 tree v4sf_ftype_v4sf_v4sf
12691 = build_function_type_list (V4SF_type_node,
12692 V4SF_type_node, V4SF_type_node, NULL_TREE);
12693 tree v8qi_ftype_v8qi_v8qi
12694 = build_function_type_list (V8QI_type_node,
12695 V8QI_type_node, V8QI_type_node, NULL_TREE);
12696 tree v4hi_ftype_v4hi_v4hi
12697 = build_function_type_list (V4HI_type_node,
12698 V4HI_type_node, V4HI_type_node, NULL_TREE);
12699 tree v2si_ftype_v2si_v2si
12700 = build_function_type_list (V2SI_type_node,
12701 V2SI_type_node, V2SI_type_node, NULL_TREE);
12702 tree di_ftype_di_di
12703 = build_function_type_list (long_long_unsigned_type_node,
12704 long_long_unsigned_type_node,
12705 long_long_unsigned_type_node, NULL_TREE);
12706
12707 tree v2si_ftype_v2sf
12708 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12709 tree v2sf_ftype_v2si
12710 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12711 tree v2si_ftype_v2si
12712 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12713 tree v2sf_ftype_v2sf
12714 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12715 tree v2sf_ftype_v2sf_v2sf
12716 = build_function_type_list (V2SF_type_node,
12717 V2SF_type_node, V2SF_type_node, NULL_TREE);
12718 tree v2si_ftype_v2sf_v2sf
12719 = build_function_type_list (V2SI_type_node,
12720 V2SF_type_node, V2SF_type_node, NULL_TREE);
12721 tree pint_type_node = build_pointer_type (integer_type_node);
12722 tree pdouble_type_node = build_pointer_type (double_type_node);
12723 tree int_ftype_v2df_v2df
12724 = build_function_type_list (integer_type_node,
12725 V2DF_type_node, V2DF_type_node, NULL_TREE);
12726
12727 tree ti_ftype_void
12728 = build_function_type (intTI_type_node, void_list_node);
12729 tree v2di_ftype_void
12730 = build_function_type (V2DI_type_node, void_list_node);
12731 tree ti_ftype_ti_ti
12732 = build_function_type_list (intTI_type_node,
12733 intTI_type_node, intTI_type_node, NULL_TREE);
12734 tree void_ftype_pvoid
12735 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12736 tree v2di_ftype_di
12737 = build_function_type_list (V2DI_type_node,
12738 long_long_unsigned_type_node, NULL_TREE);
12739 tree di_ftype_v2di
12740 = build_function_type_list (long_long_unsigned_type_node,
12741 V2DI_type_node, NULL_TREE);
12742 tree v4sf_ftype_v4si
12743 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12744 tree v4si_ftype_v4sf
12745 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12746 tree v2df_ftype_v4si
12747 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12748 tree v4si_ftype_v2df
12749 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12750 tree v2si_ftype_v2df
12751 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12752 tree v4sf_ftype_v2df
12753 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12754 tree v2df_ftype_v2si
12755 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12756 tree v2df_ftype_v4sf
12757 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12758 tree int_ftype_v2df
12759 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12760 tree v2df_ftype_v2df_int
12761 = build_function_type_list (V2DF_type_node,
12762 V2DF_type_node, integer_type_node, NULL_TREE);
12763 tree v4sf_ftype_v4sf_v2df
12764 = build_function_type_list (V4SF_type_node,
12765 V4SF_type_node, V2DF_type_node, NULL_TREE);
12766 tree v2df_ftype_v2df_v4sf
12767 = build_function_type_list (V2DF_type_node,
12768 V2DF_type_node, V4SF_type_node, NULL_TREE);
12769 tree v2df_ftype_v2df_v2df_int
12770 = build_function_type_list (V2DF_type_node,
12771 V2DF_type_node, V2DF_type_node,
12772 integer_type_node,
12773 NULL_TREE);
12774 tree v2df_ftype_v2df_pv2si
12775 = build_function_type_list (V2DF_type_node,
12776 V2DF_type_node, pv2si_type_node, NULL_TREE);
12777 tree void_ftype_pv2si_v2df
12778 = build_function_type_list (void_type_node,
12779 pv2si_type_node, V2DF_type_node, NULL_TREE);
12780 tree void_ftype_pdouble_v2df
12781 = build_function_type_list (void_type_node,
12782 pdouble_type_node, V2DF_type_node, NULL_TREE);
12783 tree void_ftype_pint_int
12784 = build_function_type_list (void_type_node,
12785 pint_type_node, integer_type_node, NULL_TREE);
12786 tree void_ftype_v16qi_v16qi_pchar
12787 = build_function_type_list (void_type_node,
12788 V16QI_type_node, V16QI_type_node,
12789 pchar_type_node, NULL_TREE);
12790 tree v2df_ftype_pdouble
12791 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12792 tree v2df_ftype_v2df_v2df
12793 = build_function_type_list (V2DF_type_node,
12794 V2DF_type_node, V2DF_type_node, NULL_TREE);
12795 tree v16qi_ftype_v16qi_v16qi
12796 = build_function_type_list (V16QI_type_node,
12797 V16QI_type_node, V16QI_type_node, NULL_TREE);
12798 tree v8hi_ftype_v8hi_v8hi
12799 = build_function_type_list (V8HI_type_node,
12800 V8HI_type_node, V8HI_type_node, NULL_TREE);
12801 tree v4si_ftype_v4si_v4si
12802 = build_function_type_list (V4SI_type_node,
12803 V4SI_type_node, V4SI_type_node, NULL_TREE);
12804 tree v2di_ftype_v2di_v2di
12805 = build_function_type_list (V2DI_type_node,
12806 V2DI_type_node, V2DI_type_node, NULL_TREE);
12807 tree v2di_ftype_v2df_v2df
12808 = build_function_type_list (V2DI_type_node,
12809 V2DF_type_node, V2DF_type_node, NULL_TREE);
12810 tree v2df_ftype_v2df
12811 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12812 tree v2df_ftype_double
12813 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12814 tree v2df_ftype_double_double
12815 = build_function_type_list (V2DF_type_node,
12816 double_type_node, double_type_node, NULL_TREE);
12817 tree int_ftype_v8hi_int
12818 = build_function_type_list (integer_type_node,
12819 V8HI_type_node, integer_type_node, NULL_TREE);
12820 tree v8hi_ftype_v8hi_int_int
12821 = build_function_type_list (V8HI_type_node,
12822 V8HI_type_node, integer_type_node,
12823 integer_type_node, NULL_TREE);
12824 tree v2di_ftype_v2di_int
12825 = build_function_type_list (V2DI_type_node,
12826 V2DI_type_node, integer_type_node, NULL_TREE);
12827 tree v4si_ftype_v4si_int
12828 = build_function_type_list (V4SI_type_node,
12829 V4SI_type_node, integer_type_node, NULL_TREE);
12830 tree v8hi_ftype_v8hi_int
12831 = build_function_type_list (V8HI_type_node,
12832 V8HI_type_node, integer_type_node, NULL_TREE);
12833 tree v8hi_ftype_v8hi_v2di
12834 = build_function_type_list (V8HI_type_node,
12835 V8HI_type_node, V2DI_type_node, NULL_TREE);
12836 tree v4si_ftype_v4si_v2di
12837 = build_function_type_list (V4SI_type_node,
12838 V4SI_type_node, V2DI_type_node, NULL_TREE);
12839 tree v4si_ftype_v8hi_v8hi
12840 = build_function_type_list (V4SI_type_node,
12841 V8HI_type_node, V8HI_type_node, NULL_TREE);
12842 tree di_ftype_v8qi_v8qi
12843 = build_function_type_list (long_long_unsigned_type_node,
12844 V8QI_type_node, V8QI_type_node, NULL_TREE);
12845 tree v2di_ftype_v16qi_v16qi
12846 = build_function_type_list (V2DI_type_node,
12847 V16QI_type_node, V16QI_type_node, NULL_TREE);
12848 tree int_ftype_v16qi
12849 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12850 tree v16qi_ftype_pchar
12851 = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
12852 tree void_ftype_pchar_v16qi
12853 = build_function_type_list (void_type_node,
12854 pchar_type_node, V16QI_type_node, NULL_TREE);
12855 tree v4si_ftype_pchar
12856 = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
12857 tree void_ftype_pchar_v4si
12858 = build_function_type_list (void_type_node,
12859 pchar_type_node, V4SI_type_node, NULL_TREE);
12860 tree v2di_ftype_v2di
12861 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12862
12863 /* Add all builtins that are more or less simple operations on two
12864 operands. */
12865 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12866 {
12867 /* Use one of the operands; the target can have a different mode for
12868 mask-generating compares. */
12869 enum machine_mode mode;
12870 tree type;
12871
12872 if (d->name == 0)
12873 continue;
12874 mode = insn_data[d->icode].operand[1].mode;
12875
12876 switch (mode)
12877 {
12878 case V16QImode:
12879 type = v16qi_ftype_v16qi_v16qi;
12880 break;
12881 case V8HImode:
12882 type = v8hi_ftype_v8hi_v8hi;
12883 break;
12884 case V4SImode:
12885 type = v4si_ftype_v4si_v4si;
12886 break;
12887 case V2DImode:
12888 type = v2di_ftype_v2di_v2di;
12889 break;
12890 case V2DFmode:
12891 type = v2df_ftype_v2df_v2df;
12892 break;
12893 case TImode:
12894 type = ti_ftype_ti_ti;
12895 break;
12896 case V4SFmode:
12897 type = v4sf_ftype_v4sf_v4sf;
12898 break;
12899 case V8QImode:
12900 type = v8qi_ftype_v8qi_v8qi;
12901 break;
12902 case V4HImode:
12903 type = v4hi_ftype_v4hi_v4hi;
12904 break;
12905 case V2SImode:
12906 type = v2si_ftype_v2si_v2si;
12907 break;
12908 case DImode:
12909 type = di_ftype_di_di;
12910 break;
12911
12912 default:
12913 abort ();
12914 }
12915
12916 /* Override for comparisons. */
12917 if (d->icode == CODE_FOR_maskcmpv4sf3
12918 || d->icode == CODE_FOR_maskncmpv4sf3
12919 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12920 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12921 type = v4si_ftype_v4sf_v4sf;
12922
12923 if (d->icode == CODE_FOR_maskcmpv2df3
12924 || d->icode == CODE_FOR_maskncmpv2df3
12925 || d->icode == CODE_FOR_vmmaskcmpv2df3
12926 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12927 type = v2di_ftype_v2df_v2df;
12928
12929 def_builtin (d->mask, d->name, type, d->code);
12930 }
12931
12932 /* Add the remaining MMX insns with somewhat more complicated types. */
12933 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12934 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12935 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12936 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12937 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12938 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12939 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12940
12941 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12942 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12943 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12944
12945 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12946 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12947
12948 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12949 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12950
12951 /* comi/ucomi insns. */
12952 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12953 if (d->mask == MASK_SSE2)
12954 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12955 else
12956 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12957
12958 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12959 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12960 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12961
12962 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12963 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12964 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12965 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12966 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12967 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12968
12969 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12970 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12971
12972 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12973
12974 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12975 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12976 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12977 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12978 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12979 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12980
12981 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12982 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12983 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12984 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12985
12986 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12987 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12988 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12989 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12990
12991 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12992
12993 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12994
12995 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12996 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12997 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12998 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12999 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13000 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13001
13002 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13003
13004 /* Original 3DNow! */
13005 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13006 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13007 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13008 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13009 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13010 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13011 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13012 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13013 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13014 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13015 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13016 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13017 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13018 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13019 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13020 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13021 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13022 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13023 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13024 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13025
13026 /* 3DNow! extension as used in the Athlon CPU. */
13027 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13028 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13029 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13030 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13031 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13032 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13033
13034 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13035
13036 /* SSE2 */
13037 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13038 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13039
13040 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13041 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13042 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13043
13044 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
13045 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
13046 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
13047 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13048 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13049 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13050
13051 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13052 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13053 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13054 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13055
13056 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13057 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13058 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13059 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13060 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13061
13062 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13063 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13064 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13065 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13066
13067 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13068 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13069
13070 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13071
13072 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13073 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13074
13075 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13076 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13077 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13078 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13079 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13080
13081 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13082
13083 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13084 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13085
13086 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13087 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13088 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13089
13090 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13091 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13092 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13093
13094 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13095 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13096 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13097 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
13098 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
13099 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13100 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13101
13102 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
13103 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13104 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13105
13106 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
13107 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
13108 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
13109 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13110 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13111 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
13112 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13113
13114 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13115
13116 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13117 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13118 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13119
13120 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13121 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13122 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13123
13124 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13125 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13126
13127 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13128 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13129 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13130 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13131
13132 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13133 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13134 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13135 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13136
13137 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13138 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13139
13140 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13141 }
13142
13143 /* Errors in the source file can cause expand_expr to return const0_rtx
13144 where we expect a vector. To avoid crashing, use one of the vector
13145 clear instructions. */
13146 static rtx
13147 safe_vector_operand (x, mode)
13148 rtx x;
13149 enum machine_mode mode;
13150 {
13151 if (x != const0_rtx)
13152 return x;
13153 x = gen_reg_rtx (mode);
13154
13155 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13156 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13157 : gen_rtx_SUBREG (DImode, x, 0)));
13158 else
13159 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13160 : gen_rtx_SUBREG (V4SFmode, x, 0),
13161 CONST0_RTX (V4SFmode)));
13162 return x;
13163 }
13164
13165 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13166
13167 static rtx
13168 ix86_expand_binop_builtin (icode, arglist, target)
13169 enum insn_code icode;
13170 tree arglist;
13171 rtx target;
13172 {
13173 rtx pat;
13174 tree arg0 = TREE_VALUE (arglist);
13175 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13176 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13177 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13178 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13179 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13180 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13181
13182 if (VECTOR_MODE_P (mode0))
13183 op0 = safe_vector_operand (op0, mode0);
13184 if (VECTOR_MODE_P (mode1))
13185 op1 = safe_vector_operand (op1, mode1);
13186
13187 if (! target
13188 || GET_MODE (target) != tmode
13189 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13190 target = gen_reg_rtx (tmode);
13191
13192 /* In case the insn wants input operands in modes different from
13193 the result, abort. */
13194 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13195 abort ();
13196
13197 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13198 op0 = copy_to_mode_reg (mode0, op0);
13199 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13200 op1 = copy_to_mode_reg (mode1, op1);
13201
13202 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13203 yet one of the two must not be a memory. This is normally enforced
13204 by expanders, but we didn't bother to create one here. */
13205 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13206 op0 = copy_to_mode_reg (mode0, op0);
13207
13208 pat = GEN_FCN (icode) (target, op0, op1);
13209 if (! pat)
13210 return 0;
13211 emit_insn (pat);
13212 return target;
13213 }
13214
13215 /* Subroutine of ix86_expand_builtin to take care of stores. */
13216
13217 static rtx
13218 ix86_expand_store_builtin (icode, arglist)
13219 enum insn_code icode;
13220 tree arglist;
13221 {
13222 rtx pat;
13223 tree arg0 = TREE_VALUE (arglist);
13224 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13225 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13226 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13227 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13228 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13229
13230 if (VECTOR_MODE_P (mode1))
13231 op1 = safe_vector_operand (op1, mode1);
13232
13233 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13234
13235 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13236 op1 = copy_to_mode_reg (mode1, op1);
13237
13238 pat = GEN_FCN (icode) (op0, op1);
13239 if (pat)
13240 emit_insn (pat);
13241 return 0;
13242 }
13243
13244 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13245
13246 static rtx
13247 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13248 enum insn_code icode;
13249 tree arglist;
13250 rtx target;
13251 int do_load;
13252 {
13253 rtx pat;
13254 tree arg0 = TREE_VALUE (arglist);
13255 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13256 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13257 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13258
13259 if (! target
13260 || GET_MODE (target) != tmode
13261 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13262 target = gen_reg_rtx (tmode);
13263 if (do_load)
13264 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13265 else
13266 {
13267 if (VECTOR_MODE_P (mode0))
13268 op0 = safe_vector_operand (op0, mode0);
13269
13270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13271 op0 = copy_to_mode_reg (mode0, op0);
13272 }
13273
13274 pat = GEN_FCN (icode) (target, op0);
13275 if (! pat)
13276 return 0;
13277 emit_insn (pat);
13278 return target;
13279 }
13280
13281 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13282 sqrtss, rsqrtss, rcpss. */
13283
13284 static rtx
13285 ix86_expand_unop1_builtin (icode, arglist, target)
13286 enum insn_code icode;
13287 tree arglist;
13288 rtx target;
13289 {
13290 rtx pat;
13291 tree arg0 = TREE_VALUE (arglist);
13292 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13293 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13294 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13295
13296 if (! target
13297 || GET_MODE (target) != tmode
13298 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13299 target = gen_reg_rtx (tmode);
13300
13301 if (VECTOR_MODE_P (mode0))
13302 op0 = safe_vector_operand (op0, mode0);
13303
13304 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13305 op0 = copy_to_mode_reg (mode0, op0);
13306
13307 op1 = op0;
13308 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13309 op1 = copy_to_mode_reg (mode0, op1);
13310
13311 pat = GEN_FCN (icode) (target, op0, op1);
13312 if (! pat)
13313 return 0;
13314 emit_insn (pat);
13315 return target;
13316 }
13317
13318 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13319
13320 static rtx
13321 ix86_expand_sse_compare (d, arglist, target)
13322 const struct builtin_description *d;
13323 tree arglist;
13324 rtx target;
13325 {
13326 rtx pat;
13327 tree arg0 = TREE_VALUE (arglist);
13328 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13329 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13330 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13331 rtx op2;
13332 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13333 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13334 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13335 enum rtx_code comparison = d->comparison;
13336
13337 if (VECTOR_MODE_P (mode0))
13338 op0 = safe_vector_operand (op0, mode0);
13339 if (VECTOR_MODE_P (mode1))
13340 op1 = safe_vector_operand (op1, mode1);
13341
13342 /* Swap operands if we have a comparison that isn't available in
13343 hardware. */
13344 if (d->flag)
13345 {
13346 rtx tmp = gen_reg_rtx (mode1);
13347 emit_move_insn (tmp, op1);
13348 op1 = op0;
13349 op0 = tmp;
13350 }
13351
13352 if (! target
13353 || GET_MODE (target) != tmode
13354 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13355 target = gen_reg_rtx (tmode);
13356
13357 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13358 op0 = copy_to_mode_reg (mode0, op0);
13359 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13360 op1 = copy_to_mode_reg (mode1, op1);
13361
13362 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13363 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13364 if (! pat)
13365 return 0;
13366 emit_insn (pat);
13367 return target;
13368 }
13369
13370 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13371
13372 static rtx
13373 ix86_expand_sse_comi (d, arglist, target)
13374 const struct builtin_description *d;
13375 tree arglist;
13376 rtx target;
13377 {
13378 rtx pat;
13379 tree arg0 = TREE_VALUE (arglist);
13380 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13381 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13382 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13383 rtx op2;
13384 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13385 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13386 enum rtx_code comparison = d->comparison;
13387
13388 if (VECTOR_MODE_P (mode0))
13389 op0 = safe_vector_operand (op0, mode0);
13390 if (VECTOR_MODE_P (mode1))
13391 op1 = safe_vector_operand (op1, mode1);
13392
13393 /* Swap operands if we have a comparison that isn't available in
13394 hardware. */
13395 if (d->flag)
13396 {
13397 rtx tmp = op1;
13398 op1 = op0;
13399 op0 = tmp;
13400 }
13401
13402 target = gen_reg_rtx (SImode);
13403 emit_move_insn (target, const0_rtx);
13404 target = gen_rtx_SUBREG (QImode, target, 0);
13405
13406 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13407 op0 = copy_to_mode_reg (mode0, op0);
13408 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13409 op1 = copy_to_mode_reg (mode1, op1);
13410
13411 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13412 pat = GEN_FCN (d->icode) (op0, op1);
13413 if (! pat)
13414 return 0;
13415 emit_insn (pat);
13416 emit_insn (gen_rtx_SET (VOIDmode,
13417 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13418 gen_rtx_fmt_ee (comparison, QImode,
13419 SET_DEST (pat),
13420 const0_rtx)));
13421
13422 return SUBREG_REG (target);
13423 }
13424
13425 /* Expand an expression EXP that calls a built-in function,
13426 with result going to TARGET if that's convenient
13427 (and in mode MODE if that's convenient).
13428 SUBTARGET may be used as the target for computing one of EXP's operands.
13429 IGNORE is nonzero if the value is to be ignored. */
13430
13431 rtx
13432 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13433 tree exp;
13434 rtx target;
13435 rtx subtarget ATTRIBUTE_UNUSED;
13436 enum machine_mode mode ATTRIBUTE_UNUSED;
13437 int ignore ATTRIBUTE_UNUSED;
13438 {
13439 const struct builtin_description *d;
13440 size_t i;
13441 enum insn_code icode;
13442 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13443 tree arglist = TREE_OPERAND (exp, 1);
13444 tree arg0, arg1, arg2;
13445 rtx op0, op1, op2, pat;
13446 enum machine_mode tmode, mode0, mode1, mode2;
13447 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13448
13449 switch (fcode)
13450 {
13451 case IX86_BUILTIN_EMMS:
13452 emit_insn (gen_emms ());
13453 return 0;
13454
13455 case IX86_BUILTIN_SFENCE:
13456 emit_insn (gen_sfence ());
13457 return 0;
13458
13459 case IX86_BUILTIN_PEXTRW:
13460 case IX86_BUILTIN_PEXTRW128:
13461 icode = (fcode == IX86_BUILTIN_PEXTRW
13462 ? CODE_FOR_mmx_pextrw
13463 : CODE_FOR_sse2_pextrw);
13464 arg0 = TREE_VALUE (arglist);
13465 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13466 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13467 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13468 tmode = insn_data[icode].operand[0].mode;
13469 mode0 = insn_data[icode].operand[1].mode;
13470 mode1 = insn_data[icode].operand[2].mode;
13471
13472 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13473 op0 = copy_to_mode_reg (mode0, op0);
13474 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13475 {
13476 /* @@@ better error message */
13477 error ("selector must be an immediate");
13478 return gen_reg_rtx (tmode);
13479 }
13480 if (target == 0
13481 || GET_MODE (target) != tmode
13482 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13483 target = gen_reg_rtx (tmode);
13484 pat = GEN_FCN (icode) (target, op0, op1);
13485 if (! pat)
13486 return 0;
13487 emit_insn (pat);
13488 return target;
13489
13490 case IX86_BUILTIN_PINSRW:
13491 case IX86_BUILTIN_PINSRW128:
13492 icode = (fcode == IX86_BUILTIN_PINSRW
13493 ? CODE_FOR_mmx_pinsrw
13494 : CODE_FOR_sse2_pinsrw);
13495 arg0 = TREE_VALUE (arglist);
13496 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13497 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13498 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13499 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13500 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13501 tmode = insn_data[icode].operand[0].mode;
13502 mode0 = insn_data[icode].operand[1].mode;
13503 mode1 = insn_data[icode].operand[2].mode;
13504 mode2 = insn_data[icode].operand[3].mode;
13505
13506 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13507 op0 = copy_to_mode_reg (mode0, op0);
13508 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13509 op1 = copy_to_mode_reg (mode1, op1);
13510 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13511 {
13512 /* @@@ better error message */
13513 error ("selector must be an immediate");
13514 return const0_rtx;
13515 }
13516 if (target == 0
13517 || GET_MODE (target) != tmode
13518 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13519 target = gen_reg_rtx (tmode);
13520 pat = GEN_FCN (icode) (target, op0, op1, op2);
13521 if (! pat)
13522 return 0;
13523 emit_insn (pat);
13524 return target;
13525
13526 case IX86_BUILTIN_MASKMOVQ:
13527 case IX86_BUILTIN_MASKMOVDQU:
13528 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13529 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13530 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13531 : CODE_FOR_sse2_maskmovdqu));
13532 /* Note the arg order is different from the operand order. */
13533 arg1 = TREE_VALUE (arglist);
13534 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13535 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13536 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13537 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13538 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13539 mode0 = insn_data[icode].operand[0].mode;
13540 mode1 = insn_data[icode].operand[1].mode;
13541 mode2 = insn_data[icode].operand[2].mode;
13542
13543 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13544 op0 = copy_to_mode_reg (mode0, op0);
13545 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13546 op1 = copy_to_mode_reg (mode1, op1);
13547 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13548 op2 = copy_to_mode_reg (mode2, op2);
13549 pat = GEN_FCN (icode) (op0, op1, op2);
13550 if (! pat)
13551 return 0;
13552 emit_insn (pat);
13553 return 0;
13554
13555 case IX86_BUILTIN_SQRTSS:
13556 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13557 case IX86_BUILTIN_RSQRTSS:
13558 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13559 case IX86_BUILTIN_RCPSS:
13560 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13561
13562 case IX86_BUILTIN_LOADAPS:
13563 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13564
13565 case IX86_BUILTIN_LOADUPS:
13566 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13567
13568 case IX86_BUILTIN_STOREAPS:
13569 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13570
13571 case IX86_BUILTIN_STOREUPS:
13572 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13573
13574 case IX86_BUILTIN_LOADSS:
13575 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13576
13577 case IX86_BUILTIN_STORESS:
13578 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13579
13580 case IX86_BUILTIN_LOADHPS:
13581 case IX86_BUILTIN_LOADLPS:
13582 case IX86_BUILTIN_LOADHPD:
13583 case IX86_BUILTIN_LOADLPD:
13584 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13585 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13586 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13587 : CODE_FOR_sse2_movlpd);
13588 arg0 = TREE_VALUE (arglist);
13589 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13590 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13591 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13592 tmode = insn_data[icode].operand[0].mode;
13593 mode0 = insn_data[icode].operand[1].mode;
13594 mode1 = insn_data[icode].operand[2].mode;
13595
13596 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13597 op0 = copy_to_mode_reg (mode0, op0);
13598 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13599 if (target == 0
13600 || GET_MODE (target) != tmode
13601 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13602 target = gen_reg_rtx (tmode);
13603 pat = GEN_FCN (icode) (target, op0, op1);
13604 if (! pat)
13605 return 0;
13606 emit_insn (pat);
13607 return target;
13608
13609 case IX86_BUILTIN_STOREHPS:
13610 case IX86_BUILTIN_STORELPS:
13611 case IX86_BUILTIN_STOREHPD:
13612 case IX86_BUILTIN_STORELPD:
13613 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13614 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13615 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13616 : CODE_FOR_sse2_movlpd);
13617 arg0 = TREE_VALUE (arglist);
13618 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13619 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13620 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13621 mode0 = insn_data[icode].operand[1].mode;
13622 mode1 = insn_data[icode].operand[2].mode;
13623
13624 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13625 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13626 op1 = copy_to_mode_reg (mode1, op1);
13627
13628 pat = GEN_FCN (icode) (op0, op0, op1);
13629 if (! pat)
13630 return 0;
13631 emit_insn (pat);
13632 return 0;
13633
13634 case IX86_BUILTIN_MOVNTPS:
13635 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13636 case IX86_BUILTIN_MOVNTQ:
13637 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13638
13639 case IX86_BUILTIN_LDMXCSR:
13640 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13641 target = assign_386_stack_local (SImode, 0);
13642 emit_move_insn (target, op0);
13643 emit_insn (gen_ldmxcsr (target));
13644 return 0;
13645
13646 case IX86_BUILTIN_STMXCSR:
13647 target = assign_386_stack_local (SImode, 0);
13648 emit_insn (gen_stmxcsr (target));
13649 return copy_to_mode_reg (SImode, target);
13650
13651 case IX86_BUILTIN_SHUFPS:
13652 case IX86_BUILTIN_SHUFPD:
13653 icode = (fcode == IX86_BUILTIN_SHUFPS
13654 ? CODE_FOR_sse_shufps
13655 : CODE_FOR_sse2_shufpd);
13656 arg0 = TREE_VALUE (arglist);
13657 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13658 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13659 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13660 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13661 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13662 tmode = insn_data[icode].operand[0].mode;
13663 mode0 = insn_data[icode].operand[1].mode;
13664 mode1 = insn_data[icode].operand[2].mode;
13665 mode2 = insn_data[icode].operand[3].mode;
13666
13667 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13668 op0 = copy_to_mode_reg (mode0, op0);
13669 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13670 op1 = copy_to_mode_reg (mode1, op1);
13671 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13672 {
13673 /* @@@ better error message */
13674 error ("mask must be an immediate");
13675 return gen_reg_rtx (tmode);
13676 }
13677 if (target == 0
13678 || GET_MODE (target) != tmode
13679 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13680 target = gen_reg_rtx (tmode);
13681 pat = GEN_FCN (icode) (target, op0, op1, op2);
13682 if (! pat)
13683 return 0;
13684 emit_insn (pat);
13685 return target;
13686
13687 case IX86_BUILTIN_PSHUFW:
13688 case IX86_BUILTIN_PSHUFD:
13689 case IX86_BUILTIN_PSHUFHW:
13690 case IX86_BUILTIN_PSHUFLW:
13691 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13692 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13693 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13694 : CODE_FOR_mmx_pshufw);
13695 arg0 = TREE_VALUE (arglist);
13696 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13697 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13698 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13699 tmode = insn_data[icode].operand[0].mode;
13700 mode1 = insn_data[icode].operand[1].mode;
13701 mode2 = insn_data[icode].operand[2].mode;
13702
13703 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13704 op0 = copy_to_mode_reg (mode1, op0);
13705 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13706 {
13707 /* @@@ better error message */
13708 error ("mask must be an immediate");
13709 return const0_rtx;
13710 }
13711 if (target == 0
13712 || GET_MODE (target) != tmode
13713 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13714 target = gen_reg_rtx (tmode);
13715 pat = GEN_FCN (icode) (target, op0, op1);
13716 if (! pat)
13717 return 0;
13718 emit_insn (pat);
13719 return target;
13720
13721 case IX86_BUILTIN_PSLLDQI128:
13722 case IX86_BUILTIN_PSRLDQI128:
13723 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13724 : CODE_FOR_sse2_lshrti3);
13725 arg0 = TREE_VALUE (arglist);
13726 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13727 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13728 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13729 tmode = insn_data[icode].operand[0].mode;
13730 mode1 = insn_data[icode].operand[1].mode;
13731 mode2 = insn_data[icode].operand[2].mode;
13732
13733 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13734 {
13735 op0 = copy_to_reg (op0);
13736 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13737 }
13738 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13739 {
13740 error ("shift must be an immediate");
13741 return const0_rtx;
13742 }
13743 target = gen_reg_rtx (V2DImode);
13744 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13745 if (! pat)
13746 return 0;
13747 emit_insn (pat);
13748 return target;
13749
13750 case IX86_BUILTIN_FEMMS:
13751 emit_insn (gen_femms ());
13752 return NULL_RTX;
13753
13754 case IX86_BUILTIN_PAVGUSB:
13755 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13756
13757 case IX86_BUILTIN_PF2ID:
13758 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13759
13760 case IX86_BUILTIN_PFACC:
13761 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13762
13763 case IX86_BUILTIN_PFADD:
13764 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13765
13766 case IX86_BUILTIN_PFCMPEQ:
13767 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13768
13769 case IX86_BUILTIN_PFCMPGE:
13770 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13771
13772 case IX86_BUILTIN_PFCMPGT:
13773 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13774
13775 case IX86_BUILTIN_PFMAX:
13776 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13777
13778 case IX86_BUILTIN_PFMIN:
13779 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13780
13781 case IX86_BUILTIN_PFMUL:
13782 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13783
13784 case IX86_BUILTIN_PFRCP:
13785 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13786
13787 case IX86_BUILTIN_PFRCPIT1:
13788 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13789
13790 case IX86_BUILTIN_PFRCPIT2:
13791 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13792
13793 case IX86_BUILTIN_PFRSQIT1:
13794 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13795
13796 case IX86_BUILTIN_PFRSQRT:
13797 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13798
13799 case IX86_BUILTIN_PFSUB:
13800 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13801
13802 case IX86_BUILTIN_PFSUBR:
13803 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13804
13805 case IX86_BUILTIN_PI2FD:
13806 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13807
13808 case IX86_BUILTIN_PMULHRW:
13809 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13810
13811 case IX86_BUILTIN_PF2IW:
13812 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13813
13814 case IX86_BUILTIN_PFNACC:
13815 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13816
13817 case IX86_BUILTIN_PFPNACC:
13818 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13819
13820 case IX86_BUILTIN_PI2FW:
13821 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13822
13823 case IX86_BUILTIN_PSWAPDSI:
13824 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13825
13826 case IX86_BUILTIN_PSWAPDSF:
13827 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13828
13829 case IX86_BUILTIN_SSE_ZERO:
13830 target = gen_reg_rtx (V4SFmode);
13831 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13832 return target;
13833
13834 case IX86_BUILTIN_MMX_ZERO:
13835 target = gen_reg_rtx (DImode);
13836 emit_insn (gen_mmx_clrdi (target));
13837 return target;
13838
13839 case IX86_BUILTIN_CLRTI:
13840 target = gen_reg_rtx (V2DImode);
13841 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13842 return target;
13843
13844
13845 case IX86_BUILTIN_SQRTSD:
13846 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13847 case IX86_BUILTIN_LOADAPD:
13848 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13849 case IX86_BUILTIN_LOADUPD:
13850 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13851
13852 case IX86_BUILTIN_STOREAPD:
13853 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13854 case IX86_BUILTIN_STOREUPD:
13855 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13856
13857 case IX86_BUILTIN_LOADSD:
13858 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13859
13860 case IX86_BUILTIN_STORESD:
13861 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13862
13863 case IX86_BUILTIN_SETPD1:
13864 target = assign_386_stack_local (DFmode, 0);
13865 arg0 = TREE_VALUE (arglist);
13866 emit_move_insn (adjust_address (target, DFmode, 0),
13867 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13868 op0 = gen_reg_rtx (V2DFmode);
13869 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13870 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13871 return op0;
13872
13873 case IX86_BUILTIN_SETPD:
13874 target = assign_386_stack_local (V2DFmode, 0);
13875 arg0 = TREE_VALUE (arglist);
13876 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13877 emit_move_insn (adjust_address (target, DFmode, 0),
13878 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13879 emit_move_insn (adjust_address (target, DFmode, 8),
13880 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13881 op0 = gen_reg_rtx (V2DFmode);
13882 emit_insn (gen_sse2_movapd (op0, target));
13883 return op0;
13884
13885 case IX86_BUILTIN_LOADRPD:
13886 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13887 gen_reg_rtx (V2DFmode), 1);
13888 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13889 return target;
13890
13891 case IX86_BUILTIN_LOADPD1:
13892 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13893 gen_reg_rtx (V2DFmode), 1);
13894 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13895 return target;
13896
13897 case IX86_BUILTIN_STOREPD1:
13898 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13899 case IX86_BUILTIN_STORERPD:
13900 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13901
13902 case IX86_BUILTIN_CLRPD:
13903 target = gen_reg_rtx (V2DFmode);
13904 emit_insn (gen_sse_clrv2df (target));
13905 return target;
13906
13907 case IX86_BUILTIN_MFENCE:
13908 emit_insn (gen_sse2_mfence ());
13909 return 0;
13910 case IX86_BUILTIN_LFENCE:
13911 emit_insn (gen_sse2_lfence ());
13912 return 0;
13913
13914 case IX86_BUILTIN_CLFLUSH:
13915 arg0 = TREE_VALUE (arglist);
13916 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13917 icode = CODE_FOR_sse2_clflush;
13918 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13919 op0 = copy_to_mode_reg (Pmode, op0);
13920
13921 emit_insn (gen_sse2_clflush (op0));
13922 return 0;
13923
13924 case IX86_BUILTIN_MOVNTPD:
13925 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13926 case IX86_BUILTIN_MOVNTDQ:
13927 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13928 case IX86_BUILTIN_MOVNTI:
13929 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13930
13931 case IX86_BUILTIN_LOADDQA:
13932 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13933 case IX86_BUILTIN_LOADDQU:
13934 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13935 case IX86_BUILTIN_LOADD:
13936 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13937
13938 case IX86_BUILTIN_STOREDQA:
13939 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13940 case IX86_BUILTIN_STOREDQU:
13941 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13942 case IX86_BUILTIN_STORED:
13943 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13944
13945 default:
13946 break;
13947 }
13948
13949 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13950 if (d->code == fcode)
13951 {
13952 /* Compares are treated specially. */
13953 if (d->icode == CODE_FOR_maskcmpv4sf3
13954 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13955 || d->icode == CODE_FOR_maskncmpv4sf3
13956 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13957 || d->icode == CODE_FOR_maskcmpv2df3
13958 || d->icode == CODE_FOR_vmmaskcmpv2df3
13959 || d->icode == CODE_FOR_maskncmpv2df3
13960 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13961 return ix86_expand_sse_compare (d, arglist, target);
13962
13963 return ix86_expand_binop_builtin (d->icode, arglist, target);
13964 }
13965
13966 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13967 if (d->code == fcode)
13968 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13969
13970 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13971 if (d->code == fcode)
13972 return ix86_expand_sse_comi (d, arglist, target);
13973
13974 /* @@@ Should really do something sensible here. */
13975 return 0;
13976 }
13977
13978 /* Store OPERAND to the memory after reload is completed. This means
13979 that we can't easily use assign_stack_local. */
13980 rtx
13981 ix86_force_to_memory (mode, operand)
13982 enum machine_mode mode;
13983 rtx operand;
13984 {
13985 rtx result;
13986 if (!reload_completed)
13987 abort ();
13988 if (TARGET_64BIT && TARGET_RED_ZONE)
13989 {
13990 result = gen_rtx_MEM (mode,
13991 gen_rtx_PLUS (Pmode,
13992 stack_pointer_rtx,
13993 GEN_INT (-RED_ZONE_SIZE)));
13994 emit_move_insn (result, operand);
13995 }
13996 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13997 {
13998 switch (mode)
13999 {
14000 case HImode:
14001 case SImode:
14002 operand = gen_lowpart (DImode, operand);
14003 /* FALLTHRU */
14004 case DImode:
14005 emit_insn (
14006 gen_rtx_SET (VOIDmode,
14007 gen_rtx_MEM (DImode,
14008 gen_rtx_PRE_DEC (DImode,
14009 stack_pointer_rtx)),
14010 operand));
14011 break;
14012 default:
14013 abort ();
14014 }
14015 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14016 }
14017 else
14018 {
14019 switch (mode)
14020 {
14021 case DImode:
14022 {
14023 rtx operands[2];
14024 split_di (&operand, 1, operands, operands + 1);
14025 emit_insn (
14026 gen_rtx_SET (VOIDmode,
14027 gen_rtx_MEM (SImode,
14028 gen_rtx_PRE_DEC (Pmode,
14029 stack_pointer_rtx)),
14030 operands[1]));
14031 emit_insn (
14032 gen_rtx_SET (VOIDmode,
14033 gen_rtx_MEM (SImode,
14034 gen_rtx_PRE_DEC (Pmode,
14035 stack_pointer_rtx)),
14036 operands[0]));
14037 }
14038 break;
14039 case HImode:
14040 /* It is better to store HImodes as SImodes. */
14041 if (!TARGET_PARTIAL_REG_STALL)
14042 operand = gen_lowpart (SImode, operand);
14043 /* FALLTHRU */
14044 case SImode:
14045 emit_insn (
14046 gen_rtx_SET (VOIDmode,
14047 gen_rtx_MEM (GET_MODE (operand),
14048 gen_rtx_PRE_DEC (SImode,
14049 stack_pointer_rtx)),
14050 operand));
14051 break;
14052 default:
14053 abort ();
14054 }
14055 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14056 }
14057 return result;
14058 }
14059
14060 /* Free operand from the memory. */
14061 void
14062 ix86_free_from_memory (mode)
14063 enum machine_mode mode;
14064 {
14065 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14066 {
14067 int size;
14068
14069 if (mode == DImode || TARGET_64BIT)
14070 size = 8;
14071 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14072 size = 2;
14073 else
14074 size = 4;
14075 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14076 to pop or add instruction if registers are available. */
14077 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14078 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14079 GEN_INT (size))));
14080 }
14081 }
14082
14083 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14084 QImode must go into class Q_REGS.
14085 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14086 movdf to do mem-to-mem moves through integer regs. */
14087 enum reg_class
14088 ix86_preferred_reload_class (x, class)
14089 rtx x;
14090 enum reg_class class;
14091 {
14092 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14093 return NO_REGS;
14094 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14095 {
14096 /* SSE can't load any constant directly yet. */
14097 if (SSE_CLASS_P (class))
14098 return NO_REGS;
14099 /* Floats can load 0 and 1. */
14100 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14101 {
14102 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14103 if (MAYBE_SSE_CLASS_P (class))
14104 return (reg_class_subset_p (class, GENERAL_REGS)
14105 ? GENERAL_REGS : FLOAT_REGS);
14106 else
14107 return class;
14108 }
14109 /* General regs can load everything. */
14110 if (reg_class_subset_p (class, GENERAL_REGS))
14111 return GENERAL_REGS;
14112 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14113 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14114 return NO_REGS;
14115 }
14116 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14117 return NO_REGS;
14118 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14119 return Q_REGS;
14120 return class;
14121 }
14122
14123 /* If we are copying between general and FP registers, we need a memory
14124 location. The same is true for SSE and MMX registers.
14125
14126 The macro can't work reliably when one of the CLASSES is class containing
14127 registers from multiple units (SSE, MMX, integer). We avoid this by never
14128 combining those units in single alternative in the machine description.
14129 Ensure that this constraint holds to avoid unexpected surprises.
14130
14131 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14132 enforce these sanity checks. */
14133 int
14134 ix86_secondary_memory_needed (class1, class2, mode, strict)
14135 enum reg_class class1, class2;
14136 enum machine_mode mode;
14137 int strict;
14138 {
14139 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14140 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14141 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14142 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14143 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14144 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14145 {
14146 if (strict)
14147 abort ();
14148 else
14149 return 1;
14150 }
14151 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14152 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14153 && (mode) != SImode)
14154 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14155 && (mode) != SImode));
14156 }
14157 /* Return the cost of moving data from a register in class CLASS1 to
14158 one in class CLASS2.
14159
14160 It is not required that the cost always equal 2 when FROM is the same as TO;
14161 on some machines it is expensive to move between registers if they are not
14162 general registers. */
14163 int
14164 ix86_register_move_cost (mode, class1, class2)
14165 enum machine_mode mode;
14166 enum reg_class class1, class2;
14167 {
14168 /* In case we require secondary memory, compute cost of the store followed
14169 by load. In order to avoid bad register allocation choices, we need
14170 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14171
14172 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14173 {
14174 int cost = 1;
14175
14176 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14177 MEMORY_MOVE_COST (mode, class1, 1));
14178 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14179 MEMORY_MOVE_COST (mode, class2, 1));
14180
14181 /* In case of copying from general_purpose_register we may emit multiple
14182 stores followed by single load causing memory size mismatch stall.
14183 Count this as arbitarily high cost of 20. */
14184 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14185 cost += 20;
14186
14187 /* In the case of FP/MMX moves, the registers actually overlap, and we
14188 have to switch modes in order to treat them differently. */
14189 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14190 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14191 cost += 20;
14192
14193 return cost;
14194 }
14195
14196 /* Moves between SSE/MMX and integer unit are expensive. */
14197 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14198 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14199 return ix86_cost->mmxsse_to_integer;
14200 if (MAYBE_FLOAT_CLASS_P (class1))
14201 return ix86_cost->fp_move;
14202 if (MAYBE_SSE_CLASS_P (class1))
14203 return ix86_cost->sse_move;
14204 if (MAYBE_MMX_CLASS_P (class1))
14205 return ix86_cost->mmx_move;
14206 return 2;
14207 }
14208
14209 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14210 int
14211 ix86_hard_regno_mode_ok (regno, mode)
14212 int regno;
14213 enum machine_mode mode;
14214 {
14215 /* Flags and only flags can only hold CCmode values. */
14216 if (CC_REGNO_P (regno))
14217 return GET_MODE_CLASS (mode) == MODE_CC;
14218 if (GET_MODE_CLASS (mode) == MODE_CC
14219 || GET_MODE_CLASS (mode) == MODE_RANDOM
14220 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14221 return 0;
14222 if (FP_REGNO_P (regno))
14223 return VALID_FP_MODE_P (mode);
14224 if (SSE_REGNO_P (regno))
14225 return VALID_SSE_REG_MODE (mode);
14226 if (MMX_REGNO_P (regno))
14227 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14228 /* We handle both integer and floats in the general purpose registers.
14229 In future we should be able to handle vector modes as well. */
14230 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14231 return 0;
14232 /* Take care for QImode values - they can be in non-QI regs, but then
14233 they do cause partial register stalls. */
14234 if (regno < 4 || mode != QImode || TARGET_64BIT)
14235 return 1;
14236 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14237 }
14238
14239 /* Return the cost of moving data of mode M between a
14240 register and memory. A value of 2 is the default; this cost is
14241 relative to those in `REGISTER_MOVE_COST'.
14242
14243 If moving between registers and memory is more expensive than
14244 between two registers, you should define this macro to express the
14245 relative cost.
14246
14247 Model also increased moving costs of QImode registers in non
14248 Q_REGS classes.
14249 */
14250 int
14251 ix86_memory_move_cost (mode, class, in)
14252 enum machine_mode mode;
14253 enum reg_class class;
14254 int in;
14255 {
14256 if (FLOAT_CLASS_P (class))
14257 {
14258 int index;
14259 switch (mode)
14260 {
14261 case SFmode:
14262 index = 0;
14263 break;
14264 case DFmode:
14265 index = 1;
14266 break;
14267 case XFmode:
14268 case TFmode:
14269 index = 2;
14270 break;
14271 default:
14272 return 100;
14273 }
14274 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14275 }
14276 if (SSE_CLASS_P (class))
14277 {
14278 int index;
14279 switch (GET_MODE_SIZE (mode))
14280 {
14281 case 4:
14282 index = 0;
14283 break;
14284 case 8:
14285 index = 1;
14286 break;
14287 case 16:
14288 index = 2;
14289 break;
14290 default:
14291 return 100;
14292 }
14293 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14294 }
14295 if (MMX_CLASS_P (class))
14296 {
14297 int index;
14298 switch (GET_MODE_SIZE (mode))
14299 {
14300 case 4:
14301 index = 0;
14302 break;
14303 case 8:
14304 index = 1;
14305 break;
14306 default:
14307 return 100;
14308 }
14309 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14310 }
14311 switch (GET_MODE_SIZE (mode))
14312 {
14313 case 1:
14314 if (in)
14315 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14316 : ix86_cost->movzbl_load);
14317 else
14318 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14319 : ix86_cost->int_store[0] + 4);
14320 break;
14321 case 2:
14322 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14323 default:
14324 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14325 if (mode == TFmode)
14326 mode = XFmode;
14327 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14328 * ((int) GET_MODE_SIZE (mode)
14329 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14330 }
14331 }
14332
14333 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14334 static void
14335 ix86_svr3_asm_out_constructor (symbol, priority)
14336 rtx symbol;
14337 int priority ATTRIBUTE_UNUSED;
14338 {
14339 init_section ();
14340 fputs ("\tpushl $", asm_out_file);
14341 assemble_name (asm_out_file, XSTR (symbol, 0));
14342 fputc ('\n', asm_out_file);
14343 }
14344 #endif
14345
14346 #if TARGET_MACHO
14347
14348 static int current_machopic_label_num;
14349
14350 /* Given a symbol name and its associated stub, write out the
14351 definition of the stub. */
14352
14353 void
14354 machopic_output_stub (file, symb, stub)
14355 FILE *file;
14356 const char *symb, *stub;
14357 {
14358 unsigned int length;
14359 char *binder_name, *symbol_name, lazy_ptr_name[32];
14360 int label = ++current_machopic_label_num;
14361
14362 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14363 symb = (*targetm.strip_name_encoding) (symb);
14364
14365 length = strlen (stub);
14366 binder_name = alloca (length + 32);
14367 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14368
14369 length = strlen (symb);
14370 symbol_name = alloca (length + 32);
14371 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14372
14373 sprintf (lazy_ptr_name, "L%d$lz", label);
14374
14375 if (MACHOPIC_PURE)
14376 machopic_picsymbol_stub_section ();
14377 else
14378 machopic_symbol_stub_section ();
14379
14380 fprintf (file, "%s:\n", stub);
14381 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14382
14383 if (MACHOPIC_PURE)
14384 {
14385 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14386 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14387 fprintf (file, "\tjmp %%edx\n");
14388 }
14389 else
14390 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14391
14392 fprintf (file, "%s:\n", binder_name);
14393
14394 if (MACHOPIC_PURE)
14395 {
14396 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14397 fprintf (file, "\tpushl %%eax\n");
14398 }
14399 else
14400 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14401
14402 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14403
14404 machopic_lazy_symbol_ptr_section ();
14405 fprintf (file, "%s:\n", lazy_ptr_name);
14406 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14407 fprintf (file, "\t.long %s\n", binder_name);
14408 }
14409 #endif /* TARGET_MACHO */
14410
14411 /* Order the registers for register allocator. */
14412
14413 void
14414 x86_order_regs_for_local_alloc ()
14415 {
14416 int pos = 0;
14417 int i;
14418
14419 /* First allocate the local general purpose registers. */
14420 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14421 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14422 reg_alloc_order [pos++] = i;
14423
14424 /* Global general purpose registers. */
14425 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14426 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14427 reg_alloc_order [pos++] = i;
14428
14429 /* x87 registers come first in case we are doing FP math
14430 using them. */
14431 if (!TARGET_SSE_MATH)
14432 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14433 reg_alloc_order [pos++] = i;
14434
14435 /* SSE registers. */
14436 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14437 reg_alloc_order [pos++] = i;
14438 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14439 reg_alloc_order [pos++] = i;
14440
14441 /* x87 registerts. */
14442 if (TARGET_SSE_MATH)
14443 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14444 reg_alloc_order [pos++] = i;
14445
14446 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14447 reg_alloc_order [pos++] = i;
14448
14449 /* Initialize the rest of array as we do not allocate some registers
14450 at all. */
14451 while (pos < FIRST_PSEUDO_REGISTER)
14452 reg_alloc_order [pos++] = 0;
14453 }
14454
14455 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14456 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14457 #endif
14458
14459 static bool
14460 ix86_ms_bitfield_layout_p (record_type)
14461 tree record_type ATTRIBUTE_UNUSED;
14462 {
14463 return TARGET_USE_MS_BITFIELD_LAYOUT;
14464 }
14465
14466 /* Returns an expression indicating where the this parameter is
14467 located on entry to the FUNCTION. */
14468
14469 static rtx
14470 x86_this_parameter (function)
14471 tree function;
14472 {
14473 tree type = TREE_TYPE (function);
14474
14475 if (TARGET_64BIT)
14476 {
14477 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14478 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14479 }
14480
14481 if (ix86_fntype_regparm (type) > 0)
14482 {
14483 tree parm;
14484
14485 parm = TYPE_ARG_TYPES (type);
14486 /* Figure out whether or not the function has a variable number of
14487 arguments. */
14488 for (; parm; parm = TREE_CHAIN (parm))
14489 if (TREE_VALUE (parm) == void_type_node)
14490 break;
14491 /* If not, the this parameter is in %eax. */
14492 if (parm)
14493 return gen_rtx_REG (SImode, 0);
14494 }
14495
14496 if (aggregate_value_p (TREE_TYPE (type)))
14497 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14498 else
14499 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14500 }
14501
14502 /* Determine whether x86_output_mi_thunk can succeed. */
14503
14504 static bool
14505 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14506 tree thunk ATTRIBUTE_UNUSED;
14507 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14508 HOST_WIDE_INT vcall_offset;
14509 tree function;
14510 {
14511 /* 64-bit can handle anything. */
14512 if (TARGET_64BIT)
14513 return true;
14514
14515 /* For 32-bit, everything's fine if we have one free register. */
14516 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14517 return true;
14518
14519 /* Need a free register for vcall_offset. */
14520 if (vcall_offset)
14521 return false;
14522
14523 /* Need a free register for GOT references. */
14524 if (flag_pic && !(*targetm.binds_local_p) (function))
14525 return false;
14526
14527 /* Otherwise ok. */
14528 return true;
14529 }
14530
14531 /* Output the assembler code for a thunk function. THUNK_DECL is the
14532 declaration for the thunk function itself, FUNCTION is the decl for
14533 the target function. DELTA is an immediate constant offset to be
14534 added to THIS. If VCALL_OFFSET is nonzero, the word at
14535 *(*this + vcall_offset) should be added to THIS. */
14536
14537 static void
14538 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14539 FILE *file ATTRIBUTE_UNUSED;
14540 tree thunk ATTRIBUTE_UNUSED;
14541 HOST_WIDE_INT delta;
14542 HOST_WIDE_INT vcall_offset;
14543 tree function;
14544 {
14545 rtx xops[3];
14546 rtx this = x86_this_parameter (function);
14547 rtx this_reg, tmp;
14548
14549 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14550 pull it in now and let DELTA benefit. */
14551 if (REG_P (this))
14552 this_reg = this;
14553 else if (vcall_offset)
14554 {
14555 /* Put the this parameter into %eax. */
14556 xops[0] = this;
14557 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14558 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14559 }
14560 else
14561 this_reg = NULL_RTX;
14562
14563 /* Adjust the this parameter by a fixed constant. */
14564 if (delta)
14565 {
14566 xops[0] = GEN_INT (delta);
14567 xops[1] = this_reg ? this_reg : this;
14568 if (TARGET_64BIT)
14569 {
14570 if (!x86_64_general_operand (xops[0], DImode))
14571 {
14572 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14573 xops[1] = tmp;
14574 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14575 xops[0] = tmp;
14576 xops[1] = this;
14577 }
14578 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14579 }
14580 else
14581 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14582 }
14583
14584 /* Adjust the this parameter by a value stored in the vtable. */
14585 if (vcall_offset)
14586 {
14587 if (TARGET_64BIT)
14588 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14589 else
14590 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14591
14592 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14593 xops[1] = tmp;
14594 if (TARGET_64BIT)
14595 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14596 else
14597 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14598
14599 /* Adjust the this parameter. */
14600 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14601 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14602 {
14603 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14604 xops[0] = GEN_INT (vcall_offset);
14605 xops[1] = tmp2;
14606 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14607 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14608 }
14609 xops[1] = this_reg;
14610 if (TARGET_64BIT)
14611 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14612 else
14613 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14614 }
14615
14616 /* If necessary, drop THIS back to its stack slot. */
14617 if (this_reg && this_reg != this)
14618 {
14619 xops[0] = this_reg;
14620 xops[1] = this;
14621 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14622 }
14623
14624 xops[0] = DECL_RTL (function);
14625 if (TARGET_64BIT)
14626 {
14627 if (!flag_pic || (*targetm.binds_local_p) (function))
14628 output_asm_insn ("jmp\t%P0", xops);
14629 else
14630 {
14631 tmp = XEXP (xops[0], 0);
14632 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14633 tmp = gen_rtx_CONST (Pmode, tmp);
14634 tmp = gen_rtx_MEM (QImode, tmp);
14635 xops[0] = tmp;
14636 output_asm_insn ("jmp\t%A0", xops);
14637 }
14638 }
14639 else
14640 {
14641 if (!flag_pic || (*targetm.binds_local_p) (function))
14642 output_asm_insn ("jmp\t%P0", xops);
14643 else
14644 {
14645 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14646 output_set_got (tmp);
14647
14648 xops[1] = tmp;
14649 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14650 output_asm_insn ("jmp\t{*}%1", xops);
14651 }
14652 }
14653 }
14654
14655 int
14656 x86_field_alignment (field, computed)
14657 tree field;
14658 int computed;
14659 {
14660 enum machine_mode mode;
14661 tree type = TREE_TYPE (field);
14662
14663 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14664 return computed;
14665 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14666 ? get_inner_array_type (type) : type);
14667 if (mode == DFmode || mode == DCmode
14668 || GET_MODE_CLASS (mode) == MODE_INT
14669 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14670 return MIN (32, computed);
14671 return computed;
14672 }
14673
14674 /* Output assembler code to FILE to increment profiler label # LABELNO
14675 for profiling a function entry. */
14676 void
14677 x86_function_profiler (file, labelno)
14678 FILE *file;
14679 int labelno;
14680 {
14681 if (TARGET_64BIT)
14682 if (flag_pic)
14683 {
14684 #ifndef NO_PROFILE_COUNTERS
14685 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14686 #endif
14687 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14688 }
14689 else
14690 {
14691 #ifndef NO_PROFILE_COUNTERS
14692 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14693 #endif
14694 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14695 }
14696 else if (flag_pic)
14697 {
14698 #ifndef NO_PROFILE_COUNTERS
14699 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14700 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14701 #endif
14702 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14703 }
14704 else
14705 {
14706 #ifndef NO_PROFILE_COUNTERS
14707 fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
14708 PROFILE_COUNT_REGISTER);
14709 #endif
14710 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14711 }
14712 }
14713
14714 /* Implement machine specific optimizations.
14715 At the moment we implement single transformation: AMD Athlon works faster
14716 when RET is not destination of conditional jump or directly preceeded
14717 by other jump instruction. We avoid the penalty by inserting NOP just
14718 before the RET instructions in such cases. */
14719 void
14720 x86_machine_dependent_reorg (first)
14721 rtx first ATTRIBUTE_UNUSED;
14722 {
14723 edge e;
14724
14725 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
14726 return;
14727 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14728 {
14729 basic_block bb = e->src;
14730 rtx ret = bb->end;
14731 rtx prev;
14732 bool insert = false;
14733
14734 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14735 continue;
14736 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14737 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14738 break;
14739 if (prev && GET_CODE (prev) == CODE_LABEL)
14740 {
14741 edge e;
14742 for (e = bb->pred; e; e = e->pred_next)
14743 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14744 && !(e->flags & EDGE_FALLTHRU))
14745 insert = 1;
14746 }
14747 if (!insert)
14748 {
14749 prev = prev_active_insn (ret);
14750 if (prev && GET_CODE (prev) == JUMP_INSN
14751 && any_condjump_p (prev))
14752 insert = 1;
14753 /* Empty functions get branch misspredict even when the jump destination
14754 is not visible to us. */
14755 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14756 insert = 1;
14757 }
14758 if (insert)
14759 emit_insn_before (gen_nop (), ret);
14760 }
14761 }
14762
14763 /* Return nonzero when QImode register that must be represented via REX prefix
14764 is used. */
14765 bool
14766 x86_extended_QIreg_mentioned_p (insn)
14767 rtx insn;
14768 {
14769 int i;
14770 extract_insn_cached (insn);
14771 for (i = 0; i < recog_data.n_operands; i++)
14772 if (REG_P (recog_data.operand[i])
14773 && REGNO (recog_data.operand[i]) >= 4)
14774 return true;
14775 return false;
14776 }
14777
14778 /* Return nonzero when P points to register encoded via REX prefix.
14779 Called via for_each_rtx. */
14780 static int
14781 extended_reg_mentioned_1 (p, data)
14782 rtx *p;
14783 void *data ATTRIBUTE_UNUSED;
14784 {
14785 unsigned int regno;
14786 if (!REG_P (*p))
14787 return 0;
14788 regno = REGNO (*p);
14789 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14790 }
14791
14792 /* Return true when INSN mentions register that must be encoded using REX
14793 prefix. */
14794 bool
14795 x86_extended_reg_mentioned_p (insn)
14796 rtx insn;
14797 {
14798 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14799 }
14800
14801 #include "gt-i386.h"
This page took 0.698884 seconds and 6 git commands to generate.