]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
82b22dc90aafe2a716396568012ab334c4900cbc
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
93 };
94
95 /* Processor costs (relative to an add) */
96 static const
97 struct processor_costs i386_cost = { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
108 3, /* MOVE_RATIO */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
137 };
138
139 static const
140 struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
180 };
181
182 static const
183 struct processor_costs pentium_cost = {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
194 6, /* MOVE_RATIO */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
223 };
224
225 static const
226 struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
237 6, /* MOVE_RATIO */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
266 };
267
268 static const
269 struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
280 4, /* MOVE_RATIO */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
309 };
310
311 static const
312 struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
323 9, /* MOVE_RATIO */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
352 };
353
354 static const
355 struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
395 };
396
397 const struct processor_costs *ix86_cost = &pentium_cost;
398
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
407
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
448
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452 #define FAST_PROLOGUE_INSN_COUNT 30
453
454 /* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456 static int use_fast_prologue_epilogue;
457
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
462
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
465
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
467 {
468 /* ax, dx, cx, bx */
469 AREG, DREG, CREG, BREG,
470 /* si, di, bp, sp */
471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
475 /* arg pointer */
476 NON_Q_REGS,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
487 };
488
489 /* The "default" register map used in 32bit mode. */
490
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
492 {
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
500 };
501
502 static int const x86_64_int_parameter_registers[6] =
503 {
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506 };
507
508 static int const x86_64_int_return_registers[4] =
509 {
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511 };
512
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515 {
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
523 };
524
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
578 */
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
580 {
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
588 };
589
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
592
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
595
596 /* The encoding characters for the four TLS models present in ELF. */
597
598 static char const tls_model_chars[] = " GLil";
599
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
603
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function GTY(())
606 {
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608 const char *some_ld_name;
609 int save_varrargs_registers;
610 int accesses_prev_frame;
611 };
612
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
615
616 /* Structure describing stack frame layout.
617 Stack grows downward:
618
619 [arguments]
620 <- ARG_POINTER
621 saved pc
622
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
626
627 [padding1] \
628 )
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
632 )
633 [padding2] /
634 */
635 struct ix86_frame
636 {
637 int nregs;
638 int padding1;
639 int va_arg_size;
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
643 int red_zone_size;
644
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
650 };
651
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string;
656 /* Parsed value. */
657 enum cmodel ix86_cmodel;
658 /* Asm dialect. */
659 const char *ix86_asm_string;
660 enum asm_dialect ix86_asm_dialect = ASM_ATT;
661 /* TLS dialext. */
662 const char *ix86_tls_dialect_string;
663 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
664
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath;
667
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch;
672
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string; /* for -march=<xxx> */
676 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
677
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string;
680
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse;
683
684 /* ix86_regparm_string as a number */
685 int ix86_regparm;
686
687 /* Alignment to use for loops and jumps: */
688
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string;
691
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string;
694
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string;
697
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary;
700
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost;
703 const char *ix86_branch_cost_string;
704
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string;
707
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix[16];
710 static int internal_label_prefix_len;
711 \f
712 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
713 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
714 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
716 int, int, FILE *));
717 static const char *get_some_local_dynamic_name PARAMS ((void));
718 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719 static rtx maybe_get_pool_constant PARAMS ((rtx));
720 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
721 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
722 rtx *, rtx *));
723 static rtx get_thread_pointer PARAMS ((void));
724 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
725 static rtx gen_push PARAMS ((rtx));
726 static int memory_address_length PARAMS ((rtx addr));
727 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
729 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730 static void ix86_dump_ppro_packet PARAMS ((FILE *));
731 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
732 static struct machine_function * ix86_init_machine_status PARAMS ((void));
733 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
734 static int ix86_nsaved_regs PARAMS ((void));
735 static void ix86_emit_save_regs PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
737 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
738 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
739 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
740 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
741 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
742 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
743 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
745 static int ix86_issue_rate PARAMS ((void));
746 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747 static void ix86_sched_init PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
750 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins PARAMS ((void));
753
754 struct ix86_address
755 {
756 rtx base, index, disp;
757 HOST_WIDE_INT scale;
758 };
759
760 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
761
762 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
763 static const char *ix86_strip_name_encoding PARAMS ((const char *))
764 ATTRIBUTE_UNUSED;
765
766 struct builtin_description;
767 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
768 tree, rtx));
769 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
770 tree, rtx));
771 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
772 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
773 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
774 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
775 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
776 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
777 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
778 enum rtx_code *,
779 enum rtx_code *,
780 enum rtx_code *));
781 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
782 rtx *, rtx *));
783 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
784 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
785 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
786 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
787 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
788 static int ix86_save_reg PARAMS ((unsigned int, int));
789 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
790 static int ix86_comp_type_attributes PARAMS ((tree, tree));
791 const struct attribute_spec ix86_attribute_table[];
792 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
793 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
794 static int ix86_value_regno PARAMS ((enum machine_mode));
795
796 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
797 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
798 #endif
799
800 /* Register class used for passing given 64bit part of the argument.
801 These represent classes as documented by the PS ABI, with the exception
802 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
803 use SF or DFmode move instead of DImode to avoid reformating penalties.
804
805 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
806 whenever possible (upper half does contain padding).
807 */
808 enum x86_64_reg_class
809 {
810 X86_64_NO_CLASS,
811 X86_64_INTEGER_CLASS,
812 X86_64_INTEGERSI_CLASS,
813 X86_64_SSE_CLASS,
814 X86_64_SSESF_CLASS,
815 X86_64_SSEDF_CLASS,
816 X86_64_SSEUP_CLASS,
817 X86_64_X87_CLASS,
818 X86_64_X87UP_CLASS,
819 X86_64_MEMORY_CLASS
820 };
821 static const char * const x86_64_reg_class_name[] =
822 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
823
824 #define MAX_CLASSES 4
825 static int classify_argument PARAMS ((enum machine_mode, tree,
826 enum x86_64_reg_class [MAX_CLASSES],
827 int));
828 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
829 int *));
830 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
831 const int *, int));
832 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
833 enum x86_64_reg_class));
834 \f
835 /* Initialize the GCC target structure. */
836 #undef TARGET_ATTRIBUTE_TABLE
837 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
838 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
839 # undef TARGET_MERGE_DECL_ATTRIBUTES
840 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
841 #endif
842
843 #undef TARGET_COMP_TYPE_ATTRIBUTES
844 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
845
846 #undef TARGET_INIT_BUILTINS
847 #define TARGET_INIT_BUILTINS ix86_init_builtins
848
849 #undef TARGET_EXPAND_BUILTIN
850 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
851
852 #undef TARGET_ASM_FUNCTION_EPILOGUE
853 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
854
855 #undef TARGET_ASM_OPEN_PAREN
856 #define TARGET_ASM_OPEN_PAREN ""
857 #undef TARGET_ASM_CLOSE_PAREN
858 #define TARGET_ASM_CLOSE_PAREN ""
859
860 #undef TARGET_ASM_ALIGNED_HI_OP
861 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
862 #undef TARGET_ASM_ALIGNED_SI_OP
863 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
864 #ifdef ASM_QUAD
865 #undef TARGET_ASM_ALIGNED_DI_OP
866 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
867 #endif
868
869 #undef TARGET_ASM_UNALIGNED_HI_OP
870 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
871 #undef TARGET_ASM_UNALIGNED_SI_OP
872 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
873 #undef TARGET_ASM_UNALIGNED_DI_OP
874 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
875
876 #undef TARGET_SCHED_ADJUST_COST
877 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
878 #undef TARGET_SCHED_ISSUE_RATE
879 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
880 #undef TARGET_SCHED_VARIABLE_ISSUE
881 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
882 #undef TARGET_SCHED_INIT
883 #define TARGET_SCHED_INIT ix86_sched_init
884 #undef TARGET_SCHED_REORDER
885 #define TARGET_SCHED_REORDER ix86_sched_reorder
886 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
887 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
888 ia32_use_dfa_pipeline_interface
889 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
890 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
891 ia32_multipass_dfa_lookahead
892
893 #ifdef HAVE_AS_TLS
894 #undef TARGET_HAVE_TLS
895 #define TARGET_HAVE_TLS true
896 #endif
897
898 struct gcc_target targetm = TARGET_INITIALIZER;
899 \f
900 /* Sometimes certain combinations of command options do not make
901 sense on a particular target machine. You can define a macro
902 `OVERRIDE_OPTIONS' to take account of this. This macro, if
903 defined, is executed once just after all the command options have
904 been parsed.
905
906 Don't use this macro to turn on various extra optimizations for
907 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
908
909 void
910 override_options ()
911 {
912 int i;
913 /* Comes from final.c -- no real reason to change it. */
914 #define MAX_CODE_ALIGN 16
915
916 static struct ptt
917 {
918 const struct processor_costs *cost; /* Processor costs */
919 const int target_enable; /* Target flags to enable. */
920 const int target_disable; /* Target flags to disable. */
921 const int align_loop; /* Default alignments. */
922 const int align_loop_max_skip;
923 const int align_jump;
924 const int align_jump_max_skip;
925 const int align_func;
926 const int branch_cost;
927 }
928 const processor_target_table[PROCESSOR_max] =
929 {
930 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
931 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
932 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
933 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
934 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
935 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
936 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
937 };
938
939 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
940 static struct pta
941 {
942 const char *const name; /* processor name or nickname. */
943 const enum processor_type processor;
944 const enum pta_flags
945 {
946 PTA_SSE = 1,
947 PTA_SSE2 = 2,
948 PTA_MMX = 4,
949 PTA_PREFETCH_SSE = 8,
950 PTA_3DNOW = 16,
951 PTA_3DNOW_A = 64
952 } flags;
953 }
954 const processor_alias_table[] =
955 {
956 {"i386", PROCESSOR_I386, 0},
957 {"i486", PROCESSOR_I486, 0},
958 {"i586", PROCESSOR_PENTIUM, 0},
959 {"pentium", PROCESSOR_PENTIUM, 0},
960 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
961 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
962 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
963 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
964 {"i686", PROCESSOR_PENTIUMPRO, 0},
965 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
966 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
967 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
968 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
969 PTA_MMX | PTA_PREFETCH_SSE},
970 {"k6", PROCESSOR_K6, PTA_MMX},
971 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
972 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
973 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
974 | PTA_3DNOW_A},
975 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
976 | PTA_3DNOW | PTA_3DNOW_A},
977 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
978 | PTA_3DNOW_A | PTA_SSE},
979 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
980 | PTA_3DNOW_A | PTA_SSE},
981 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
982 | PTA_3DNOW_A | PTA_SSE},
983 };
984
985 int const pta_size = ARRAY_SIZE (processor_alias_table);
986
987 /* By default our XFmode is the 80-bit extended format. If we have
988 use TFmode instead, it's also the 80-bit format, but with padding. */
989 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
990 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
991
992 #ifdef SUBTARGET_OVERRIDE_OPTIONS
993 SUBTARGET_OVERRIDE_OPTIONS;
994 #endif
995
996 if (!ix86_cpu_string && ix86_arch_string)
997 ix86_cpu_string = ix86_arch_string;
998 if (!ix86_cpu_string)
999 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1000 if (!ix86_arch_string)
1001 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1002
1003 if (ix86_cmodel_string != 0)
1004 {
1005 if (!strcmp (ix86_cmodel_string, "small"))
1006 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1007 else if (flag_pic)
1008 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1009 else if (!strcmp (ix86_cmodel_string, "32"))
1010 ix86_cmodel = CM_32;
1011 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1012 ix86_cmodel = CM_KERNEL;
1013 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1014 ix86_cmodel = CM_MEDIUM;
1015 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1016 ix86_cmodel = CM_LARGE;
1017 else
1018 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1019 }
1020 else
1021 {
1022 ix86_cmodel = CM_32;
1023 if (TARGET_64BIT)
1024 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1025 }
1026 if (ix86_asm_string != 0)
1027 {
1028 if (!strcmp (ix86_asm_string, "intel"))
1029 ix86_asm_dialect = ASM_INTEL;
1030 else if (!strcmp (ix86_asm_string, "att"))
1031 ix86_asm_dialect = ASM_ATT;
1032 else
1033 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1034 }
1035 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1036 error ("code model `%s' not supported in the %s bit mode",
1037 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1038 if (ix86_cmodel == CM_LARGE)
1039 sorry ("code model `large' not supported yet");
1040 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1041 sorry ("%i-bit mode not compiled in",
1042 (target_flags & MASK_64BIT) ? 64 : 32);
1043
1044 for (i = 0; i < pta_size; i++)
1045 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1046 {
1047 ix86_arch = processor_alias_table[i].processor;
1048 /* Default cpu tuning to the architecture. */
1049 ix86_cpu = ix86_arch;
1050 if (processor_alias_table[i].flags & PTA_MMX
1051 && !(target_flags_explicit & MASK_MMX))
1052 target_flags |= MASK_MMX;
1053 if (processor_alias_table[i].flags & PTA_3DNOW
1054 && !(target_flags_explicit & MASK_3DNOW))
1055 target_flags |= MASK_3DNOW;
1056 if (processor_alias_table[i].flags & PTA_3DNOW_A
1057 && !(target_flags_explicit & MASK_3DNOW_A))
1058 target_flags |= MASK_3DNOW_A;
1059 if (processor_alias_table[i].flags & PTA_SSE
1060 && !(target_flags_explicit & MASK_SSE))
1061 target_flags |= MASK_SSE;
1062 if (processor_alias_table[i].flags & PTA_SSE2
1063 && !(target_flags_explicit & MASK_SSE2))
1064 target_flags |= MASK_SSE2;
1065 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1066 x86_prefetch_sse = true;
1067 break;
1068 }
1069
1070 if (i == pta_size)
1071 error ("bad value (%s) for -march= switch", ix86_arch_string);
1072
1073 for (i = 0; i < pta_size; i++)
1074 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1075 {
1076 ix86_cpu = processor_alias_table[i].processor;
1077 break;
1078 }
1079 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1080 x86_prefetch_sse = true;
1081 if (i == pta_size)
1082 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1083
1084 if (optimize_size)
1085 ix86_cost = &size_cost;
1086 else
1087 ix86_cost = processor_target_table[ix86_cpu].cost;
1088 target_flags |= processor_target_table[ix86_cpu].target_enable;
1089 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1090
1091 /* Arrange to set up i386_stack_locals for all functions. */
1092 init_machine_status = ix86_init_machine_status;
1093
1094 /* Validate -mregparm= value. */
1095 if (ix86_regparm_string)
1096 {
1097 i = atoi (ix86_regparm_string);
1098 if (i < 0 || i > REGPARM_MAX)
1099 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1100 else
1101 ix86_regparm = i;
1102 }
1103 else
1104 if (TARGET_64BIT)
1105 ix86_regparm = REGPARM_MAX;
1106
1107 /* If the user has provided any of the -malign-* options,
1108 warn and use that value only if -falign-* is not set.
1109 Remove this code in GCC 3.2 or later. */
1110 if (ix86_align_loops_string)
1111 {
1112 warning ("-malign-loops is obsolete, use -falign-loops");
1113 if (align_loops == 0)
1114 {
1115 i = atoi (ix86_align_loops_string);
1116 if (i < 0 || i > MAX_CODE_ALIGN)
1117 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1118 else
1119 align_loops = 1 << i;
1120 }
1121 }
1122
1123 if (ix86_align_jumps_string)
1124 {
1125 warning ("-malign-jumps is obsolete, use -falign-jumps");
1126 if (align_jumps == 0)
1127 {
1128 i = atoi (ix86_align_jumps_string);
1129 if (i < 0 || i > MAX_CODE_ALIGN)
1130 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1131 else
1132 align_jumps = 1 << i;
1133 }
1134 }
1135
1136 if (ix86_align_funcs_string)
1137 {
1138 warning ("-malign-functions is obsolete, use -falign-functions");
1139 if (align_functions == 0)
1140 {
1141 i = atoi (ix86_align_funcs_string);
1142 if (i < 0 || i > MAX_CODE_ALIGN)
1143 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1144 else
1145 align_functions = 1 << i;
1146 }
1147 }
1148
1149 /* Default align_* from the processor table. */
1150 if (align_loops == 0)
1151 {
1152 align_loops = processor_target_table[ix86_cpu].align_loop;
1153 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1154 }
1155 if (align_jumps == 0)
1156 {
1157 align_jumps = processor_target_table[ix86_cpu].align_jump;
1158 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1159 }
1160 if (align_functions == 0)
1161 {
1162 align_functions = processor_target_table[ix86_cpu].align_func;
1163 }
1164
1165 /* Validate -mpreferred-stack-boundary= value, or provide default.
1166 The default of 128 bits is for Pentium III's SSE __m128, but we
1167 don't want additional code to keep the stack aligned when
1168 optimizing for code size. */
1169 ix86_preferred_stack_boundary = (optimize_size
1170 ? TARGET_64BIT ? 128 : 32
1171 : 128);
1172 if (ix86_preferred_stack_boundary_string)
1173 {
1174 i = atoi (ix86_preferred_stack_boundary_string);
1175 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1176 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1177 TARGET_64BIT ? 4 : 2);
1178 else
1179 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1180 }
1181
1182 /* Validate -mbranch-cost= value, or provide default. */
1183 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1184 if (ix86_branch_cost_string)
1185 {
1186 i = atoi (ix86_branch_cost_string);
1187 if (i < 0 || i > 5)
1188 error ("-mbranch-cost=%d is not between 0 and 5", i);
1189 else
1190 ix86_branch_cost = i;
1191 }
1192
1193 if (ix86_tls_dialect_string)
1194 {
1195 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1196 ix86_tls_dialect = TLS_DIALECT_GNU;
1197 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1198 ix86_tls_dialect = TLS_DIALECT_SUN;
1199 else
1200 error ("bad value (%s) for -mtls-dialect= switch",
1201 ix86_tls_dialect_string);
1202 }
1203
1204 if (profile_flag)
1205 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1206
1207 /* Keep nonleaf frame pointers. */
1208 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1209 flag_omit_frame_pointer = 1;
1210
1211 /* If we're doing fast math, we don't care about comparison order
1212 wrt NaNs. This lets us use a shorter comparison sequence. */
1213 if (flag_unsafe_math_optimizations)
1214 target_flags &= ~MASK_IEEE_FP;
1215
1216 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1217 since the insns won't need emulation. */
1218 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1219 target_flags &= ~MASK_NO_FANCY_MATH_387;
1220
1221 if (TARGET_64BIT)
1222 {
1223 if (TARGET_ALIGN_DOUBLE)
1224 error ("-malign-double makes no sense in the 64bit mode");
1225 if (TARGET_RTD)
1226 error ("-mrtd calling convention not supported in the 64bit mode");
1227 /* Enable by default the SSE and MMX builtins. */
1228 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1229 ix86_fpmath = FPMATH_SSE;
1230 }
1231 else
1232 ix86_fpmath = FPMATH_387;
1233
1234 if (ix86_fpmath_string != 0)
1235 {
1236 if (! strcmp (ix86_fpmath_string, "387"))
1237 ix86_fpmath = FPMATH_387;
1238 else if (! strcmp (ix86_fpmath_string, "sse"))
1239 {
1240 if (!TARGET_SSE)
1241 {
1242 warning ("SSE instruction set disabled, using 387 arithmetics");
1243 ix86_fpmath = FPMATH_387;
1244 }
1245 else
1246 ix86_fpmath = FPMATH_SSE;
1247 }
1248 else if (! strcmp (ix86_fpmath_string, "387,sse")
1249 || ! strcmp (ix86_fpmath_string, "sse,387"))
1250 {
1251 if (!TARGET_SSE)
1252 {
1253 warning ("SSE instruction set disabled, using 387 arithmetics");
1254 ix86_fpmath = FPMATH_387;
1255 }
1256 else if (!TARGET_80387)
1257 {
1258 warning ("387 instruction set disabled, using SSE arithmetics");
1259 ix86_fpmath = FPMATH_SSE;
1260 }
1261 else
1262 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1263 }
1264 else
1265 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1266 }
1267
1268 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1269 on by -msse. */
1270 if (TARGET_SSE)
1271 {
1272 target_flags |= MASK_MMX;
1273 x86_prefetch_sse = true;
1274 }
1275
1276 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1277 if (TARGET_3DNOW)
1278 {
1279 target_flags |= MASK_MMX;
1280 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1281 extensions it adds. */
1282 if (x86_3dnow_a & (1 << ix86_arch))
1283 target_flags |= MASK_3DNOW_A;
1284 }
1285 if ((x86_accumulate_outgoing_args & CPUMASK)
1286 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1287 && !optimize_size)
1288 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1289
1290 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1291 {
1292 char *p;
1293 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1294 p = strchr (internal_label_prefix, 'X');
1295 internal_label_prefix_len = p - internal_label_prefix;
1296 *p = '\0';
1297 }
1298 }
1299 \f
1300 void
1301 optimization_options (level, size)
1302 int level;
1303 int size ATTRIBUTE_UNUSED;
1304 {
1305 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1306 make the problem with not enough registers even worse. */
1307 #ifdef INSN_SCHEDULING
1308 if (level > 1)
1309 flag_schedule_insns = 0;
1310 #endif
1311 if (TARGET_64BIT && optimize >= 1)
1312 flag_omit_frame_pointer = 1;
1313 if (TARGET_64BIT)
1314 {
1315 flag_pcc_struct_return = 0;
1316 flag_asynchronous_unwind_tables = 1;
1317 }
1318 if (profile_flag)
1319 flag_omit_frame_pointer = 0;
1320 }
1321 \f
1322 /* Table of valid machine attributes. */
1323 const struct attribute_spec ix86_attribute_table[] =
1324 {
1325 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1326 /* Stdcall attribute says callee is responsible for popping arguments
1327 if they are not variable. */
1328 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1329 /* Cdecl attribute says the callee is a normal C declaration */
1330 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1331 /* Regparm attribute specifies how many integer arguments are to be
1332 passed in registers. */
1333 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1334 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1335 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1336 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1337 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1338 #endif
1339 { NULL, 0, 0, false, false, false, NULL }
1340 };
1341
1342 /* Handle a "cdecl" or "stdcall" attribute;
1343 arguments as in struct attribute_spec.handler. */
1344 static tree
1345 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1346 tree *node;
1347 tree name;
1348 tree args ATTRIBUTE_UNUSED;
1349 int flags ATTRIBUTE_UNUSED;
1350 bool *no_add_attrs;
1351 {
1352 if (TREE_CODE (*node) != FUNCTION_TYPE
1353 && TREE_CODE (*node) != METHOD_TYPE
1354 && TREE_CODE (*node) != FIELD_DECL
1355 && TREE_CODE (*node) != TYPE_DECL)
1356 {
1357 warning ("`%s' attribute only applies to functions",
1358 IDENTIFIER_POINTER (name));
1359 *no_add_attrs = true;
1360 }
1361
1362 if (TARGET_64BIT)
1363 {
1364 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1365 *no_add_attrs = true;
1366 }
1367
1368 return NULL_TREE;
1369 }
1370
1371 /* Handle a "regparm" attribute;
1372 arguments as in struct attribute_spec.handler. */
1373 static tree
1374 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1375 tree *node;
1376 tree name;
1377 tree args;
1378 int flags ATTRIBUTE_UNUSED;
1379 bool *no_add_attrs;
1380 {
1381 if (TREE_CODE (*node) != FUNCTION_TYPE
1382 && TREE_CODE (*node) != METHOD_TYPE
1383 && TREE_CODE (*node) != FIELD_DECL
1384 && TREE_CODE (*node) != TYPE_DECL)
1385 {
1386 warning ("`%s' attribute only applies to functions",
1387 IDENTIFIER_POINTER (name));
1388 *no_add_attrs = true;
1389 }
1390 else
1391 {
1392 tree cst;
1393
1394 cst = TREE_VALUE (args);
1395 if (TREE_CODE (cst) != INTEGER_CST)
1396 {
1397 warning ("`%s' attribute requires an integer constant argument",
1398 IDENTIFIER_POINTER (name));
1399 *no_add_attrs = true;
1400 }
1401 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1402 {
1403 warning ("argument to `%s' attribute larger than %d",
1404 IDENTIFIER_POINTER (name), REGPARM_MAX);
1405 *no_add_attrs = true;
1406 }
1407 }
1408
1409 return NULL_TREE;
1410 }
1411
1412 /* Return 0 if the attributes for two types are incompatible, 1 if they
1413 are compatible, and 2 if they are nearly compatible (which causes a
1414 warning to be generated). */
1415
1416 static int
1417 ix86_comp_type_attributes (type1, type2)
1418 tree type1;
1419 tree type2;
1420 {
1421 /* Check for mismatch of non-default calling convention. */
1422 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1423
1424 if (TREE_CODE (type1) != FUNCTION_TYPE)
1425 return 1;
1426
1427 /* Check for mismatched return types (cdecl vs stdcall). */
1428 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1429 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1430 return 0;
1431 return 1;
1432 }
1433 \f
1434 /* Value is the number of bytes of arguments automatically
1435 popped when returning from a subroutine call.
1436 FUNDECL is the declaration node of the function (as a tree),
1437 FUNTYPE is the data type of the function (as a tree),
1438 or for a library call it is an identifier node for the subroutine name.
1439 SIZE is the number of bytes of arguments passed on the stack.
1440
1441 On the 80386, the RTD insn may be used to pop them if the number
1442 of args is fixed, but if the number is variable then the caller
1443 must pop them all. RTD can't be used for library calls now
1444 because the library is compiled with the Unix compiler.
1445 Use of RTD is a selectable option, since it is incompatible with
1446 standard Unix calling sequences. If the option is not selected,
1447 the caller must always pop the args.
1448
1449 The attribute stdcall is equivalent to RTD on a per module basis. */
1450
1451 int
1452 ix86_return_pops_args (fundecl, funtype, size)
1453 tree fundecl;
1454 tree funtype;
1455 int size;
1456 {
1457 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1458
1459 /* Cdecl functions override -mrtd, and never pop the stack. */
1460 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1461
1462 /* Stdcall functions will pop the stack if not variable args. */
1463 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1464 rtd = 1;
1465
1466 if (rtd
1467 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1468 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1469 == void_type_node)))
1470 return size;
1471 }
1472
1473 /* Lose any fake structure return argument if it is passed on the stack. */
1474 if (aggregate_value_p (TREE_TYPE (funtype))
1475 && !TARGET_64BIT)
1476 {
1477 int nregs = ix86_regparm;
1478
1479 if (funtype)
1480 {
1481 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1482
1483 if (attr)
1484 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1485 }
1486
1487 if (!nregs)
1488 return GET_MODE_SIZE (Pmode);
1489 }
1490
1491 return 0;
1492 }
1493 \f
1494 /* Argument support functions. */
1495
1496 /* Return true when register may be used to pass function parameters. */
1497 bool
1498 ix86_function_arg_regno_p (regno)
1499 int regno;
1500 {
1501 int i;
1502 if (!TARGET_64BIT)
1503 return (regno < REGPARM_MAX
1504 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1505 if (SSE_REGNO_P (regno) && TARGET_SSE)
1506 return true;
1507 /* RAX is used as hidden argument to va_arg functions. */
1508 if (!regno)
1509 return true;
1510 for (i = 0; i < REGPARM_MAX; i++)
1511 if (regno == x86_64_int_parameter_registers[i])
1512 return true;
1513 return false;
1514 }
1515
1516 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1517 for a call to a function whose data type is FNTYPE.
1518 For a library call, FNTYPE is 0. */
1519
1520 void
1521 init_cumulative_args (cum, fntype, libname)
1522 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1523 tree fntype; /* tree ptr for function decl */
1524 rtx libname; /* SYMBOL_REF of library name or 0 */
1525 {
1526 static CUMULATIVE_ARGS zero_cum;
1527 tree param, next_param;
1528
1529 if (TARGET_DEBUG_ARG)
1530 {
1531 fprintf (stderr, "\ninit_cumulative_args (");
1532 if (fntype)
1533 fprintf (stderr, "fntype code = %s, ret code = %s",
1534 tree_code_name[(int) TREE_CODE (fntype)],
1535 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1536 else
1537 fprintf (stderr, "no fntype");
1538
1539 if (libname)
1540 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1541 }
1542
1543 *cum = zero_cum;
1544
1545 /* Set up the number of registers to use for passing arguments. */
1546 cum->nregs = ix86_regparm;
1547 cum->sse_nregs = SSE_REGPARM_MAX;
1548 if (fntype && !TARGET_64BIT)
1549 {
1550 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1551
1552 if (attr)
1553 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1554 }
1555 cum->maybe_vaarg = false;
1556
1557 /* Determine if this function has variable arguments. This is
1558 indicated by the last argument being 'void_type_mode' if there
1559 are no variable arguments. If there are variable arguments, then
1560 we won't pass anything in registers */
1561
1562 if (cum->nregs)
1563 {
1564 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1565 param != 0; param = next_param)
1566 {
1567 next_param = TREE_CHAIN (param);
1568 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1569 {
1570 if (!TARGET_64BIT)
1571 cum->nregs = 0;
1572 cum->maybe_vaarg = true;
1573 }
1574 }
1575 }
1576 if ((!fntype && !libname)
1577 || (fntype && !TYPE_ARG_TYPES (fntype)))
1578 cum->maybe_vaarg = 1;
1579
1580 if (TARGET_DEBUG_ARG)
1581 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1582
1583 return;
1584 }
1585
1586 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1587 of this code is to classify each 8bytes of incoming argument by the register
1588 class and assign registers accordingly. */
1589
1590 /* Return the union class of CLASS1 and CLASS2.
1591 See the x86-64 PS ABI for details. */
1592
1593 static enum x86_64_reg_class
1594 merge_classes (class1, class2)
1595 enum x86_64_reg_class class1, class2;
1596 {
1597 /* Rule #1: If both classes are equal, this is the resulting class. */
1598 if (class1 == class2)
1599 return class1;
1600
1601 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1602 the other class. */
1603 if (class1 == X86_64_NO_CLASS)
1604 return class2;
1605 if (class2 == X86_64_NO_CLASS)
1606 return class1;
1607
1608 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1609 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1610 return X86_64_MEMORY_CLASS;
1611
1612 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1613 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1614 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1615 return X86_64_INTEGERSI_CLASS;
1616 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1617 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1618 return X86_64_INTEGER_CLASS;
1619
1620 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1621 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1622 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1623 return X86_64_MEMORY_CLASS;
1624
1625 /* Rule #6: Otherwise class SSE is used. */
1626 return X86_64_SSE_CLASS;
1627 }
1628
1629 /* Classify the argument of type TYPE and mode MODE.
1630 CLASSES will be filled by the register class used to pass each word
1631 of the operand. The number of words is returned. In case the parameter
1632 should be passed in memory, 0 is returned. As a special case for zero
1633 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1634
1635 BIT_OFFSET is used internally for handling records and specifies offset
1636 of the offset in bits modulo 256 to avoid overflow cases.
1637
1638 See the x86-64 PS ABI for details.
1639 */
1640
1641 static int
1642 classify_argument (mode, type, classes, bit_offset)
1643 enum machine_mode mode;
1644 tree type;
1645 enum x86_64_reg_class classes[MAX_CLASSES];
1646 int bit_offset;
1647 {
1648 int bytes =
1649 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1650 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1651
1652 /* Variable sized entities are always passed/returned in memory. */
1653 if (bytes < 0)
1654 return 0;
1655
1656 if (type && AGGREGATE_TYPE_P (type))
1657 {
1658 int i;
1659 tree field;
1660 enum x86_64_reg_class subclasses[MAX_CLASSES];
1661
1662 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1663 if (bytes > 16)
1664 return 0;
1665
1666 for (i = 0; i < words; i++)
1667 classes[i] = X86_64_NO_CLASS;
1668
1669 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1670 signalize memory class, so handle it as special case. */
1671 if (!words)
1672 {
1673 classes[0] = X86_64_NO_CLASS;
1674 return 1;
1675 }
1676
1677 /* Classify each field of record and merge classes. */
1678 if (TREE_CODE (type) == RECORD_TYPE)
1679 {
1680 /* For classes first merge in the field of the subclasses. */
1681 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1682 {
1683 tree bases = TYPE_BINFO_BASETYPES (type);
1684 int n_bases = TREE_VEC_LENGTH (bases);
1685 int i;
1686
1687 for (i = 0; i < n_bases; ++i)
1688 {
1689 tree binfo = TREE_VEC_ELT (bases, i);
1690 int num;
1691 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1692 tree type = BINFO_TYPE (binfo);
1693
1694 num = classify_argument (TYPE_MODE (type),
1695 type, subclasses,
1696 (offset + bit_offset) % 256);
1697 if (!num)
1698 return 0;
1699 for (i = 0; i < num; i++)
1700 {
1701 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1702 classes[i + pos] =
1703 merge_classes (subclasses[i], classes[i + pos]);
1704 }
1705 }
1706 }
1707 /* And now merge the fields of structure. */
1708 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1709 {
1710 if (TREE_CODE (field) == FIELD_DECL)
1711 {
1712 int num;
1713
1714 /* Bitfields are always classified as integer. Handle them
1715 early, since later code would consider them to be
1716 misaligned integers. */
1717 if (DECL_BIT_FIELD (field))
1718 {
1719 for (i = int_bit_position (field) / 8 / 8;
1720 i < (int_bit_position (field)
1721 + tree_low_cst (DECL_SIZE (field), 0)
1722 + 63) / 8 / 8; i++)
1723 classes[i] =
1724 merge_classes (X86_64_INTEGER_CLASS,
1725 classes[i]);
1726 }
1727 else
1728 {
1729 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1730 TREE_TYPE (field), subclasses,
1731 (int_bit_position (field)
1732 + bit_offset) % 256);
1733 if (!num)
1734 return 0;
1735 for (i = 0; i < num; i++)
1736 {
1737 int pos =
1738 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1739 classes[i + pos] =
1740 merge_classes (subclasses[i], classes[i + pos]);
1741 }
1742 }
1743 }
1744 }
1745 }
1746 /* Arrays are handled as small records. */
1747 else if (TREE_CODE (type) == ARRAY_TYPE)
1748 {
1749 int num;
1750 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1751 TREE_TYPE (type), subclasses, bit_offset);
1752 if (!num)
1753 return 0;
1754
1755 /* The partial classes are now full classes. */
1756 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1757 subclasses[0] = X86_64_SSE_CLASS;
1758 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1759 subclasses[0] = X86_64_INTEGER_CLASS;
1760
1761 for (i = 0; i < words; i++)
1762 classes[i] = subclasses[i % num];
1763 }
1764 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1765 else if (TREE_CODE (type) == UNION_TYPE
1766 || TREE_CODE (type) == QUAL_UNION_TYPE)
1767 {
1768 /* For classes first merge in the field of the subclasses. */
1769 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1770 {
1771 tree bases = TYPE_BINFO_BASETYPES (type);
1772 int n_bases = TREE_VEC_LENGTH (bases);
1773 int i;
1774
1775 for (i = 0; i < n_bases; ++i)
1776 {
1777 tree binfo = TREE_VEC_ELT (bases, i);
1778 int num;
1779 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1780 tree type = BINFO_TYPE (binfo);
1781
1782 num = classify_argument (TYPE_MODE (type),
1783 type, subclasses,
1784 (offset + (bit_offset % 64)) % 256);
1785 if (!num)
1786 return 0;
1787 for (i = 0; i < num; i++)
1788 {
1789 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1790 classes[i + pos] =
1791 merge_classes (subclasses[i], classes[i + pos]);
1792 }
1793 }
1794 }
1795 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1796 {
1797 if (TREE_CODE (field) == FIELD_DECL)
1798 {
1799 int num;
1800 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1801 TREE_TYPE (field), subclasses,
1802 bit_offset);
1803 if (!num)
1804 return 0;
1805 for (i = 0; i < num; i++)
1806 classes[i] = merge_classes (subclasses[i], classes[i]);
1807 }
1808 }
1809 }
1810 else
1811 abort ();
1812
1813 /* Final merger cleanup. */
1814 for (i = 0; i < words; i++)
1815 {
1816 /* If one class is MEMORY, everything should be passed in
1817 memory. */
1818 if (classes[i] == X86_64_MEMORY_CLASS)
1819 return 0;
1820
1821 /* The X86_64_SSEUP_CLASS should be always preceded by
1822 X86_64_SSE_CLASS. */
1823 if (classes[i] == X86_64_SSEUP_CLASS
1824 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1825 classes[i] = X86_64_SSE_CLASS;
1826
1827 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1828 if (classes[i] == X86_64_X87UP_CLASS
1829 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1830 classes[i] = X86_64_SSE_CLASS;
1831 }
1832 return words;
1833 }
1834
1835 /* Compute alignment needed. We align all types to natural boundaries with
1836 exception of XFmode that is aligned to 64bits. */
1837 if (mode != VOIDmode && mode != BLKmode)
1838 {
1839 int mode_alignment = GET_MODE_BITSIZE (mode);
1840
1841 if (mode == XFmode)
1842 mode_alignment = 128;
1843 else if (mode == XCmode)
1844 mode_alignment = 256;
1845 /* Misaligned fields are always returned in memory. */
1846 if (bit_offset % mode_alignment)
1847 return 0;
1848 }
1849
1850 /* Classification of atomic types. */
1851 switch (mode)
1852 {
1853 case DImode:
1854 case SImode:
1855 case HImode:
1856 case QImode:
1857 case CSImode:
1858 case CHImode:
1859 case CQImode:
1860 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1861 classes[0] = X86_64_INTEGERSI_CLASS;
1862 else
1863 classes[0] = X86_64_INTEGER_CLASS;
1864 return 1;
1865 case CDImode:
1866 case TImode:
1867 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1868 return 2;
1869 case CTImode:
1870 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1871 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1872 return 4;
1873 case SFmode:
1874 if (!(bit_offset % 64))
1875 classes[0] = X86_64_SSESF_CLASS;
1876 else
1877 classes[0] = X86_64_SSE_CLASS;
1878 return 1;
1879 case DFmode:
1880 classes[0] = X86_64_SSEDF_CLASS;
1881 return 1;
1882 case TFmode:
1883 classes[0] = X86_64_X87_CLASS;
1884 classes[1] = X86_64_X87UP_CLASS;
1885 return 2;
1886 case TCmode:
1887 classes[0] = X86_64_X87_CLASS;
1888 classes[1] = X86_64_X87UP_CLASS;
1889 classes[2] = X86_64_X87_CLASS;
1890 classes[3] = X86_64_X87UP_CLASS;
1891 return 4;
1892 case DCmode:
1893 classes[0] = X86_64_SSEDF_CLASS;
1894 classes[1] = X86_64_SSEDF_CLASS;
1895 return 2;
1896 case SCmode:
1897 classes[0] = X86_64_SSE_CLASS;
1898 return 1;
1899 case V4SFmode:
1900 case V4SImode:
1901 case V16QImode:
1902 case V8HImode:
1903 case V2DFmode:
1904 case V2DImode:
1905 classes[0] = X86_64_SSE_CLASS;
1906 classes[1] = X86_64_SSEUP_CLASS;
1907 return 2;
1908 case V2SFmode:
1909 case V2SImode:
1910 case V4HImode:
1911 case V8QImode:
1912 classes[0] = X86_64_SSE_CLASS;
1913 return 1;
1914 case BLKmode:
1915 case VOIDmode:
1916 return 0;
1917 default:
1918 abort ();
1919 }
1920 }
1921
1922 /* Examine the argument and return set number of register required in each
1923 class. Return 0 iff parameter should be passed in memory. */
1924 static int
1925 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1926 enum machine_mode mode;
1927 tree type;
1928 int *int_nregs, *sse_nregs;
1929 int in_return;
1930 {
1931 enum x86_64_reg_class class[MAX_CLASSES];
1932 int n = classify_argument (mode, type, class, 0);
1933
1934 *int_nregs = 0;
1935 *sse_nregs = 0;
1936 if (!n)
1937 return 0;
1938 for (n--; n >= 0; n--)
1939 switch (class[n])
1940 {
1941 case X86_64_INTEGER_CLASS:
1942 case X86_64_INTEGERSI_CLASS:
1943 (*int_nregs)++;
1944 break;
1945 case X86_64_SSE_CLASS:
1946 case X86_64_SSESF_CLASS:
1947 case X86_64_SSEDF_CLASS:
1948 (*sse_nregs)++;
1949 break;
1950 case X86_64_NO_CLASS:
1951 case X86_64_SSEUP_CLASS:
1952 break;
1953 case X86_64_X87_CLASS:
1954 case X86_64_X87UP_CLASS:
1955 if (!in_return)
1956 return 0;
1957 break;
1958 case X86_64_MEMORY_CLASS:
1959 abort ();
1960 }
1961 return 1;
1962 }
1963 /* Construct container for the argument used by GCC interface. See
1964 FUNCTION_ARG for the detailed description. */
1965 static rtx
1966 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1967 enum machine_mode mode;
1968 tree type;
1969 int in_return;
1970 int nintregs, nsseregs;
1971 const int * intreg;
1972 int sse_regno;
1973 {
1974 enum machine_mode tmpmode;
1975 int bytes =
1976 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1977 enum x86_64_reg_class class[MAX_CLASSES];
1978 int n;
1979 int i;
1980 int nexps = 0;
1981 int needed_sseregs, needed_intregs;
1982 rtx exp[MAX_CLASSES];
1983 rtx ret;
1984
1985 n = classify_argument (mode, type, class, 0);
1986 if (TARGET_DEBUG_ARG)
1987 {
1988 if (!n)
1989 fprintf (stderr, "Memory class\n");
1990 else
1991 {
1992 fprintf (stderr, "Classes:");
1993 for (i = 0; i < n; i++)
1994 {
1995 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1996 }
1997 fprintf (stderr, "\n");
1998 }
1999 }
2000 if (!n)
2001 return NULL;
2002 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2003 return NULL;
2004 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2005 return NULL;
2006
2007 /* First construct simple cases. Avoid SCmode, since we want to use
2008 single register to pass this type. */
2009 if (n == 1 && mode != SCmode)
2010 switch (class[0])
2011 {
2012 case X86_64_INTEGER_CLASS:
2013 case X86_64_INTEGERSI_CLASS:
2014 return gen_rtx_REG (mode, intreg[0]);
2015 case X86_64_SSE_CLASS:
2016 case X86_64_SSESF_CLASS:
2017 case X86_64_SSEDF_CLASS:
2018 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2019 case X86_64_X87_CLASS:
2020 return gen_rtx_REG (mode, FIRST_STACK_REG);
2021 case X86_64_NO_CLASS:
2022 /* Zero sized array, struct or class. */
2023 return NULL;
2024 default:
2025 abort ();
2026 }
2027 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2028 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2029 if (n == 2
2030 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2031 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2032 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2033 && class[1] == X86_64_INTEGER_CLASS
2034 && (mode == CDImode || mode == TImode)
2035 && intreg[0] + 1 == intreg[1])
2036 return gen_rtx_REG (mode, intreg[0]);
2037 if (n == 4
2038 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2039 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2040 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2041
2042 /* Otherwise figure out the entries of the PARALLEL. */
2043 for (i = 0; i < n; i++)
2044 {
2045 switch (class[i])
2046 {
2047 case X86_64_NO_CLASS:
2048 break;
2049 case X86_64_INTEGER_CLASS:
2050 case X86_64_INTEGERSI_CLASS:
2051 /* Merge TImodes on aligned occassions here too. */
2052 if (i * 8 + 8 > bytes)
2053 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2054 else if (class[i] == X86_64_INTEGERSI_CLASS)
2055 tmpmode = SImode;
2056 else
2057 tmpmode = DImode;
2058 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2059 if (tmpmode == BLKmode)
2060 tmpmode = DImode;
2061 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2062 gen_rtx_REG (tmpmode, *intreg),
2063 GEN_INT (i*8));
2064 intreg++;
2065 break;
2066 case X86_64_SSESF_CLASS:
2067 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2068 gen_rtx_REG (SFmode,
2069 SSE_REGNO (sse_regno)),
2070 GEN_INT (i*8));
2071 sse_regno++;
2072 break;
2073 case X86_64_SSEDF_CLASS:
2074 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2075 gen_rtx_REG (DFmode,
2076 SSE_REGNO (sse_regno)),
2077 GEN_INT (i*8));
2078 sse_regno++;
2079 break;
2080 case X86_64_SSE_CLASS:
2081 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2082 tmpmode = TImode, i++;
2083 else
2084 tmpmode = DImode;
2085 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2086 gen_rtx_REG (tmpmode,
2087 SSE_REGNO (sse_regno)),
2088 GEN_INT (i*8));
2089 sse_regno++;
2090 break;
2091 default:
2092 abort ();
2093 }
2094 }
2095 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2096 for (i = 0; i < nexps; i++)
2097 XVECEXP (ret, 0, i) = exp [i];
2098 return ret;
2099 }
2100
2101 /* Update the data in CUM to advance over an argument
2102 of mode MODE and data type TYPE.
2103 (TYPE is null for libcalls where that information may not be available.) */
2104
2105 void
2106 function_arg_advance (cum, mode, type, named)
2107 CUMULATIVE_ARGS *cum; /* current arg information */
2108 enum machine_mode mode; /* current arg mode */
2109 tree type; /* type of the argument or 0 if lib support */
2110 int named; /* whether or not the argument was named */
2111 {
2112 int bytes =
2113 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2114 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2115
2116 if (TARGET_DEBUG_ARG)
2117 fprintf (stderr,
2118 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2119 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2120 if (TARGET_64BIT)
2121 {
2122 int int_nregs, sse_nregs;
2123 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2124 cum->words += words;
2125 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2126 {
2127 cum->nregs -= int_nregs;
2128 cum->sse_nregs -= sse_nregs;
2129 cum->regno += int_nregs;
2130 cum->sse_regno += sse_nregs;
2131 }
2132 else
2133 cum->words += words;
2134 }
2135 else
2136 {
2137 if (TARGET_SSE && mode == TImode)
2138 {
2139 cum->sse_words += words;
2140 cum->sse_nregs -= 1;
2141 cum->sse_regno += 1;
2142 if (cum->sse_nregs <= 0)
2143 {
2144 cum->sse_nregs = 0;
2145 cum->sse_regno = 0;
2146 }
2147 }
2148 else
2149 {
2150 cum->words += words;
2151 cum->nregs -= words;
2152 cum->regno += words;
2153
2154 if (cum->nregs <= 0)
2155 {
2156 cum->nregs = 0;
2157 cum->regno = 0;
2158 }
2159 }
2160 }
2161 return;
2162 }
2163
2164 /* Define where to put the arguments to a function.
2165 Value is zero to push the argument on the stack,
2166 or a hard register in which to store the argument.
2167
2168 MODE is the argument's machine mode.
2169 TYPE is the data type of the argument (as a tree).
2170 This is null for libcalls where that information may
2171 not be available.
2172 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2173 the preceding args and about the function being called.
2174 NAMED is nonzero if this argument is a named parameter
2175 (otherwise it is an extra parameter matching an ellipsis). */
2176
2177 rtx
2178 function_arg (cum, mode, type, named)
2179 CUMULATIVE_ARGS *cum; /* current arg information */
2180 enum machine_mode mode; /* current arg mode */
2181 tree type; /* type of the argument or 0 if lib support */
2182 int named; /* != 0 for normal args, == 0 for ... args */
2183 {
2184 rtx ret = NULL_RTX;
2185 int bytes =
2186 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2187 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2188
2189 /* Handle an hidden AL argument containing number of registers for varargs
2190 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2191 any AL settings. */
2192 if (mode == VOIDmode)
2193 {
2194 if (TARGET_64BIT)
2195 return GEN_INT (cum->maybe_vaarg
2196 ? (cum->sse_nregs < 0
2197 ? SSE_REGPARM_MAX
2198 : cum->sse_regno)
2199 : -1);
2200 else
2201 return constm1_rtx;
2202 }
2203 if (TARGET_64BIT)
2204 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2205 &x86_64_int_parameter_registers [cum->regno],
2206 cum->sse_regno);
2207 else
2208 switch (mode)
2209 {
2210 /* For now, pass fp/complex values on the stack. */
2211 default:
2212 break;
2213
2214 case BLKmode:
2215 case DImode:
2216 case SImode:
2217 case HImode:
2218 case QImode:
2219 if (words <= cum->nregs)
2220 ret = gen_rtx_REG (mode, cum->regno);
2221 break;
2222 case TImode:
2223 if (cum->sse_nregs)
2224 ret = gen_rtx_REG (mode, cum->sse_regno);
2225 break;
2226 }
2227
2228 if (TARGET_DEBUG_ARG)
2229 {
2230 fprintf (stderr,
2231 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2232 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2233
2234 if (ret)
2235 print_simple_rtl (stderr, ret);
2236 else
2237 fprintf (stderr, ", stack");
2238
2239 fprintf (stderr, " )\n");
2240 }
2241
2242 return ret;
2243 }
2244
2245 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2246 and type. */
2247
2248 int
2249 ix86_function_arg_boundary (mode, type)
2250 enum machine_mode mode;
2251 tree type;
2252 {
2253 int align;
2254 if (!TARGET_64BIT)
2255 return PARM_BOUNDARY;
2256 if (type)
2257 align = TYPE_ALIGN (type);
2258 else
2259 align = GET_MODE_ALIGNMENT (mode);
2260 if (align < PARM_BOUNDARY)
2261 align = PARM_BOUNDARY;
2262 if (align > 128)
2263 align = 128;
2264 return align;
2265 }
2266
2267 /* Return true if N is a possible register number of function value. */
2268 bool
2269 ix86_function_value_regno_p (regno)
2270 int regno;
2271 {
2272 if (!TARGET_64BIT)
2273 {
2274 return ((regno) == 0
2275 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2276 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2277 }
2278 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2279 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2280 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2281 }
2282
2283 /* Define how to find the value returned by a function.
2284 VALTYPE is the data type of the value (as a tree).
2285 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2286 otherwise, FUNC is 0. */
2287 rtx
2288 ix86_function_value (valtype)
2289 tree valtype;
2290 {
2291 if (TARGET_64BIT)
2292 {
2293 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2294 REGPARM_MAX, SSE_REGPARM_MAX,
2295 x86_64_int_return_registers, 0);
2296 /* For zero sized structures, construct_continer return NULL, but we need
2297 to keep rest of compiler happy by returning meaningfull value. */
2298 if (!ret)
2299 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2300 return ret;
2301 }
2302 else
2303 return gen_rtx_REG (TYPE_MODE (valtype),
2304 ix86_value_regno (TYPE_MODE (valtype)));
2305 }
2306
2307 /* Return false iff type is returned in memory. */
2308 int
2309 ix86_return_in_memory (type)
2310 tree type;
2311 {
2312 int needed_intregs, needed_sseregs;
2313 if (TARGET_64BIT)
2314 {
2315 return !examine_argument (TYPE_MODE (type), type, 1,
2316 &needed_intregs, &needed_sseregs);
2317 }
2318 else
2319 {
2320 if (TYPE_MODE (type) == BLKmode
2321 || (VECTOR_MODE_P (TYPE_MODE (type))
2322 && int_size_in_bytes (type) == 8)
2323 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2324 && TYPE_MODE (type) != TFmode
2325 && !VECTOR_MODE_P (TYPE_MODE (type))))
2326 return 1;
2327 return 0;
2328 }
2329 }
2330
2331 /* Define how to find the value returned by a library function
2332 assuming the value has mode MODE. */
2333 rtx
2334 ix86_libcall_value (mode)
2335 enum machine_mode mode;
2336 {
2337 if (TARGET_64BIT)
2338 {
2339 switch (mode)
2340 {
2341 case SFmode:
2342 case SCmode:
2343 case DFmode:
2344 case DCmode:
2345 return gen_rtx_REG (mode, FIRST_SSE_REG);
2346 case TFmode:
2347 case TCmode:
2348 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2349 default:
2350 return gen_rtx_REG (mode, 0);
2351 }
2352 }
2353 else
2354 return gen_rtx_REG (mode, ix86_value_regno (mode));
2355 }
2356
2357 /* Given a mode, return the register to use for a return value. */
2358
2359 static int
2360 ix86_value_regno (mode)
2361 enum machine_mode mode;
2362 {
2363 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2364 return FIRST_FLOAT_REG;
2365 if (mode == TImode || VECTOR_MODE_P (mode))
2366 return FIRST_SSE_REG;
2367 return 0;
2368 }
2369 \f
2370 /* Create the va_list data type. */
2371
2372 tree
2373 ix86_build_va_list ()
2374 {
2375 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2376
2377 /* For i386 we use plain pointer to argument area. */
2378 if (!TARGET_64BIT)
2379 return build_pointer_type (char_type_node);
2380
2381 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2382 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2383
2384 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2385 unsigned_type_node);
2386 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2387 unsigned_type_node);
2388 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2389 ptr_type_node);
2390 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2391 ptr_type_node);
2392
2393 DECL_FIELD_CONTEXT (f_gpr) = record;
2394 DECL_FIELD_CONTEXT (f_fpr) = record;
2395 DECL_FIELD_CONTEXT (f_ovf) = record;
2396 DECL_FIELD_CONTEXT (f_sav) = record;
2397
2398 TREE_CHAIN (record) = type_decl;
2399 TYPE_NAME (record) = type_decl;
2400 TYPE_FIELDS (record) = f_gpr;
2401 TREE_CHAIN (f_gpr) = f_fpr;
2402 TREE_CHAIN (f_fpr) = f_ovf;
2403 TREE_CHAIN (f_ovf) = f_sav;
2404
2405 layout_type (record);
2406
2407 /* The correct type is an array type of one element. */
2408 return build_array_type (record, build_index_type (size_zero_node));
2409 }
2410
2411 /* Perform any needed actions needed for a function that is receiving a
2412 variable number of arguments.
2413
2414 CUM is as above.
2415
2416 MODE and TYPE are the mode and type of the current parameter.
2417
2418 PRETEND_SIZE is a variable that should be set to the amount of stack
2419 that must be pushed by the prolog to pretend that our caller pushed
2420 it.
2421
2422 Normally, this macro will push all remaining incoming registers on the
2423 stack and set PRETEND_SIZE to the length of the registers pushed. */
2424
2425 void
2426 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2427 CUMULATIVE_ARGS *cum;
2428 enum machine_mode mode;
2429 tree type;
2430 int *pretend_size ATTRIBUTE_UNUSED;
2431 int no_rtl;
2432
2433 {
2434 CUMULATIVE_ARGS next_cum;
2435 rtx save_area = NULL_RTX, mem;
2436 rtx label;
2437 rtx label_ref;
2438 rtx tmp_reg;
2439 rtx nsse_reg;
2440 int set;
2441 tree fntype;
2442 int stdarg_p;
2443 int i;
2444
2445 if (!TARGET_64BIT)
2446 return;
2447
2448 /* Indicate to allocate space on the stack for varargs save area. */
2449 ix86_save_varrargs_registers = 1;
2450
2451 fntype = TREE_TYPE (current_function_decl);
2452 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2453 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2454 != void_type_node));
2455
2456 /* For varargs, we do not want to skip the dummy va_dcl argument.
2457 For stdargs, we do want to skip the last named argument. */
2458 next_cum = *cum;
2459 if (stdarg_p)
2460 function_arg_advance (&next_cum, mode, type, 1);
2461
2462 if (!no_rtl)
2463 save_area = frame_pointer_rtx;
2464
2465 set = get_varargs_alias_set ();
2466
2467 for (i = next_cum.regno; i < ix86_regparm; i++)
2468 {
2469 mem = gen_rtx_MEM (Pmode,
2470 plus_constant (save_area, i * UNITS_PER_WORD));
2471 set_mem_alias_set (mem, set);
2472 emit_move_insn (mem, gen_rtx_REG (Pmode,
2473 x86_64_int_parameter_registers[i]));
2474 }
2475
2476 if (next_cum.sse_nregs)
2477 {
2478 /* Now emit code to save SSE registers. The AX parameter contains number
2479 of SSE parameter regsiters used to call this function. We use
2480 sse_prologue_save insn template that produces computed jump across
2481 SSE saves. We need some preparation work to get this working. */
2482
2483 label = gen_label_rtx ();
2484 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2485
2486 /* Compute address to jump to :
2487 label - 5*eax + nnamed_sse_arguments*5 */
2488 tmp_reg = gen_reg_rtx (Pmode);
2489 nsse_reg = gen_reg_rtx (Pmode);
2490 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2491 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2492 gen_rtx_MULT (Pmode, nsse_reg,
2493 GEN_INT (4))));
2494 if (next_cum.sse_regno)
2495 emit_move_insn
2496 (nsse_reg,
2497 gen_rtx_CONST (DImode,
2498 gen_rtx_PLUS (DImode,
2499 label_ref,
2500 GEN_INT (next_cum.sse_regno * 4))));
2501 else
2502 emit_move_insn (nsse_reg, label_ref);
2503 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2504
2505 /* Compute address of memory block we save into. We always use pointer
2506 pointing 127 bytes after first byte to store - this is needed to keep
2507 instruction size limited by 4 bytes. */
2508 tmp_reg = gen_reg_rtx (Pmode);
2509 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2510 plus_constant (save_area,
2511 8 * REGPARM_MAX + 127)));
2512 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2513 set_mem_alias_set (mem, set);
2514 set_mem_align (mem, BITS_PER_WORD);
2515
2516 /* And finally do the dirty job! */
2517 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2518 GEN_INT (next_cum.sse_regno), label));
2519 }
2520
2521 }
2522
2523 /* Implement va_start. */
2524
2525 void
2526 ix86_va_start (valist, nextarg)
2527 tree valist;
2528 rtx nextarg;
2529 {
2530 HOST_WIDE_INT words, n_gpr, n_fpr;
2531 tree f_gpr, f_fpr, f_ovf, f_sav;
2532 tree gpr, fpr, ovf, sav, t;
2533
2534 /* Only 64bit target needs something special. */
2535 if (!TARGET_64BIT)
2536 {
2537 std_expand_builtin_va_start (valist, nextarg);
2538 return;
2539 }
2540
2541 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2542 f_fpr = TREE_CHAIN (f_gpr);
2543 f_ovf = TREE_CHAIN (f_fpr);
2544 f_sav = TREE_CHAIN (f_ovf);
2545
2546 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2547 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2548 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2549 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2550 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2551
2552 /* Count number of gp and fp argument registers used. */
2553 words = current_function_args_info.words;
2554 n_gpr = current_function_args_info.regno;
2555 n_fpr = current_function_args_info.sse_regno;
2556
2557 if (TARGET_DEBUG_ARG)
2558 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2559 (int) words, (int) n_gpr, (int) n_fpr);
2560
2561 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2562 build_int_2 (n_gpr * 8, 0));
2563 TREE_SIDE_EFFECTS (t) = 1;
2564 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2565
2566 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2567 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2568 TREE_SIDE_EFFECTS (t) = 1;
2569 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2570
2571 /* Find the overflow area. */
2572 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2573 if (words != 0)
2574 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2575 build_int_2 (words * UNITS_PER_WORD, 0));
2576 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2577 TREE_SIDE_EFFECTS (t) = 1;
2578 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2579
2580 /* Find the register save area.
2581 Prologue of the function save it right above stack frame. */
2582 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2583 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2584 TREE_SIDE_EFFECTS (t) = 1;
2585 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2586 }
2587
2588 /* Implement va_arg. */
2589 rtx
2590 ix86_va_arg (valist, type)
2591 tree valist, type;
2592 {
2593 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2594 tree f_gpr, f_fpr, f_ovf, f_sav;
2595 tree gpr, fpr, ovf, sav, t;
2596 int size, rsize;
2597 rtx lab_false, lab_over = NULL_RTX;
2598 rtx addr_rtx, r;
2599 rtx container;
2600
2601 /* Only 64bit target needs something special. */
2602 if (!TARGET_64BIT)
2603 {
2604 return std_expand_builtin_va_arg (valist, type);
2605 }
2606
2607 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2608 f_fpr = TREE_CHAIN (f_gpr);
2609 f_ovf = TREE_CHAIN (f_fpr);
2610 f_sav = TREE_CHAIN (f_ovf);
2611
2612 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2613 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2614 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2615 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2616 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2617
2618 size = int_size_in_bytes (type);
2619 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2620
2621 container = construct_container (TYPE_MODE (type), type, 0,
2622 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2623 /*
2624 * Pull the value out of the saved registers ...
2625 */
2626
2627 addr_rtx = gen_reg_rtx (Pmode);
2628
2629 if (container)
2630 {
2631 rtx int_addr_rtx, sse_addr_rtx;
2632 int needed_intregs, needed_sseregs;
2633 int need_temp;
2634
2635 lab_over = gen_label_rtx ();
2636 lab_false = gen_label_rtx ();
2637
2638 examine_argument (TYPE_MODE (type), type, 0,
2639 &needed_intregs, &needed_sseregs);
2640
2641
2642 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2643 || TYPE_ALIGN (type) > 128);
2644
2645 /* In case we are passing structure, verify that it is consetuctive block
2646 on the register save area. If not we need to do moves. */
2647 if (!need_temp && !REG_P (container))
2648 {
2649 /* Verify that all registers are strictly consetuctive */
2650 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2651 {
2652 int i;
2653
2654 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2655 {
2656 rtx slot = XVECEXP (container, 0, i);
2657 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2658 || INTVAL (XEXP (slot, 1)) != i * 16)
2659 need_temp = 1;
2660 }
2661 }
2662 else
2663 {
2664 int i;
2665
2666 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2667 {
2668 rtx slot = XVECEXP (container, 0, i);
2669 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2670 || INTVAL (XEXP (slot, 1)) != i * 8)
2671 need_temp = 1;
2672 }
2673 }
2674 }
2675 if (!need_temp)
2676 {
2677 int_addr_rtx = addr_rtx;
2678 sse_addr_rtx = addr_rtx;
2679 }
2680 else
2681 {
2682 int_addr_rtx = gen_reg_rtx (Pmode);
2683 sse_addr_rtx = gen_reg_rtx (Pmode);
2684 }
2685 /* First ensure that we fit completely in registers. */
2686 if (needed_intregs)
2687 {
2688 emit_cmp_and_jump_insns (expand_expr
2689 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2690 GEN_INT ((REGPARM_MAX - needed_intregs +
2691 1) * 8), GE, const1_rtx, SImode,
2692 1, lab_false);
2693 }
2694 if (needed_sseregs)
2695 {
2696 emit_cmp_and_jump_insns (expand_expr
2697 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2698 GEN_INT ((SSE_REGPARM_MAX -
2699 needed_sseregs + 1) * 16 +
2700 REGPARM_MAX * 8), GE, const1_rtx,
2701 SImode, 1, lab_false);
2702 }
2703
2704 /* Compute index to start of area used for integer regs. */
2705 if (needed_intregs)
2706 {
2707 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2708 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2709 if (r != int_addr_rtx)
2710 emit_move_insn (int_addr_rtx, r);
2711 }
2712 if (needed_sseregs)
2713 {
2714 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2715 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2716 if (r != sse_addr_rtx)
2717 emit_move_insn (sse_addr_rtx, r);
2718 }
2719 if (need_temp)
2720 {
2721 int i;
2722 rtx mem;
2723
2724 /* Never use the memory itself, as it has the alias set. */
2725 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2726 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2727 set_mem_alias_set (mem, get_varargs_alias_set ());
2728 set_mem_align (mem, BITS_PER_UNIT);
2729
2730 for (i = 0; i < XVECLEN (container, 0); i++)
2731 {
2732 rtx slot = XVECEXP (container, 0, i);
2733 rtx reg = XEXP (slot, 0);
2734 enum machine_mode mode = GET_MODE (reg);
2735 rtx src_addr;
2736 rtx src_mem;
2737 int src_offset;
2738 rtx dest_mem;
2739
2740 if (SSE_REGNO_P (REGNO (reg)))
2741 {
2742 src_addr = sse_addr_rtx;
2743 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2744 }
2745 else
2746 {
2747 src_addr = int_addr_rtx;
2748 src_offset = REGNO (reg) * 8;
2749 }
2750 src_mem = gen_rtx_MEM (mode, src_addr);
2751 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2752 src_mem = adjust_address (src_mem, mode, src_offset);
2753 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2754 emit_move_insn (dest_mem, src_mem);
2755 }
2756 }
2757
2758 if (needed_intregs)
2759 {
2760 t =
2761 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2762 build_int_2 (needed_intregs * 8, 0));
2763 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2764 TREE_SIDE_EFFECTS (t) = 1;
2765 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2766 }
2767 if (needed_sseregs)
2768 {
2769 t =
2770 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2771 build_int_2 (needed_sseregs * 16, 0));
2772 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2773 TREE_SIDE_EFFECTS (t) = 1;
2774 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2775 }
2776
2777 emit_jump_insn (gen_jump (lab_over));
2778 emit_barrier ();
2779 emit_label (lab_false);
2780 }
2781
2782 /* ... otherwise out of the overflow area. */
2783
2784 /* Care for on-stack alignment if needed. */
2785 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2786 t = ovf;
2787 else
2788 {
2789 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2790 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2791 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2792 }
2793 t = save_expr (t);
2794
2795 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2796 if (r != addr_rtx)
2797 emit_move_insn (addr_rtx, r);
2798
2799 t =
2800 build (PLUS_EXPR, TREE_TYPE (t), t,
2801 build_int_2 (rsize * UNITS_PER_WORD, 0));
2802 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2803 TREE_SIDE_EFFECTS (t) = 1;
2804 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2805
2806 if (container)
2807 emit_label (lab_over);
2808
2809 return addr_rtx;
2810 }
2811 \f
2812 /* Return nonzero if OP is either a i387 or SSE fp register. */
2813 int
2814 any_fp_register_operand (op, mode)
2815 rtx op;
2816 enum machine_mode mode ATTRIBUTE_UNUSED;
2817 {
2818 return ANY_FP_REG_P (op);
2819 }
2820
2821 /* Return nonzero if OP is an i387 fp register. */
2822 int
2823 fp_register_operand (op, mode)
2824 rtx op;
2825 enum machine_mode mode ATTRIBUTE_UNUSED;
2826 {
2827 return FP_REG_P (op);
2828 }
2829
2830 /* Return nonzero if OP is a non-fp register_operand. */
2831 int
2832 register_and_not_any_fp_reg_operand (op, mode)
2833 rtx op;
2834 enum machine_mode mode;
2835 {
2836 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2837 }
2838
2839 /* Return nonzero of OP is a register operand other than an
2840 i387 fp register. */
2841 int
2842 register_and_not_fp_reg_operand (op, mode)
2843 rtx op;
2844 enum machine_mode mode;
2845 {
2846 return register_operand (op, mode) && !FP_REG_P (op);
2847 }
2848
2849 /* Return nonzero if OP is general operand representable on x86_64. */
2850
2851 int
2852 x86_64_general_operand (op, mode)
2853 rtx op;
2854 enum machine_mode mode;
2855 {
2856 if (!TARGET_64BIT)
2857 return general_operand (op, mode);
2858 if (nonimmediate_operand (op, mode))
2859 return 1;
2860 return x86_64_sign_extended_value (op);
2861 }
2862
2863 /* Return nonzero if OP is general operand representable on x86_64
2864 as either sign extended or zero extended constant. */
2865
2866 int
2867 x86_64_szext_general_operand (op, mode)
2868 rtx op;
2869 enum machine_mode mode;
2870 {
2871 if (!TARGET_64BIT)
2872 return general_operand (op, mode);
2873 if (nonimmediate_operand (op, mode))
2874 return 1;
2875 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2876 }
2877
2878 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2879
2880 int
2881 x86_64_nonmemory_operand (op, mode)
2882 rtx op;
2883 enum machine_mode mode;
2884 {
2885 if (!TARGET_64BIT)
2886 return nonmemory_operand (op, mode);
2887 if (register_operand (op, mode))
2888 return 1;
2889 return x86_64_sign_extended_value (op);
2890 }
2891
2892 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2893
2894 int
2895 x86_64_movabs_operand (op, mode)
2896 rtx op;
2897 enum machine_mode mode;
2898 {
2899 if (!TARGET_64BIT || !flag_pic)
2900 return nonmemory_operand (op, mode);
2901 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2902 return 1;
2903 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2904 return 1;
2905 return 0;
2906 }
2907
2908 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2909
2910 int
2911 x86_64_szext_nonmemory_operand (op, mode)
2912 rtx op;
2913 enum machine_mode mode;
2914 {
2915 if (!TARGET_64BIT)
2916 return nonmemory_operand (op, mode);
2917 if (register_operand (op, mode))
2918 return 1;
2919 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2920 }
2921
2922 /* Return nonzero if OP is immediate operand representable on x86_64. */
2923
2924 int
2925 x86_64_immediate_operand (op, mode)
2926 rtx op;
2927 enum machine_mode mode;
2928 {
2929 if (!TARGET_64BIT)
2930 return immediate_operand (op, mode);
2931 return x86_64_sign_extended_value (op);
2932 }
2933
2934 /* Return nonzero if OP is immediate operand representable on x86_64. */
2935
2936 int
2937 x86_64_zext_immediate_operand (op, mode)
2938 rtx op;
2939 enum machine_mode mode ATTRIBUTE_UNUSED;
2940 {
2941 return x86_64_zero_extended_value (op);
2942 }
2943
2944 /* Return nonzero if OP is (const_int 1), else return zero. */
2945
2946 int
2947 const_int_1_operand (op, mode)
2948 rtx op;
2949 enum machine_mode mode ATTRIBUTE_UNUSED;
2950 {
2951 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2952 }
2953
2954 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2955 for shift & compare patterns, as shifting by 0 does not change flags),
2956 else return zero. */
2957
2958 int
2959 const_int_1_31_operand (op, mode)
2960 rtx op;
2961 enum machine_mode mode ATTRIBUTE_UNUSED;
2962 {
2963 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2964 }
2965
2966 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2967 reference and a constant. */
2968
2969 int
2970 symbolic_operand (op, mode)
2971 register rtx op;
2972 enum machine_mode mode ATTRIBUTE_UNUSED;
2973 {
2974 switch (GET_CODE (op))
2975 {
2976 case SYMBOL_REF:
2977 case LABEL_REF:
2978 return 1;
2979
2980 case CONST:
2981 op = XEXP (op, 0);
2982 if (GET_CODE (op) == SYMBOL_REF
2983 || GET_CODE (op) == LABEL_REF
2984 || (GET_CODE (op) == UNSPEC
2985 && (XINT (op, 1) == UNSPEC_GOT
2986 || XINT (op, 1) == UNSPEC_GOTOFF
2987 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2988 return 1;
2989 if (GET_CODE (op) != PLUS
2990 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2991 return 0;
2992
2993 op = XEXP (op, 0);
2994 if (GET_CODE (op) == SYMBOL_REF
2995 || GET_CODE (op) == LABEL_REF)
2996 return 1;
2997 /* Only @GOTOFF gets offsets. */
2998 if (GET_CODE (op) != UNSPEC
2999 || XINT (op, 1) != UNSPEC_GOTOFF)
3000 return 0;
3001
3002 op = XVECEXP (op, 0, 0);
3003 if (GET_CODE (op) == SYMBOL_REF
3004 || GET_CODE (op) == LABEL_REF)
3005 return 1;
3006 return 0;
3007
3008 default:
3009 return 0;
3010 }
3011 }
3012
3013 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3014
3015 int
3016 pic_symbolic_operand (op, mode)
3017 register rtx op;
3018 enum machine_mode mode ATTRIBUTE_UNUSED;
3019 {
3020 if (GET_CODE (op) != CONST)
3021 return 0;
3022 op = XEXP (op, 0);
3023 if (TARGET_64BIT)
3024 {
3025 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3026 return 1;
3027 }
3028 else
3029 {
3030 if (GET_CODE (op) == UNSPEC)
3031 return 1;
3032 if (GET_CODE (op) != PLUS
3033 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3034 return 0;
3035 op = XEXP (op, 0);
3036 if (GET_CODE (op) == UNSPEC)
3037 return 1;
3038 }
3039 return 0;
3040 }
3041
3042 /* Return true if OP is a symbolic operand that resolves locally. */
3043
3044 static int
3045 local_symbolic_operand (op, mode)
3046 rtx op;
3047 enum machine_mode mode ATTRIBUTE_UNUSED;
3048 {
3049 if (GET_CODE (op) == LABEL_REF)
3050 return 1;
3051
3052 if (GET_CODE (op) == CONST
3053 && GET_CODE (XEXP (op, 0)) == PLUS
3054 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3055 op = XEXP (XEXP (op, 0), 0);
3056
3057 if (GET_CODE (op) != SYMBOL_REF)
3058 return 0;
3059
3060 /* These we've been told are local by varasm and encode_section_info
3061 respectively. */
3062 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3063 return 1;
3064
3065 /* There is, however, a not insubstantial body of code in the rest of
3066 the compiler that assumes it can just stick the results of
3067 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3068 /* ??? This is a hack. Should update the body of the compiler to
3069 always create a DECL an invoke targetm.encode_section_info. */
3070 if (strncmp (XSTR (op, 0), internal_label_prefix,
3071 internal_label_prefix_len) == 0)
3072 return 1;
3073
3074 return 0;
3075 }
3076
3077 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3078
3079 int
3080 tls_symbolic_operand (op, mode)
3081 register rtx op;
3082 enum machine_mode mode ATTRIBUTE_UNUSED;
3083 {
3084 const char *symbol_str;
3085
3086 if (GET_CODE (op) != SYMBOL_REF)
3087 return 0;
3088 symbol_str = XSTR (op, 0);
3089
3090 if (symbol_str[0] != '%')
3091 return 0;
3092 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3093 }
3094
3095 static int
3096 tls_symbolic_operand_1 (op, kind)
3097 rtx op;
3098 enum tls_model kind;
3099 {
3100 const char *symbol_str;
3101
3102 if (GET_CODE (op) != SYMBOL_REF)
3103 return 0;
3104 symbol_str = XSTR (op, 0);
3105
3106 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3107 }
3108
3109 int
3110 global_dynamic_symbolic_operand (op, mode)
3111 register rtx op;
3112 enum machine_mode mode ATTRIBUTE_UNUSED;
3113 {
3114 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3115 }
3116
3117 int
3118 local_dynamic_symbolic_operand (op, mode)
3119 register rtx op;
3120 enum machine_mode mode ATTRIBUTE_UNUSED;
3121 {
3122 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3123 }
3124
3125 int
3126 initial_exec_symbolic_operand (op, mode)
3127 register rtx op;
3128 enum machine_mode mode ATTRIBUTE_UNUSED;
3129 {
3130 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3131 }
3132
3133 int
3134 local_exec_symbolic_operand (op, mode)
3135 register rtx op;
3136 enum machine_mode mode ATTRIBUTE_UNUSED;
3137 {
3138 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3139 }
3140
3141 /* Test for a valid operand for a call instruction. Don't allow the
3142 arg pointer register or virtual regs since they may decay into
3143 reg + const, which the patterns can't handle. */
3144
3145 int
3146 call_insn_operand (op, mode)
3147 rtx op;
3148 enum machine_mode mode ATTRIBUTE_UNUSED;
3149 {
3150 /* Disallow indirect through a virtual register. This leads to
3151 compiler aborts when trying to eliminate them. */
3152 if (GET_CODE (op) == REG
3153 && (op == arg_pointer_rtx
3154 || op == frame_pointer_rtx
3155 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3156 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3157 return 0;
3158
3159 /* Disallow `call 1234'. Due to varying assembler lameness this
3160 gets either rejected or translated to `call .+1234'. */
3161 if (GET_CODE (op) == CONST_INT)
3162 return 0;
3163
3164 /* Explicitly allow SYMBOL_REF even if pic. */
3165 if (GET_CODE (op) == SYMBOL_REF)
3166 return 1;
3167
3168 /* Otherwise we can allow any general_operand in the address. */
3169 return general_operand (op, Pmode);
3170 }
3171
3172 int
3173 constant_call_address_operand (op, mode)
3174 rtx op;
3175 enum machine_mode mode ATTRIBUTE_UNUSED;
3176 {
3177 if (GET_CODE (op) == CONST
3178 && GET_CODE (XEXP (op, 0)) == PLUS
3179 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3180 op = XEXP (XEXP (op, 0), 0);
3181 return GET_CODE (op) == SYMBOL_REF;
3182 }
3183
3184 /* Match exactly zero and one. */
3185
3186 int
3187 const0_operand (op, mode)
3188 register rtx op;
3189 enum machine_mode mode;
3190 {
3191 return op == CONST0_RTX (mode);
3192 }
3193
3194 int
3195 const1_operand (op, mode)
3196 register rtx op;
3197 enum machine_mode mode ATTRIBUTE_UNUSED;
3198 {
3199 return op == const1_rtx;
3200 }
3201
3202 /* Match 2, 4, or 8. Used for leal multiplicands. */
3203
3204 int
3205 const248_operand (op, mode)
3206 register rtx op;
3207 enum machine_mode mode ATTRIBUTE_UNUSED;
3208 {
3209 return (GET_CODE (op) == CONST_INT
3210 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3211 }
3212
3213 /* True if this is a constant appropriate for an increment or decremenmt. */
3214
3215 int
3216 incdec_operand (op, mode)
3217 register rtx op;
3218 enum machine_mode mode ATTRIBUTE_UNUSED;
3219 {
3220 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3221 registers, since carry flag is not set. */
3222 if (TARGET_PENTIUM4 && !optimize_size)
3223 return 0;
3224 return op == const1_rtx || op == constm1_rtx;
3225 }
3226
3227 /* Return nonzero if OP is acceptable as operand of DImode shift
3228 expander. */
3229
3230 int
3231 shiftdi_operand (op, mode)
3232 rtx op;
3233 enum machine_mode mode ATTRIBUTE_UNUSED;
3234 {
3235 if (TARGET_64BIT)
3236 return nonimmediate_operand (op, mode);
3237 else
3238 return register_operand (op, mode);
3239 }
3240
3241 /* Return false if this is the stack pointer, or any other fake
3242 register eliminable to the stack pointer. Otherwise, this is
3243 a register operand.
3244
3245 This is used to prevent esp from being used as an index reg.
3246 Which would only happen in pathological cases. */
3247
3248 int
3249 reg_no_sp_operand (op, mode)
3250 register rtx op;
3251 enum machine_mode mode;
3252 {
3253 rtx t = op;
3254 if (GET_CODE (t) == SUBREG)
3255 t = SUBREG_REG (t);
3256 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3257 return 0;
3258
3259 return register_operand (op, mode);
3260 }
3261
3262 int
3263 mmx_reg_operand (op, mode)
3264 register rtx op;
3265 enum machine_mode mode ATTRIBUTE_UNUSED;
3266 {
3267 return MMX_REG_P (op);
3268 }
3269
3270 /* Return false if this is any eliminable register. Otherwise
3271 general_operand. */
3272
3273 int
3274 general_no_elim_operand (op, mode)
3275 register rtx op;
3276 enum machine_mode mode;
3277 {
3278 rtx t = op;
3279 if (GET_CODE (t) == SUBREG)
3280 t = SUBREG_REG (t);
3281 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3282 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3283 || t == virtual_stack_dynamic_rtx)
3284 return 0;
3285 if (REG_P (t)
3286 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3287 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3288 return 0;
3289
3290 return general_operand (op, mode);
3291 }
3292
3293 /* Return false if this is any eliminable register. Otherwise
3294 register_operand or const_int. */
3295
3296 int
3297 nonmemory_no_elim_operand (op, mode)
3298 register rtx op;
3299 enum machine_mode mode;
3300 {
3301 rtx t = op;
3302 if (GET_CODE (t) == SUBREG)
3303 t = SUBREG_REG (t);
3304 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3305 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3306 || t == virtual_stack_dynamic_rtx)
3307 return 0;
3308
3309 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3310 }
3311
3312 /* Return false if this is any eliminable register or stack register,
3313 otherwise work like register_operand. */
3314
3315 int
3316 index_register_operand (op, mode)
3317 register rtx op;
3318 enum machine_mode mode;
3319 {
3320 rtx t = op;
3321 if (GET_CODE (t) == SUBREG)
3322 t = SUBREG_REG (t);
3323 if (!REG_P (t))
3324 return 0;
3325 if (t == arg_pointer_rtx
3326 || t == frame_pointer_rtx
3327 || t == virtual_incoming_args_rtx
3328 || t == virtual_stack_vars_rtx
3329 || t == virtual_stack_dynamic_rtx
3330 || REGNO (t) == STACK_POINTER_REGNUM)
3331 return 0;
3332
3333 return general_operand (op, mode);
3334 }
3335
3336 /* Return true if op is a Q_REGS class register. */
3337
3338 int
3339 q_regs_operand (op, mode)
3340 register rtx op;
3341 enum machine_mode mode;
3342 {
3343 if (mode != VOIDmode && GET_MODE (op) != mode)
3344 return 0;
3345 if (GET_CODE (op) == SUBREG)
3346 op = SUBREG_REG (op);
3347 return ANY_QI_REG_P (op);
3348 }
3349
3350 /* Return true if op is a NON_Q_REGS class register. */
3351
3352 int
3353 non_q_regs_operand (op, mode)
3354 register rtx op;
3355 enum machine_mode mode;
3356 {
3357 if (mode != VOIDmode && GET_MODE (op) != mode)
3358 return 0;
3359 if (GET_CODE (op) == SUBREG)
3360 op = SUBREG_REG (op);
3361 return NON_QI_REG_P (op);
3362 }
3363
3364 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3365 insns. */
3366 int
3367 sse_comparison_operator (op, mode)
3368 rtx op;
3369 enum machine_mode mode ATTRIBUTE_UNUSED;
3370 {
3371 enum rtx_code code = GET_CODE (op);
3372 switch (code)
3373 {
3374 /* Operations supported directly. */
3375 case EQ:
3376 case LT:
3377 case LE:
3378 case UNORDERED:
3379 case NE:
3380 case UNGE:
3381 case UNGT:
3382 case ORDERED:
3383 return 1;
3384 /* These are equivalent to ones above in non-IEEE comparisons. */
3385 case UNEQ:
3386 case UNLT:
3387 case UNLE:
3388 case LTGT:
3389 case GE:
3390 case GT:
3391 return !TARGET_IEEE_FP;
3392 default:
3393 return 0;
3394 }
3395 }
3396 /* Return 1 if OP is a valid comparison operator in valid mode. */
3397 int
3398 ix86_comparison_operator (op, mode)
3399 register rtx op;
3400 enum machine_mode mode;
3401 {
3402 enum machine_mode inmode;
3403 enum rtx_code code = GET_CODE (op);
3404 if (mode != VOIDmode && GET_MODE (op) != mode)
3405 return 0;
3406 if (GET_RTX_CLASS (code) != '<')
3407 return 0;
3408 inmode = GET_MODE (XEXP (op, 0));
3409
3410 if (inmode == CCFPmode || inmode == CCFPUmode)
3411 {
3412 enum rtx_code second_code, bypass_code;
3413 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3414 return (bypass_code == NIL && second_code == NIL);
3415 }
3416 switch (code)
3417 {
3418 case EQ: case NE:
3419 return 1;
3420 case LT: case GE:
3421 if (inmode == CCmode || inmode == CCGCmode
3422 || inmode == CCGOCmode || inmode == CCNOmode)
3423 return 1;
3424 return 0;
3425 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3426 if (inmode == CCmode)
3427 return 1;
3428 return 0;
3429 case GT: case LE:
3430 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3431 return 1;
3432 return 0;
3433 default:
3434 return 0;
3435 }
3436 }
3437
3438 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3439
3440 int
3441 fcmov_comparison_operator (op, mode)
3442 register rtx op;
3443 enum machine_mode mode;
3444 {
3445 enum machine_mode inmode;
3446 enum rtx_code code = GET_CODE (op);
3447 if (mode != VOIDmode && GET_MODE (op) != mode)
3448 return 0;
3449 if (GET_RTX_CLASS (code) != '<')
3450 return 0;
3451 inmode = GET_MODE (XEXP (op, 0));
3452 if (inmode == CCFPmode || inmode == CCFPUmode)
3453 {
3454 enum rtx_code second_code, bypass_code;
3455 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3456 if (bypass_code != NIL || second_code != NIL)
3457 return 0;
3458 code = ix86_fp_compare_code_to_integer (code);
3459 }
3460 /* i387 supports just limited amount of conditional codes. */
3461 switch (code)
3462 {
3463 case LTU: case GTU: case LEU: case GEU:
3464 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3465 return 1;
3466 return 0;
3467 case ORDERED: case UNORDERED:
3468 case EQ: case NE:
3469 return 1;
3470 default:
3471 return 0;
3472 }
3473 }
3474
3475 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3476
3477 int
3478 promotable_binary_operator (op, mode)
3479 register rtx op;
3480 enum machine_mode mode ATTRIBUTE_UNUSED;
3481 {
3482 switch (GET_CODE (op))
3483 {
3484 case MULT:
3485 /* Modern CPUs have same latency for HImode and SImode multiply,
3486 but 386 and 486 do HImode multiply faster. */
3487 return ix86_cpu > PROCESSOR_I486;
3488 case PLUS:
3489 case AND:
3490 case IOR:
3491 case XOR:
3492 case ASHIFT:
3493 return 1;
3494 default:
3495 return 0;
3496 }
3497 }
3498
3499 /* Nearly general operand, but accept any const_double, since we wish
3500 to be able to drop them into memory rather than have them get pulled
3501 into registers. */
3502
3503 int
3504 cmp_fp_expander_operand (op, mode)
3505 register rtx op;
3506 enum machine_mode mode;
3507 {
3508 if (mode != VOIDmode && mode != GET_MODE (op))
3509 return 0;
3510 if (GET_CODE (op) == CONST_DOUBLE)
3511 return 1;
3512 return general_operand (op, mode);
3513 }
3514
3515 /* Match an SI or HImode register for a zero_extract. */
3516
3517 int
3518 ext_register_operand (op, mode)
3519 register rtx op;
3520 enum machine_mode mode ATTRIBUTE_UNUSED;
3521 {
3522 int regno;
3523 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3524 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3525 return 0;
3526
3527 if (!register_operand (op, VOIDmode))
3528 return 0;
3529
3530 /* Be curefull to accept only registers having upper parts. */
3531 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3532 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3533 }
3534
3535 /* Return 1 if this is a valid binary floating-point operation.
3536 OP is the expression matched, and MODE is its mode. */
3537
3538 int
3539 binary_fp_operator (op, mode)
3540 register rtx op;
3541 enum machine_mode mode;
3542 {
3543 if (mode != VOIDmode && mode != GET_MODE (op))
3544 return 0;
3545
3546 switch (GET_CODE (op))
3547 {
3548 case PLUS:
3549 case MINUS:
3550 case MULT:
3551 case DIV:
3552 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3553
3554 default:
3555 return 0;
3556 }
3557 }
3558
3559 int
3560 mult_operator (op, mode)
3561 register rtx op;
3562 enum machine_mode mode ATTRIBUTE_UNUSED;
3563 {
3564 return GET_CODE (op) == MULT;
3565 }
3566
3567 int
3568 div_operator (op, mode)
3569 register rtx op;
3570 enum machine_mode mode ATTRIBUTE_UNUSED;
3571 {
3572 return GET_CODE (op) == DIV;
3573 }
3574
3575 int
3576 arith_or_logical_operator (op, mode)
3577 rtx op;
3578 enum machine_mode mode;
3579 {
3580 return ((mode == VOIDmode || GET_MODE (op) == mode)
3581 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3582 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3583 }
3584
3585 /* Returns 1 if OP is memory operand with a displacement. */
3586
3587 int
3588 memory_displacement_operand (op, mode)
3589 register rtx op;
3590 enum machine_mode mode;
3591 {
3592 struct ix86_address parts;
3593
3594 if (! memory_operand (op, mode))
3595 return 0;
3596
3597 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3598 abort ();
3599
3600 return parts.disp != NULL_RTX;
3601 }
3602
3603 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3604 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3605
3606 ??? It seems likely that this will only work because cmpsi is an
3607 expander, and no actual insns use this. */
3608
3609 int
3610 cmpsi_operand (op, mode)
3611 rtx op;
3612 enum machine_mode mode;
3613 {
3614 if (nonimmediate_operand (op, mode))
3615 return 1;
3616
3617 if (GET_CODE (op) == AND
3618 && GET_MODE (op) == SImode
3619 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3620 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3621 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3622 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3623 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3624 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3625 return 1;
3626
3627 return 0;
3628 }
3629
3630 /* Returns 1 if OP is memory operand that can not be represented by the
3631 modRM array. */
3632
3633 int
3634 long_memory_operand (op, mode)
3635 register rtx op;
3636 enum machine_mode mode;
3637 {
3638 if (! memory_operand (op, mode))
3639 return 0;
3640
3641 return memory_address_length (op) != 0;
3642 }
3643
3644 /* Return nonzero if the rtx is known aligned. */
3645
3646 int
3647 aligned_operand (op, mode)
3648 rtx op;
3649 enum machine_mode mode;
3650 {
3651 struct ix86_address parts;
3652
3653 if (!general_operand (op, mode))
3654 return 0;
3655
3656 /* Registers and immediate operands are always "aligned". */
3657 if (GET_CODE (op) != MEM)
3658 return 1;
3659
3660 /* Don't even try to do any aligned optimizations with volatiles. */
3661 if (MEM_VOLATILE_P (op))
3662 return 0;
3663
3664 op = XEXP (op, 0);
3665
3666 /* Pushes and pops are only valid on the stack pointer. */
3667 if (GET_CODE (op) == PRE_DEC
3668 || GET_CODE (op) == POST_INC)
3669 return 1;
3670
3671 /* Decode the address. */
3672 if (! ix86_decompose_address (op, &parts))
3673 abort ();
3674
3675 if (parts.base && GET_CODE (parts.base) == SUBREG)
3676 parts.base = SUBREG_REG (parts.base);
3677 if (parts.index && GET_CODE (parts.index) == SUBREG)
3678 parts.index = SUBREG_REG (parts.index);
3679
3680 /* Look for some component that isn't known to be aligned. */
3681 if (parts.index)
3682 {
3683 if (parts.scale < 4
3684 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3685 return 0;
3686 }
3687 if (parts.base)
3688 {
3689 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3690 return 0;
3691 }
3692 if (parts.disp)
3693 {
3694 if (GET_CODE (parts.disp) != CONST_INT
3695 || (INTVAL (parts.disp) & 3) != 0)
3696 return 0;
3697 }
3698
3699 /* Didn't find one -- this must be an aligned address. */
3700 return 1;
3701 }
3702 \f
3703 /* Return true if the constant is something that can be loaded with
3704 a special instruction. Only handle 0.0 and 1.0; others are less
3705 worthwhile. */
3706
3707 int
3708 standard_80387_constant_p (x)
3709 rtx x;
3710 {
3711 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3712 return -1;
3713 /* Note that on the 80387, other constants, such as pi, that we should support
3714 too. On some machines, these are much slower to load as standard constant,
3715 than to load from doubles in memory. */
3716 if (x == CONST0_RTX (GET_MODE (x)))
3717 return 1;
3718 if (x == CONST1_RTX (GET_MODE (x)))
3719 return 2;
3720 return 0;
3721 }
3722
3723 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3724 */
3725 int
3726 standard_sse_constant_p (x)
3727 rtx x;
3728 {
3729 if (GET_CODE (x) != CONST_DOUBLE)
3730 return -1;
3731 return (x == CONST0_RTX (GET_MODE (x)));
3732 }
3733
3734 /* Returns 1 if OP contains a symbol reference */
3735
3736 int
3737 symbolic_reference_mentioned_p (op)
3738 rtx op;
3739 {
3740 register const char *fmt;
3741 register int i;
3742
3743 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3744 return 1;
3745
3746 fmt = GET_RTX_FORMAT (GET_CODE (op));
3747 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3748 {
3749 if (fmt[i] == 'E')
3750 {
3751 register int j;
3752
3753 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3754 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3755 return 1;
3756 }
3757
3758 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3759 return 1;
3760 }
3761
3762 return 0;
3763 }
3764
3765 /* Return 1 if it is appropriate to emit `ret' instructions in the
3766 body of a function. Do this only if the epilogue is simple, needing a
3767 couple of insns. Prior to reloading, we can't tell how many registers
3768 must be saved, so return 0 then. Return 0 if there is no frame
3769 marker to de-allocate.
3770
3771 If NON_SAVING_SETJMP is defined and true, then it is not possible
3772 for the epilogue to be simple, so return 0. This is a special case
3773 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3774 until final, but jump_optimize may need to know sooner if a
3775 `return' is OK. */
3776
3777 int
3778 ix86_can_use_return_insn_p ()
3779 {
3780 struct ix86_frame frame;
3781
3782 #ifdef NON_SAVING_SETJMP
3783 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3784 return 0;
3785 #endif
3786
3787 if (! reload_completed || frame_pointer_needed)
3788 return 0;
3789
3790 /* Don't allow more than 32 pop, since that's all we can do
3791 with one instruction. */
3792 if (current_function_pops_args
3793 && current_function_args_size >= 32768)
3794 return 0;
3795
3796 ix86_compute_frame_layout (&frame);
3797 return frame.to_allocate == 0 && frame.nregs == 0;
3798 }
3799 \f
3800 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3801 int
3802 x86_64_sign_extended_value (value)
3803 rtx value;
3804 {
3805 switch (GET_CODE (value))
3806 {
3807 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3808 to be at least 32 and this all acceptable constants are
3809 represented as CONST_INT. */
3810 case CONST_INT:
3811 if (HOST_BITS_PER_WIDE_INT == 32)
3812 return 1;
3813 else
3814 {
3815 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3816 return trunc_int_for_mode (val, SImode) == val;
3817 }
3818 break;
3819
3820 /* For certain code models, the symbolic references are known to fit. */
3821 case SYMBOL_REF:
3822 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3823
3824 /* For certain code models, the code is near as well. */
3825 case LABEL_REF:
3826 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3827
3828 /* We also may accept the offsetted memory references in certain special
3829 cases. */
3830 case CONST:
3831 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3832 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3833 return 1;
3834 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3835 {
3836 rtx op1 = XEXP (XEXP (value, 0), 0);
3837 rtx op2 = XEXP (XEXP (value, 0), 1);
3838 HOST_WIDE_INT offset;
3839
3840 if (ix86_cmodel == CM_LARGE)
3841 return 0;
3842 if (GET_CODE (op2) != CONST_INT)
3843 return 0;
3844 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3845 switch (GET_CODE (op1))
3846 {
3847 case SYMBOL_REF:
3848 /* For CM_SMALL assume that latest object is 1MB before
3849 end of 31bits boundary. We may also accept pretty
3850 large negative constants knowing that all objects are
3851 in the positive half of address space. */
3852 if (ix86_cmodel == CM_SMALL
3853 && offset < 1024*1024*1024
3854 && trunc_int_for_mode (offset, SImode) == offset)
3855 return 1;
3856 /* For CM_KERNEL we know that all object resist in the
3857 negative half of 32bits address space. We may not
3858 accept negative offsets, since they may be just off
3859 and we may accept pretty large positive ones. */
3860 if (ix86_cmodel == CM_KERNEL
3861 && offset > 0
3862 && trunc_int_for_mode (offset, SImode) == offset)
3863 return 1;
3864 break;
3865 case LABEL_REF:
3866 /* These conditions are similar to SYMBOL_REF ones, just the
3867 constraints for code models differ. */
3868 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3869 && offset < 1024*1024*1024
3870 && trunc_int_for_mode (offset, SImode) == offset)
3871 return 1;
3872 if (ix86_cmodel == CM_KERNEL
3873 && offset > 0
3874 && trunc_int_for_mode (offset, SImode) == offset)
3875 return 1;
3876 break;
3877 default:
3878 return 0;
3879 }
3880 }
3881 return 0;
3882 default:
3883 return 0;
3884 }
3885 }
3886
3887 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3888 int
3889 x86_64_zero_extended_value (value)
3890 rtx value;
3891 {
3892 switch (GET_CODE (value))
3893 {
3894 case CONST_DOUBLE:
3895 if (HOST_BITS_PER_WIDE_INT == 32)
3896 return (GET_MODE (value) == VOIDmode
3897 && !CONST_DOUBLE_HIGH (value));
3898 else
3899 return 0;
3900 case CONST_INT:
3901 if (HOST_BITS_PER_WIDE_INT == 32)
3902 return INTVAL (value) >= 0;
3903 else
3904 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3905 break;
3906
3907 /* For certain code models, the symbolic references are known to fit. */
3908 case SYMBOL_REF:
3909 return ix86_cmodel == CM_SMALL;
3910
3911 /* For certain code models, the code is near as well. */
3912 case LABEL_REF:
3913 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3914
3915 /* We also may accept the offsetted memory references in certain special
3916 cases. */
3917 case CONST:
3918 if (GET_CODE (XEXP (value, 0)) == PLUS)
3919 {
3920 rtx op1 = XEXP (XEXP (value, 0), 0);
3921 rtx op2 = XEXP (XEXP (value, 0), 1);
3922
3923 if (ix86_cmodel == CM_LARGE)
3924 return 0;
3925 switch (GET_CODE (op1))
3926 {
3927 case SYMBOL_REF:
3928 return 0;
3929 /* For small code model we may accept pretty large positive
3930 offsets, since one bit is available for free. Negative
3931 offsets are limited by the size of NULL pointer area
3932 specified by the ABI. */
3933 if (ix86_cmodel == CM_SMALL
3934 && GET_CODE (op2) == CONST_INT
3935 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3936 && (trunc_int_for_mode (INTVAL (op2), SImode)
3937 == INTVAL (op2)))
3938 return 1;
3939 /* ??? For the kernel, we may accept adjustment of
3940 -0x10000000, since we know that it will just convert
3941 negative address space to positive, but perhaps this
3942 is not worthwhile. */
3943 break;
3944 case LABEL_REF:
3945 /* These conditions are similar to SYMBOL_REF ones, just the
3946 constraints for code models differ. */
3947 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3948 && GET_CODE (op2) == CONST_INT
3949 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3950 && (trunc_int_for_mode (INTVAL (op2), SImode)
3951 == INTVAL (op2)))
3952 return 1;
3953 break;
3954 default:
3955 return 0;
3956 }
3957 }
3958 return 0;
3959 default:
3960 return 0;
3961 }
3962 }
3963
3964 /* Value should be nonzero if functions must have frame pointers.
3965 Zero means the frame pointer need not be set up (and parms may
3966 be accessed via the stack pointer) in functions that seem suitable. */
3967
3968 int
3969 ix86_frame_pointer_required ()
3970 {
3971 /* If we accessed previous frames, then the generated code expects
3972 to be able to access the saved ebp value in our frame. */
3973 if (cfun->machine->accesses_prev_frame)
3974 return 1;
3975
3976 /* Several x86 os'es need a frame pointer for other reasons,
3977 usually pertaining to setjmp. */
3978 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3979 return 1;
3980
3981 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3982 the frame pointer by default. Turn it back on now if we've not
3983 got a leaf function. */
3984 if (TARGET_OMIT_LEAF_FRAME_POINTER
3985 && (!current_function_is_leaf || current_function_profile))
3986 return 1;
3987
3988 return 0;
3989 }
3990
3991 /* Record that the current function accesses previous call frames. */
3992
3993 void
3994 ix86_setup_frame_addresses ()
3995 {
3996 cfun->machine->accesses_prev_frame = 1;
3997 }
3998 \f
3999 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4000 # define USE_HIDDEN_LINKONCE 1
4001 #else
4002 # define USE_HIDDEN_LINKONCE 0
4003 #endif
4004
4005 static int pic_labels_used;
4006
4007 /* Fills in the label name that should be used for a pc thunk for
4008 the given register. */
4009
4010 static void
4011 get_pc_thunk_name (name, regno)
4012 char name[32];
4013 unsigned int regno;
4014 {
4015 if (USE_HIDDEN_LINKONCE)
4016 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4017 else
4018 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4019 }
4020
4021
4022 /* This function generates code for -fpic that loads %ebx with
4023 the return address of the caller and then returns. */
4024
4025 void
4026 ix86_asm_file_end (file)
4027 FILE *file;
4028 {
4029 rtx xops[2];
4030 int regno;
4031
4032 for (regno = 0; regno < 8; ++regno)
4033 {
4034 char name[32];
4035
4036 if (! ((pic_labels_used >> regno) & 1))
4037 continue;
4038
4039 get_pc_thunk_name (name, regno);
4040
4041 if (USE_HIDDEN_LINKONCE)
4042 {
4043 tree decl;
4044
4045 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4046 error_mark_node);
4047 TREE_PUBLIC (decl) = 1;
4048 TREE_STATIC (decl) = 1;
4049 DECL_ONE_ONLY (decl) = 1;
4050
4051 (*targetm.asm_out.unique_section) (decl, 0);
4052 named_section (decl, NULL, 0);
4053
4054 (*targetm.asm_out.globalize_label) (file, name);
4055 fputs ("\t.hidden\t", file);
4056 assemble_name (file, name);
4057 fputc ('\n', file);
4058 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4059 }
4060 else
4061 {
4062 text_section ();
4063 ASM_OUTPUT_LABEL (file, name);
4064 }
4065
4066 xops[0] = gen_rtx_REG (SImode, regno);
4067 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4068 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4069 output_asm_insn ("ret", xops);
4070 }
4071 }
4072
4073 /* Emit code for the SET_GOT patterns. */
4074
4075 const char *
4076 output_set_got (dest)
4077 rtx dest;
4078 {
4079 rtx xops[3];
4080
4081 xops[0] = dest;
4082 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4083
4084 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4085 {
4086 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4087
4088 if (!flag_pic)
4089 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4090 else
4091 output_asm_insn ("call\t%a2", xops);
4092
4093 #if TARGET_MACHO
4094 /* Output the "canonical" label name ("Lxx$pb") here too. This
4095 is what will be referred to by the Mach-O PIC subsystem. */
4096 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4097 #endif
4098 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4099 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4100
4101 if (flag_pic)
4102 output_asm_insn ("pop{l}\t%0", xops);
4103 }
4104 else
4105 {
4106 char name[32];
4107 get_pc_thunk_name (name, REGNO (dest));
4108 pic_labels_used |= 1 << REGNO (dest);
4109
4110 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4111 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4112 output_asm_insn ("call\t%X2", xops);
4113 }
4114
4115 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4116 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4117 else if (!TARGET_MACHO)
4118 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4119
4120 return "";
4121 }
4122
4123 /* Generate an "push" pattern for input ARG. */
4124
4125 static rtx
4126 gen_push (arg)
4127 rtx arg;
4128 {
4129 return gen_rtx_SET (VOIDmode,
4130 gen_rtx_MEM (Pmode,
4131 gen_rtx_PRE_DEC (Pmode,
4132 stack_pointer_rtx)),
4133 arg);
4134 }
4135
4136 /* Return >= 0 if there is an unused call-clobbered register available
4137 for the entire function. */
4138
4139 static unsigned int
4140 ix86_select_alt_pic_regnum ()
4141 {
4142 if (current_function_is_leaf && !current_function_profile)
4143 {
4144 int i;
4145 for (i = 2; i >= 0; --i)
4146 if (!regs_ever_live[i])
4147 return i;
4148 }
4149
4150 return INVALID_REGNUM;
4151 }
4152
4153 /* Return 1 if we need to save REGNO. */
4154 static int
4155 ix86_save_reg (regno, maybe_eh_return)
4156 unsigned int regno;
4157 int maybe_eh_return;
4158 {
4159 if (pic_offset_table_rtx
4160 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4161 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4162 || current_function_profile
4163 || current_function_calls_eh_return))
4164 {
4165 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4166 return 0;
4167 return 1;
4168 }
4169
4170 if (current_function_calls_eh_return && maybe_eh_return)
4171 {
4172 unsigned i;
4173 for (i = 0; ; i++)
4174 {
4175 unsigned test = EH_RETURN_DATA_REGNO (i);
4176 if (test == INVALID_REGNUM)
4177 break;
4178 if (test == regno)
4179 return 1;
4180 }
4181 }
4182
4183 return (regs_ever_live[regno]
4184 && !call_used_regs[regno]
4185 && !fixed_regs[regno]
4186 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4187 }
4188
4189 /* Return number of registers to be saved on the stack. */
4190
4191 static int
4192 ix86_nsaved_regs ()
4193 {
4194 int nregs = 0;
4195 int regno;
4196
4197 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4198 if (ix86_save_reg (regno, true))
4199 nregs++;
4200 return nregs;
4201 }
4202
4203 /* Return the offset between two registers, one to be eliminated, and the other
4204 its replacement, at the start of a routine. */
4205
4206 HOST_WIDE_INT
4207 ix86_initial_elimination_offset (from, to)
4208 int from;
4209 int to;
4210 {
4211 struct ix86_frame frame;
4212 ix86_compute_frame_layout (&frame);
4213
4214 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4215 return frame.hard_frame_pointer_offset;
4216 else if (from == FRAME_POINTER_REGNUM
4217 && to == HARD_FRAME_POINTER_REGNUM)
4218 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4219 else
4220 {
4221 if (to != STACK_POINTER_REGNUM)
4222 abort ();
4223 else if (from == ARG_POINTER_REGNUM)
4224 return frame.stack_pointer_offset;
4225 else if (from != FRAME_POINTER_REGNUM)
4226 abort ();
4227 else
4228 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4229 }
4230 }
4231
4232 /* Fill structure ix86_frame about frame of currently computed function. */
4233
4234 static void
4235 ix86_compute_frame_layout (frame)
4236 struct ix86_frame *frame;
4237 {
4238 HOST_WIDE_INT total_size;
4239 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4240 int offset;
4241 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4242 HOST_WIDE_INT size = get_frame_size ();
4243
4244 frame->nregs = ix86_nsaved_regs ();
4245 total_size = size;
4246
4247 /* Skip return address and saved base pointer. */
4248 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4249
4250 frame->hard_frame_pointer_offset = offset;
4251
4252 /* Do some sanity checking of stack_alignment_needed and
4253 preferred_alignment, since i386 port is the only using those features
4254 that may break easily. */
4255
4256 if (size && !stack_alignment_needed)
4257 abort ();
4258 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4259 abort ();
4260 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4261 abort ();
4262 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4263 abort ();
4264
4265 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4266 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4267
4268 /* Register save area */
4269 offset += frame->nregs * UNITS_PER_WORD;
4270
4271 /* Va-arg area */
4272 if (ix86_save_varrargs_registers)
4273 {
4274 offset += X86_64_VARARGS_SIZE;
4275 frame->va_arg_size = X86_64_VARARGS_SIZE;
4276 }
4277 else
4278 frame->va_arg_size = 0;
4279
4280 /* Align start of frame for local function. */
4281 frame->padding1 = ((offset + stack_alignment_needed - 1)
4282 & -stack_alignment_needed) - offset;
4283
4284 offset += frame->padding1;
4285
4286 /* Frame pointer points here. */
4287 frame->frame_pointer_offset = offset;
4288
4289 offset += size;
4290
4291 /* Add outgoing arguments area. Can be skipped if we eliminated
4292 all the function calls as dead code. */
4293 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4294 {
4295 offset += current_function_outgoing_args_size;
4296 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4297 }
4298 else
4299 frame->outgoing_arguments_size = 0;
4300
4301 /* Align stack boundary. Only needed if we're calling another function
4302 or using alloca. */
4303 if (!current_function_is_leaf || current_function_calls_alloca)
4304 frame->padding2 = ((offset + preferred_alignment - 1)
4305 & -preferred_alignment) - offset;
4306 else
4307 frame->padding2 = 0;
4308
4309 offset += frame->padding2;
4310
4311 /* We've reached end of stack frame. */
4312 frame->stack_pointer_offset = offset;
4313
4314 /* Size prologue needs to allocate. */
4315 frame->to_allocate =
4316 (size + frame->padding1 + frame->padding2
4317 + frame->outgoing_arguments_size + frame->va_arg_size);
4318
4319 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4320 && current_function_is_leaf)
4321 {
4322 frame->red_zone_size = frame->to_allocate;
4323 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4324 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4325 }
4326 else
4327 frame->red_zone_size = 0;
4328 frame->to_allocate -= frame->red_zone_size;
4329 frame->stack_pointer_offset -= frame->red_zone_size;
4330 #if 0
4331 fprintf (stderr, "nregs: %i\n", frame->nregs);
4332 fprintf (stderr, "size: %i\n", size);
4333 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4334 fprintf (stderr, "padding1: %i\n", frame->padding1);
4335 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4336 fprintf (stderr, "padding2: %i\n", frame->padding2);
4337 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4338 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4339 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4340 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4341 frame->hard_frame_pointer_offset);
4342 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4343 #endif
4344 }
4345
4346 /* Emit code to save registers in the prologue. */
4347
4348 static void
4349 ix86_emit_save_regs ()
4350 {
4351 register int regno;
4352 rtx insn;
4353
4354 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4355 if (ix86_save_reg (regno, true))
4356 {
4357 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4358 RTX_FRAME_RELATED_P (insn) = 1;
4359 }
4360 }
4361
4362 /* Emit code to save registers using MOV insns. First register
4363 is restored from POINTER + OFFSET. */
4364 static void
4365 ix86_emit_save_regs_using_mov (pointer, offset)
4366 rtx pointer;
4367 HOST_WIDE_INT offset;
4368 {
4369 int regno;
4370 rtx insn;
4371
4372 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4373 if (ix86_save_reg (regno, true))
4374 {
4375 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4376 Pmode, offset),
4377 gen_rtx_REG (Pmode, regno));
4378 RTX_FRAME_RELATED_P (insn) = 1;
4379 offset += UNITS_PER_WORD;
4380 }
4381 }
4382
4383 /* Expand the prologue into a bunch of separate insns. */
4384
4385 void
4386 ix86_expand_prologue ()
4387 {
4388 rtx insn;
4389 bool pic_reg_used;
4390 struct ix86_frame frame;
4391 int use_mov = 0;
4392 HOST_WIDE_INT allocate;
4393
4394 if (!optimize_size)
4395 {
4396 use_fast_prologue_epilogue
4397 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4398 if (TARGET_PROLOGUE_USING_MOVE)
4399 use_mov = use_fast_prologue_epilogue;
4400 }
4401 ix86_compute_frame_layout (&frame);
4402
4403 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4404 slower on all targets. Also sdb doesn't like it. */
4405
4406 if (frame_pointer_needed)
4407 {
4408 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4409 RTX_FRAME_RELATED_P (insn) = 1;
4410
4411 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4412 RTX_FRAME_RELATED_P (insn) = 1;
4413 }
4414
4415 allocate = frame.to_allocate;
4416 /* In case we are dealing only with single register and empty frame,
4417 push is equivalent of the mov+add sequence. */
4418 if (allocate == 0 && frame.nregs <= 1)
4419 use_mov = 0;
4420
4421 if (!use_mov)
4422 ix86_emit_save_regs ();
4423 else
4424 allocate += frame.nregs * UNITS_PER_WORD;
4425
4426 if (allocate == 0)
4427 ;
4428 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4429 {
4430 insn = emit_insn (gen_pro_epilogue_adjust_stack
4431 (stack_pointer_rtx, stack_pointer_rtx,
4432 GEN_INT (-allocate)));
4433 RTX_FRAME_RELATED_P (insn) = 1;
4434 }
4435 else
4436 {
4437 /* ??? Is this only valid for Win32? */
4438
4439 rtx arg0, sym;
4440
4441 if (TARGET_64BIT)
4442 abort ();
4443
4444 arg0 = gen_rtx_REG (SImode, 0);
4445 emit_move_insn (arg0, GEN_INT (allocate));
4446
4447 sym = gen_rtx_MEM (FUNCTION_MODE,
4448 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4449 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4450
4451 CALL_INSN_FUNCTION_USAGE (insn)
4452 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4453 CALL_INSN_FUNCTION_USAGE (insn));
4454 }
4455 if (use_mov)
4456 {
4457 if (!frame_pointer_needed || !frame.to_allocate)
4458 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4459 else
4460 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4461 -frame.nregs * UNITS_PER_WORD);
4462 }
4463
4464 #ifdef SUBTARGET_PROLOGUE
4465 SUBTARGET_PROLOGUE;
4466 #endif
4467
4468 pic_reg_used = false;
4469 if (pic_offset_table_rtx
4470 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4471 || current_function_profile))
4472 {
4473 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4474
4475 if (alt_pic_reg_used != INVALID_REGNUM)
4476 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4477
4478 pic_reg_used = true;
4479 }
4480
4481 if (pic_reg_used)
4482 {
4483 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4484
4485 /* Even with accurate pre-reload life analysis, we can wind up
4486 deleting all references to the pic register after reload.
4487 Consider if cross-jumping unifies two sides of a branch
4488 controled by a comparison vs the only read from a global.
4489 In which case, allow the set_got to be deleted, though we're
4490 too late to do anything about the ebx save in the prologue. */
4491 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4492 }
4493
4494 /* Prevent function calls from be scheduled before the call to mcount.
4495 In the pic_reg_used case, make sure that the got load isn't deleted. */
4496 if (current_function_profile)
4497 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4498 }
4499
4500 /* Emit code to restore saved registers using MOV insns. First register
4501 is restored from POINTER + OFFSET. */
4502 static void
4503 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4504 rtx pointer;
4505 int offset;
4506 int maybe_eh_return;
4507 {
4508 int regno;
4509
4510 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4511 if (ix86_save_reg (regno, maybe_eh_return))
4512 {
4513 emit_move_insn (gen_rtx_REG (Pmode, regno),
4514 adjust_address (gen_rtx_MEM (Pmode, pointer),
4515 Pmode, offset));
4516 offset += UNITS_PER_WORD;
4517 }
4518 }
4519
4520 /* Restore function stack, frame, and registers. */
4521
4522 void
4523 ix86_expand_epilogue (style)
4524 int style;
4525 {
4526 int regno;
4527 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4528 struct ix86_frame frame;
4529 HOST_WIDE_INT offset;
4530
4531 ix86_compute_frame_layout (&frame);
4532
4533 /* Calculate start of saved registers relative to ebp. Special care
4534 must be taken for the normal return case of a function using
4535 eh_return: the eax and edx registers are marked as saved, but not
4536 restored along this path. */
4537 offset = frame.nregs;
4538 if (current_function_calls_eh_return && style != 2)
4539 offset -= 2;
4540 offset *= -UNITS_PER_WORD;
4541
4542 /* If we're only restoring one register and sp is not valid then
4543 using a move instruction to restore the register since it's
4544 less work than reloading sp and popping the register.
4545
4546 The default code result in stack adjustment using add/lea instruction,
4547 while this code results in LEAVE instruction (or discrete equivalent),
4548 so it is profitable in some other cases as well. Especially when there
4549 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4550 and there is exactly one register to pop. This heruistic may need some
4551 tuning in future. */
4552 if ((!sp_valid && frame.nregs <= 1)
4553 || (TARGET_EPILOGUE_USING_MOVE
4554 && use_fast_prologue_epilogue
4555 && (frame.nregs > 1 || frame.to_allocate))
4556 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4557 || (frame_pointer_needed && TARGET_USE_LEAVE
4558 && use_fast_prologue_epilogue && frame.nregs == 1)
4559 || current_function_calls_eh_return)
4560 {
4561 /* Restore registers. We can use ebp or esp to address the memory
4562 locations. If both are available, default to ebp, since offsets
4563 are known to be small. Only exception is esp pointing directly to the
4564 end of block of saved registers, where we may simplify addressing
4565 mode. */
4566
4567 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4568 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4569 frame.to_allocate, style == 2);
4570 else
4571 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4572 offset, style == 2);
4573
4574 /* eh_return epilogues need %ecx added to the stack pointer. */
4575 if (style == 2)
4576 {
4577 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4578
4579 if (frame_pointer_needed)
4580 {
4581 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4582 tmp = plus_constant (tmp, UNITS_PER_WORD);
4583 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4584
4585 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4586 emit_move_insn (hard_frame_pointer_rtx, tmp);
4587
4588 emit_insn (gen_pro_epilogue_adjust_stack
4589 (stack_pointer_rtx, sa, const0_rtx));
4590 }
4591 else
4592 {
4593 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4594 tmp = plus_constant (tmp, (frame.to_allocate
4595 + frame.nregs * UNITS_PER_WORD));
4596 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4597 }
4598 }
4599 else if (!frame_pointer_needed)
4600 emit_insn (gen_pro_epilogue_adjust_stack
4601 (stack_pointer_rtx, stack_pointer_rtx,
4602 GEN_INT (frame.to_allocate
4603 + frame.nregs * UNITS_PER_WORD)));
4604 /* If not an i386, mov & pop is faster than "leave". */
4605 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4606 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4607 else
4608 {
4609 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4610 hard_frame_pointer_rtx,
4611 const0_rtx));
4612 if (TARGET_64BIT)
4613 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4614 else
4615 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4616 }
4617 }
4618 else
4619 {
4620 /* First step is to deallocate the stack frame so that we can
4621 pop the registers. */
4622 if (!sp_valid)
4623 {
4624 if (!frame_pointer_needed)
4625 abort ();
4626 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4627 hard_frame_pointer_rtx,
4628 GEN_INT (offset)));
4629 }
4630 else if (frame.to_allocate)
4631 emit_insn (gen_pro_epilogue_adjust_stack
4632 (stack_pointer_rtx, stack_pointer_rtx,
4633 GEN_INT (frame.to_allocate)));
4634
4635 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4636 if (ix86_save_reg (regno, false))
4637 {
4638 if (TARGET_64BIT)
4639 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4640 else
4641 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4642 }
4643 if (frame_pointer_needed)
4644 {
4645 /* Leave results in shorter dependency chains on CPUs that are
4646 able to grok it fast. */
4647 if (TARGET_USE_LEAVE)
4648 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4649 else if (TARGET_64BIT)
4650 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4651 else
4652 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4653 }
4654 }
4655
4656 /* Sibcall epilogues don't want a return instruction. */
4657 if (style == 0)
4658 return;
4659
4660 if (current_function_pops_args && current_function_args_size)
4661 {
4662 rtx popc = GEN_INT (current_function_pops_args);
4663
4664 /* i386 can only pop 64K bytes. If asked to pop more, pop
4665 return address, do explicit add, and jump indirectly to the
4666 caller. */
4667
4668 if (current_function_pops_args >= 65536)
4669 {
4670 rtx ecx = gen_rtx_REG (SImode, 2);
4671
4672 /* There are is no "pascal" calling convention in 64bit ABI. */
4673 if (TARGET_64BIT)
4674 abort ();
4675
4676 emit_insn (gen_popsi1 (ecx));
4677 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4678 emit_jump_insn (gen_return_indirect_internal (ecx));
4679 }
4680 else
4681 emit_jump_insn (gen_return_pop_internal (popc));
4682 }
4683 else
4684 emit_jump_insn (gen_return_internal ());
4685 }
4686
4687 /* Reset from the function's potential modifications. */
4688
4689 static void
4690 ix86_output_function_epilogue (file, size)
4691 FILE *file ATTRIBUTE_UNUSED;
4692 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4693 {
4694 if (pic_offset_table_rtx)
4695 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4696 }
4697 \f
4698 /* Extract the parts of an RTL expression that is a valid memory address
4699 for an instruction. Return 0 if the structure of the address is
4700 grossly off. Return -1 if the address contains ASHIFT, so it is not
4701 strictly valid, but still used for computing length of lea instruction.
4702 */
4703
4704 static int
4705 ix86_decompose_address (addr, out)
4706 register rtx addr;
4707 struct ix86_address *out;
4708 {
4709 rtx base = NULL_RTX;
4710 rtx index = NULL_RTX;
4711 rtx disp = NULL_RTX;
4712 HOST_WIDE_INT scale = 1;
4713 rtx scale_rtx = NULL_RTX;
4714 int retval = 1;
4715
4716 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4717 base = addr;
4718 else if (GET_CODE (addr) == PLUS)
4719 {
4720 rtx op0 = XEXP (addr, 0);
4721 rtx op1 = XEXP (addr, 1);
4722 enum rtx_code code0 = GET_CODE (op0);
4723 enum rtx_code code1 = GET_CODE (op1);
4724
4725 if (code0 == REG || code0 == SUBREG)
4726 {
4727 if (code1 == REG || code1 == SUBREG)
4728 index = op0, base = op1; /* index + base */
4729 else
4730 base = op0, disp = op1; /* base + displacement */
4731 }
4732 else if (code0 == MULT)
4733 {
4734 index = XEXP (op0, 0);
4735 scale_rtx = XEXP (op0, 1);
4736 if (code1 == REG || code1 == SUBREG)
4737 base = op1; /* index*scale + base */
4738 else
4739 disp = op1; /* index*scale + disp */
4740 }
4741 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4742 {
4743 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4744 scale_rtx = XEXP (XEXP (op0, 0), 1);
4745 base = XEXP (op0, 1);
4746 disp = op1;
4747 }
4748 else if (code0 == PLUS)
4749 {
4750 index = XEXP (op0, 0); /* index + base + disp */
4751 base = XEXP (op0, 1);
4752 disp = op1;
4753 }
4754 else
4755 return 0;
4756 }
4757 else if (GET_CODE (addr) == MULT)
4758 {
4759 index = XEXP (addr, 0); /* index*scale */
4760 scale_rtx = XEXP (addr, 1);
4761 }
4762 else if (GET_CODE (addr) == ASHIFT)
4763 {
4764 rtx tmp;
4765
4766 /* We're called for lea too, which implements ashift on occasion. */
4767 index = XEXP (addr, 0);
4768 tmp = XEXP (addr, 1);
4769 if (GET_CODE (tmp) != CONST_INT)
4770 return 0;
4771 scale = INTVAL (tmp);
4772 if ((unsigned HOST_WIDE_INT) scale > 3)
4773 return 0;
4774 scale = 1 << scale;
4775 retval = -1;
4776 }
4777 else
4778 disp = addr; /* displacement */
4779
4780 /* Extract the integral value of scale. */
4781 if (scale_rtx)
4782 {
4783 if (GET_CODE (scale_rtx) != CONST_INT)
4784 return 0;
4785 scale = INTVAL (scale_rtx);
4786 }
4787
4788 /* Allow arg pointer and stack pointer as index if there is not scaling */
4789 if (base && index && scale == 1
4790 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4791 || index == stack_pointer_rtx))
4792 {
4793 rtx tmp = base;
4794 base = index;
4795 index = tmp;
4796 }
4797
4798 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4799 if ((base == hard_frame_pointer_rtx
4800 || base == frame_pointer_rtx
4801 || base == arg_pointer_rtx) && !disp)
4802 disp = const0_rtx;
4803
4804 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4805 Avoid this by transforming to [%esi+0]. */
4806 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4807 && base && !index && !disp
4808 && REG_P (base)
4809 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4810 disp = const0_rtx;
4811
4812 /* Special case: encode reg+reg instead of reg*2. */
4813 if (!base && index && scale && scale == 2)
4814 base = index, scale = 1;
4815
4816 /* Special case: scaling cannot be encoded without base or displacement. */
4817 if (!base && !disp && index && scale != 1)
4818 disp = const0_rtx;
4819
4820 out->base = base;
4821 out->index = index;
4822 out->disp = disp;
4823 out->scale = scale;
4824
4825 return retval;
4826 }
4827 \f
4828 /* Return cost of the memory address x.
4829 For i386, it is better to use a complex address than let gcc copy
4830 the address into a reg and make a new pseudo. But not if the address
4831 requires to two regs - that would mean more pseudos with longer
4832 lifetimes. */
4833 int
4834 ix86_address_cost (x)
4835 rtx x;
4836 {
4837 struct ix86_address parts;
4838 int cost = 1;
4839
4840 if (!ix86_decompose_address (x, &parts))
4841 abort ();
4842
4843 if (parts.base && GET_CODE (parts.base) == SUBREG)
4844 parts.base = SUBREG_REG (parts.base);
4845 if (parts.index && GET_CODE (parts.index) == SUBREG)
4846 parts.index = SUBREG_REG (parts.index);
4847
4848 /* More complex memory references are better. */
4849 if (parts.disp && parts.disp != const0_rtx)
4850 cost--;
4851
4852 /* Attempt to minimize number of registers in the address. */
4853 if ((parts.base
4854 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4855 || (parts.index
4856 && (!REG_P (parts.index)
4857 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4858 cost++;
4859
4860 if (parts.base
4861 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4862 && parts.index
4863 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4864 && parts.base != parts.index)
4865 cost++;
4866
4867 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4868 since it's predecode logic can't detect the length of instructions
4869 and it degenerates to vector decoded. Increase cost of such
4870 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4871 to split such addresses or even refuse such addresses at all.
4872
4873 Following addressing modes are affected:
4874 [base+scale*index]
4875 [scale*index+disp]
4876 [base+index]
4877
4878 The first and last case may be avoidable by explicitly coding the zero in
4879 memory address, but I don't have AMD-K6 machine handy to check this
4880 theory. */
4881
4882 if (TARGET_K6
4883 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4884 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4885 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4886 cost += 10;
4887
4888 return cost;
4889 }
4890 \f
4891 /* If X is a machine specific address (i.e. a symbol or label being
4892 referenced as a displacement from the GOT implemented using an
4893 UNSPEC), then return the base term. Otherwise return X. */
4894
4895 rtx
4896 ix86_find_base_term (x)
4897 rtx x;
4898 {
4899 rtx term;
4900
4901 if (TARGET_64BIT)
4902 {
4903 if (GET_CODE (x) != CONST)
4904 return x;
4905 term = XEXP (x, 0);
4906 if (GET_CODE (term) == PLUS
4907 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4908 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4909 term = XEXP (term, 0);
4910 if (GET_CODE (term) != UNSPEC
4911 || XINT (term, 1) != UNSPEC_GOTPCREL)
4912 return x;
4913
4914 term = XVECEXP (term, 0, 0);
4915
4916 if (GET_CODE (term) != SYMBOL_REF
4917 && GET_CODE (term) != LABEL_REF)
4918 return x;
4919
4920 return term;
4921 }
4922
4923 if (GET_CODE (x) != PLUS
4924 || XEXP (x, 0) != pic_offset_table_rtx
4925 || GET_CODE (XEXP (x, 1)) != CONST)
4926 return x;
4927
4928 term = XEXP (XEXP (x, 1), 0);
4929
4930 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4931 term = XEXP (term, 0);
4932
4933 if (GET_CODE (term) != UNSPEC
4934 || XINT (term, 1) != UNSPEC_GOTOFF)
4935 return x;
4936
4937 term = XVECEXP (term, 0, 0);
4938
4939 if (GET_CODE (term) != SYMBOL_REF
4940 && GET_CODE (term) != LABEL_REF)
4941 return x;
4942
4943 return term;
4944 }
4945 \f
4946 /* Determine if a given RTX is a valid constant. We already know this
4947 satisfies CONSTANT_P. */
4948
4949 bool
4950 legitimate_constant_p (x)
4951 rtx x;
4952 {
4953 rtx inner;
4954
4955 switch (GET_CODE (x))
4956 {
4957 case SYMBOL_REF:
4958 /* TLS symbols are not constant. */
4959 if (tls_symbolic_operand (x, Pmode))
4960 return false;
4961 break;
4962
4963 case CONST:
4964 inner = XEXP (x, 0);
4965
4966 /* Offsets of TLS symbols are never valid.
4967 Discourage CSE from creating them. */
4968 if (GET_CODE (inner) == PLUS
4969 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4970 return false;
4971
4972 /* Only some unspecs are valid as "constants". */
4973 if (GET_CODE (inner) == UNSPEC)
4974 switch (XINT (inner, 1))
4975 {
4976 case UNSPEC_TPOFF:
4977 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4978 default:
4979 return false;
4980 }
4981 break;
4982
4983 default:
4984 break;
4985 }
4986
4987 /* Otherwise we handle everything else in the move patterns. */
4988 return true;
4989 }
4990
4991 /* Determine if a given RTX is a valid constant address. */
4992
4993 bool
4994 constant_address_p (x)
4995 rtx x;
4996 {
4997 switch (GET_CODE (x))
4998 {
4999 case LABEL_REF:
5000 case CONST_INT:
5001 return true;
5002
5003 case CONST_DOUBLE:
5004 return TARGET_64BIT;
5005
5006 case CONST:
5007 /* For Mach-O, really believe the CONST. */
5008 if (TARGET_MACHO)
5009 return true;
5010 /* Otherwise fall through. */
5011 case SYMBOL_REF:
5012 return !flag_pic && legitimate_constant_p (x);
5013
5014 default:
5015 return false;
5016 }
5017 }
5018
5019 /* Nonzero if the constant value X is a legitimate general operand
5020 when generating PIC code. It is given that flag_pic is on and
5021 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5022
5023 bool
5024 legitimate_pic_operand_p (x)
5025 rtx x;
5026 {
5027 rtx inner;
5028
5029 switch (GET_CODE (x))
5030 {
5031 case CONST:
5032 inner = XEXP (x, 0);
5033
5034 /* Only some unspecs are valid as "constants". */
5035 if (GET_CODE (inner) == UNSPEC)
5036 switch (XINT (inner, 1))
5037 {
5038 case UNSPEC_TPOFF:
5039 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5040 default:
5041 return false;
5042 }
5043 /* FALLTHRU */
5044
5045 case SYMBOL_REF:
5046 case LABEL_REF:
5047 return legitimate_pic_address_disp_p (x);
5048
5049 default:
5050 return true;
5051 }
5052 }
5053
5054 /* Determine if a given CONST RTX is a valid memory displacement
5055 in PIC mode. */
5056
5057 int
5058 legitimate_pic_address_disp_p (disp)
5059 register rtx disp;
5060 {
5061 bool saw_plus;
5062
5063 /* In 64bit mode we can allow direct addresses of symbols and labels
5064 when they are not dynamic symbols. */
5065 if (TARGET_64BIT)
5066 {
5067 rtx x = disp;
5068 if (GET_CODE (disp) == CONST)
5069 x = XEXP (disp, 0);
5070 /* ??? Handle PIC code models */
5071 if (GET_CODE (x) == PLUS
5072 && (GET_CODE (XEXP (x, 1)) == CONST_INT
5073 && ix86_cmodel == CM_SMALL_PIC
5074 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
5075 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
5076 x = XEXP (x, 0);
5077 if (local_symbolic_operand (x, Pmode))
5078 return 1;
5079 }
5080 if (GET_CODE (disp) != CONST)
5081 return 0;
5082 disp = XEXP (disp, 0);
5083
5084 if (TARGET_64BIT)
5085 {
5086 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5087 of GOT tables. We should not need these anyway. */
5088 if (GET_CODE (disp) != UNSPEC
5089 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5090 return 0;
5091
5092 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5093 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5094 return 0;
5095 return 1;
5096 }
5097
5098 saw_plus = false;
5099 if (GET_CODE (disp) == PLUS)
5100 {
5101 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5102 return 0;
5103 disp = XEXP (disp, 0);
5104 saw_plus = true;
5105 }
5106
5107 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5108 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5109 {
5110 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5111 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5112 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5113 {
5114 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5115 if (strstr (sym_name, "$pb") != 0)
5116 return 1;
5117 }
5118 }
5119
5120 if (GET_CODE (disp) != UNSPEC)
5121 return 0;
5122
5123 switch (XINT (disp, 1))
5124 {
5125 case UNSPEC_GOT:
5126 if (saw_plus)
5127 return false;
5128 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5129 case UNSPEC_GOTOFF:
5130 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5131 case UNSPEC_GOTTPOFF:
5132 case UNSPEC_GOTNTPOFF:
5133 case UNSPEC_INDNTPOFF:
5134 if (saw_plus)
5135 return false;
5136 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5137 case UNSPEC_NTPOFF:
5138 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5139 case UNSPEC_DTPOFF:
5140 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5141 }
5142
5143 return 0;
5144 }
5145
5146 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5147 memory address for an instruction. The MODE argument is the machine mode
5148 for the MEM expression that wants to use this address.
5149
5150 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5151 convert common non-canonical forms to canonical form so that they will
5152 be recognized. */
5153
5154 int
5155 legitimate_address_p (mode, addr, strict)
5156 enum machine_mode mode;
5157 register rtx addr;
5158 int strict;
5159 {
5160 struct ix86_address parts;
5161 rtx base, index, disp;
5162 HOST_WIDE_INT scale;
5163 const char *reason = NULL;
5164 rtx reason_rtx = NULL_RTX;
5165
5166 if (TARGET_DEBUG_ADDR)
5167 {
5168 fprintf (stderr,
5169 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5170 GET_MODE_NAME (mode), strict);
5171 debug_rtx (addr);
5172 }
5173
5174 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5175 {
5176 if (TARGET_DEBUG_ADDR)
5177 fprintf (stderr, "Success.\n");
5178 return TRUE;
5179 }
5180
5181 if (ix86_decompose_address (addr, &parts) <= 0)
5182 {
5183 reason = "decomposition failed";
5184 goto report_error;
5185 }
5186
5187 base = parts.base;
5188 index = parts.index;
5189 disp = parts.disp;
5190 scale = parts.scale;
5191
5192 /* Validate base register.
5193
5194 Don't allow SUBREG's here, it can lead to spill failures when the base
5195 is one word out of a two word structure, which is represented internally
5196 as a DImode int. */
5197
5198 if (base)
5199 {
5200 rtx reg;
5201 reason_rtx = base;
5202
5203 if (GET_CODE (base) == SUBREG)
5204 reg = SUBREG_REG (base);
5205 else
5206 reg = base;
5207
5208 if (GET_CODE (reg) != REG)
5209 {
5210 reason = "base is not a register";
5211 goto report_error;
5212 }
5213
5214 if (GET_MODE (base) != Pmode)
5215 {
5216 reason = "base is not in Pmode";
5217 goto report_error;
5218 }
5219
5220 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5221 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5222 {
5223 reason = "base is not valid";
5224 goto report_error;
5225 }
5226 }
5227
5228 /* Validate index register.
5229
5230 Don't allow SUBREG's here, it can lead to spill failures when the index
5231 is one word out of a two word structure, which is represented internally
5232 as a DImode int. */
5233
5234 if (index)
5235 {
5236 rtx reg;
5237 reason_rtx = index;
5238
5239 if (GET_CODE (index) == SUBREG)
5240 reg = SUBREG_REG (index);
5241 else
5242 reg = index;
5243
5244 if (GET_CODE (reg) != REG)
5245 {
5246 reason = "index is not a register";
5247 goto report_error;
5248 }
5249
5250 if (GET_MODE (index) != Pmode)
5251 {
5252 reason = "index is not in Pmode";
5253 goto report_error;
5254 }
5255
5256 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5257 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5258 {
5259 reason = "index is not valid";
5260 goto report_error;
5261 }
5262 }
5263
5264 /* Validate scale factor. */
5265 if (scale != 1)
5266 {
5267 reason_rtx = GEN_INT (scale);
5268 if (!index)
5269 {
5270 reason = "scale without index";
5271 goto report_error;
5272 }
5273
5274 if (scale != 2 && scale != 4 && scale != 8)
5275 {
5276 reason = "scale is not a valid multiplier";
5277 goto report_error;
5278 }
5279 }
5280
5281 /* Validate displacement. */
5282 if (disp)
5283 {
5284 reason_rtx = disp;
5285
5286 if (TARGET_64BIT)
5287 {
5288 if (!x86_64_sign_extended_value (disp))
5289 {
5290 reason = "displacement is out of range";
5291 goto report_error;
5292 }
5293 }
5294 else
5295 {
5296 if (GET_CODE (disp) == CONST_DOUBLE)
5297 {
5298 reason = "displacement is a const_double";
5299 goto report_error;
5300 }
5301 }
5302
5303 if (GET_CODE (disp) == CONST
5304 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5305 switch (XINT (XEXP (disp, 0), 1))
5306 {
5307 case UNSPEC_GOT:
5308 case UNSPEC_GOTOFF:
5309 case UNSPEC_GOTPCREL:
5310 if (!flag_pic)
5311 abort ();
5312 goto is_legitimate_pic;
5313
5314 case UNSPEC_GOTTPOFF:
5315 case UNSPEC_GOTNTPOFF:
5316 case UNSPEC_INDNTPOFF:
5317 case UNSPEC_NTPOFF:
5318 case UNSPEC_DTPOFF:
5319 break;
5320
5321 default:
5322 reason = "invalid address unspec";
5323 goto report_error;
5324 }
5325
5326 else if (flag_pic && (SYMBOLIC_CONST (disp)
5327 #if TARGET_MACHO
5328 && !machopic_operand_p (disp)
5329 #endif
5330 ))
5331 {
5332 is_legitimate_pic:
5333 if (TARGET_64BIT && (index || base))
5334 {
5335 reason = "non-constant pic memory reference";
5336 goto report_error;
5337 }
5338 if (! legitimate_pic_address_disp_p (disp))
5339 {
5340 reason = "displacement is an invalid pic construct";
5341 goto report_error;
5342 }
5343
5344 /* This code used to verify that a symbolic pic displacement
5345 includes the pic_offset_table_rtx register.
5346
5347 While this is good idea, unfortunately these constructs may
5348 be created by "adds using lea" optimization for incorrect
5349 code like:
5350
5351 int a;
5352 int foo(int i)
5353 {
5354 return *(&a+i);
5355 }
5356
5357 This code is nonsensical, but results in addressing
5358 GOT table with pic_offset_table_rtx base. We can't
5359 just refuse it easily, since it gets matched by
5360 "addsi3" pattern, that later gets split to lea in the
5361 case output register differs from input. While this
5362 can be handled by separate addsi pattern for this case
5363 that never results in lea, this seems to be easier and
5364 correct fix for crash to disable this test. */
5365 }
5366 else if (!CONSTANT_ADDRESS_P (disp))
5367 {
5368 reason = "displacement is not constant";
5369 goto report_error;
5370 }
5371 }
5372
5373 /* Everything looks valid. */
5374 if (TARGET_DEBUG_ADDR)
5375 fprintf (stderr, "Success.\n");
5376 return TRUE;
5377
5378 report_error:
5379 if (TARGET_DEBUG_ADDR)
5380 {
5381 fprintf (stderr, "Error: %s\n", reason);
5382 debug_rtx (reason_rtx);
5383 }
5384 return FALSE;
5385 }
5386 \f
5387 /* Return an unique alias set for the GOT. */
5388
5389 static HOST_WIDE_INT
5390 ix86_GOT_alias_set ()
5391 {
5392 static HOST_WIDE_INT set = -1;
5393 if (set == -1)
5394 set = new_alias_set ();
5395 return set;
5396 }
5397
5398 /* Return a legitimate reference for ORIG (an address) using the
5399 register REG. If REG is 0, a new pseudo is generated.
5400
5401 There are two types of references that must be handled:
5402
5403 1. Global data references must load the address from the GOT, via
5404 the PIC reg. An insn is emitted to do this load, and the reg is
5405 returned.
5406
5407 2. Static data references, constant pool addresses, and code labels
5408 compute the address as an offset from the GOT, whose base is in
5409 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5410 differentiate them from global data objects. The returned
5411 address is the PIC reg + an unspec constant.
5412
5413 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5414 reg also appears in the address. */
5415
5416 rtx
5417 legitimize_pic_address (orig, reg)
5418 rtx orig;
5419 rtx reg;
5420 {
5421 rtx addr = orig;
5422 rtx new = orig;
5423 rtx base;
5424
5425 #if TARGET_MACHO
5426 if (reg == 0)
5427 reg = gen_reg_rtx (Pmode);
5428 /* Use the generic Mach-O PIC machinery. */
5429 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5430 #endif
5431
5432 if (local_symbolic_operand (addr, Pmode))
5433 {
5434 /* In 64bit mode we can address such objects directly. */
5435 if (TARGET_64BIT)
5436 new = addr;
5437 else
5438 {
5439 /* This symbol may be referenced via a displacement from the PIC
5440 base address (@GOTOFF). */
5441
5442 if (reload_in_progress)
5443 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5444 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5445 new = gen_rtx_CONST (Pmode, new);
5446 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5447
5448 if (reg != 0)
5449 {
5450 emit_move_insn (reg, new);
5451 new = reg;
5452 }
5453 }
5454 }
5455 else if (GET_CODE (addr) == SYMBOL_REF)
5456 {
5457 if (TARGET_64BIT)
5458 {
5459 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5460 new = gen_rtx_CONST (Pmode, new);
5461 new = gen_rtx_MEM (Pmode, new);
5462 RTX_UNCHANGING_P (new) = 1;
5463 set_mem_alias_set (new, ix86_GOT_alias_set ());
5464
5465 if (reg == 0)
5466 reg = gen_reg_rtx (Pmode);
5467 /* Use directly gen_movsi, otherwise the address is loaded
5468 into register for CSE. We don't want to CSE this addresses,
5469 instead we CSE addresses from the GOT table, so skip this. */
5470 emit_insn (gen_movsi (reg, new));
5471 new = reg;
5472 }
5473 else
5474 {
5475 /* This symbol must be referenced via a load from the
5476 Global Offset Table (@GOT). */
5477
5478 if (reload_in_progress)
5479 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5480 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5481 new = gen_rtx_CONST (Pmode, new);
5482 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5483 new = gen_rtx_MEM (Pmode, new);
5484 RTX_UNCHANGING_P (new) = 1;
5485 set_mem_alias_set (new, ix86_GOT_alias_set ());
5486
5487 if (reg == 0)
5488 reg = gen_reg_rtx (Pmode);
5489 emit_move_insn (reg, new);
5490 new = reg;
5491 }
5492 }
5493 else
5494 {
5495 if (GET_CODE (addr) == CONST)
5496 {
5497 addr = XEXP (addr, 0);
5498
5499 /* We must match stuff we generate before. Assume the only
5500 unspecs that can get here are ours. Not that we could do
5501 anything with them anyway... */
5502 if (GET_CODE (addr) == UNSPEC
5503 || (GET_CODE (addr) == PLUS
5504 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5505 return orig;
5506 if (GET_CODE (addr) != PLUS)
5507 abort ();
5508 }
5509 if (GET_CODE (addr) == PLUS)
5510 {
5511 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5512
5513 /* Check first to see if this is a constant offset from a @GOTOFF
5514 symbol reference. */
5515 if (local_symbolic_operand (op0, Pmode)
5516 && GET_CODE (op1) == CONST_INT)
5517 {
5518 if (!TARGET_64BIT)
5519 {
5520 if (reload_in_progress)
5521 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5522 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5523 UNSPEC_GOTOFF);
5524 new = gen_rtx_PLUS (Pmode, new, op1);
5525 new = gen_rtx_CONST (Pmode, new);
5526 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5527
5528 if (reg != 0)
5529 {
5530 emit_move_insn (reg, new);
5531 new = reg;
5532 }
5533 }
5534 else
5535 {
5536 /* ??? We need to limit offsets here. */
5537 }
5538 }
5539 else
5540 {
5541 base = legitimize_pic_address (XEXP (addr, 0), reg);
5542 new = legitimize_pic_address (XEXP (addr, 1),
5543 base == reg ? NULL_RTX : reg);
5544
5545 if (GET_CODE (new) == CONST_INT)
5546 new = plus_constant (base, INTVAL (new));
5547 else
5548 {
5549 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5550 {
5551 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5552 new = XEXP (new, 1);
5553 }
5554 new = gen_rtx_PLUS (Pmode, base, new);
5555 }
5556 }
5557 }
5558 }
5559 return new;
5560 }
5561
5562 static void
5563 ix86_encode_section_info (decl, first)
5564 tree decl;
5565 int first ATTRIBUTE_UNUSED;
5566 {
5567 bool local_p = (*targetm.binds_local_p) (decl);
5568 rtx rtl, symbol;
5569
5570 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5571 if (GET_CODE (rtl) != MEM)
5572 return;
5573 symbol = XEXP (rtl, 0);
5574 if (GET_CODE (symbol) != SYMBOL_REF)
5575 return;
5576
5577 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5578 symbol so that we may access it directly in the GOT. */
5579
5580 if (flag_pic)
5581 SYMBOL_REF_FLAG (symbol) = local_p;
5582
5583 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5584 "local dynamic", "initial exec" or "local exec" TLS models
5585 respectively. */
5586
5587 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5588 {
5589 const char *symbol_str;
5590 char *newstr;
5591 size_t len;
5592 enum tls_model kind = decl_tls_model (decl);
5593
5594 symbol_str = XSTR (symbol, 0);
5595
5596 if (symbol_str[0] == '%')
5597 {
5598 if (symbol_str[1] == tls_model_chars[kind])
5599 return;
5600 symbol_str += 2;
5601 }
5602 len = strlen (symbol_str) + 1;
5603 newstr = alloca (len + 2);
5604
5605 newstr[0] = '%';
5606 newstr[1] = tls_model_chars[kind];
5607 memcpy (newstr + 2, symbol_str, len);
5608
5609 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5610 }
5611 }
5612
5613 /* Undo the above when printing symbol names. */
5614
5615 static const char *
5616 ix86_strip_name_encoding (str)
5617 const char *str;
5618 {
5619 if (str[0] == '%')
5620 str += 2;
5621 if (str [0] == '*')
5622 str += 1;
5623 return str;
5624 }
5625 \f
5626 /* Load the thread pointer into a register. */
5627
5628 static rtx
5629 get_thread_pointer ()
5630 {
5631 rtx tp;
5632
5633 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5634 tp = gen_rtx_MEM (Pmode, tp);
5635 RTX_UNCHANGING_P (tp) = 1;
5636 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5637 tp = force_reg (Pmode, tp);
5638
5639 return tp;
5640 }
5641
5642 /* Try machine-dependent ways of modifying an illegitimate address
5643 to be legitimate. If we find one, return the new, valid address.
5644 This macro is used in only one place: `memory_address' in explow.c.
5645
5646 OLDX is the address as it was before break_out_memory_refs was called.
5647 In some cases it is useful to look at this to decide what needs to be done.
5648
5649 MODE and WIN are passed so that this macro can use
5650 GO_IF_LEGITIMATE_ADDRESS.
5651
5652 It is always safe for this macro to do nothing. It exists to recognize
5653 opportunities to optimize the output.
5654
5655 For the 80386, we handle X+REG by loading X into a register R and
5656 using R+REG. R will go in a general reg and indexing will be used.
5657 However, if REG is a broken-out memory address or multiplication,
5658 nothing needs to be done because REG can certainly go in a general reg.
5659
5660 When -fpic is used, special handling is needed for symbolic references.
5661 See comments by legitimize_pic_address in i386.c for details. */
5662
5663 rtx
5664 legitimize_address (x, oldx, mode)
5665 register rtx x;
5666 register rtx oldx ATTRIBUTE_UNUSED;
5667 enum machine_mode mode;
5668 {
5669 int changed = 0;
5670 unsigned log;
5671
5672 if (TARGET_DEBUG_ADDR)
5673 {
5674 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5675 GET_MODE_NAME (mode));
5676 debug_rtx (x);
5677 }
5678
5679 log = tls_symbolic_operand (x, mode);
5680 if (log)
5681 {
5682 rtx dest, base, off, pic;
5683
5684 switch (log)
5685 {
5686 case TLS_MODEL_GLOBAL_DYNAMIC:
5687 dest = gen_reg_rtx (Pmode);
5688 emit_insn (gen_tls_global_dynamic (dest, x));
5689 break;
5690
5691 case TLS_MODEL_LOCAL_DYNAMIC:
5692 base = gen_reg_rtx (Pmode);
5693 emit_insn (gen_tls_local_dynamic_base (base));
5694
5695 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5696 off = gen_rtx_CONST (Pmode, off);
5697
5698 return gen_rtx_PLUS (Pmode, base, off);
5699
5700 case TLS_MODEL_INITIAL_EXEC:
5701 if (flag_pic)
5702 {
5703 if (reload_in_progress)
5704 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5705 pic = pic_offset_table_rtx;
5706 }
5707 else if (!TARGET_GNU_TLS)
5708 {
5709 pic = gen_reg_rtx (Pmode);
5710 emit_insn (gen_set_got (pic));
5711 }
5712 else
5713 pic = NULL;
5714
5715 base = get_thread_pointer ();
5716
5717 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5718 !TARGET_GNU_TLS
5719 ? UNSPEC_GOTTPOFF
5720 : flag_pic ? UNSPEC_GOTNTPOFF
5721 : UNSPEC_INDNTPOFF);
5722 off = gen_rtx_CONST (Pmode, off);
5723 if (flag_pic || !TARGET_GNU_TLS)
5724 off = gen_rtx_PLUS (Pmode, pic, off);
5725 off = gen_rtx_MEM (Pmode, off);
5726 RTX_UNCHANGING_P (off) = 1;
5727 set_mem_alias_set (off, ix86_GOT_alias_set ());
5728 dest = gen_reg_rtx (Pmode);
5729
5730 if (TARGET_GNU_TLS)
5731 {
5732 emit_move_insn (dest, off);
5733 return gen_rtx_PLUS (Pmode, base, dest);
5734 }
5735 else
5736 emit_insn (gen_subsi3 (dest, base, off));
5737 break;
5738
5739 case TLS_MODEL_LOCAL_EXEC:
5740 base = get_thread_pointer ();
5741
5742 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5743 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5744 off = gen_rtx_CONST (Pmode, off);
5745
5746 if (TARGET_GNU_TLS)
5747 return gen_rtx_PLUS (Pmode, base, off);
5748 else
5749 {
5750 dest = gen_reg_rtx (Pmode);
5751 emit_insn (gen_subsi3 (dest, base, off));
5752 }
5753 break;
5754
5755 default:
5756 abort ();
5757 }
5758
5759 return dest;
5760 }
5761
5762 if (flag_pic && SYMBOLIC_CONST (x))
5763 return legitimize_pic_address (x, 0);
5764
5765 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5766 if (GET_CODE (x) == ASHIFT
5767 && GET_CODE (XEXP (x, 1)) == CONST_INT
5768 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5769 {
5770 changed = 1;
5771 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5772 GEN_INT (1 << log));
5773 }
5774
5775 if (GET_CODE (x) == PLUS)
5776 {
5777 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5778
5779 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5780 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5781 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5782 {
5783 changed = 1;
5784 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5785 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5786 GEN_INT (1 << log));
5787 }
5788
5789 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5790 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5791 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5792 {
5793 changed = 1;
5794 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5795 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5796 GEN_INT (1 << log));
5797 }
5798
5799 /* Put multiply first if it isn't already. */
5800 if (GET_CODE (XEXP (x, 1)) == MULT)
5801 {
5802 rtx tmp = XEXP (x, 0);
5803 XEXP (x, 0) = XEXP (x, 1);
5804 XEXP (x, 1) = tmp;
5805 changed = 1;
5806 }
5807
5808 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5809 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5810 created by virtual register instantiation, register elimination, and
5811 similar optimizations. */
5812 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5813 {
5814 changed = 1;
5815 x = gen_rtx_PLUS (Pmode,
5816 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5817 XEXP (XEXP (x, 1), 0)),
5818 XEXP (XEXP (x, 1), 1));
5819 }
5820
5821 /* Canonicalize
5822 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5823 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5824 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5825 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5826 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5827 && CONSTANT_P (XEXP (x, 1)))
5828 {
5829 rtx constant;
5830 rtx other = NULL_RTX;
5831
5832 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5833 {
5834 constant = XEXP (x, 1);
5835 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5836 }
5837 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5838 {
5839 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5840 other = XEXP (x, 1);
5841 }
5842 else
5843 constant = 0;
5844
5845 if (constant)
5846 {
5847 changed = 1;
5848 x = gen_rtx_PLUS (Pmode,
5849 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5850 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5851 plus_constant (other, INTVAL (constant)));
5852 }
5853 }
5854
5855 if (changed && legitimate_address_p (mode, x, FALSE))
5856 return x;
5857
5858 if (GET_CODE (XEXP (x, 0)) == MULT)
5859 {
5860 changed = 1;
5861 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5862 }
5863
5864 if (GET_CODE (XEXP (x, 1)) == MULT)
5865 {
5866 changed = 1;
5867 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5868 }
5869
5870 if (changed
5871 && GET_CODE (XEXP (x, 1)) == REG
5872 && GET_CODE (XEXP (x, 0)) == REG)
5873 return x;
5874
5875 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5876 {
5877 changed = 1;
5878 x = legitimize_pic_address (x, 0);
5879 }
5880
5881 if (changed && legitimate_address_p (mode, x, FALSE))
5882 return x;
5883
5884 if (GET_CODE (XEXP (x, 0)) == REG)
5885 {
5886 register rtx temp = gen_reg_rtx (Pmode);
5887 register rtx val = force_operand (XEXP (x, 1), temp);
5888 if (val != temp)
5889 emit_move_insn (temp, val);
5890
5891 XEXP (x, 1) = temp;
5892 return x;
5893 }
5894
5895 else if (GET_CODE (XEXP (x, 1)) == REG)
5896 {
5897 register rtx temp = gen_reg_rtx (Pmode);
5898 register rtx val = force_operand (XEXP (x, 0), temp);
5899 if (val != temp)
5900 emit_move_insn (temp, val);
5901
5902 XEXP (x, 0) = temp;
5903 return x;
5904 }
5905 }
5906
5907 return x;
5908 }
5909 \f
5910 /* Print an integer constant expression in assembler syntax. Addition
5911 and subtraction are the only arithmetic that may appear in these
5912 expressions. FILE is the stdio stream to write to, X is the rtx, and
5913 CODE is the operand print code from the output string. */
5914
5915 static void
5916 output_pic_addr_const (file, x, code)
5917 FILE *file;
5918 rtx x;
5919 int code;
5920 {
5921 char buf[256];
5922
5923 switch (GET_CODE (x))
5924 {
5925 case PC:
5926 if (flag_pic)
5927 putc ('.', file);
5928 else
5929 abort ();
5930 break;
5931
5932 case SYMBOL_REF:
5933 assemble_name (file, XSTR (x, 0));
5934 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5935 fputs ("@PLT", file);
5936 break;
5937
5938 case LABEL_REF:
5939 x = XEXP (x, 0);
5940 /* FALLTHRU */
5941 case CODE_LABEL:
5942 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5943 assemble_name (asm_out_file, buf);
5944 break;
5945
5946 case CONST_INT:
5947 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5948 break;
5949
5950 case CONST:
5951 /* This used to output parentheses around the expression,
5952 but that does not work on the 386 (either ATT or BSD assembler). */
5953 output_pic_addr_const (file, XEXP (x, 0), code);
5954 break;
5955
5956 case CONST_DOUBLE:
5957 if (GET_MODE (x) == VOIDmode)
5958 {
5959 /* We can use %d if the number is <32 bits and positive. */
5960 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5961 fprintf (file, "0x%lx%08lx",
5962 (unsigned long) CONST_DOUBLE_HIGH (x),
5963 (unsigned long) CONST_DOUBLE_LOW (x));
5964 else
5965 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5966 }
5967 else
5968 /* We can't handle floating point constants;
5969 PRINT_OPERAND must handle them. */
5970 output_operand_lossage ("floating constant misused");
5971 break;
5972
5973 case PLUS:
5974 /* Some assemblers need integer constants to appear first. */
5975 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5976 {
5977 output_pic_addr_const (file, XEXP (x, 0), code);
5978 putc ('+', file);
5979 output_pic_addr_const (file, XEXP (x, 1), code);
5980 }
5981 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5982 {
5983 output_pic_addr_const (file, XEXP (x, 1), code);
5984 putc ('+', file);
5985 output_pic_addr_const (file, XEXP (x, 0), code);
5986 }
5987 else
5988 abort ();
5989 break;
5990
5991 case MINUS:
5992 if (!TARGET_MACHO)
5993 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5994 output_pic_addr_const (file, XEXP (x, 0), code);
5995 putc ('-', file);
5996 output_pic_addr_const (file, XEXP (x, 1), code);
5997 if (!TARGET_MACHO)
5998 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5999 break;
6000
6001 case UNSPEC:
6002 if (XVECLEN (x, 0) != 1)
6003 abort ();
6004 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6005 switch (XINT (x, 1))
6006 {
6007 case UNSPEC_GOT:
6008 fputs ("@GOT", file);
6009 break;
6010 case UNSPEC_GOTOFF:
6011 fputs ("@GOTOFF", file);
6012 break;
6013 case UNSPEC_GOTPCREL:
6014 fputs ("@GOTPCREL(%rip)", file);
6015 break;
6016 case UNSPEC_GOTTPOFF:
6017 /* FIXME: This might be @TPOFF in Sun ld too. */
6018 fputs ("@GOTTPOFF", file);
6019 break;
6020 case UNSPEC_TPOFF:
6021 fputs ("@TPOFF", file);
6022 break;
6023 case UNSPEC_NTPOFF:
6024 fputs ("@NTPOFF", file);
6025 break;
6026 case UNSPEC_DTPOFF:
6027 fputs ("@DTPOFF", file);
6028 break;
6029 case UNSPEC_GOTNTPOFF:
6030 fputs ("@GOTNTPOFF", file);
6031 break;
6032 case UNSPEC_INDNTPOFF:
6033 fputs ("@INDNTPOFF", file);
6034 break;
6035 default:
6036 output_operand_lossage ("invalid UNSPEC as operand");
6037 break;
6038 }
6039 break;
6040
6041 default:
6042 output_operand_lossage ("invalid expression as operand");
6043 }
6044 }
6045
6046 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6047 We need to handle our special PIC relocations. */
6048
6049 void
6050 i386_dwarf_output_addr_const (file, x)
6051 FILE *file;
6052 rtx x;
6053 {
6054 #ifdef ASM_QUAD
6055 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6056 #else
6057 if (TARGET_64BIT)
6058 abort ();
6059 fprintf (file, "%s", ASM_LONG);
6060 #endif
6061 if (flag_pic)
6062 output_pic_addr_const (file, x, '\0');
6063 else
6064 output_addr_const (file, x);
6065 fputc ('\n', file);
6066 }
6067
6068 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6069 We need to emit DTP-relative relocations. */
6070
6071 void
6072 i386_output_dwarf_dtprel (file, size, x)
6073 FILE *file;
6074 int size;
6075 rtx x;
6076 {
6077 switch (size)
6078 {
6079 case 4:
6080 fputs (ASM_LONG, file);
6081 break;
6082 case 8:
6083 #ifdef ASM_QUAD
6084 fputs (ASM_QUAD, file);
6085 break;
6086 #endif
6087 default:
6088 abort ();
6089 }
6090
6091 output_addr_const (file, x);
6092 fputs ("@DTPOFF", file);
6093 }
6094
6095 /* In the name of slightly smaller debug output, and to cater to
6096 general assembler losage, recognize PIC+GOTOFF and turn it back
6097 into a direct symbol reference. */
6098
6099 rtx
6100 i386_simplify_dwarf_addr (orig_x)
6101 rtx orig_x;
6102 {
6103 rtx x = orig_x, y;
6104
6105 if (GET_CODE (x) == MEM)
6106 x = XEXP (x, 0);
6107
6108 if (TARGET_64BIT)
6109 {
6110 if (GET_CODE (x) != CONST
6111 || GET_CODE (XEXP (x, 0)) != UNSPEC
6112 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6113 || GET_CODE (orig_x) != MEM)
6114 return orig_x;
6115 return XVECEXP (XEXP (x, 0), 0, 0);
6116 }
6117
6118 if (GET_CODE (x) != PLUS
6119 || GET_CODE (XEXP (x, 1)) != CONST)
6120 return orig_x;
6121
6122 if (GET_CODE (XEXP (x, 0)) == REG
6123 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6124 /* %ebx + GOT/GOTOFF */
6125 y = NULL;
6126 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6127 {
6128 /* %ebx + %reg * scale + GOT/GOTOFF */
6129 y = XEXP (x, 0);
6130 if (GET_CODE (XEXP (y, 0)) == REG
6131 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6132 y = XEXP (y, 1);
6133 else if (GET_CODE (XEXP (y, 1)) == REG
6134 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6135 y = XEXP (y, 0);
6136 else
6137 return orig_x;
6138 if (GET_CODE (y) != REG
6139 && GET_CODE (y) != MULT
6140 && GET_CODE (y) != ASHIFT)
6141 return orig_x;
6142 }
6143 else
6144 return orig_x;
6145
6146 x = XEXP (XEXP (x, 1), 0);
6147 if (GET_CODE (x) == UNSPEC
6148 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6149 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6150 {
6151 if (y)
6152 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6153 return XVECEXP (x, 0, 0);
6154 }
6155
6156 if (GET_CODE (x) == PLUS
6157 && GET_CODE (XEXP (x, 0)) == UNSPEC
6158 && GET_CODE (XEXP (x, 1)) == CONST_INT
6159 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6160 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6161 && GET_CODE (orig_x) != MEM)))
6162 {
6163 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6164 if (y)
6165 return gen_rtx_PLUS (Pmode, y, x);
6166 return x;
6167 }
6168
6169 return orig_x;
6170 }
6171 \f
6172 static void
6173 put_condition_code (code, mode, reverse, fp, file)
6174 enum rtx_code code;
6175 enum machine_mode mode;
6176 int reverse, fp;
6177 FILE *file;
6178 {
6179 const char *suffix;
6180
6181 if (mode == CCFPmode || mode == CCFPUmode)
6182 {
6183 enum rtx_code second_code, bypass_code;
6184 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6185 if (bypass_code != NIL || second_code != NIL)
6186 abort ();
6187 code = ix86_fp_compare_code_to_integer (code);
6188 mode = CCmode;
6189 }
6190 if (reverse)
6191 code = reverse_condition (code);
6192
6193 switch (code)
6194 {
6195 case EQ:
6196 suffix = "e";
6197 break;
6198 case NE:
6199 suffix = "ne";
6200 break;
6201 case GT:
6202 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6203 abort ();
6204 suffix = "g";
6205 break;
6206 case GTU:
6207 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6208 Those same assemblers have the same but opposite losage on cmov. */
6209 if (mode != CCmode)
6210 abort ();
6211 suffix = fp ? "nbe" : "a";
6212 break;
6213 case LT:
6214 if (mode == CCNOmode || mode == CCGOCmode)
6215 suffix = "s";
6216 else if (mode == CCmode || mode == CCGCmode)
6217 suffix = "l";
6218 else
6219 abort ();
6220 break;
6221 case LTU:
6222 if (mode != CCmode)
6223 abort ();
6224 suffix = "b";
6225 break;
6226 case GE:
6227 if (mode == CCNOmode || mode == CCGOCmode)
6228 suffix = "ns";
6229 else if (mode == CCmode || mode == CCGCmode)
6230 suffix = "ge";
6231 else
6232 abort ();
6233 break;
6234 case GEU:
6235 /* ??? As above. */
6236 if (mode != CCmode)
6237 abort ();
6238 suffix = fp ? "nb" : "ae";
6239 break;
6240 case LE:
6241 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6242 abort ();
6243 suffix = "le";
6244 break;
6245 case LEU:
6246 if (mode != CCmode)
6247 abort ();
6248 suffix = "be";
6249 break;
6250 case UNORDERED:
6251 suffix = fp ? "u" : "p";
6252 break;
6253 case ORDERED:
6254 suffix = fp ? "nu" : "np";
6255 break;
6256 default:
6257 abort ();
6258 }
6259 fputs (suffix, file);
6260 }
6261
6262 void
6263 print_reg (x, code, file)
6264 rtx x;
6265 int code;
6266 FILE *file;
6267 {
6268 if (REGNO (x) == ARG_POINTER_REGNUM
6269 || REGNO (x) == FRAME_POINTER_REGNUM
6270 || REGNO (x) == FLAGS_REG
6271 || REGNO (x) == FPSR_REG)
6272 abort ();
6273
6274 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6275 putc ('%', file);
6276
6277 if (code == 'w' || MMX_REG_P (x))
6278 code = 2;
6279 else if (code == 'b')
6280 code = 1;
6281 else if (code == 'k')
6282 code = 4;
6283 else if (code == 'q')
6284 code = 8;
6285 else if (code == 'y')
6286 code = 3;
6287 else if (code == 'h')
6288 code = 0;
6289 else
6290 code = GET_MODE_SIZE (GET_MODE (x));
6291
6292 /* Irritatingly, AMD extended registers use different naming convention
6293 from the normal registers. */
6294 if (REX_INT_REG_P (x))
6295 {
6296 if (!TARGET_64BIT)
6297 abort ();
6298 switch (code)
6299 {
6300 case 0:
6301 error ("extended registers have no high halves");
6302 break;
6303 case 1:
6304 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6305 break;
6306 case 2:
6307 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6308 break;
6309 case 4:
6310 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6311 break;
6312 case 8:
6313 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6314 break;
6315 default:
6316 error ("unsupported operand size for extended register");
6317 break;
6318 }
6319 return;
6320 }
6321 switch (code)
6322 {
6323 case 3:
6324 if (STACK_TOP_P (x))
6325 {
6326 fputs ("st(0)", file);
6327 break;
6328 }
6329 /* FALLTHRU */
6330 case 8:
6331 case 4:
6332 case 12:
6333 if (! ANY_FP_REG_P (x))
6334 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6335 /* FALLTHRU */
6336 case 16:
6337 case 2:
6338 fputs (hi_reg_name[REGNO (x)], file);
6339 break;
6340 case 1:
6341 fputs (qi_reg_name[REGNO (x)], file);
6342 break;
6343 case 0:
6344 fputs (qi_high_reg_name[REGNO (x)], file);
6345 break;
6346 default:
6347 abort ();
6348 }
6349 }
6350
6351 /* Locate some local-dynamic symbol still in use by this function
6352 so that we can print its name in some tls_local_dynamic_base
6353 pattern. */
6354
6355 static const char *
6356 get_some_local_dynamic_name ()
6357 {
6358 rtx insn;
6359
6360 if (cfun->machine->some_ld_name)
6361 return cfun->machine->some_ld_name;
6362
6363 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6364 if (INSN_P (insn)
6365 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6366 return cfun->machine->some_ld_name;
6367
6368 abort ();
6369 }
6370
6371 static int
6372 get_some_local_dynamic_name_1 (px, data)
6373 rtx *px;
6374 void *data ATTRIBUTE_UNUSED;
6375 {
6376 rtx x = *px;
6377
6378 if (GET_CODE (x) == SYMBOL_REF
6379 && local_dynamic_symbolic_operand (x, Pmode))
6380 {
6381 cfun->machine->some_ld_name = XSTR (x, 0);
6382 return 1;
6383 }
6384
6385 return 0;
6386 }
6387
6388 /* Meaning of CODE:
6389 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6390 C -- print opcode suffix for set/cmov insn.
6391 c -- like C, but print reversed condition
6392 F,f -- likewise, but for floating-point.
6393 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6394 nothing
6395 R -- print the prefix for register names.
6396 z -- print the opcode suffix for the size of the current operand.
6397 * -- print a star (in certain assembler syntax)
6398 A -- print an absolute memory reference.
6399 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6400 s -- print a shift double count, followed by the assemblers argument
6401 delimiter.
6402 b -- print the QImode name of the register for the indicated operand.
6403 %b0 would print %al if operands[0] is reg 0.
6404 w -- likewise, print the HImode name of the register.
6405 k -- likewise, print the SImode name of the register.
6406 q -- likewise, print the DImode name of the register.
6407 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6408 y -- print "st(0)" instead of "st" as a register.
6409 D -- print condition for SSE cmp instruction.
6410 P -- if PIC, print an @PLT suffix.
6411 X -- don't print any sort of PIC '@' suffix for a symbol.
6412 & -- print some in-use local-dynamic symbol name.
6413 */
6414
6415 void
6416 print_operand (file, x, code)
6417 FILE *file;
6418 rtx x;
6419 int code;
6420 {
6421 if (code)
6422 {
6423 switch (code)
6424 {
6425 case '*':
6426 if (ASSEMBLER_DIALECT == ASM_ATT)
6427 putc ('*', file);
6428 return;
6429
6430 case '&':
6431 assemble_name (file, get_some_local_dynamic_name ());
6432 return;
6433
6434 case 'A':
6435 if (ASSEMBLER_DIALECT == ASM_ATT)
6436 putc ('*', file);
6437 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6438 {
6439 /* Intel syntax. For absolute addresses, registers should not
6440 be surrounded by braces. */
6441 if (GET_CODE (x) != REG)
6442 {
6443 putc ('[', file);
6444 PRINT_OPERAND (file, x, 0);
6445 putc (']', file);
6446 return;
6447 }
6448 }
6449 else
6450 abort ();
6451
6452 PRINT_OPERAND (file, x, 0);
6453 return;
6454
6455
6456 case 'L':
6457 if (ASSEMBLER_DIALECT == ASM_ATT)
6458 putc ('l', file);
6459 return;
6460
6461 case 'W':
6462 if (ASSEMBLER_DIALECT == ASM_ATT)
6463 putc ('w', file);
6464 return;
6465
6466 case 'B':
6467 if (ASSEMBLER_DIALECT == ASM_ATT)
6468 putc ('b', file);
6469 return;
6470
6471 case 'Q':
6472 if (ASSEMBLER_DIALECT == ASM_ATT)
6473 putc ('l', file);
6474 return;
6475
6476 case 'S':
6477 if (ASSEMBLER_DIALECT == ASM_ATT)
6478 putc ('s', file);
6479 return;
6480
6481 case 'T':
6482 if (ASSEMBLER_DIALECT == ASM_ATT)
6483 putc ('t', file);
6484 return;
6485
6486 case 'z':
6487 /* 387 opcodes don't get size suffixes if the operands are
6488 registers. */
6489 if (STACK_REG_P (x))
6490 return;
6491
6492 /* Likewise if using Intel opcodes. */
6493 if (ASSEMBLER_DIALECT == ASM_INTEL)
6494 return;
6495
6496 /* This is the size of op from size of operand. */
6497 switch (GET_MODE_SIZE (GET_MODE (x)))
6498 {
6499 case 2:
6500 #ifdef HAVE_GAS_FILDS_FISTS
6501 putc ('s', file);
6502 #endif
6503 return;
6504
6505 case 4:
6506 if (GET_MODE (x) == SFmode)
6507 {
6508 putc ('s', file);
6509 return;
6510 }
6511 else
6512 putc ('l', file);
6513 return;
6514
6515 case 12:
6516 case 16:
6517 putc ('t', file);
6518 return;
6519
6520 case 8:
6521 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6522 {
6523 #ifdef GAS_MNEMONICS
6524 putc ('q', file);
6525 #else
6526 putc ('l', file);
6527 putc ('l', file);
6528 #endif
6529 }
6530 else
6531 putc ('l', file);
6532 return;
6533
6534 default:
6535 abort ();
6536 }
6537
6538 case 'b':
6539 case 'w':
6540 case 'k':
6541 case 'q':
6542 case 'h':
6543 case 'y':
6544 case 'X':
6545 case 'P':
6546 break;
6547
6548 case 's':
6549 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6550 {
6551 PRINT_OPERAND (file, x, 0);
6552 putc (',', file);
6553 }
6554 return;
6555
6556 case 'D':
6557 /* Little bit of braindamage here. The SSE compare instructions
6558 does use completely different names for the comparisons that the
6559 fp conditional moves. */
6560 switch (GET_CODE (x))
6561 {
6562 case EQ:
6563 case UNEQ:
6564 fputs ("eq", file);
6565 break;
6566 case LT:
6567 case UNLT:
6568 fputs ("lt", file);
6569 break;
6570 case LE:
6571 case UNLE:
6572 fputs ("le", file);
6573 break;
6574 case UNORDERED:
6575 fputs ("unord", file);
6576 break;
6577 case NE:
6578 case LTGT:
6579 fputs ("neq", file);
6580 break;
6581 case UNGE:
6582 case GE:
6583 fputs ("nlt", file);
6584 break;
6585 case UNGT:
6586 case GT:
6587 fputs ("nle", file);
6588 break;
6589 case ORDERED:
6590 fputs ("ord", file);
6591 break;
6592 default:
6593 abort ();
6594 break;
6595 }
6596 return;
6597 case 'O':
6598 #ifdef CMOV_SUN_AS_SYNTAX
6599 if (ASSEMBLER_DIALECT == ASM_ATT)
6600 {
6601 switch (GET_MODE (x))
6602 {
6603 case HImode: putc ('w', file); break;
6604 case SImode:
6605 case SFmode: putc ('l', file); break;
6606 case DImode:
6607 case DFmode: putc ('q', file); break;
6608 default: abort ();
6609 }
6610 putc ('.', file);
6611 }
6612 #endif
6613 return;
6614 case 'C':
6615 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6616 return;
6617 case 'F':
6618 #ifdef CMOV_SUN_AS_SYNTAX
6619 if (ASSEMBLER_DIALECT == ASM_ATT)
6620 putc ('.', file);
6621 #endif
6622 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6623 return;
6624
6625 /* Like above, but reverse condition */
6626 case 'c':
6627 /* Check to see if argument to %c is really a constant
6628 and not a condition code which needs to be reversed. */
6629 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6630 {
6631 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6632 return;
6633 }
6634 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6635 return;
6636 case 'f':
6637 #ifdef CMOV_SUN_AS_SYNTAX
6638 if (ASSEMBLER_DIALECT == ASM_ATT)
6639 putc ('.', file);
6640 #endif
6641 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6642 return;
6643 case '+':
6644 {
6645 rtx x;
6646
6647 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6648 return;
6649
6650 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6651 if (x)
6652 {
6653 int pred_val = INTVAL (XEXP (x, 0));
6654
6655 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6656 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6657 {
6658 int taken = pred_val > REG_BR_PROB_BASE / 2;
6659 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6660
6661 /* Emit hints only in the case default branch prediction
6662 heruistics would fail. */
6663 if (taken != cputaken)
6664 {
6665 /* We use 3e (DS) prefix for taken branches and
6666 2e (CS) prefix for not taken branches. */
6667 if (taken)
6668 fputs ("ds ; ", file);
6669 else
6670 fputs ("cs ; ", file);
6671 }
6672 }
6673 }
6674 return;
6675 }
6676 default:
6677 output_operand_lossage ("invalid operand code `%c'", code);
6678 }
6679 }
6680
6681 if (GET_CODE (x) == REG)
6682 {
6683 PRINT_REG (x, code, file);
6684 }
6685
6686 else if (GET_CODE (x) == MEM)
6687 {
6688 /* No `byte ptr' prefix for call instructions. */
6689 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6690 {
6691 const char * size;
6692 switch (GET_MODE_SIZE (GET_MODE (x)))
6693 {
6694 case 1: size = "BYTE"; break;
6695 case 2: size = "WORD"; break;
6696 case 4: size = "DWORD"; break;
6697 case 8: size = "QWORD"; break;
6698 case 12: size = "XWORD"; break;
6699 case 16: size = "XMMWORD"; break;
6700 default:
6701 abort ();
6702 }
6703
6704 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6705 if (code == 'b')
6706 size = "BYTE";
6707 else if (code == 'w')
6708 size = "WORD";
6709 else if (code == 'k')
6710 size = "DWORD";
6711
6712 fputs (size, file);
6713 fputs (" PTR ", file);
6714 }
6715
6716 x = XEXP (x, 0);
6717 if (flag_pic && CONSTANT_ADDRESS_P (x))
6718 output_pic_addr_const (file, x, code);
6719 /* Avoid (%rip) for call operands. */
6720 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6721 && GET_CODE (x) != CONST_INT)
6722 output_addr_const (file, x);
6723 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6724 output_operand_lossage ("invalid constraints for operand");
6725 else
6726 output_address (x);
6727 }
6728
6729 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6730 {
6731 REAL_VALUE_TYPE r;
6732 long l;
6733
6734 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6735 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6736
6737 if (ASSEMBLER_DIALECT == ASM_ATT)
6738 putc ('$', file);
6739 fprintf (file, "0x%lx", l);
6740 }
6741
6742 /* These float cases don't actually occur as immediate operands. */
6743 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6744 {
6745 REAL_VALUE_TYPE r;
6746 char dstr[30];
6747
6748 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6749 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6750 fprintf (file, "%s", dstr);
6751 }
6752
6753 else if (GET_CODE (x) == CONST_DOUBLE
6754 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6755 {
6756 REAL_VALUE_TYPE r;
6757 char dstr[30];
6758
6759 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6760 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6761 fprintf (file, "%s", dstr);
6762 }
6763
6764 else
6765 {
6766 if (code != 'P')
6767 {
6768 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6769 {
6770 if (ASSEMBLER_DIALECT == ASM_ATT)
6771 putc ('$', file);
6772 }
6773 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6774 || GET_CODE (x) == LABEL_REF)
6775 {
6776 if (ASSEMBLER_DIALECT == ASM_ATT)
6777 putc ('$', file);
6778 else
6779 fputs ("OFFSET FLAT:", file);
6780 }
6781 }
6782 if (GET_CODE (x) == CONST_INT)
6783 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6784 else if (flag_pic)
6785 output_pic_addr_const (file, x, code);
6786 else
6787 output_addr_const (file, x);
6788 }
6789 }
6790 \f
6791 /* Print a memory operand whose address is ADDR. */
6792
6793 void
6794 print_operand_address (file, addr)
6795 FILE *file;
6796 register rtx addr;
6797 {
6798 struct ix86_address parts;
6799 rtx base, index, disp;
6800 int scale;
6801
6802 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6803 {
6804 if (ASSEMBLER_DIALECT == ASM_INTEL)
6805 fputs ("DWORD PTR ", file);
6806 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6807 putc ('%', file);
6808 fputs ("gs:0", file);
6809 return;
6810 }
6811
6812 if (! ix86_decompose_address (addr, &parts))
6813 abort ();
6814
6815 base = parts.base;
6816 index = parts.index;
6817 disp = parts.disp;
6818 scale = parts.scale;
6819
6820 if (!base && !index)
6821 {
6822 /* Displacement only requires special attention. */
6823
6824 if (GET_CODE (disp) == CONST_INT)
6825 {
6826 if (ASSEMBLER_DIALECT == ASM_INTEL)
6827 {
6828 if (USER_LABEL_PREFIX[0] == 0)
6829 putc ('%', file);
6830 fputs ("ds:", file);
6831 }
6832 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6833 }
6834 else if (flag_pic)
6835 output_pic_addr_const (file, addr, 0);
6836 else
6837 output_addr_const (file, addr);
6838
6839 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6840 if (TARGET_64BIT
6841 && (GET_CODE (addr) == SYMBOL_REF
6842 || GET_CODE (addr) == LABEL_REF
6843 || (GET_CODE (addr) == CONST
6844 && GET_CODE (XEXP (addr, 0)) == PLUS
6845 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6846 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
6847 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6848 fputs ("(%rip)", file);
6849 }
6850 else
6851 {
6852 if (ASSEMBLER_DIALECT == ASM_ATT)
6853 {
6854 if (disp)
6855 {
6856 if (flag_pic)
6857 output_pic_addr_const (file, disp, 0);
6858 else if (GET_CODE (disp) == LABEL_REF)
6859 output_asm_label (disp);
6860 else
6861 output_addr_const (file, disp);
6862 }
6863
6864 putc ('(', file);
6865 if (base)
6866 PRINT_REG (base, 0, file);
6867 if (index)
6868 {
6869 putc (',', file);
6870 PRINT_REG (index, 0, file);
6871 if (scale != 1)
6872 fprintf (file, ",%d", scale);
6873 }
6874 putc (')', file);
6875 }
6876 else
6877 {
6878 rtx offset = NULL_RTX;
6879
6880 if (disp)
6881 {
6882 /* Pull out the offset of a symbol; print any symbol itself. */
6883 if (GET_CODE (disp) == CONST
6884 && GET_CODE (XEXP (disp, 0)) == PLUS
6885 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6886 {
6887 offset = XEXP (XEXP (disp, 0), 1);
6888 disp = gen_rtx_CONST (VOIDmode,
6889 XEXP (XEXP (disp, 0), 0));
6890 }
6891
6892 if (flag_pic)
6893 output_pic_addr_const (file, disp, 0);
6894 else if (GET_CODE (disp) == LABEL_REF)
6895 output_asm_label (disp);
6896 else if (GET_CODE (disp) == CONST_INT)
6897 offset = disp;
6898 else
6899 output_addr_const (file, disp);
6900 }
6901
6902 putc ('[', file);
6903 if (base)
6904 {
6905 PRINT_REG (base, 0, file);
6906 if (offset)
6907 {
6908 if (INTVAL (offset) >= 0)
6909 putc ('+', file);
6910 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6911 }
6912 }
6913 else if (offset)
6914 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6915 else
6916 putc ('0', file);
6917
6918 if (index)
6919 {
6920 putc ('+', file);
6921 PRINT_REG (index, 0, file);
6922 if (scale != 1)
6923 fprintf (file, "*%d", scale);
6924 }
6925 putc (']', file);
6926 }
6927 }
6928 }
6929
6930 bool
6931 output_addr_const_extra (file, x)
6932 FILE *file;
6933 rtx x;
6934 {
6935 rtx op;
6936
6937 if (GET_CODE (x) != UNSPEC)
6938 return false;
6939
6940 op = XVECEXP (x, 0, 0);
6941 switch (XINT (x, 1))
6942 {
6943 case UNSPEC_GOTTPOFF:
6944 output_addr_const (file, op);
6945 /* FIXME: This might be @TPOFF in Sun ld. */
6946 fputs ("@GOTTPOFF", file);
6947 break;
6948 case UNSPEC_TPOFF:
6949 output_addr_const (file, op);
6950 fputs ("@TPOFF", file);
6951 break;
6952 case UNSPEC_NTPOFF:
6953 output_addr_const (file, op);
6954 fputs ("@NTPOFF", file);
6955 break;
6956 case UNSPEC_DTPOFF:
6957 output_addr_const (file, op);
6958 fputs ("@DTPOFF", file);
6959 break;
6960 case UNSPEC_GOTNTPOFF:
6961 output_addr_const (file, op);
6962 fputs ("@GOTNTPOFF", file);
6963 break;
6964 case UNSPEC_INDNTPOFF:
6965 output_addr_const (file, op);
6966 fputs ("@INDNTPOFF", file);
6967 break;
6968
6969 default:
6970 return false;
6971 }
6972
6973 return true;
6974 }
6975 \f
6976 /* Split one or more DImode RTL references into pairs of SImode
6977 references. The RTL can be REG, offsettable MEM, integer constant, or
6978 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6979 split and "num" is its length. lo_half and hi_half are output arrays
6980 that parallel "operands". */
6981
6982 void
6983 split_di (operands, num, lo_half, hi_half)
6984 rtx operands[];
6985 int num;
6986 rtx lo_half[], hi_half[];
6987 {
6988 while (num--)
6989 {
6990 rtx op = operands[num];
6991
6992 /* simplify_subreg refuse to split volatile memory addresses,
6993 but we still have to handle it. */
6994 if (GET_CODE (op) == MEM)
6995 {
6996 lo_half[num] = adjust_address (op, SImode, 0);
6997 hi_half[num] = adjust_address (op, SImode, 4);
6998 }
6999 else
7000 {
7001 lo_half[num] = simplify_gen_subreg (SImode, op,
7002 GET_MODE (op) == VOIDmode
7003 ? DImode : GET_MODE (op), 0);
7004 hi_half[num] = simplify_gen_subreg (SImode, op,
7005 GET_MODE (op) == VOIDmode
7006 ? DImode : GET_MODE (op), 4);
7007 }
7008 }
7009 }
7010 /* Split one or more TImode RTL references into pairs of SImode
7011 references. The RTL can be REG, offsettable MEM, integer constant, or
7012 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7013 split and "num" is its length. lo_half and hi_half are output arrays
7014 that parallel "operands". */
7015
7016 void
7017 split_ti (operands, num, lo_half, hi_half)
7018 rtx operands[];
7019 int num;
7020 rtx lo_half[], hi_half[];
7021 {
7022 while (num--)
7023 {
7024 rtx op = operands[num];
7025
7026 /* simplify_subreg refuse to split volatile memory addresses, but we
7027 still have to handle it. */
7028 if (GET_CODE (op) == MEM)
7029 {
7030 lo_half[num] = adjust_address (op, DImode, 0);
7031 hi_half[num] = adjust_address (op, DImode, 8);
7032 }
7033 else
7034 {
7035 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7036 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7037 }
7038 }
7039 }
7040 \f
7041 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7042 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7043 is the expression of the binary operation. The output may either be
7044 emitted here, or returned to the caller, like all output_* functions.
7045
7046 There is no guarantee that the operands are the same mode, as they
7047 might be within FLOAT or FLOAT_EXTEND expressions. */
7048
7049 #ifndef SYSV386_COMPAT
7050 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7051 wants to fix the assemblers because that causes incompatibility
7052 with gcc. No-one wants to fix gcc because that causes
7053 incompatibility with assemblers... You can use the option of
7054 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7055 #define SYSV386_COMPAT 1
7056 #endif
7057
7058 const char *
7059 output_387_binary_op (insn, operands)
7060 rtx insn;
7061 rtx *operands;
7062 {
7063 static char buf[30];
7064 const char *p;
7065 const char *ssep;
7066 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7067
7068 #ifdef ENABLE_CHECKING
7069 /* Even if we do not want to check the inputs, this documents input
7070 constraints. Which helps in understanding the following code. */
7071 if (STACK_REG_P (operands[0])
7072 && ((REG_P (operands[1])
7073 && REGNO (operands[0]) == REGNO (operands[1])
7074 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7075 || (REG_P (operands[2])
7076 && REGNO (operands[0]) == REGNO (operands[2])
7077 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7078 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7079 ; /* ok */
7080 else if (!is_sse)
7081 abort ();
7082 #endif
7083
7084 switch (GET_CODE (operands[3]))
7085 {
7086 case PLUS:
7087 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7088 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7089 p = "fiadd";
7090 else
7091 p = "fadd";
7092 ssep = "add";
7093 break;
7094
7095 case MINUS:
7096 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7097 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7098 p = "fisub";
7099 else
7100 p = "fsub";
7101 ssep = "sub";
7102 break;
7103
7104 case MULT:
7105 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7106 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7107 p = "fimul";
7108 else
7109 p = "fmul";
7110 ssep = "mul";
7111 break;
7112
7113 case DIV:
7114 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7115 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7116 p = "fidiv";
7117 else
7118 p = "fdiv";
7119 ssep = "div";
7120 break;
7121
7122 default:
7123 abort ();
7124 }
7125
7126 if (is_sse)
7127 {
7128 strcpy (buf, ssep);
7129 if (GET_MODE (operands[0]) == SFmode)
7130 strcat (buf, "ss\t{%2, %0|%0, %2}");
7131 else
7132 strcat (buf, "sd\t{%2, %0|%0, %2}");
7133 return buf;
7134 }
7135 strcpy (buf, p);
7136
7137 switch (GET_CODE (operands[3]))
7138 {
7139 case MULT:
7140 case PLUS:
7141 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7142 {
7143 rtx temp = operands[2];
7144 operands[2] = operands[1];
7145 operands[1] = temp;
7146 }
7147
7148 /* know operands[0] == operands[1]. */
7149
7150 if (GET_CODE (operands[2]) == MEM)
7151 {
7152 p = "%z2\t%2";
7153 break;
7154 }
7155
7156 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7157 {
7158 if (STACK_TOP_P (operands[0]))
7159 /* How is it that we are storing to a dead operand[2]?
7160 Well, presumably operands[1] is dead too. We can't
7161 store the result to st(0) as st(0) gets popped on this
7162 instruction. Instead store to operands[2] (which I
7163 think has to be st(1)). st(1) will be popped later.
7164 gcc <= 2.8.1 didn't have this check and generated
7165 assembly code that the Unixware assembler rejected. */
7166 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7167 else
7168 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7169 break;
7170 }
7171
7172 if (STACK_TOP_P (operands[0]))
7173 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7174 else
7175 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7176 break;
7177
7178 case MINUS:
7179 case DIV:
7180 if (GET_CODE (operands[1]) == MEM)
7181 {
7182 p = "r%z1\t%1";
7183 break;
7184 }
7185
7186 if (GET_CODE (operands[2]) == MEM)
7187 {
7188 p = "%z2\t%2";
7189 break;
7190 }
7191
7192 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7193 {
7194 #if SYSV386_COMPAT
7195 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7196 derived assemblers, confusingly reverse the direction of
7197 the operation for fsub{r} and fdiv{r} when the
7198 destination register is not st(0). The Intel assembler
7199 doesn't have this brain damage. Read !SYSV386_COMPAT to
7200 figure out what the hardware really does. */
7201 if (STACK_TOP_P (operands[0]))
7202 p = "{p\t%0, %2|rp\t%2, %0}";
7203 else
7204 p = "{rp\t%2, %0|p\t%0, %2}";
7205 #else
7206 if (STACK_TOP_P (operands[0]))
7207 /* As above for fmul/fadd, we can't store to st(0). */
7208 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7209 else
7210 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7211 #endif
7212 break;
7213 }
7214
7215 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7216 {
7217 #if SYSV386_COMPAT
7218 if (STACK_TOP_P (operands[0]))
7219 p = "{rp\t%0, %1|p\t%1, %0}";
7220 else
7221 p = "{p\t%1, %0|rp\t%0, %1}";
7222 #else
7223 if (STACK_TOP_P (operands[0]))
7224 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7225 else
7226 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7227 #endif
7228 break;
7229 }
7230
7231 if (STACK_TOP_P (operands[0]))
7232 {
7233 if (STACK_TOP_P (operands[1]))
7234 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7235 else
7236 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7237 break;
7238 }
7239 else if (STACK_TOP_P (operands[1]))
7240 {
7241 #if SYSV386_COMPAT
7242 p = "{\t%1, %0|r\t%0, %1}";
7243 #else
7244 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7245 #endif
7246 }
7247 else
7248 {
7249 #if SYSV386_COMPAT
7250 p = "{r\t%2, %0|\t%0, %2}";
7251 #else
7252 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7253 #endif
7254 }
7255 break;
7256
7257 default:
7258 abort ();
7259 }
7260
7261 strcat (buf, p);
7262 return buf;
7263 }
7264
7265 /* Output code to initialize control word copies used by
7266 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7267 is set to control word rounding downwards. */
7268 void
7269 emit_i387_cw_initialization (normal, round_down)
7270 rtx normal, round_down;
7271 {
7272 rtx reg = gen_reg_rtx (HImode);
7273
7274 emit_insn (gen_x86_fnstcw_1 (normal));
7275 emit_move_insn (reg, normal);
7276 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7277 && !TARGET_64BIT)
7278 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7279 else
7280 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7281 emit_move_insn (round_down, reg);
7282 }
7283
7284 /* Output code for INSN to convert a float to a signed int. OPERANDS
7285 are the insn operands. The output may be [HSD]Imode and the input
7286 operand may be [SDX]Fmode. */
7287
7288 const char *
7289 output_fix_trunc (insn, operands)
7290 rtx insn;
7291 rtx *operands;
7292 {
7293 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7294 int dimode_p = GET_MODE (operands[0]) == DImode;
7295
7296 /* Jump through a hoop or two for DImode, since the hardware has no
7297 non-popping instruction. We used to do this a different way, but
7298 that was somewhat fragile and broke with post-reload splitters. */
7299 if (dimode_p && !stack_top_dies)
7300 output_asm_insn ("fld\t%y1", operands);
7301
7302 if (!STACK_TOP_P (operands[1]))
7303 abort ();
7304
7305 if (GET_CODE (operands[0]) != MEM)
7306 abort ();
7307
7308 output_asm_insn ("fldcw\t%3", operands);
7309 if (stack_top_dies || dimode_p)
7310 output_asm_insn ("fistp%z0\t%0", operands);
7311 else
7312 output_asm_insn ("fist%z0\t%0", operands);
7313 output_asm_insn ("fldcw\t%2", operands);
7314
7315 return "";
7316 }
7317
7318 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7319 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7320 when fucom should be used. */
7321
7322 const char *
7323 output_fp_compare (insn, operands, eflags_p, unordered_p)
7324 rtx insn;
7325 rtx *operands;
7326 int eflags_p, unordered_p;
7327 {
7328 int stack_top_dies;
7329 rtx cmp_op0 = operands[0];
7330 rtx cmp_op1 = operands[1];
7331 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7332
7333 if (eflags_p == 2)
7334 {
7335 cmp_op0 = cmp_op1;
7336 cmp_op1 = operands[2];
7337 }
7338 if (is_sse)
7339 {
7340 if (GET_MODE (operands[0]) == SFmode)
7341 if (unordered_p)
7342 return "ucomiss\t{%1, %0|%0, %1}";
7343 else
7344 return "comiss\t{%1, %0|%0, %y}";
7345 else
7346 if (unordered_p)
7347 return "ucomisd\t{%1, %0|%0, %1}";
7348 else
7349 return "comisd\t{%1, %0|%0, %y}";
7350 }
7351
7352 if (! STACK_TOP_P (cmp_op0))
7353 abort ();
7354
7355 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7356
7357 if (STACK_REG_P (cmp_op1)
7358 && stack_top_dies
7359 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7360 && REGNO (cmp_op1) != FIRST_STACK_REG)
7361 {
7362 /* If both the top of the 387 stack dies, and the other operand
7363 is also a stack register that dies, then this must be a
7364 `fcompp' float compare */
7365
7366 if (eflags_p == 1)
7367 {
7368 /* There is no double popping fcomi variant. Fortunately,
7369 eflags is immune from the fstp's cc clobbering. */
7370 if (unordered_p)
7371 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7372 else
7373 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7374 return "fstp\t%y0";
7375 }
7376 else
7377 {
7378 if (eflags_p == 2)
7379 {
7380 if (unordered_p)
7381 return "fucompp\n\tfnstsw\t%0";
7382 else
7383 return "fcompp\n\tfnstsw\t%0";
7384 }
7385 else
7386 {
7387 if (unordered_p)
7388 return "fucompp";
7389 else
7390 return "fcompp";
7391 }
7392 }
7393 }
7394 else
7395 {
7396 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7397
7398 static const char * const alt[24] =
7399 {
7400 "fcom%z1\t%y1",
7401 "fcomp%z1\t%y1",
7402 "fucom%z1\t%y1",
7403 "fucomp%z1\t%y1",
7404
7405 "ficom%z1\t%y1",
7406 "ficomp%z1\t%y1",
7407 NULL,
7408 NULL,
7409
7410 "fcomi\t{%y1, %0|%0, %y1}",
7411 "fcomip\t{%y1, %0|%0, %y1}",
7412 "fucomi\t{%y1, %0|%0, %y1}",
7413 "fucomip\t{%y1, %0|%0, %y1}",
7414
7415 NULL,
7416 NULL,
7417 NULL,
7418 NULL,
7419
7420 "fcom%z2\t%y2\n\tfnstsw\t%0",
7421 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7422 "fucom%z2\t%y2\n\tfnstsw\t%0",
7423 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7424
7425 "ficom%z2\t%y2\n\tfnstsw\t%0",
7426 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7427 NULL,
7428 NULL
7429 };
7430
7431 int mask;
7432 const char *ret;
7433
7434 mask = eflags_p << 3;
7435 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7436 mask |= unordered_p << 1;
7437 mask |= stack_top_dies;
7438
7439 if (mask >= 24)
7440 abort ();
7441 ret = alt[mask];
7442 if (ret == NULL)
7443 abort ();
7444
7445 return ret;
7446 }
7447 }
7448
7449 void
7450 ix86_output_addr_vec_elt (file, value)
7451 FILE *file;
7452 int value;
7453 {
7454 const char *directive = ASM_LONG;
7455
7456 if (TARGET_64BIT)
7457 {
7458 #ifdef ASM_QUAD
7459 directive = ASM_QUAD;
7460 #else
7461 abort ();
7462 #endif
7463 }
7464
7465 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7466 }
7467
7468 void
7469 ix86_output_addr_diff_elt (file, value, rel)
7470 FILE *file;
7471 int value, rel;
7472 {
7473 if (TARGET_64BIT)
7474 fprintf (file, "%s%s%d-%s%d\n",
7475 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7476 else if (HAVE_AS_GOTOFF_IN_DATA)
7477 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7478 #if TARGET_MACHO
7479 else if (TARGET_MACHO)
7480 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7481 machopic_function_base_name () + 1);
7482 #endif
7483 else
7484 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7485 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7486 }
7487 \f
7488 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7489 for the target. */
7490
7491 void
7492 ix86_expand_clear (dest)
7493 rtx dest;
7494 {
7495 rtx tmp;
7496
7497 /* We play register width games, which are only valid after reload. */
7498 if (!reload_completed)
7499 abort ();
7500
7501 /* Avoid HImode and its attendant prefix byte. */
7502 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7503 dest = gen_rtx_REG (SImode, REGNO (dest));
7504
7505 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7506
7507 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7508 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7509 {
7510 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7511 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7512 }
7513
7514 emit_insn (tmp);
7515 }
7516
7517 /* X is an unchanging MEM. If it is a constant pool reference, return
7518 the constant pool rtx, else NULL. */
7519
7520 static rtx
7521 maybe_get_pool_constant (x)
7522 rtx x;
7523 {
7524 x = XEXP (x, 0);
7525
7526 if (flag_pic)
7527 {
7528 if (GET_CODE (x) != PLUS)
7529 return NULL_RTX;
7530 if (XEXP (x, 0) != pic_offset_table_rtx)
7531 return NULL_RTX;
7532 x = XEXP (x, 1);
7533 if (GET_CODE (x) != CONST)
7534 return NULL_RTX;
7535 x = XEXP (x, 0);
7536 if (GET_CODE (x) != UNSPEC)
7537 return NULL_RTX;
7538 if (XINT (x, 1) != UNSPEC_GOTOFF)
7539 return NULL_RTX;
7540 x = XVECEXP (x, 0, 0);
7541 }
7542
7543 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7544 return get_pool_constant (x);
7545
7546 return NULL_RTX;
7547 }
7548
7549 void
7550 ix86_expand_move (mode, operands)
7551 enum machine_mode mode;
7552 rtx operands[];
7553 {
7554 int strict = (reload_in_progress || reload_completed);
7555 rtx insn, op0, op1, tmp;
7556
7557 op0 = operands[0];
7558 op1 = operands[1];
7559
7560 /* ??? We have a slight problem. We need to say that tls symbols are
7561 not legitimate constants so that reload does not helpfully reload
7562 these constants from a REG_EQUIV, which we cannot handle. (Recall
7563 that general- and local-dynamic address resolution requires a
7564 function call.)
7565
7566 However, if we say that tls symbols are not legitimate constants,
7567 then emit_move_insn helpfully drop them into the constant pool.
7568
7569 It is far easier to work around emit_move_insn than reload. Recognize
7570 the MEM that we would have created and extract the symbol_ref. */
7571
7572 if (mode == Pmode
7573 && GET_CODE (op1) == MEM
7574 && RTX_UNCHANGING_P (op1))
7575 {
7576 tmp = maybe_get_pool_constant (op1);
7577 /* Note that we only care about symbolic constants here, which
7578 unlike CONST_INT will always have a proper mode. */
7579 if (tmp && GET_MODE (tmp) == Pmode)
7580 op1 = tmp;
7581 }
7582
7583 if (tls_symbolic_operand (op1, Pmode))
7584 {
7585 op1 = legitimize_address (op1, op1, VOIDmode);
7586 if (GET_CODE (op0) == MEM)
7587 {
7588 tmp = gen_reg_rtx (mode);
7589 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7590 op1 = tmp;
7591 }
7592 }
7593 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7594 {
7595 #if TARGET_MACHO
7596 if (MACHOPIC_PURE)
7597 {
7598 rtx temp = ((reload_in_progress
7599 || ((op0 && GET_CODE (op0) == REG)
7600 && mode == Pmode))
7601 ? op0 : gen_reg_rtx (Pmode));
7602 op1 = machopic_indirect_data_reference (op1, temp);
7603 op1 = machopic_legitimize_pic_address (op1, mode,
7604 temp == op1 ? 0 : temp);
7605 }
7606 else
7607 {
7608 if (MACHOPIC_INDIRECT)
7609 op1 = machopic_indirect_data_reference (op1, 0);
7610 }
7611 if (op0 != op1)
7612 {
7613 insn = gen_rtx_SET (VOIDmode, op0, op1);
7614 emit_insn (insn);
7615 }
7616 return;
7617 #endif /* TARGET_MACHO */
7618 if (GET_CODE (op0) == MEM)
7619 op1 = force_reg (Pmode, op1);
7620 else
7621 {
7622 rtx temp = op0;
7623 if (GET_CODE (temp) != REG)
7624 temp = gen_reg_rtx (Pmode);
7625 temp = legitimize_pic_address (op1, temp);
7626 if (temp == op0)
7627 return;
7628 op1 = temp;
7629 }
7630 }
7631 else
7632 {
7633 if (GET_CODE (op0) == MEM
7634 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7635 || !push_operand (op0, mode))
7636 && GET_CODE (op1) == MEM)
7637 op1 = force_reg (mode, op1);
7638
7639 if (push_operand (op0, mode)
7640 && ! general_no_elim_operand (op1, mode))
7641 op1 = copy_to_mode_reg (mode, op1);
7642
7643 /* Force large constants in 64bit compilation into register
7644 to get them CSEed. */
7645 if (TARGET_64BIT && mode == DImode
7646 && immediate_operand (op1, mode)
7647 && !x86_64_zero_extended_value (op1)
7648 && !register_operand (op0, mode)
7649 && optimize && !reload_completed && !reload_in_progress)
7650 op1 = copy_to_mode_reg (mode, op1);
7651
7652 if (FLOAT_MODE_P (mode))
7653 {
7654 /* If we are loading a floating point constant to a register,
7655 force the value to memory now, since we'll get better code
7656 out the back end. */
7657
7658 if (strict)
7659 ;
7660 else if (GET_CODE (op1) == CONST_DOUBLE
7661 && register_operand (op0, mode))
7662 op1 = validize_mem (force_const_mem (mode, op1));
7663 }
7664 }
7665
7666 insn = gen_rtx_SET (VOIDmode, op0, op1);
7667
7668 emit_insn (insn);
7669 }
7670
7671 void
7672 ix86_expand_vector_move (mode, operands)
7673 enum machine_mode mode;
7674 rtx operands[];
7675 {
7676 /* Force constants other than zero into memory. We do not know how
7677 the instructions used to build constants modify the upper 64 bits
7678 of the register, once we have that information we may be able
7679 to handle some of them more efficiently. */
7680 if ((reload_in_progress | reload_completed) == 0
7681 && register_operand (operands[0], mode)
7682 && CONSTANT_P (operands[1]))
7683 {
7684 rtx addr = gen_reg_rtx (Pmode);
7685 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7686 operands[1] = gen_rtx_MEM (mode, addr);
7687 }
7688
7689 /* Make operand1 a register if it isn't already. */
7690 if ((reload_in_progress | reload_completed) == 0
7691 && !register_operand (operands[0], mode)
7692 && !register_operand (operands[1], mode))
7693 {
7694 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7695 emit_move_insn (operands[0], temp);
7696 return;
7697 }
7698
7699 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7700 }
7701
7702 /* Attempt to expand a binary operator. Make the expansion closer to the
7703 actual machine, then just general_operand, which will allow 3 separate
7704 memory references (one output, two input) in a single insn. */
7705
7706 void
7707 ix86_expand_binary_operator (code, mode, operands)
7708 enum rtx_code code;
7709 enum machine_mode mode;
7710 rtx operands[];
7711 {
7712 int matching_memory;
7713 rtx src1, src2, dst, op, clob;
7714
7715 dst = operands[0];
7716 src1 = operands[1];
7717 src2 = operands[2];
7718
7719 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7720 if (GET_RTX_CLASS (code) == 'c'
7721 && (rtx_equal_p (dst, src2)
7722 || immediate_operand (src1, mode)))
7723 {
7724 rtx temp = src1;
7725 src1 = src2;
7726 src2 = temp;
7727 }
7728
7729 /* If the destination is memory, and we do not have matching source
7730 operands, do things in registers. */
7731 matching_memory = 0;
7732 if (GET_CODE (dst) == MEM)
7733 {
7734 if (rtx_equal_p (dst, src1))
7735 matching_memory = 1;
7736 else if (GET_RTX_CLASS (code) == 'c'
7737 && rtx_equal_p (dst, src2))
7738 matching_memory = 2;
7739 else
7740 dst = gen_reg_rtx (mode);
7741 }
7742
7743 /* Both source operands cannot be in memory. */
7744 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7745 {
7746 if (matching_memory != 2)
7747 src2 = force_reg (mode, src2);
7748 else
7749 src1 = force_reg (mode, src1);
7750 }
7751
7752 /* If the operation is not commutable, source 1 cannot be a constant
7753 or non-matching memory. */
7754 if ((CONSTANT_P (src1)
7755 || (!matching_memory && GET_CODE (src1) == MEM))
7756 && GET_RTX_CLASS (code) != 'c')
7757 src1 = force_reg (mode, src1);
7758
7759 /* If optimizing, copy to regs to improve CSE */
7760 if (optimize && ! no_new_pseudos)
7761 {
7762 if (GET_CODE (dst) == MEM)
7763 dst = gen_reg_rtx (mode);
7764 if (GET_CODE (src1) == MEM)
7765 src1 = force_reg (mode, src1);
7766 if (GET_CODE (src2) == MEM)
7767 src2 = force_reg (mode, src2);
7768 }
7769
7770 /* Emit the instruction. */
7771
7772 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7773 if (reload_in_progress)
7774 {
7775 /* Reload doesn't know about the flags register, and doesn't know that
7776 it doesn't want to clobber it. We can only do this with PLUS. */
7777 if (code != PLUS)
7778 abort ();
7779 emit_insn (op);
7780 }
7781 else
7782 {
7783 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7784 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7785 }
7786
7787 /* Fix up the destination if needed. */
7788 if (dst != operands[0])
7789 emit_move_insn (operands[0], dst);
7790 }
7791
7792 /* Return TRUE or FALSE depending on whether the binary operator meets the
7793 appropriate constraints. */
7794
7795 int
7796 ix86_binary_operator_ok (code, mode, operands)
7797 enum rtx_code code;
7798 enum machine_mode mode ATTRIBUTE_UNUSED;
7799 rtx operands[3];
7800 {
7801 /* Both source operands cannot be in memory. */
7802 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7803 return 0;
7804 /* If the operation is not commutable, source 1 cannot be a constant. */
7805 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7806 return 0;
7807 /* If the destination is memory, we must have a matching source operand. */
7808 if (GET_CODE (operands[0]) == MEM
7809 && ! (rtx_equal_p (operands[0], operands[1])
7810 || (GET_RTX_CLASS (code) == 'c'
7811 && rtx_equal_p (operands[0], operands[2]))))
7812 return 0;
7813 /* If the operation is not commutable and the source 1 is memory, we must
7814 have a matching destination. */
7815 if (GET_CODE (operands[1]) == MEM
7816 && GET_RTX_CLASS (code) != 'c'
7817 && ! rtx_equal_p (operands[0], operands[1]))
7818 return 0;
7819 return 1;
7820 }
7821
7822 /* Attempt to expand a unary operator. Make the expansion closer to the
7823 actual machine, then just general_operand, which will allow 2 separate
7824 memory references (one output, one input) in a single insn. */
7825
7826 void
7827 ix86_expand_unary_operator (code, mode, operands)
7828 enum rtx_code code;
7829 enum machine_mode mode;
7830 rtx operands[];
7831 {
7832 int matching_memory;
7833 rtx src, dst, op, clob;
7834
7835 dst = operands[0];
7836 src = operands[1];
7837
7838 /* If the destination is memory, and we do not have matching source
7839 operands, do things in registers. */
7840 matching_memory = 0;
7841 if (GET_CODE (dst) == MEM)
7842 {
7843 if (rtx_equal_p (dst, src))
7844 matching_memory = 1;
7845 else
7846 dst = gen_reg_rtx (mode);
7847 }
7848
7849 /* When source operand is memory, destination must match. */
7850 if (!matching_memory && GET_CODE (src) == MEM)
7851 src = force_reg (mode, src);
7852
7853 /* If optimizing, copy to regs to improve CSE */
7854 if (optimize && ! no_new_pseudos)
7855 {
7856 if (GET_CODE (dst) == MEM)
7857 dst = gen_reg_rtx (mode);
7858 if (GET_CODE (src) == MEM)
7859 src = force_reg (mode, src);
7860 }
7861
7862 /* Emit the instruction. */
7863
7864 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7865 if (reload_in_progress || code == NOT)
7866 {
7867 /* Reload doesn't know about the flags register, and doesn't know that
7868 it doesn't want to clobber it. */
7869 if (code != NOT)
7870 abort ();
7871 emit_insn (op);
7872 }
7873 else
7874 {
7875 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7876 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7877 }
7878
7879 /* Fix up the destination if needed. */
7880 if (dst != operands[0])
7881 emit_move_insn (operands[0], dst);
7882 }
7883
7884 /* Return TRUE or FALSE depending on whether the unary operator meets the
7885 appropriate constraints. */
7886
7887 int
7888 ix86_unary_operator_ok (code, mode, operands)
7889 enum rtx_code code ATTRIBUTE_UNUSED;
7890 enum machine_mode mode ATTRIBUTE_UNUSED;
7891 rtx operands[2] ATTRIBUTE_UNUSED;
7892 {
7893 /* If one of operands is memory, source and destination must match. */
7894 if ((GET_CODE (operands[0]) == MEM
7895 || GET_CODE (operands[1]) == MEM)
7896 && ! rtx_equal_p (operands[0], operands[1]))
7897 return FALSE;
7898 return TRUE;
7899 }
7900
7901 /* Return TRUE or FALSE depending on whether the first SET in INSN
7902 has source and destination with matching CC modes, and that the
7903 CC mode is at least as constrained as REQ_MODE. */
7904
7905 int
7906 ix86_match_ccmode (insn, req_mode)
7907 rtx insn;
7908 enum machine_mode req_mode;
7909 {
7910 rtx set;
7911 enum machine_mode set_mode;
7912
7913 set = PATTERN (insn);
7914 if (GET_CODE (set) == PARALLEL)
7915 set = XVECEXP (set, 0, 0);
7916 if (GET_CODE (set) != SET)
7917 abort ();
7918 if (GET_CODE (SET_SRC (set)) != COMPARE)
7919 abort ();
7920
7921 set_mode = GET_MODE (SET_DEST (set));
7922 switch (set_mode)
7923 {
7924 case CCNOmode:
7925 if (req_mode != CCNOmode
7926 && (req_mode != CCmode
7927 || XEXP (SET_SRC (set), 1) != const0_rtx))
7928 return 0;
7929 break;
7930 case CCmode:
7931 if (req_mode == CCGCmode)
7932 return 0;
7933 /* FALLTHRU */
7934 case CCGCmode:
7935 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7936 return 0;
7937 /* FALLTHRU */
7938 case CCGOCmode:
7939 if (req_mode == CCZmode)
7940 return 0;
7941 /* FALLTHRU */
7942 case CCZmode:
7943 break;
7944
7945 default:
7946 abort ();
7947 }
7948
7949 return (GET_MODE (SET_SRC (set)) == set_mode);
7950 }
7951
7952 /* Generate insn patterns to do an integer compare of OPERANDS. */
7953
7954 static rtx
7955 ix86_expand_int_compare (code, op0, op1)
7956 enum rtx_code code;
7957 rtx op0, op1;
7958 {
7959 enum machine_mode cmpmode;
7960 rtx tmp, flags;
7961
7962 cmpmode = SELECT_CC_MODE (code, op0, op1);
7963 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7964
7965 /* This is very simple, but making the interface the same as in the
7966 FP case makes the rest of the code easier. */
7967 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7968 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7969
7970 /* Return the test that should be put into the flags user, i.e.
7971 the bcc, scc, or cmov instruction. */
7972 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7973 }
7974
7975 /* Figure out whether to use ordered or unordered fp comparisons.
7976 Return the appropriate mode to use. */
7977
7978 enum machine_mode
7979 ix86_fp_compare_mode (code)
7980 enum rtx_code code ATTRIBUTE_UNUSED;
7981 {
7982 /* ??? In order to make all comparisons reversible, we do all comparisons
7983 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7984 all forms trapping and nontrapping comparisons, we can make inequality
7985 comparisons trapping again, since it results in better code when using
7986 FCOM based compares. */
7987 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7988 }
7989
7990 enum machine_mode
7991 ix86_cc_mode (code, op0, op1)
7992 enum rtx_code code;
7993 rtx op0, op1;
7994 {
7995 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7996 return ix86_fp_compare_mode (code);
7997 switch (code)
7998 {
7999 /* Only zero flag is needed. */
8000 case EQ: /* ZF=0 */
8001 case NE: /* ZF!=0 */
8002 return CCZmode;
8003 /* Codes needing carry flag. */
8004 case GEU: /* CF=0 */
8005 case GTU: /* CF=0 & ZF=0 */
8006 case LTU: /* CF=1 */
8007 case LEU: /* CF=1 | ZF=1 */
8008 return CCmode;
8009 /* Codes possibly doable only with sign flag when
8010 comparing against zero. */
8011 case GE: /* SF=OF or SF=0 */
8012 case LT: /* SF<>OF or SF=1 */
8013 if (op1 == const0_rtx)
8014 return CCGOCmode;
8015 else
8016 /* For other cases Carry flag is not required. */
8017 return CCGCmode;
8018 /* Codes doable only with sign flag when comparing
8019 against zero, but we miss jump instruction for it
8020 so we need to use relational tests agains overflow
8021 that thus needs to be zero. */
8022 case GT: /* ZF=0 & SF=OF */
8023 case LE: /* ZF=1 | SF<>OF */
8024 if (op1 == const0_rtx)
8025 return CCNOmode;
8026 else
8027 return CCGCmode;
8028 /* strcmp pattern do (use flags) and combine may ask us for proper
8029 mode. */
8030 case USE:
8031 return CCmode;
8032 default:
8033 abort ();
8034 }
8035 }
8036
8037 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8038
8039 int
8040 ix86_use_fcomi_compare (code)
8041 enum rtx_code code ATTRIBUTE_UNUSED;
8042 {
8043 enum rtx_code swapped_code = swap_condition (code);
8044 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8045 || (ix86_fp_comparison_cost (swapped_code)
8046 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8047 }
8048
8049 /* Swap, force into registers, or otherwise massage the two operands
8050 to a fp comparison. The operands are updated in place; the new
8051 comparsion code is returned. */
8052
8053 static enum rtx_code
8054 ix86_prepare_fp_compare_args (code, pop0, pop1)
8055 enum rtx_code code;
8056 rtx *pop0, *pop1;
8057 {
8058 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8059 rtx op0 = *pop0, op1 = *pop1;
8060 enum machine_mode op_mode = GET_MODE (op0);
8061 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8062
8063 /* All of the unordered compare instructions only work on registers.
8064 The same is true of the XFmode compare instructions. The same is
8065 true of the fcomi compare instructions. */
8066
8067 if (!is_sse
8068 && (fpcmp_mode == CCFPUmode
8069 || op_mode == XFmode
8070 || op_mode == TFmode
8071 || ix86_use_fcomi_compare (code)))
8072 {
8073 op0 = force_reg (op_mode, op0);
8074 op1 = force_reg (op_mode, op1);
8075 }
8076 else
8077 {
8078 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8079 things around if they appear profitable, otherwise force op0
8080 into a register. */
8081
8082 if (standard_80387_constant_p (op0) == 0
8083 || (GET_CODE (op0) == MEM
8084 && ! (standard_80387_constant_p (op1) == 0
8085 || GET_CODE (op1) == MEM)))
8086 {
8087 rtx tmp;
8088 tmp = op0, op0 = op1, op1 = tmp;
8089 code = swap_condition (code);
8090 }
8091
8092 if (GET_CODE (op0) != REG)
8093 op0 = force_reg (op_mode, op0);
8094
8095 if (CONSTANT_P (op1))
8096 {
8097 if (standard_80387_constant_p (op1))
8098 op1 = force_reg (op_mode, op1);
8099 else
8100 op1 = validize_mem (force_const_mem (op_mode, op1));
8101 }
8102 }
8103
8104 /* Try to rearrange the comparison to make it cheaper. */
8105 if (ix86_fp_comparison_cost (code)
8106 > ix86_fp_comparison_cost (swap_condition (code))
8107 && (GET_CODE (op1) == REG || !no_new_pseudos))
8108 {
8109 rtx tmp;
8110 tmp = op0, op0 = op1, op1 = tmp;
8111 code = swap_condition (code);
8112 if (GET_CODE (op0) != REG)
8113 op0 = force_reg (op_mode, op0);
8114 }
8115
8116 *pop0 = op0;
8117 *pop1 = op1;
8118 return code;
8119 }
8120
8121 /* Convert comparison codes we use to represent FP comparison to integer
8122 code that will result in proper branch. Return UNKNOWN if no such code
8123 is available. */
8124 static enum rtx_code
8125 ix86_fp_compare_code_to_integer (code)
8126 enum rtx_code code;
8127 {
8128 switch (code)
8129 {
8130 case GT:
8131 return GTU;
8132 case GE:
8133 return GEU;
8134 case ORDERED:
8135 case UNORDERED:
8136 return code;
8137 break;
8138 case UNEQ:
8139 return EQ;
8140 break;
8141 case UNLT:
8142 return LTU;
8143 break;
8144 case UNLE:
8145 return LEU;
8146 break;
8147 case LTGT:
8148 return NE;
8149 break;
8150 default:
8151 return UNKNOWN;
8152 }
8153 }
8154
8155 /* Split comparison code CODE into comparisons we can do using branch
8156 instructions. BYPASS_CODE is comparison code for branch that will
8157 branch around FIRST_CODE and SECOND_CODE. If some of branches
8158 is not required, set value to NIL.
8159 We never require more than two branches. */
8160 static void
8161 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8162 enum rtx_code code, *bypass_code, *first_code, *second_code;
8163 {
8164 *first_code = code;
8165 *bypass_code = NIL;
8166 *second_code = NIL;
8167
8168 /* The fcomi comparison sets flags as follows:
8169
8170 cmp ZF PF CF
8171 > 0 0 0
8172 < 0 0 1
8173 = 1 0 0
8174 un 1 1 1 */
8175
8176 switch (code)
8177 {
8178 case GT: /* GTU - CF=0 & ZF=0 */
8179 case GE: /* GEU - CF=0 */
8180 case ORDERED: /* PF=0 */
8181 case UNORDERED: /* PF=1 */
8182 case UNEQ: /* EQ - ZF=1 */
8183 case UNLT: /* LTU - CF=1 */
8184 case UNLE: /* LEU - CF=1 | ZF=1 */
8185 case LTGT: /* EQ - ZF=0 */
8186 break;
8187 case LT: /* LTU - CF=1 - fails on unordered */
8188 *first_code = UNLT;
8189 *bypass_code = UNORDERED;
8190 break;
8191 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8192 *first_code = UNLE;
8193 *bypass_code = UNORDERED;
8194 break;
8195 case EQ: /* EQ - ZF=1 - fails on unordered */
8196 *first_code = UNEQ;
8197 *bypass_code = UNORDERED;
8198 break;
8199 case NE: /* NE - ZF=0 - fails on unordered */
8200 *first_code = LTGT;
8201 *second_code = UNORDERED;
8202 break;
8203 case UNGE: /* GEU - CF=0 - fails on unordered */
8204 *first_code = GE;
8205 *second_code = UNORDERED;
8206 break;
8207 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8208 *first_code = GT;
8209 *second_code = UNORDERED;
8210 break;
8211 default:
8212 abort ();
8213 }
8214 if (!TARGET_IEEE_FP)
8215 {
8216 *second_code = NIL;
8217 *bypass_code = NIL;
8218 }
8219 }
8220
8221 /* Return cost of comparison done fcom + arithmetics operations on AX.
8222 All following functions do use number of instructions as an cost metrics.
8223 In future this should be tweaked to compute bytes for optimize_size and
8224 take into account performance of various instructions on various CPUs. */
8225 static int
8226 ix86_fp_comparison_arithmetics_cost (code)
8227 enum rtx_code code;
8228 {
8229 if (!TARGET_IEEE_FP)
8230 return 4;
8231 /* The cost of code output by ix86_expand_fp_compare. */
8232 switch (code)
8233 {
8234 case UNLE:
8235 case UNLT:
8236 case LTGT:
8237 case GT:
8238 case GE:
8239 case UNORDERED:
8240 case ORDERED:
8241 case UNEQ:
8242 return 4;
8243 break;
8244 case LT:
8245 case NE:
8246 case EQ:
8247 case UNGE:
8248 return 5;
8249 break;
8250 case LE:
8251 case UNGT:
8252 return 6;
8253 break;
8254 default:
8255 abort ();
8256 }
8257 }
8258
8259 /* Return cost of comparison done using fcomi operation.
8260 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8261 static int
8262 ix86_fp_comparison_fcomi_cost (code)
8263 enum rtx_code code;
8264 {
8265 enum rtx_code bypass_code, first_code, second_code;
8266 /* Return arbitarily high cost when instruction is not supported - this
8267 prevents gcc from using it. */
8268 if (!TARGET_CMOVE)
8269 return 1024;
8270 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8271 return (bypass_code != NIL || second_code != NIL) + 2;
8272 }
8273
8274 /* Return cost of comparison done using sahf operation.
8275 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8276 static int
8277 ix86_fp_comparison_sahf_cost (code)
8278 enum rtx_code code;
8279 {
8280 enum rtx_code bypass_code, first_code, second_code;
8281 /* Return arbitarily high cost when instruction is not preferred - this
8282 avoids gcc from using it. */
8283 if (!TARGET_USE_SAHF && !optimize_size)
8284 return 1024;
8285 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8286 return (bypass_code != NIL || second_code != NIL) + 3;
8287 }
8288
8289 /* Compute cost of the comparison done using any method.
8290 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8291 static int
8292 ix86_fp_comparison_cost (code)
8293 enum rtx_code code;
8294 {
8295 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8296 int min;
8297
8298 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8299 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8300
8301 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8302 if (min > sahf_cost)
8303 min = sahf_cost;
8304 if (min > fcomi_cost)
8305 min = fcomi_cost;
8306 return min;
8307 }
8308
8309 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8310
8311 static rtx
8312 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8313 enum rtx_code code;
8314 rtx op0, op1, scratch;
8315 rtx *second_test;
8316 rtx *bypass_test;
8317 {
8318 enum machine_mode fpcmp_mode, intcmp_mode;
8319 rtx tmp, tmp2;
8320 int cost = ix86_fp_comparison_cost (code);
8321 enum rtx_code bypass_code, first_code, second_code;
8322
8323 fpcmp_mode = ix86_fp_compare_mode (code);
8324 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8325
8326 if (second_test)
8327 *second_test = NULL_RTX;
8328 if (bypass_test)
8329 *bypass_test = NULL_RTX;
8330
8331 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8332
8333 /* Do fcomi/sahf based test when profitable. */
8334 if ((bypass_code == NIL || bypass_test)
8335 && (second_code == NIL || second_test)
8336 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8337 {
8338 if (TARGET_CMOVE)
8339 {
8340 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8341 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8342 tmp);
8343 emit_insn (tmp);
8344 }
8345 else
8346 {
8347 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8348 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8349 if (!scratch)
8350 scratch = gen_reg_rtx (HImode);
8351 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8352 emit_insn (gen_x86_sahf_1 (scratch));
8353 }
8354
8355 /* The FP codes work out to act like unsigned. */
8356 intcmp_mode = fpcmp_mode;
8357 code = first_code;
8358 if (bypass_code != NIL)
8359 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8360 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8361 const0_rtx);
8362 if (second_code != NIL)
8363 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8364 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8365 const0_rtx);
8366 }
8367 else
8368 {
8369 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8370 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8371 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8372 if (!scratch)
8373 scratch = gen_reg_rtx (HImode);
8374 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8375
8376 /* In the unordered case, we have to check C2 for NaN's, which
8377 doesn't happen to work out to anything nice combination-wise.
8378 So do some bit twiddling on the value we've got in AH to come
8379 up with an appropriate set of condition codes. */
8380
8381 intcmp_mode = CCNOmode;
8382 switch (code)
8383 {
8384 case GT:
8385 case UNGT:
8386 if (code == GT || !TARGET_IEEE_FP)
8387 {
8388 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8389 code = EQ;
8390 }
8391 else
8392 {
8393 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8394 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8395 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8396 intcmp_mode = CCmode;
8397 code = GEU;
8398 }
8399 break;
8400 case LT:
8401 case UNLT:
8402 if (code == LT && TARGET_IEEE_FP)
8403 {
8404 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8405 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8406 intcmp_mode = CCmode;
8407 code = EQ;
8408 }
8409 else
8410 {
8411 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8412 code = NE;
8413 }
8414 break;
8415 case GE:
8416 case UNGE:
8417 if (code == GE || !TARGET_IEEE_FP)
8418 {
8419 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8420 code = EQ;
8421 }
8422 else
8423 {
8424 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8425 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8426 GEN_INT (0x01)));
8427 code = NE;
8428 }
8429 break;
8430 case LE:
8431 case UNLE:
8432 if (code == LE && TARGET_IEEE_FP)
8433 {
8434 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8435 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8436 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8437 intcmp_mode = CCmode;
8438 code = LTU;
8439 }
8440 else
8441 {
8442 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8443 code = NE;
8444 }
8445 break;
8446 case EQ:
8447 case UNEQ:
8448 if (code == EQ && TARGET_IEEE_FP)
8449 {
8450 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8451 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8452 intcmp_mode = CCmode;
8453 code = EQ;
8454 }
8455 else
8456 {
8457 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8458 code = NE;
8459 break;
8460 }
8461 break;
8462 case NE:
8463 case LTGT:
8464 if (code == NE && TARGET_IEEE_FP)
8465 {
8466 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8467 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8468 GEN_INT (0x40)));
8469 code = NE;
8470 }
8471 else
8472 {
8473 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8474 code = EQ;
8475 }
8476 break;
8477
8478 case UNORDERED:
8479 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8480 code = NE;
8481 break;
8482 case ORDERED:
8483 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8484 code = EQ;
8485 break;
8486
8487 default:
8488 abort ();
8489 }
8490 }
8491
8492 /* Return the test that should be put into the flags user, i.e.
8493 the bcc, scc, or cmov instruction. */
8494 return gen_rtx_fmt_ee (code, VOIDmode,
8495 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8496 const0_rtx);
8497 }
8498
8499 rtx
8500 ix86_expand_compare (code, second_test, bypass_test)
8501 enum rtx_code code;
8502 rtx *second_test, *bypass_test;
8503 {
8504 rtx op0, op1, ret;
8505 op0 = ix86_compare_op0;
8506 op1 = ix86_compare_op1;
8507
8508 if (second_test)
8509 *second_test = NULL_RTX;
8510 if (bypass_test)
8511 *bypass_test = NULL_RTX;
8512
8513 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8514 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8515 second_test, bypass_test);
8516 else
8517 ret = ix86_expand_int_compare (code, op0, op1);
8518
8519 return ret;
8520 }
8521
8522 /* Return true if the CODE will result in nontrivial jump sequence. */
8523 bool
8524 ix86_fp_jump_nontrivial_p (code)
8525 enum rtx_code code;
8526 {
8527 enum rtx_code bypass_code, first_code, second_code;
8528 if (!TARGET_CMOVE)
8529 return true;
8530 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8531 return bypass_code != NIL || second_code != NIL;
8532 }
8533
8534 void
8535 ix86_expand_branch (code, label)
8536 enum rtx_code code;
8537 rtx label;
8538 {
8539 rtx tmp;
8540
8541 switch (GET_MODE (ix86_compare_op0))
8542 {
8543 case QImode:
8544 case HImode:
8545 case SImode:
8546 simple:
8547 tmp = ix86_expand_compare (code, NULL, NULL);
8548 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8549 gen_rtx_LABEL_REF (VOIDmode, label),
8550 pc_rtx);
8551 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8552 return;
8553
8554 case SFmode:
8555 case DFmode:
8556 case XFmode:
8557 case TFmode:
8558 {
8559 rtvec vec;
8560 int use_fcomi;
8561 enum rtx_code bypass_code, first_code, second_code;
8562
8563 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8564 &ix86_compare_op1);
8565
8566 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8567
8568 /* Check whether we will use the natural sequence with one jump. If
8569 so, we can expand jump early. Otherwise delay expansion by
8570 creating compound insn to not confuse optimizers. */
8571 if (bypass_code == NIL && second_code == NIL
8572 && TARGET_CMOVE)
8573 {
8574 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8575 gen_rtx_LABEL_REF (VOIDmode, label),
8576 pc_rtx, NULL_RTX);
8577 }
8578 else
8579 {
8580 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8581 ix86_compare_op0, ix86_compare_op1);
8582 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8583 gen_rtx_LABEL_REF (VOIDmode, label),
8584 pc_rtx);
8585 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8586
8587 use_fcomi = ix86_use_fcomi_compare (code);
8588 vec = rtvec_alloc (3 + !use_fcomi);
8589 RTVEC_ELT (vec, 0) = tmp;
8590 RTVEC_ELT (vec, 1)
8591 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8592 RTVEC_ELT (vec, 2)
8593 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8594 if (! use_fcomi)
8595 RTVEC_ELT (vec, 3)
8596 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8597
8598 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8599 }
8600 return;
8601 }
8602
8603 case DImode:
8604 if (TARGET_64BIT)
8605 goto simple;
8606 /* Expand DImode branch into multiple compare+branch. */
8607 {
8608 rtx lo[2], hi[2], label2;
8609 enum rtx_code code1, code2, code3;
8610
8611 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8612 {
8613 tmp = ix86_compare_op0;
8614 ix86_compare_op0 = ix86_compare_op1;
8615 ix86_compare_op1 = tmp;
8616 code = swap_condition (code);
8617 }
8618 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8619 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8620
8621 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8622 avoid two branches. This costs one extra insn, so disable when
8623 optimizing for size. */
8624
8625 if ((code == EQ || code == NE)
8626 && (!optimize_size
8627 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8628 {
8629 rtx xor0, xor1;
8630
8631 xor1 = hi[0];
8632 if (hi[1] != const0_rtx)
8633 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8634 NULL_RTX, 0, OPTAB_WIDEN);
8635
8636 xor0 = lo[0];
8637 if (lo[1] != const0_rtx)
8638 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8639 NULL_RTX, 0, OPTAB_WIDEN);
8640
8641 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8642 NULL_RTX, 0, OPTAB_WIDEN);
8643
8644 ix86_compare_op0 = tmp;
8645 ix86_compare_op1 = const0_rtx;
8646 ix86_expand_branch (code, label);
8647 return;
8648 }
8649
8650 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8651 op1 is a constant and the low word is zero, then we can just
8652 examine the high word. */
8653
8654 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8655 switch (code)
8656 {
8657 case LT: case LTU: case GE: case GEU:
8658 ix86_compare_op0 = hi[0];
8659 ix86_compare_op1 = hi[1];
8660 ix86_expand_branch (code, label);
8661 return;
8662 default:
8663 break;
8664 }
8665
8666 /* Otherwise, we need two or three jumps. */
8667
8668 label2 = gen_label_rtx ();
8669
8670 code1 = code;
8671 code2 = swap_condition (code);
8672 code3 = unsigned_condition (code);
8673
8674 switch (code)
8675 {
8676 case LT: case GT: case LTU: case GTU:
8677 break;
8678
8679 case LE: code1 = LT; code2 = GT; break;
8680 case GE: code1 = GT; code2 = LT; break;
8681 case LEU: code1 = LTU; code2 = GTU; break;
8682 case GEU: code1 = GTU; code2 = LTU; break;
8683
8684 case EQ: code1 = NIL; code2 = NE; break;
8685 case NE: code2 = NIL; break;
8686
8687 default:
8688 abort ();
8689 }
8690
8691 /*
8692 * a < b =>
8693 * if (hi(a) < hi(b)) goto true;
8694 * if (hi(a) > hi(b)) goto false;
8695 * if (lo(a) < lo(b)) goto true;
8696 * false:
8697 */
8698
8699 ix86_compare_op0 = hi[0];
8700 ix86_compare_op1 = hi[1];
8701
8702 if (code1 != NIL)
8703 ix86_expand_branch (code1, label);
8704 if (code2 != NIL)
8705 ix86_expand_branch (code2, label2);
8706
8707 ix86_compare_op0 = lo[0];
8708 ix86_compare_op1 = lo[1];
8709 ix86_expand_branch (code3, label);
8710
8711 if (code2 != NIL)
8712 emit_label (label2);
8713 return;
8714 }
8715
8716 default:
8717 abort ();
8718 }
8719 }
8720
8721 /* Split branch based on floating point condition. */
8722 void
8723 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8724 enum rtx_code code;
8725 rtx op1, op2, target1, target2, tmp;
8726 {
8727 rtx second, bypass;
8728 rtx label = NULL_RTX;
8729 rtx condition;
8730 int bypass_probability = -1, second_probability = -1, probability = -1;
8731 rtx i;
8732
8733 if (target2 != pc_rtx)
8734 {
8735 rtx tmp = target2;
8736 code = reverse_condition_maybe_unordered (code);
8737 target2 = target1;
8738 target1 = tmp;
8739 }
8740
8741 condition = ix86_expand_fp_compare (code, op1, op2,
8742 tmp, &second, &bypass);
8743
8744 if (split_branch_probability >= 0)
8745 {
8746 /* Distribute the probabilities across the jumps.
8747 Assume the BYPASS and SECOND to be always test
8748 for UNORDERED. */
8749 probability = split_branch_probability;
8750
8751 /* Value of 1 is low enough to make no need for probability
8752 to be updated. Later we may run some experiments and see
8753 if unordered values are more frequent in practice. */
8754 if (bypass)
8755 bypass_probability = 1;
8756 if (second)
8757 second_probability = 1;
8758 }
8759 if (bypass != NULL_RTX)
8760 {
8761 label = gen_label_rtx ();
8762 i = emit_jump_insn (gen_rtx_SET
8763 (VOIDmode, pc_rtx,
8764 gen_rtx_IF_THEN_ELSE (VOIDmode,
8765 bypass,
8766 gen_rtx_LABEL_REF (VOIDmode,
8767 label),
8768 pc_rtx)));
8769 if (bypass_probability >= 0)
8770 REG_NOTES (i)
8771 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8772 GEN_INT (bypass_probability),
8773 REG_NOTES (i));
8774 }
8775 i = emit_jump_insn (gen_rtx_SET
8776 (VOIDmode, pc_rtx,
8777 gen_rtx_IF_THEN_ELSE (VOIDmode,
8778 condition, target1, target2)));
8779 if (probability >= 0)
8780 REG_NOTES (i)
8781 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8782 GEN_INT (probability),
8783 REG_NOTES (i));
8784 if (second != NULL_RTX)
8785 {
8786 i = emit_jump_insn (gen_rtx_SET
8787 (VOIDmode, pc_rtx,
8788 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8789 target2)));
8790 if (second_probability >= 0)
8791 REG_NOTES (i)
8792 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8793 GEN_INT (second_probability),
8794 REG_NOTES (i));
8795 }
8796 if (label != NULL_RTX)
8797 emit_label (label);
8798 }
8799
8800 int
8801 ix86_expand_setcc (code, dest)
8802 enum rtx_code code;
8803 rtx dest;
8804 {
8805 rtx ret, tmp, tmpreg;
8806 rtx second_test, bypass_test;
8807
8808 if (GET_MODE (ix86_compare_op0) == DImode
8809 && !TARGET_64BIT)
8810 return 0; /* FAIL */
8811
8812 if (GET_MODE (dest) != QImode)
8813 abort ();
8814
8815 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8816 PUT_MODE (ret, QImode);
8817
8818 tmp = dest;
8819 tmpreg = dest;
8820
8821 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8822 if (bypass_test || second_test)
8823 {
8824 rtx test = second_test;
8825 int bypass = 0;
8826 rtx tmp2 = gen_reg_rtx (QImode);
8827 if (bypass_test)
8828 {
8829 if (second_test)
8830 abort ();
8831 test = bypass_test;
8832 bypass = 1;
8833 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8834 }
8835 PUT_MODE (test, QImode);
8836 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8837
8838 if (bypass)
8839 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8840 else
8841 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8842 }
8843
8844 return 1; /* DONE */
8845 }
8846
8847 int
8848 ix86_expand_int_movcc (operands)
8849 rtx operands[];
8850 {
8851 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8852 rtx compare_seq, compare_op;
8853 rtx second_test, bypass_test;
8854 enum machine_mode mode = GET_MODE (operands[0]);
8855
8856 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8857 In case comparsion is done with immediate, we can convert it to LTU or
8858 GEU by altering the integer. */
8859
8860 if ((code == LEU || code == GTU)
8861 && GET_CODE (ix86_compare_op1) == CONST_INT
8862 && mode != HImode
8863 && INTVAL (ix86_compare_op1) != -1
8864 /* For x86-64, the immediate field in the instruction is 32-bit
8865 signed, so we can't increment a DImode value above 0x7fffffff. */
8866 && (!TARGET_64BIT
8867 || GET_MODE (ix86_compare_op0) != DImode
8868 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8869 && GET_CODE (operands[2]) == CONST_INT
8870 && GET_CODE (operands[3]) == CONST_INT)
8871 {
8872 if (code == LEU)
8873 code = LTU;
8874 else
8875 code = GEU;
8876 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8877 GET_MODE (ix86_compare_op0));
8878 }
8879
8880 start_sequence ();
8881 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8882 compare_seq = get_insns ();
8883 end_sequence ();
8884
8885 compare_code = GET_CODE (compare_op);
8886
8887 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8888 HImode insns, we'd be swallowed in word prefix ops. */
8889
8890 if (mode != HImode
8891 && (mode != DImode || TARGET_64BIT)
8892 && GET_CODE (operands[2]) == CONST_INT
8893 && GET_CODE (operands[3]) == CONST_INT)
8894 {
8895 rtx out = operands[0];
8896 HOST_WIDE_INT ct = INTVAL (operands[2]);
8897 HOST_WIDE_INT cf = INTVAL (operands[3]);
8898 HOST_WIDE_INT diff;
8899
8900 if ((compare_code == LTU || compare_code == GEU)
8901 && !second_test && !bypass_test)
8902 {
8903 /* Detect overlap between destination and compare sources. */
8904 rtx tmp = out;
8905
8906 /* To simplify rest of code, restrict to the GEU case. */
8907 if (compare_code == LTU)
8908 {
8909 int tmp = ct;
8910 ct = cf;
8911 cf = tmp;
8912 compare_code = reverse_condition (compare_code);
8913 code = reverse_condition (code);
8914 }
8915 diff = ct - cf;
8916
8917 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8918 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8919 tmp = gen_reg_rtx (mode);
8920
8921 emit_insn (compare_seq);
8922 if (mode == DImode)
8923 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8924 else
8925 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8926
8927 if (diff == 1)
8928 {
8929 /*
8930 * cmpl op0,op1
8931 * sbbl dest,dest
8932 * [addl dest, ct]
8933 *
8934 * Size 5 - 8.
8935 */
8936 if (ct)
8937 tmp = expand_simple_binop (mode, PLUS,
8938 tmp, GEN_INT (ct),
8939 tmp, 1, OPTAB_DIRECT);
8940 }
8941 else if (cf == -1)
8942 {
8943 /*
8944 * cmpl op0,op1
8945 * sbbl dest,dest
8946 * orl $ct, dest
8947 *
8948 * Size 8.
8949 */
8950 tmp = expand_simple_binop (mode, IOR,
8951 tmp, GEN_INT (ct),
8952 tmp, 1, OPTAB_DIRECT);
8953 }
8954 else if (diff == -1 && ct)
8955 {
8956 /*
8957 * cmpl op0,op1
8958 * sbbl dest,dest
8959 * notl dest
8960 * [addl dest, cf]
8961 *
8962 * Size 8 - 11.
8963 */
8964 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8965 if (cf)
8966 tmp = expand_simple_binop (mode, PLUS,
8967 tmp, GEN_INT (cf),
8968 tmp, 1, OPTAB_DIRECT);
8969 }
8970 else
8971 {
8972 /*
8973 * cmpl op0,op1
8974 * sbbl dest,dest
8975 * [notl dest]
8976 * andl cf - ct, dest
8977 * [addl dest, ct]
8978 *
8979 * Size 8 - 11.
8980 */
8981
8982 if (cf == 0)
8983 {
8984 cf = ct;
8985 ct = 0;
8986 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8987 }
8988
8989 tmp = expand_simple_binop (mode, AND,
8990 tmp,
8991 gen_int_mode (cf - ct, mode),
8992 tmp, 1, OPTAB_DIRECT);
8993 if (ct)
8994 tmp = expand_simple_binop (mode, PLUS,
8995 tmp, GEN_INT (ct),
8996 tmp, 1, OPTAB_DIRECT);
8997 }
8998
8999 if (tmp != out)
9000 emit_move_insn (out, tmp);
9001
9002 return 1; /* DONE */
9003 }
9004
9005 diff = ct - cf;
9006 if (diff < 0)
9007 {
9008 HOST_WIDE_INT tmp;
9009 tmp = ct, ct = cf, cf = tmp;
9010 diff = -diff;
9011 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9012 {
9013 /* We may be reversing unordered compare to normal compare, that
9014 is not valid in general (we may convert non-trapping condition
9015 to trapping one), however on i386 we currently emit all
9016 comparisons unordered. */
9017 compare_code = reverse_condition_maybe_unordered (compare_code);
9018 code = reverse_condition_maybe_unordered (code);
9019 }
9020 else
9021 {
9022 compare_code = reverse_condition (compare_code);
9023 code = reverse_condition (code);
9024 }
9025 }
9026
9027 compare_code = NIL;
9028 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9029 && GET_CODE (ix86_compare_op1) == CONST_INT)
9030 {
9031 if (ix86_compare_op1 == const0_rtx
9032 && (code == LT || code == GE))
9033 compare_code = code;
9034 else if (ix86_compare_op1 == constm1_rtx)
9035 {
9036 if (code == LE)
9037 compare_code = LT;
9038 else if (code == GT)
9039 compare_code = GE;
9040 }
9041 }
9042
9043 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9044 if (compare_code != NIL
9045 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9046 && (cf == -1 || ct == -1))
9047 {
9048 /* If lea code below could be used, only optimize
9049 if it results in a 2 insn sequence. */
9050
9051 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9052 || diff == 3 || diff == 5 || diff == 9)
9053 || (compare_code == LT && ct == -1)
9054 || (compare_code == GE && cf == -1))
9055 {
9056 /*
9057 * notl op1 (if necessary)
9058 * sarl $31, op1
9059 * orl cf, op1
9060 */
9061 if (ct != -1)
9062 {
9063 cf = ct;
9064 ct = -1;
9065 code = reverse_condition (code);
9066 }
9067
9068 out = emit_store_flag (out, code, ix86_compare_op0,
9069 ix86_compare_op1, VOIDmode, 0, -1);
9070
9071 out = expand_simple_binop (mode, IOR,
9072 out, GEN_INT (cf),
9073 out, 1, OPTAB_DIRECT);
9074 if (out != operands[0])
9075 emit_move_insn (operands[0], out);
9076
9077 return 1; /* DONE */
9078 }
9079 }
9080
9081 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9082 || diff == 3 || diff == 5 || diff == 9)
9083 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9084 {
9085 /*
9086 * xorl dest,dest
9087 * cmpl op1,op2
9088 * setcc dest
9089 * lea cf(dest*(ct-cf)),dest
9090 *
9091 * Size 14.
9092 *
9093 * This also catches the degenerate setcc-only case.
9094 */
9095
9096 rtx tmp;
9097 int nops;
9098
9099 out = emit_store_flag (out, code, ix86_compare_op0,
9100 ix86_compare_op1, VOIDmode, 0, 1);
9101
9102 nops = 0;
9103 /* On x86_64 the lea instruction operates on Pmode, so we need
9104 to get arithmetics done in proper mode to match. */
9105 if (diff == 1)
9106 tmp = out;
9107 else
9108 {
9109 rtx out1;
9110 out1 = out;
9111 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9112 nops++;
9113 if (diff & 1)
9114 {
9115 tmp = gen_rtx_PLUS (mode, tmp, out1);
9116 nops++;
9117 }
9118 }
9119 if (cf != 0)
9120 {
9121 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9122 nops++;
9123 }
9124 if (tmp != out
9125 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9126 {
9127 if (nops == 1)
9128 {
9129 rtx clob;
9130
9131 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9132 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9133
9134 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9135 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9136 emit_insn (tmp);
9137 }
9138 else
9139 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9140 }
9141 if (out != operands[0])
9142 emit_move_insn (operands[0], copy_rtx (out));
9143
9144 return 1; /* DONE */
9145 }
9146
9147 /*
9148 * General case: Jumpful:
9149 * xorl dest,dest cmpl op1, op2
9150 * cmpl op1, op2 movl ct, dest
9151 * setcc dest jcc 1f
9152 * decl dest movl cf, dest
9153 * andl (cf-ct),dest 1:
9154 * addl ct,dest
9155 *
9156 * Size 20. Size 14.
9157 *
9158 * This is reasonably steep, but branch mispredict costs are
9159 * high on modern cpus, so consider failing only if optimizing
9160 * for space.
9161 *
9162 * %%% Parameterize branch_cost on the tuning architecture, then
9163 * use that. The 80386 couldn't care less about mispredicts.
9164 */
9165
9166 if (!optimize_size && !TARGET_CMOVE)
9167 {
9168 if (cf == 0)
9169 {
9170 cf = ct;
9171 ct = 0;
9172 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9173 /* We may be reversing unordered compare to normal compare,
9174 that is not valid in general (we may convert non-trapping
9175 condition to trapping one), however on i386 we currently
9176 emit all comparisons unordered. */
9177 code = reverse_condition_maybe_unordered (code);
9178 else
9179 {
9180 code = reverse_condition (code);
9181 if (compare_code != NIL)
9182 compare_code = reverse_condition (compare_code);
9183 }
9184 }
9185
9186 if (compare_code != NIL)
9187 {
9188 /* notl op1 (if needed)
9189 sarl $31, op1
9190 andl (cf-ct), op1
9191 addl ct, op1
9192
9193 For x < 0 (resp. x <= -1) there will be no notl,
9194 so if possible swap the constants to get rid of the
9195 complement.
9196 True/false will be -1/0 while code below (store flag
9197 followed by decrement) is 0/-1, so the constants need
9198 to be exchanged once more. */
9199
9200 if (compare_code == GE || !cf)
9201 {
9202 code = reverse_condition (code);
9203 compare_code = LT;
9204 }
9205 else
9206 {
9207 HOST_WIDE_INT tmp = cf;
9208 cf = ct;
9209 ct = tmp;
9210 }
9211
9212 out = emit_store_flag (out, code, ix86_compare_op0,
9213 ix86_compare_op1, VOIDmode, 0, -1);
9214 }
9215 else
9216 {
9217 out = emit_store_flag (out, code, ix86_compare_op0,
9218 ix86_compare_op1, VOIDmode, 0, 1);
9219
9220 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9221 out, 1, OPTAB_DIRECT);
9222 }
9223
9224 out = expand_simple_binop (mode, AND, out,
9225 gen_int_mode (cf - ct, mode),
9226 out, 1, OPTAB_DIRECT);
9227 if (ct)
9228 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9229 out, 1, OPTAB_DIRECT);
9230 if (out != operands[0])
9231 emit_move_insn (operands[0], out);
9232
9233 return 1; /* DONE */
9234 }
9235 }
9236
9237 if (!TARGET_CMOVE)
9238 {
9239 /* Try a few things more with specific constants and a variable. */
9240
9241 optab op;
9242 rtx var, orig_out, out, tmp;
9243
9244 if (optimize_size)
9245 return 0; /* FAIL */
9246
9247 /* If one of the two operands is an interesting constant, load a
9248 constant with the above and mask it in with a logical operation. */
9249
9250 if (GET_CODE (operands[2]) == CONST_INT)
9251 {
9252 var = operands[3];
9253 if (INTVAL (operands[2]) == 0)
9254 operands[3] = constm1_rtx, op = and_optab;
9255 else if (INTVAL (operands[2]) == -1)
9256 operands[3] = const0_rtx, op = ior_optab;
9257 else
9258 return 0; /* FAIL */
9259 }
9260 else if (GET_CODE (operands[3]) == CONST_INT)
9261 {
9262 var = operands[2];
9263 if (INTVAL (operands[3]) == 0)
9264 operands[2] = constm1_rtx, op = and_optab;
9265 else if (INTVAL (operands[3]) == -1)
9266 operands[2] = const0_rtx, op = ior_optab;
9267 else
9268 return 0; /* FAIL */
9269 }
9270 else
9271 return 0; /* FAIL */
9272
9273 orig_out = operands[0];
9274 tmp = gen_reg_rtx (mode);
9275 operands[0] = tmp;
9276
9277 /* Recurse to get the constant loaded. */
9278 if (ix86_expand_int_movcc (operands) == 0)
9279 return 0; /* FAIL */
9280
9281 /* Mask in the interesting variable. */
9282 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9283 OPTAB_WIDEN);
9284 if (out != orig_out)
9285 emit_move_insn (orig_out, out);
9286
9287 return 1; /* DONE */
9288 }
9289
9290 /*
9291 * For comparison with above,
9292 *
9293 * movl cf,dest
9294 * movl ct,tmp
9295 * cmpl op1,op2
9296 * cmovcc tmp,dest
9297 *
9298 * Size 15.
9299 */
9300
9301 if (! nonimmediate_operand (operands[2], mode))
9302 operands[2] = force_reg (mode, operands[2]);
9303 if (! nonimmediate_operand (operands[3], mode))
9304 operands[3] = force_reg (mode, operands[3]);
9305
9306 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9307 {
9308 rtx tmp = gen_reg_rtx (mode);
9309 emit_move_insn (tmp, operands[3]);
9310 operands[3] = tmp;
9311 }
9312 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9313 {
9314 rtx tmp = gen_reg_rtx (mode);
9315 emit_move_insn (tmp, operands[2]);
9316 operands[2] = tmp;
9317 }
9318 if (! register_operand (operands[2], VOIDmode)
9319 && ! register_operand (operands[3], VOIDmode))
9320 operands[2] = force_reg (mode, operands[2]);
9321
9322 emit_insn (compare_seq);
9323 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9324 gen_rtx_IF_THEN_ELSE (mode,
9325 compare_op, operands[2],
9326 operands[3])));
9327 if (bypass_test)
9328 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9329 gen_rtx_IF_THEN_ELSE (mode,
9330 bypass_test,
9331 operands[3],
9332 operands[0])));
9333 if (second_test)
9334 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9335 gen_rtx_IF_THEN_ELSE (mode,
9336 second_test,
9337 operands[2],
9338 operands[0])));
9339
9340 return 1; /* DONE */
9341 }
9342
9343 int
9344 ix86_expand_fp_movcc (operands)
9345 rtx operands[];
9346 {
9347 enum rtx_code code;
9348 rtx tmp;
9349 rtx compare_op, second_test, bypass_test;
9350
9351 /* For SF/DFmode conditional moves based on comparisons
9352 in same mode, we may want to use SSE min/max instructions. */
9353 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9354 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9355 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9356 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9357 && (!TARGET_IEEE_FP
9358 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9359 /* We may be called from the post-reload splitter. */
9360 && (!REG_P (operands[0])
9361 || SSE_REG_P (operands[0])
9362 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9363 {
9364 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9365 code = GET_CODE (operands[1]);
9366
9367 /* See if we have (cross) match between comparison operands and
9368 conditional move operands. */
9369 if (rtx_equal_p (operands[2], op1))
9370 {
9371 rtx tmp = op0;
9372 op0 = op1;
9373 op1 = tmp;
9374 code = reverse_condition_maybe_unordered (code);
9375 }
9376 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9377 {
9378 /* Check for min operation. */
9379 if (code == LT)
9380 {
9381 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9382 if (memory_operand (op0, VOIDmode))
9383 op0 = force_reg (GET_MODE (operands[0]), op0);
9384 if (GET_MODE (operands[0]) == SFmode)
9385 emit_insn (gen_minsf3 (operands[0], op0, op1));
9386 else
9387 emit_insn (gen_mindf3 (operands[0], op0, op1));
9388 return 1;
9389 }
9390 /* Check for max operation. */
9391 if (code == GT)
9392 {
9393 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9394 if (memory_operand (op0, VOIDmode))
9395 op0 = force_reg (GET_MODE (operands[0]), op0);
9396 if (GET_MODE (operands[0]) == SFmode)
9397 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9398 else
9399 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9400 return 1;
9401 }
9402 }
9403 /* Manage condition to be sse_comparison_operator. In case we are
9404 in non-ieee mode, try to canonicalize the destination operand
9405 to be first in the comparison - this helps reload to avoid extra
9406 moves. */
9407 if (!sse_comparison_operator (operands[1], VOIDmode)
9408 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9409 {
9410 rtx tmp = ix86_compare_op0;
9411 ix86_compare_op0 = ix86_compare_op1;
9412 ix86_compare_op1 = tmp;
9413 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9414 VOIDmode, ix86_compare_op0,
9415 ix86_compare_op1);
9416 }
9417 /* Similary try to manage result to be first operand of conditional
9418 move. We also don't support the NE comparison on SSE, so try to
9419 avoid it. */
9420 if ((rtx_equal_p (operands[0], operands[3])
9421 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9422 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9423 {
9424 rtx tmp = operands[2];
9425 operands[2] = operands[3];
9426 operands[3] = tmp;
9427 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9428 (GET_CODE (operands[1])),
9429 VOIDmode, ix86_compare_op0,
9430 ix86_compare_op1);
9431 }
9432 if (GET_MODE (operands[0]) == SFmode)
9433 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9434 operands[2], operands[3],
9435 ix86_compare_op0, ix86_compare_op1));
9436 else
9437 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9438 operands[2], operands[3],
9439 ix86_compare_op0, ix86_compare_op1));
9440 return 1;
9441 }
9442
9443 /* The floating point conditional move instructions don't directly
9444 support conditions resulting from a signed integer comparison. */
9445
9446 code = GET_CODE (operands[1]);
9447 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9448
9449 /* The floating point conditional move instructions don't directly
9450 support signed integer comparisons. */
9451
9452 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9453 {
9454 if (second_test != NULL || bypass_test != NULL)
9455 abort ();
9456 tmp = gen_reg_rtx (QImode);
9457 ix86_expand_setcc (code, tmp);
9458 code = NE;
9459 ix86_compare_op0 = tmp;
9460 ix86_compare_op1 = const0_rtx;
9461 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9462 }
9463 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9464 {
9465 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9466 emit_move_insn (tmp, operands[3]);
9467 operands[3] = tmp;
9468 }
9469 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9470 {
9471 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9472 emit_move_insn (tmp, operands[2]);
9473 operands[2] = tmp;
9474 }
9475
9476 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9477 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9478 compare_op,
9479 operands[2],
9480 operands[3])));
9481 if (bypass_test)
9482 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9483 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9484 bypass_test,
9485 operands[3],
9486 operands[0])));
9487 if (second_test)
9488 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9489 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9490 second_test,
9491 operands[2],
9492 operands[0])));
9493
9494 return 1;
9495 }
9496
9497 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9498 works for floating pointer parameters and nonoffsetable memories.
9499 For pushes, it returns just stack offsets; the values will be saved
9500 in the right order. Maximally three parts are generated. */
9501
9502 static int
9503 ix86_split_to_parts (operand, parts, mode)
9504 rtx operand;
9505 rtx *parts;
9506 enum machine_mode mode;
9507 {
9508 int size;
9509
9510 if (!TARGET_64BIT)
9511 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9512 else
9513 size = (GET_MODE_SIZE (mode) + 4) / 8;
9514
9515 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9516 abort ();
9517 if (size < 2 || size > 3)
9518 abort ();
9519
9520 /* Optimize constant pool reference to immediates. This is used by fp
9521 moves, that force all constants to memory to allow combining. */
9522 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9523 {
9524 rtx tmp = maybe_get_pool_constant (operand);
9525 if (tmp)
9526 operand = tmp;
9527 }
9528
9529 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9530 {
9531 /* The only non-offsetable memories we handle are pushes. */
9532 if (! push_operand (operand, VOIDmode))
9533 abort ();
9534
9535 operand = copy_rtx (operand);
9536 PUT_MODE (operand, Pmode);
9537 parts[0] = parts[1] = parts[2] = operand;
9538 }
9539 else if (!TARGET_64BIT)
9540 {
9541 if (mode == DImode)
9542 split_di (&operand, 1, &parts[0], &parts[1]);
9543 else
9544 {
9545 if (REG_P (operand))
9546 {
9547 if (!reload_completed)
9548 abort ();
9549 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9550 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9551 if (size == 3)
9552 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9553 }
9554 else if (offsettable_memref_p (operand))
9555 {
9556 operand = adjust_address (operand, SImode, 0);
9557 parts[0] = operand;
9558 parts[1] = adjust_address (operand, SImode, 4);
9559 if (size == 3)
9560 parts[2] = adjust_address (operand, SImode, 8);
9561 }
9562 else if (GET_CODE (operand) == CONST_DOUBLE)
9563 {
9564 REAL_VALUE_TYPE r;
9565 long l[4];
9566
9567 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9568 switch (mode)
9569 {
9570 case XFmode:
9571 case TFmode:
9572 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9573 parts[2] = gen_int_mode (l[2], SImode);
9574 break;
9575 case DFmode:
9576 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9577 break;
9578 default:
9579 abort ();
9580 }
9581 parts[1] = gen_int_mode (l[1], SImode);
9582 parts[0] = gen_int_mode (l[0], SImode);
9583 }
9584 else
9585 abort ();
9586 }
9587 }
9588 else
9589 {
9590 if (mode == TImode)
9591 split_ti (&operand, 1, &parts[0], &parts[1]);
9592 if (mode == XFmode || mode == TFmode)
9593 {
9594 if (REG_P (operand))
9595 {
9596 if (!reload_completed)
9597 abort ();
9598 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9599 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9600 }
9601 else if (offsettable_memref_p (operand))
9602 {
9603 operand = adjust_address (operand, DImode, 0);
9604 parts[0] = operand;
9605 parts[1] = adjust_address (operand, SImode, 8);
9606 }
9607 else if (GET_CODE (operand) == CONST_DOUBLE)
9608 {
9609 REAL_VALUE_TYPE r;
9610 long l[3];
9611
9612 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9613 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9614 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9615 if (HOST_BITS_PER_WIDE_INT >= 64)
9616 parts[0]
9617 = gen_int_mode
9618 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9619 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9620 DImode);
9621 else
9622 parts[0] = immed_double_const (l[0], l[1], DImode);
9623 parts[1] = gen_int_mode (l[2], SImode);
9624 }
9625 else
9626 abort ();
9627 }
9628 }
9629
9630 return size;
9631 }
9632
9633 /* Emit insns to perform a move or push of DI, DF, and XF values.
9634 Return false when normal moves are needed; true when all required
9635 insns have been emitted. Operands 2-4 contain the input values
9636 int the correct order; operands 5-7 contain the output values. */
9637
9638 void
9639 ix86_split_long_move (operands)
9640 rtx operands[];
9641 {
9642 rtx part[2][3];
9643 int nparts;
9644 int push = 0;
9645 int collisions = 0;
9646 enum machine_mode mode = GET_MODE (operands[0]);
9647
9648 /* The DFmode expanders may ask us to move double.
9649 For 64bit target this is single move. By hiding the fact
9650 here we simplify i386.md splitters. */
9651 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9652 {
9653 /* Optimize constant pool reference to immediates. This is used by
9654 fp moves, that force all constants to memory to allow combining. */
9655
9656 if (GET_CODE (operands[1]) == MEM
9657 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9658 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9659 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9660 if (push_operand (operands[0], VOIDmode))
9661 {
9662 operands[0] = copy_rtx (operands[0]);
9663 PUT_MODE (operands[0], Pmode);
9664 }
9665 else
9666 operands[0] = gen_lowpart (DImode, operands[0]);
9667 operands[1] = gen_lowpart (DImode, operands[1]);
9668 emit_move_insn (operands[0], operands[1]);
9669 return;
9670 }
9671
9672 /* The only non-offsettable memory we handle is push. */
9673 if (push_operand (operands[0], VOIDmode))
9674 push = 1;
9675 else if (GET_CODE (operands[0]) == MEM
9676 && ! offsettable_memref_p (operands[0]))
9677 abort ();
9678
9679 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9680 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9681
9682 /* When emitting push, take care for source operands on the stack. */
9683 if (push && GET_CODE (operands[1]) == MEM
9684 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9685 {
9686 if (nparts == 3)
9687 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9688 XEXP (part[1][2], 0));
9689 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9690 XEXP (part[1][1], 0));
9691 }
9692
9693 /* We need to do copy in the right order in case an address register
9694 of the source overlaps the destination. */
9695 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9696 {
9697 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9698 collisions++;
9699 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9700 collisions++;
9701 if (nparts == 3
9702 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9703 collisions++;
9704
9705 /* Collision in the middle part can be handled by reordering. */
9706 if (collisions == 1 && nparts == 3
9707 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9708 {
9709 rtx tmp;
9710 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9711 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9712 }
9713
9714 /* If there are more collisions, we can't handle it by reordering.
9715 Do an lea to the last part and use only one colliding move. */
9716 else if (collisions > 1)
9717 {
9718 collisions = 1;
9719 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9720 XEXP (part[1][0], 0)));
9721 part[1][0] = change_address (part[1][0],
9722 TARGET_64BIT ? DImode : SImode,
9723 part[0][nparts - 1]);
9724 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9725 if (nparts == 3)
9726 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9727 }
9728 }
9729
9730 if (push)
9731 {
9732 if (!TARGET_64BIT)
9733 {
9734 if (nparts == 3)
9735 {
9736 /* We use only first 12 bytes of TFmode value, but for pushing we
9737 are required to adjust stack as if we were pushing real 16byte
9738 value. */
9739 if (mode == TFmode && !TARGET_64BIT)
9740 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9741 GEN_INT (-4)));
9742 emit_move_insn (part[0][2], part[1][2]);
9743 }
9744 }
9745 else
9746 {
9747 /* In 64bit mode we don't have 32bit push available. In case this is
9748 register, it is OK - we will just use larger counterpart. We also
9749 retype memory - these comes from attempt to avoid REX prefix on
9750 moving of second half of TFmode value. */
9751 if (GET_MODE (part[1][1]) == SImode)
9752 {
9753 if (GET_CODE (part[1][1]) == MEM)
9754 part[1][1] = adjust_address (part[1][1], DImode, 0);
9755 else if (REG_P (part[1][1]))
9756 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9757 else
9758 abort ();
9759 if (GET_MODE (part[1][0]) == SImode)
9760 part[1][0] = part[1][1];
9761 }
9762 }
9763 emit_move_insn (part[0][1], part[1][1]);
9764 emit_move_insn (part[0][0], part[1][0]);
9765 return;
9766 }
9767
9768 /* Choose correct order to not overwrite the source before it is copied. */
9769 if ((REG_P (part[0][0])
9770 && REG_P (part[1][1])
9771 && (REGNO (part[0][0]) == REGNO (part[1][1])
9772 || (nparts == 3
9773 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9774 || (collisions > 0
9775 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9776 {
9777 if (nparts == 3)
9778 {
9779 operands[2] = part[0][2];
9780 operands[3] = part[0][1];
9781 operands[4] = part[0][0];
9782 operands[5] = part[1][2];
9783 operands[6] = part[1][1];
9784 operands[7] = part[1][0];
9785 }
9786 else
9787 {
9788 operands[2] = part[0][1];
9789 operands[3] = part[0][0];
9790 operands[5] = part[1][1];
9791 operands[6] = part[1][0];
9792 }
9793 }
9794 else
9795 {
9796 if (nparts == 3)
9797 {
9798 operands[2] = part[0][0];
9799 operands[3] = part[0][1];
9800 operands[4] = part[0][2];
9801 operands[5] = part[1][0];
9802 operands[6] = part[1][1];
9803 operands[7] = part[1][2];
9804 }
9805 else
9806 {
9807 operands[2] = part[0][0];
9808 operands[3] = part[0][1];
9809 operands[5] = part[1][0];
9810 operands[6] = part[1][1];
9811 }
9812 }
9813 emit_move_insn (operands[2], operands[5]);
9814 emit_move_insn (operands[3], operands[6]);
9815 if (nparts == 3)
9816 emit_move_insn (operands[4], operands[7]);
9817
9818 return;
9819 }
9820
9821 void
9822 ix86_split_ashldi (operands, scratch)
9823 rtx *operands, scratch;
9824 {
9825 rtx low[2], high[2];
9826 int count;
9827
9828 if (GET_CODE (operands[2]) == CONST_INT)
9829 {
9830 split_di (operands, 2, low, high);
9831 count = INTVAL (operands[2]) & 63;
9832
9833 if (count >= 32)
9834 {
9835 emit_move_insn (high[0], low[1]);
9836 emit_move_insn (low[0], const0_rtx);
9837
9838 if (count > 32)
9839 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9840 }
9841 else
9842 {
9843 if (!rtx_equal_p (operands[0], operands[1]))
9844 emit_move_insn (operands[0], operands[1]);
9845 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9846 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9847 }
9848 }
9849 else
9850 {
9851 if (!rtx_equal_p (operands[0], operands[1]))
9852 emit_move_insn (operands[0], operands[1]);
9853
9854 split_di (operands, 1, low, high);
9855
9856 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9857 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9858
9859 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9860 {
9861 if (! no_new_pseudos)
9862 scratch = force_reg (SImode, const0_rtx);
9863 else
9864 emit_move_insn (scratch, const0_rtx);
9865
9866 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9867 scratch));
9868 }
9869 else
9870 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9871 }
9872 }
9873
9874 void
9875 ix86_split_ashrdi (operands, scratch)
9876 rtx *operands, scratch;
9877 {
9878 rtx low[2], high[2];
9879 int count;
9880
9881 if (GET_CODE (operands[2]) == CONST_INT)
9882 {
9883 split_di (operands, 2, low, high);
9884 count = INTVAL (operands[2]) & 63;
9885
9886 if (count >= 32)
9887 {
9888 emit_move_insn (low[0], high[1]);
9889
9890 if (! reload_completed)
9891 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9892 else
9893 {
9894 emit_move_insn (high[0], low[0]);
9895 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9896 }
9897
9898 if (count > 32)
9899 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9900 }
9901 else
9902 {
9903 if (!rtx_equal_p (operands[0], operands[1]))
9904 emit_move_insn (operands[0], operands[1]);
9905 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9906 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9907 }
9908 }
9909 else
9910 {
9911 if (!rtx_equal_p (operands[0], operands[1]))
9912 emit_move_insn (operands[0], operands[1]);
9913
9914 split_di (operands, 1, low, high);
9915
9916 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9917 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9918
9919 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9920 {
9921 if (! no_new_pseudos)
9922 scratch = gen_reg_rtx (SImode);
9923 emit_move_insn (scratch, high[0]);
9924 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9925 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9926 scratch));
9927 }
9928 else
9929 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9930 }
9931 }
9932
9933 void
9934 ix86_split_lshrdi (operands, scratch)
9935 rtx *operands, scratch;
9936 {
9937 rtx low[2], high[2];
9938 int count;
9939
9940 if (GET_CODE (operands[2]) == CONST_INT)
9941 {
9942 split_di (operands, 2, low, high);
9943 count = INTVAL (operands[2]) & 63;
9944
9945 if (count >= 32)
9946 {
9947 emit_move_insn (low[0], high[1]);
9948 emit_move_insn (high[0], const0_rtx);
9949
9950 if (count > 32)
9951 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9952 }
9953 else
9954 {
9955 if (!rtx_equal_p (operands[0], operands[1]))
9956 emit_move_insn (operands[0], operands[1]);
9957 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9958 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9959 }
9960 }
9961 else
9962 {
9963 if (!rtx_equal_p (operands[0], operands[1]))
9964 emit_move_insn (operands[0], operands[1]);
9965
9966 split_di (operands, 1, low, high);
9967
9968 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9969 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9970
9971 /* Heh. By reversing the arguments, we can reuse this pattern. */
9972 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9973 {
9974 if (! no_new_pseudos)
9975 scratch = force_reg (SImode, const0_rtx);
9976 else
9977 emit_move_insn (scratch, const0_rtx);
9978
9979 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9980 scratch));
9981 }
9982 else
9983 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9984 }
9985 }
9986
9987 /* Helper function for the string operations below. Dest VARIABLE whether
9988 it is aligned to VALUE bytes. If true, jump to the label. */
9989 static rtx
9990 ix86_expand_aligntest (variable, value)
9991 rtx variable;
9992 int value;
9993 {
9994 rtx label = gen_label_rtx ();
9995 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9996 if (GET_MODE (variable) == DImode)
9997 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9998 else
9999 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10000 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10001 1, label);
10002 return label;
10003 }
10004
10005 /* Adjust COUNTER by the VALUE. */
10006 static void
10007 ix86_adjust_counter (countreg, value)
10008 rtx countreg;
10009 HOST_WIDE_INT value;
10010 {
10011 if (GET_MODE (countreg) == DImode)
10012 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10013 else
10014 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10015 }
10016
10017 /* Zero extend possibly SImode EXP to Pmode register. */
10018 rtx
10019 ix86_zero_extend_to_Pmode (exp)
10020 rtx exp;
10021 {
10022 rtx r;
10023 if (GET_MODE (exp) == VOIDmode)
10024 return force_reg (Pmode, exp);
10025 if (GET_MODE (exp) == Pmode)
10026 return copy_to_mode_reg (Pmode, exp);
10027 r = gen_reg_rtx (Pmode);
10028 emit_insn (gen_zero_extendsidi2 (r, exp));
10029 return r;
10030 }
10031
10032 /* Expand string move (memcpy) operation. Use i386 string operations when
10033 profitable. expand_clrstr contains similar code. */
10034 int
10035 ix86_expand_movstr (dst, src, count_exp, align_exp)
10036 rtx dst, src, count_exp, align_exp;
10037 {
10038 rtx srcreg, destreg, countreg;
10039 enum machine_mode counter_mode;
10040 HOST_WIDE_INT align = 0;
10041 unsigned HOST_WIDE_INT count = 0;
10042 rtx insns;
10043
10044 start_sequence ();
10045
10046 if (GET_CODE (align_exp) == CONST_INT)
10047 align = INTVAL (align_exp);
10048
10049 /* This simple hack avoids all inlining code and simplifies code below. */
10050 if (!TARGET_ALIGN_STRINGOPS)
10051 align = 64;
10052
10053 if (GET_CODE (count_exp) == CONST_INT)
10054 count = INTVAL (count_exp);
10055
10056 /* Figure out proper mode for counter. For 32bits it is always SImode,
10057 for 64bits use SImode when possible, otherwise DImode.
10058 Set count to number of bytes copied when known at compile time. */
10059 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10060 || x86_64_zero_extended_value (count_exp))
10061 counter_mode = SImode;
10062 else
10063 counter_mode = DImode;
10064
10065 if (counter_mode != SImode && counter_mode != DImode)
10066 abort ();
10067
10068 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10069 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10070
10071 emit_insn (gen_cld ());
10072
10073 /* When optimizing for size emit simple rep ; movsb instruction for
10074 counts not divisible by 4. */
10075
10076 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10077 {
10078 countreg = ix86_zero_extend_to_Pmode (count_exp);
10079 if (TARGET_64BIT)
10080 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10081 destreg, srcreg, countreg));
10082 else
10083 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10084 destreg, srcreg, countreg));
10085 }
10086
10087 /* For constant aligned (or small unaligned) copies use rep movsl
10088 followed by code copying the rest. For PentiumPro ensure 8 byte
10089 alignment to allow rep movsl acceleration. */
10090
10091 else if (count != 0
10092 && (align >= 8
10093 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10094 || optimize_size || count < (unsigned int) 64))
10095 {
10096 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10097 if (count & ~(size - 1))
10098 {
10099 countreg = copy_to_mode_reg (counter_mode,
10100 GEN_INT ((count >> (size == 4 ? 2 : 3))
10101 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10102 countreg = ix86_zero_extend_to_Pmode (countreg);
10103 if (size == 4)
10104 {
10105 if (TARGET_64BIT)
10106 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10107 destreg, srcreg, countreg));
10108 else
10109 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10110 destreg, srcreg, countreg));
10111 }
10112 else
10113 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10114 destreg, srcreg, countreg));
10115 }
10116 if (size == 8 && (count & 0x04))
10117 emit_insn (gen_strmovsi (destreg, srcreg));
10118 if (count & 0x02)
10119 emit_insn (gen_strmovhi (destreg, srcreg));
10120 if (count & 0x01)
10121 emit_insn (gen_strmovqi (destreg, srcreg));
10122 }
10123 /* The generic code based on the glibc implementation:
10124 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10125 allowing accelerated copying there)
10126 - copy the data using rep movsl
10127 - copy the rest. */
10128 else
10129 {
10130 rtx countreg2;
10131 rtx label = NULL;
10132 int desired_alignment = (TARGET_PENTIUMPRO
10133 && (count == 0 || count >= (unsigned int) 260)
10134 ? 8 : UNITS_PER_WORD);
10135
10136 /* In case we don't know anything about the alignment, default to
10137 library version, since it is usually equally fast and result in
10138 shorter code. */
10139 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10140 {
10141 end_sequence ();
10142 return 0;
10143 }
10144
10145 if (TARGET_SINGLE_STRINGOP)
10146 emit_insn (gen_cld ());
10147
10148 countreg2 = gen_reg_rtx (Pmode);
10149 countreg = copy_to_mode_reg (counter_mode, count_exp);
10150
10151 /* We don't use loops to align destination and to copy parts smaller
10152 than 4 bytes, because gcc is able to optimize such code better (in
10153 the case the destination or the count really is aligned, gcc is often
10154 able to predict the branches) and also it is friendlier to the
10155 hardware branch prediction.
10156
10157 Using loops is benefical for generic case, because we can
10158 handle small counts using the loops. Many CPUs (such as Athlon)
10159 have large REP prefix setup costs.
10160
10161 This is quite costy. Maybe we can revisit this decision later or
10162 add some customizability to this code. */
10163
10164 if (count == 0 && align < desired_alignment)
10165 {
10166 label = gen_label_rtx ();
10167 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10168 LEU, 0, counter_mode, 1, label);
10169 }
10170 if (align <= 1)
10171 {
10172 rtx label = ix86_expand_aligntest (destreg, 1);
10173 emit_insn (gen_strmovqi (destreg, srcreg));
10174 ix86_adjust_counter (countreg, 1);
10175 emit_label (label);
10176 LABEL_NUSES (label) = 1;
10177 }
10178 if (align <= 2)
10179 {
10180 rtx label = ix86_expand_aligntest (destreg, 2);
10181 emit_insn (gen_strmovhi (destreg, srcreg));
10182 ix86_adjust_counter (countreg, 2);
10183 emit_label (label);
10184 LABEL_NUSES (label) = 1;
10185 }
10186 if (align <= 4 && desired_alignment > 4)
10187 {
10188 rtx label = ix86_expand_aligntest (destreg, 4);
10189 emit_insn (gen_strmovsi (destreg, srcreg));
10190 ix86_adjust_counter (countreg, 4);
10191 emit_label (label);
10192 LABEL_NUSES (label) = 1;
10193 }
10194
10195 if (label && desired_alignment > 4 && !TARGET_64BIT)
10196 {
10197 emit_label (label);
10198 LABEL_NUSES (label) = 1;
10199 label = NULL_RTX;
10200 }
10201 if (!TARGET_SINGLE_STRINGOP)
10202 emit_insn (gen_cld ());
10203 if (TARGET_64BIT)
10204 {
10205 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10206 GEN_INT (3)));
10207 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10208 destreg, srcreg, countreg2));
10209 }
10210 else
10211 {
10212 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10213 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10214 destreg, srcreg, countreg2));
10215 }
10216
10217 if (label)
10218 {
10219 emit_label (label);
10220 LABEL_NUSES (label) = 1;
10221 }
10222 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10223 emit_insn (gen_strmovsi (destreg, srcreg));
10224 if ((align <= 4 || count == 0) && TARGET_64BIT)
10225 {
10226 rtx label = ix86_expand_aligntest (countreg, 4);
10227 emit_insn (gen_strmovsi (destreg, srcreg));
10228 emit_label (label);
10229 LABEL_NUSES (label) = 1;
10230 }
10231 if (align > 2 && count != 0 && (count & 2))
10232 emit_insn (gen_strmovhi (destreg, srcreg));
10233 if (align <= 2 || count == 0)
10234 {
10235 rtx label = ix86_expand_aligntest (countreg, 2);
10236 emit_insn (gen_strmovhi (destreg, srcreg));
10237 emit_label (label);
10238 LABEL_NUSES (label) = 1;
10239 }
10240 if (align > 1 && count != 0 && (count & 1))
10241 emit_insn (gen_strmovqi (destreg, srcreg));
10242 if (align <= 1 || count == 0)
10243 {
10244 rtx label = ix86_expand_aligntest (countreg, 1);
10245 emit_insn (gen_strmovqi (destreg, srcreg));
10246 emit_label (label);
10247 LABEL_NUSES (label) = 1;
10248 }
10249 }
10250
10251 insns = get_insns ();
10252 end_sequence ();
10253
10254 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10255 emit_insn (insns);
10256 return 1;
10257 }
10258
10259 /* Expand string clear operation (bzero). Use i386 string operations when
10260 profitable. expand_movstr contains similar code. */
10261 int
10262 ix86_expand_clrstr (src, count_exp, align_exp)
10263 rtx src, count_exp, align_exp;
10264 {
10265 rtx destreg, zeroreg, countreg;
10266 enum machine_mode counter_mode;
10267 HOST_WIDE_INT align = 0;
10268 unsigned HOST_WIDE_INT count = 0;
10269
10270 if (GET_CODE (align_exp) == CONST_INT)
10271 align = INTVAL (align_exp);
10272
10273 /* This simple hack avoids all inlining code and simplifies code below. */
10274 if (!TARGET_ALIGN_STRINGOPS)
10275 align = 32;
10276
10277 if (GET_CODE (count_exp) == CONST_INT)
10278 count = INTVAL (count_exp);
10279 /* Figure out proper mode for counter. For 32bits it is always SImode,
10280 for 64bits use SImode when possible, otherwise DImode.
10281 Set count to number of bytes copied when known at compile time. */
10282 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10283 || x86_64_zero_extended_value (count_exp))
10284 counter_mode = SImode;
10285 else
10286 counter_mode = DImode;
10287
10288 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10289
10290 emit_insn (gen_cld ());
10291
10292 /* When optimizing for size emit simple rep ; movsb instruction for
10293 counts not divisible by 4. */
10294
10295 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10296 {
10297 countreg = ix86_zero_extend_to_Pmode (count_exp);
10298 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10299 if (TARGET_64BIT)
10300 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10301 destreg, countreg));
10302 else
10303 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10304 destreg, countreg));
10305 }
10306 else if (count != 0
10307 && (align >= 8
10308 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10309 || optimize_size || count < (unsigned int) 64))
10310 {
10311 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10312 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10313 if (count & ~(size - 1))
10314 {
10315 countreg = copy_to_mode_reg (counter_mode,
10316 GEN_INT ((count >> (size == 4 ? 2 : 3))
10317 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10318 countreg = ix86_zero_extend_to_Pmode (countreg);
10319 if (size == 4)
10320 {
10321 if (TARGET_64BIT)
10322 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10323 destreg, countreg));
10324 else
10325 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10326 destreg, countreg));
10327 }
10328 else
10329 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10330 destreg, countreg));
10331 }
10332 if (size == 8 && (count & 0x04))
10333 emit_insn (gen_strsetsi (destreg,
10334 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10335 if (count & 0x02)
10336 emit_insn (gen_strsethi (destreg,
10337 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10338 if (count & 0x01)
10339 emit_insn (gen_strsetqi (destreg,
10340 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10341 }
10342 else
10343 {
10344 rtx countreg2;
10345 rtx label = NULL;
10346 /* Compute desired alignment of the string operation. */
10347 int desired_alignment = (TARGET_PENTIUMPRO
10348 && (count == 0 || count >= (unsigned int) 260)
10349 ? 8 : UNITS_PER_WORD);
10350
10351 /* In case we don't know anything about the alignment, default to
10352 library version, since it is usually equally fast and result in
10353 shorter code. */
10354 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10355 return 0;
10356
10357 if (TARGET_SINGLE_STRINGOP)
10358 emit_insn (gen_cld ());
10359
10360 countreg2 = gen_reg_rtx (Pmode);
10361 countreg = copy_to_mode_reg (counter_mode, count_exp);
10362 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10363
10364 if (count == 0 && align < desired_alignment)
10365 {
10366 label = gen_label_rtx ();
10367 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10368 LEU, 0, counter_mode, 1, label);
10369 }
10370 if (align <= 1)
10371 {
10372 rtx label = ix86_expand_aligntest (destreg, 1);
10373 emit_insn (gen_strsetqi (destreg,
10374 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10375 ix86_adjust_counter (countreg, 1);
10376 emit_label (label);
10377 LABEL_NUSES (label) = 1;
10378 }
10379 if (align <= 2)
10380 {
10381 rtx label = ix86_expand_aligntest (destreg, 2);
10382 emit_insn (gen_strsethi (destreg,
10383 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10384 ix86_adjust_counter (countreg, 2);
10385 emit_label (label);
10386 LABEL_NUSES (label) = 1;
10387 }
10388 if (align <= 4 && desired_alignment > 4)
10389 {
10390 rtx label = ix86_expand_aligntest (destreg, 4);
10391 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10392 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10393 : zeroreg)));
10394 ix86_adjust_counter (countreg, 4);
10395 emit_label (label);
10396 LABEL_NUSES (label) = 1;
10397 }
10398
10399 if (label && desired_alignment > 4 && !TARGET_64BIT)
10400 {
10401 emit_label (label);
10402 LABEL_NUSES (label) = 1;
10403 label = NULL_RTX;
10404 }
10405
10406 if (!TARGET_SINGLE_STRINGOP)
10407 emit_insn (gen_cld ());
10408 if (TARGET_64BIT)
10409 {
10410 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10411 GEN_INT (3)));
10412 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10413 destreg, countreg2));
10414 }
10415 else
10416 {
10417 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10418 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10419 destreg, countreg2));
10420 }
10421 if (label)
10422 {
10423 emit_label (label);
10424 LABEL_NUSES (label) = 1;
10425 }
10426
10427 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10428 emit_insn (gen_strsetsi (destreg,
10429 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10430 if (TARGET_64BIT && (align <= 4 || count == 0))
10431 {
10432 rtx label = ix86_expand_aligntest (countreg, 4);
10433 emit_insn (gen_strsetsi (destreg,
10434 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10435 emit_label (label);
10436 LABEL_NUSES (label) = 1;
10437 }
10438 if (align > 2 && count != 0 && (count & 2))
10439 emit_insn (gen_strsethi (destreg,
10440 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10441 if (align <= 2 || count == 0)
10442 {
10443 rtx label = ix86_expand_aligntest (countreg, 2);
10444 emit_insn (gen_strsethi (destreg,
10445 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10446 emit_label (label);
10447 LABEL_NUSES (label) = 1;
10448 }
10449 if (align > 1 && count != 0 && (count & 1))
10450 emit_insn (gen_strsetqi (destreg,
10451 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10452 if (align <= 1 || count == 0)
10453 {
10454 rtx label = ix86_expand_aligntest (countreg, 1);
10455 emit_insn (gen_strsetqi (destreg,
10456 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10457 emit_label (label);
10458 LABEL_NUSES (label) = 1;
10459 }
10460 }
10461 return 1;
10462 }
10463 /* Expand strlen. */
10464 int
10465 ix86_expand_strlen (out, src, eoschar, align)
10466 rtx out, src, eoschar, align;
10467 {
10468 rtx addr, scratch1, scratch2, scratch3, scratch4;
10469
10470 /* The generic case of strlen expander is long. Avoid it's
10471 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10472
10473 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10474 && !TARGET_INLINE_ALL_STRINGOPS
10475 && !optimize_size
10476 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10477 return 0;
10478
10479 addr = force_reg (Pmode, XEXP (src, 0));
10480 scratch1 = gen_reg_rtx (Pmode);
10481
10482 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10483 && !optimize_size)
10484 {
10485 /* Well it seems that some optimizer does not combine a call like
10486 foo(strlen(bar), strlen(bar));
10487 when the move and the subtraction is done here. It does calculate
10488 the length just once when these instructions are done inside of
10489 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10490 often used and I use one fewer register for the lifetime of
10491 output_strlen_unroll() this is better. */
10492
10493 emit_move_insn (out, addr);
10494
10495 ix86_expand_strlensi_unroll_1 (out, align);
10496
10497 /* strlensi_unroll_1 returns the address of the zero at the end of
10498 the string, like memchr(), so compute the length by subtracting
10499 the start address. */
10500 if (TARGET_64BIT)
10501 emit_insn (gen_subdi3 (out, out, addr));
10502 else
10503 emit_insn (gen_subsi3 (out, out, addr));
10504 }
10505 else
10506 {
10507 scratch2 = gen_reg_rtx (Pmode);
10508 scratch3 = gen_reg_rtx (Pmode);
10509 scratch4 = force_reg (Pmode, constm1_rtx);
10510
10511 emit_move_insn (scratch3, addr);
10512 eoschar = force_reg (QImode, eoschar);
10513
10514 emit_insn (gen_cld ());
10515 if (TARGET_64BIT)
10516 {
10517 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10518 align, scratch4, scratch3));
10519 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10520 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10521 }
10522 else
10523 {
10524 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10525 align, scratch4, scratch3));
10526 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10527 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10528 }
10529 }
10530 return 1;
10531 }
10532
10533 /* Expand the appropriate insns for doing strlen if not just doing
10534 repnz; scasb
10535
10536 out = result, initialized with the start address
10537 align_rtx = alignment of the address.
10538 scratch = scratch register, initialized with the startaddress when
10539 not aligned, otherwise undefined
10540
10541 This is just the body. It needs the initialisations mentioned above and
10542 some address computing at the end. These things are done in i386.md. */
10543
10544 static void
10545 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10546 rtx out, align_rtx;
10547 {
10548 int align;
10549 rtx tmp;
10550 rtx align_2_label = NULL_RTX;
10551 rtx align_3_label = NULL_RTX;
10552 rtx align_4_label = gen_label_rtx ();
10553 rtx end_0_label = gen_label_rtx ();
10554 rtx mem;
10555 rtx tmpreg = gen_reg_rtx (SImode);
10556 rtx scratch = gen_reg_rtx (SImode);
10557
10558 align = 0;
10559 if (GET_CODE (align_rtx) == CONST_INT)
10560 align = INTVAL (align_rtx);
10561
10562 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10563
10564 /* Is there a known alignment and is it less than 4? */
10565 if (align < 4)
10566 {
10567 rtx scratch1 = gen_reg_rtx (Pmode);
10568 emit_move_insn (scratch1, out);
10569 /* Is there a known alignment and is it not 2? */
10570 if (align != 2)
10571 {
10572 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10573 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10574
10575 /* Leave just the 3 lower bits. */
10576 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10577 NULL_RTX, 0, OPTAB_WIDEN);
10578
10579 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10580 Pmode, 1, align_4_label);
10581 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10582 Pmode, 1, align_2_label);
10583 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10584 Pmode, 1, align_3_label);
10585 }
10586 else
10587 {
10588 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10589 check if is aligned to 4 - byte. */
10590
10591 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10592 NULL_RTX, 0, OPTAB_WIDEN);
10593
10594 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10595 Pmode, 1, align_4_label);
10596 }
10597
10598 mem = gen_rtx_MEM (QImode, out);
10599
10600 /* Now compare the bytes. */
10601
10602 /* Compare the first n unaligned byte on a byte per byte basis. */
10603 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10604 QImode, 1, end_0_label);
10605
10606 /* Increment the address. */
10607 if (TARGET_64BIT)
10608 emit_insn (gen_adddi3 (out, out, const1_rtx));
10609 else
10610 emit_insn (gen_addsi3 (out, out, const1_rtx));
10611
10612 /* Not needed with an alignment of 2 */
10613 if (align != 2)
10614 {
10615 emit_label (align_2_label);
10616
10617 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10618 end_0_label);
10619
10620 if (TARGET_64BIT)
10621 emit_insn (gen_adddi3 (out, out, const1_rtx));
10622 else
10623 emit_insn (gen_addsi3 (out, out, const1_rtx));
10624
10625 emit_label (align_3_label);
10626 }
10627
10628 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10629 end_0_label);
10630
10631 if (TARGET_64BIT)
10632 emit_insn (gen_adddi3 (out, out, const1_rtx));
10633 else
10634 emit_insn (gen_addsi3 (out, out, const1_rtx));
10635 }
10636
10637 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10638 align this loop. It gives only huge programs, but does not help to
10639 speed up. */
10640 emit_label (align_4_label);
10641
10642 mem = gen_rtx_MEM (SImode, out);
10643 emit_move_insn (scratch, mem);
10644 if (TARGET_64BIT)
10645 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10646 else
10647 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10648
10649 /* This formula yields a nonzero result iff one of the bytes is zero.
10650 This saves three branches inside loop and many cycles. */
10651
10652 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10653 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10654 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10655 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10656 gen_int_mode (0x80808080, SImode)));
10657 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10658 align_4_label);
10659
10660 if (TARGET_CMOVE)
10661 {
10662 rtx reg = gen_reg_rtx (SImode);
10663 rtx reg2 = gen_reg_rtx (Pmode);
10664 emit_move_insn (reg, tmpreg);
10665 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10666
10667 /* If zero is not in the first two bytes, move two bytes forward. */
10668 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10669 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10670 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10671 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10672 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10673 reg,
10674 tmpreg)));
10675 /* Emit lea manually to avoid clobbering of flags. */
10676 emit_insn (gen_rtx_SET (SImode, reg2,
10677 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10678
10679 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10680 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10681 emit_insn (gen_rtx_SET (VOIDmode, out,
10682 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10683 reg2,
10684 out)));
10685
10686 }
10687 else
10688 {
10689 rtx end_2_label = gen_label_rtx ();
10690 /* Is zero in the first two bytes? */
10691
10692 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10693 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10694 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10695 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10696 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10697 pc_rtx);
10698 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10699 JUMP_LABEL (tmp) = end_2_label;
10700
10701 /* Not in the first two. Move two bytes forward. */
10702 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10703 if (TARGET_64BIT)
10704 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10705 else
10706 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10707
10708 emit_label (end_2_label);
10709
10710 }
10711
10712 /* Avoid branch in fixing the byte. */
10713 tmpreg = gen_lowpart (QImode, tmpreg);
10714 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10715 if (TARGET_64BIT)
10716 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10717 else
10718 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10719
10720 emit_label (end_0_label);
10721 }
10722
10723 void
10724 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10725 rtx retval, fnaddr, callarg1, callarg2, pop;
10726 {
10727 rtx use = NULL, call;
10728
10729 if (pop == const0_rtx)
10730 pop = NULL;
10731 if (TARGET_64BIT && pop)
10732 abort ();
10733
10734 #if TARGET_MACHO
10735 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10736 fnaddr = machopic_indirect_call_target (fnaddr);
10737 #else
10738 /* Static functions and indirect calls don't need the pic register. */
10739 if (! TARGET_64BIT && flag_pic
10740 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10741 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10742 use_reg (&use, pic_offset_table_rtx);
10743
10744 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10745 {
10746 rtx al = gen_rtx_REG (QImode, 0);
10747 emit_move_insn (al, callarg2);
10748 use_reg (&use, al);
10749 }
10750 #endif /* TARGET_MACHO */
10751
10752 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10753 {
10754 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10755 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10756 }
10757
10758 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10759 if (retval)
10760 call = gen_rtx_SET (VOIDmode, retval, call);
10761 if (pop)
10762 {
10763 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10764 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10765 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10766 }
10767
10768 call = emit_call_insn (call);
10769 if (use)
10770 CALL_INSN_FUNCTION_USAGE (call) = use;
10771 }
10772
10773 \f
10774 /* Clear stack slot assignments remembered from previous functions.
10775 This is called from INIT_EXPANDERS once before RTL is emitted for each
10776 function. */
10777
10778 static struct machine_function *
10779 ix86_init_machine_status ()
10780 {
10781 return ggc_alloc_cleared (sizeof (struct machine_function));
10782 }
10783
10784 /* Return a MEM corresponding to a stack slot with mode MODE.
10785 Allocate a new slot if necessary.
10786
10787 The RTL for a function can have several slots available: N is
10788 which slot to use. */
10789
10790 rtx
10791 assign_386_stack_local (mode, n)
10792 enum machine_mode mode;
10793 int n;
10794 {
10795 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10796 abort ();
10797
10798 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10799 ix86_stack_locals[(int) mode][n]
10800 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10801
10802 return ix86_stack_locals[(int) mode][n];
10803 }
10804
10805 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10806
10807 static GTY(()) rtx ix86_tls_symbol;
10808 rtx
10809 ix86_tls_get_addr ()
10810 {
10811
10812 if (!ix86_tls_symbol)
10813 {
10814 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10815 ? "___tls_get_addr"
10816 : "__tls_get_addr"));
10817 }
10818
10819 return ix86_tls_symbol;
10820 }
10821 \f
10822 /* Calculate the length of the memory address in the instruction
10823 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10824
10825 static int
10826 memory_address_length (addr)
10827 rtx addr;
10828 {
10829 struct ix86_address parts;
10830 rtx base, index, disp;
10831 int len;
10832
10833 if (GET_CODE (addr) == PRE_DEC
10834 || GET_CODE (addr) == POST_INC
10835 || GET_CODE (addr) == PRE_MODIFY
10836 || GET_CODE (addr) == POST_MODIFY)
10837 return 0;
10838
10839 if (! ix86_decompose_address (addr, &parts))
10840 abort ();
10841
10842 base = parts.base;
10843 index = parts.index;
10844 disp = parts.disp;
10845 len = 0;
10846
10847 /* Register Indirect. */
10848 if (base && !index && !disp)
10849 {
10850 /* Special cases: ebp and esp need the two-byte modrm form. */
10851 if (addr == stack_pointer_rtx
10852 || addr == arg_pointer_rtx
10853 || addr == frame_pointer_rtx
10854 || addr == hard_frame_pointer_rtx)
10855 len = 1;
10856 }
10857
10858 /* Direct Addressing. */
10859 else if (disp && !base && !index)
10860 len = 4;
10861
10862 else
10863 {
10864 /* Find the length of the displacement constant. */
10865 if (disp)
10866 {
10867 if (GET_CODE (disp) == CONST_INT
10868 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10869 len = 1;
10870 else
10871 len = 4;
10872 }
10873
10874 /* An index requires the two-byte modrm form. */
10875 if (index)
10876 len += 1;
10877 }
10878
10879 return len;
10880 }
10881
10882 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10883 is set, expect that insn have 8bit immediate alternative. */
10884 int
10885 ix86_attr_length_immediate_default (insn, shortform)
10886 rtx insn;
10887 int shortform;
10888 {
10889 int len = 0;
10890 int i;
10891 extract_insn_cached (insn);
10892 for (i = recog_data.n_operands - 1; i >= 0; --i)
10893 if (CONSTANT_P (recog_data.operand[i]))
10894 {
10895 if (len)
10896 abort ();
10897 if (shortform
10898 && GET_CODE (recog_data.operand[i]) == CONST_INT
10899 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10900 len = 1;
10901 else
10902 {
10903 switch (get_attr_mode (insn))
10904 {
10905 case MODE_QI:
10906 len+=1;
10907 break;
10908 case MODE_HI:
10909 len+=2;
10910 break;
10911 case MODE_SI:
10912 len+=4;
10913 break;
10914 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10915 case MODE_DI:
10916 len+=4;
10917 break;
10918 default:
10919 fatal_insn ("unknown insn mode", insn);
10920 }
10921 }
10922 }
10923 return len;
10924 }
10925 /* Compute default value for "length_address" attribute. */
10926 int
10927 ix86_attr_length_address_default (insn)
10928 rtx insn;
10929 {
10930 int i;
10931 extract_insn_cached (insn);
10932 for (i = recog_data.n_operands - 1; i >= 0; --i)
10933 if (GET_CODE (recog_data.operand[i]) == MEM)
10934 {
10935 return memory_address_length (XEXP (recog_data.operand[i], 0));
10936 break;
10937 }
10938 return 0;
10939 }
10940 \f
10941 /* Return the maximum number of instructions a cpu can issue. */
10942
10943 static int
10944 ix86_issue_rate ()
10945 {
10946 switch (ix86_cpu)
10947 {
10948 case PROCESSOR_PENTIUM:
10949 case PROCESSOR_K6:
10950 return 2;
10951
10952 case PROCESSOR_PENTIUMPRO:
10953 case PROCESSOR_PENTIUM4:
10954 case PROCESSOR_ATHLON:
10955 return 3;
10956
10957 default:
10958 return 1;
10959 }
10960 }
10961
10962 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10963 by DEP_INSN and nothing set by DEP_INSN. */
10964
10965 static int
10966 ix86_flags_dependant (insn, dep_insn, insn_type)
10967 rtx insn, dep_insn;
10968 enum attr_type insn_type;
10969 {
10970 rtx set, set2;
10971
10972 /* Simplify the test for uninteresting insns. */
10973 if (insn_type != TYPE_SETCC
10974 && insn_type != TYPE_ICMOV
10975 && insn_type != TYPE_FCMOV
10976 && insn_type != TYPE_IBR)
10977 return 0;
10978
10979 if ((set = single_set (dep_insn)) != 0)
10980 {
10981 set = SET_DEST (set);
10982 set2 = NULL_RTX;
10983 }
10984 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10985 && XVECLEN (PATTERN (dep_insn), 0) == 2
10986 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10987 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10988 {
10989 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10990 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10991 }
10992 else
10993 return 0;
10994
10995 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10996 return 0;
10997
10998 /* This test is true if the dependent insn reads the flags but
10999 not any other potentially set register. */
11000 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11001 return 0;
11002
11003 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11004 return 0;
11005
11006 return 1;
11007 }
11008
11009 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11010 address with operands set by DEP_INSN. */
11011
11012 static int
11013 ix86_agi_dependant (insn, dep_insn, insn_type)
11014 rtx insn, dep_insn;
11015 enum attr_type insn_type;
11016 {
11017 rtx addr;
11018
11019 if (insn_type == TYPE_LEA
11020 && TARGET_PENTIUM)
11021 {
11022 addr = PATTERN (insn);
11023 if (GET_CODE (addr) == SET)
11024 ;
11025 else if (GET_CODE (addr) == PARALLEL
11026 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11027 addr = XVECEXP (addr, 0, 0);
11028 else
11029 abort ();
11030 addr = SET_SRC (addr);
11031 }
11032 else
11033 {
11034 int i;
11035 extract_insn_cached (insn);
11036 for (i = recog_data.n_operands - 1; i >= 0; --i)
11037 if (GET_CODE (recog_data.operand[i]) == MEM)
11038 {
11039 addr = XEXP (recog_data.operand[i], 0);
11040 goto found;
11041 }
11042 return 0;
11043 found:;
11044 }
11045
11046 return modified_in_p (addr, dep_insn);
11047 }
11048
11049 static int
11050 ix86_adjust_cost (insn, link, dep_insn, cost)
11051 rtx insn, link, dep_insn;
11052 int cost;
11053 {
11054 enum attr_type insn_type, dep_insn_type;
11055 enum attr_memory memory, dep_memory;
11056 rtx set, set2;
11057 int dep_insn_code_number;
11058
11059 /* Anti and output depenancies have zero cost on all CPUs. */
11060 if (REG_NOTE_KIND (link) != 0)
11061 return 0;
11062
11063 dep_insn_code_number = recog_memoized (dep_insn);
11064
11065 /* If we can't recognize the insns, we can't really do anything. */
11066 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11067 return cost;
11068
11069 insn_type = get_attr_type (insn);
11070 dep_insn_type = get_attr_type (dep_insn);
11071
11072 switch (ix86_cpu)
11073 {
11074 case PROCESSOR_PENTIUM:
11075 /* Address Generation Interlock adds a cycle of latency. */
11076 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11077 cost += 1;
11078
11079 /* ??? Compares pair with jump/setcc. */
11080 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11081 cost = 0;
11082
11083 /* Floating point stores require value to be ready one cycle ealier. */
11084 if (insn_type == TYPE_FMOV
11085 && get_attr_memory (insn) == MEMORY_STORE
11086 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11087 cost += 1;
11088 break;
11089
11090 case PROCESSOR_PENTIUMPRO:
11091 memory = get_attr_memory (insn);
11092 dep_memory = get_attr_memory (dep_insn);
11093
11094 /* Since we can't represent delayed latencies of load+operation,
11095 increase the cost here for non-imov insns. */
11096 if (dep_insn_type != TYPE_IMOV
11097 && dep_insn_type != TYPE_FMOV
11098 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11099 cost += 1;
11100
11101 /* INT->FP conversion is expensive. */
11102 if (get_attr_fp_int_src (dep_insn))
11103 cost += 5;
11104
11105 /* There is one cycle extra latency between an FP op and a store. */
11106 if (insn_type == TYPE_FMOV
11107 && (set = single_set (dep_insn)) != NULL_RTX
11108 && (set2 = single_set (insn)) != NULL_RTX
11109 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11110 && GET_CODE (SET_DEST (set2)) == MEM)
11111 cost += 1;
11112
11113 /* Show ability of reorder buffer to hide latency of load by executing
11114 in parallel with previous instruction in case
11115 previous instruction is not needed to compute the address. */
11116 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11117 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11118 {
11119 /* Claim moves to take one cycle, as core can issue one load
11120 at time and the next load can start cycle later. */
11121 if (dep_insn_type == TYPE_IMOV
11122 || dep_insn_type == TYPE_FMOV)
11123 cost = 1;
11124 else if (cost > 1)
11125 cost--;
11126 }
11127 break;
11128
11129 case PROCESSOR_K6:
11130 memory = get_attr_memory (insn);
11131 dep_memory = get_attr_memory (dep_insn);
11132 /* The esp dependency is resolved before the instruction is really
11133 finished. */
11134 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11135 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11136 return 1;
11137
11138 /* Since we can't represent delayed latencies of load+operation,
11139 increase the cost here for non-imov insns. */
11140 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11141 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11142
11143 /* INT->FP conversion is expensive. */
11144 if (get_attr_fp_int_src (dep_insn))
11145 cost += 5;
11146
11147 /* Show ability of reorder buffer to hide latency of load by executing
11148 in parallel with previous instruction in case
11149 previous instruction is not needed to compute the address. */
11150 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11151 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11152 {
11153 /* Claim moves to take one cycle, as core can issue one load
11154 at time and the next load can start cycle later. */
11155 if (dep_insn_type == TYPE_IMOV
11156 || dep_insn_type == TYPE_FMOV)
11157 cost = 1;
11158 else if (cost > 2)
11159 cost -= 2;
11160 else
11161 cost = 1;
11162 }
11163 break;
11164
11165 case PROCESSOR_ATHLON:
11166 memory = get_attr_memory (insn);
11167 dep_memory = get_attr_memory (dep_insn);
11168
11169 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11170 {
11171 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11172 cost += 2;
11173 else
11174 cost += 3;
11175 }
11176 /* Show ability of reorder buffer to hide latency of load by executing
11177 in parallel with previous instruction in case
11178 previous instruction is not needed to compute the address. */
11179 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11180 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11181 {
11182 /* Claim moves to take one cycle, as core can issue one load
11183 at time and the next load can start cycle later. */
11184 if (dep_insn_type == TYPE_IMOV
11185 || dep_insn_type == TYPE_FMOV)
11186 cost = 0;
11187 else if (cost >= 3)
11188 cost -= 3;
11189 else
11190 cost = 0;
11191 }
11192
11193 default:
11194 break;
11195 }
11196
11197 return cost;
11198 }
11199
11200 static union
11201 {
11202 struct ppro_sched_data
11203 {
11204 rtx decode[3];
11205 int issued_this_cycle;
11206 } ppro;
11207 } ix86_sched_data;
11208
11209 static enum attr_ppro_uops
11210 ix86_safe_ppro_uops (insn)
11211 rtx insn;
11212 {
11213 if (recog_memoized (insn) >= 0)
11214 return get_attr_ppro_uops (insn);
11215 else
11216 return PPRO_UOPS_MANY;
11217 }
11218
11219 static void
11220 ix86_dump_ppro_packet (dump)
11221 FILE *dump;
11222 {
11223 if (ix86_sched_data.ppro.decode[0])
11224 {
11225 fprintf (dump, "PPRO packet: %d",
11226 INSN_UID (ix86_sched_data.ppro.decode[0]));
11227 if (ix86_sched_data.ppro.decode[1])
11228 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11229 if (ix86_sched_data.ppro.decode[2])
11230 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11231 fputc ('\n', dump);
11232 }
11233 }
11234
11235 /* We're beginning a new block. Initialize data structures as necessary. */
11236
11237 static void
11238 ix86_sched_init (dump, sched_verbose, veclen)
11239 FILE *dump ATTRIBUTE_UNUSED;
11240 int sched_verbose ATTRIBUTE_UNUSED;
11241 int veclen ATTRIBUTE_UNUSED;
11242 {
11243 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11244 }
11245
11246 /* Shift INSN to SLOT, and shift everything else down. */
11247
11248 static void
11249 ix86_reorder_insn (insnp, slot)
11250 rtx *insnp, *slot;
11251 {
11252 if (insnp != slot)
11253 {
11254 rtx insn = *insnp;
11255 do
11256 insnp[0] = insnp[1];
11257 while (++insnp != slot);
11258 *insnp = insn;
11259 }
11260 }
11261
11262 static void
11263 ix86_sched_reorder_ppro (ready, e_ready)
11264 rtx *ready;
11265 rtx *e_ready;
11266 {
11267 rtx decode[3];
11268 enum attr_ppro_uops cur_uops;
11269 int issued_this_cycle;
11270 rtx *insnp;
11271 int i;
11272
11273 /* At this point .ppro.decode contains the state of the three
11274 decoders from last "cycle". That is, those insns that were
11275 actually independent. But here we're scheduling for the
11276 decoder, and we may find things that are decodable in the
11277 same cycle. */
11278
11279 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11280 issued_this_cycle = 0;
11281
11282 insnp = e_ready;
11283 cur_uops = ix86_safe_ppro_uops (*insnp);
11284
11285 /* If the decoders are empty, and we've a complex insn at the
11286 head of the priority queue, let it issue without complaint. */
11287 if (decode[0] == NULL)
11288 {
11289 if (cur_uops == PPRO_UOPS_MANY)
11290 {
11291 decode[0] = *insnp;
11292 goto ppro_done;
11293 }
11294
11295 /* Otherwise, search for a 2-4 uop unsn to issue. */
11296 while (cur_uops != PPRO_UOPS_FEW)
11297 {
11298 if (insnp == ready)
11299 break;
11300 cur_uops = ix86_safe_ppro_uops (*--insnp);
11301 }
11302
11303 /* If so, move it to the head of the line. */
11304 if (cur_uops == PPRO_UOPS_FEW)
11305 ix86_reorder_insn (insnp, e_ready);
11306
11307 /* Issue the head of the queue. */
11308 issued_this_cycle = 1;
11309 decode[0] = *e_ready--;
11310 }
11311
11312 /* Look for simple insns to fill in the other two slots. */
11313 for (i = 1; i < 3; ++i)
11314 if (decode[i] == NULL)
11315 {
11316 if (ready > e_ready)
11317 goto ppro_done;
11318
11319 insnp = e_ready;
11320 cur_uops = ix86_safe_ppro_uops (*insnp);
11321 while (cur_uops != PPRO_UOPS_ONE)
11322 {
11323 if (insnp == ready)
11324 break;
11325 cur_uops = ix86_safe_ppro_uops (*--insnp);
11326 }
11327
11328 /* Found one. Move it to the head of the queue and issue it. */
11329 if (cur_uops == PPRO_UOPS_ONE)
11330 {
11331 ix86_reorder_insn (insnp, e_ready);
11332 decode[i] = *e_ready--;
11333 issued_this_cycle++;
11334 continue;
11335 }
11336
11337 /* ??? Didn't find one. Ideally, here we would do a lazy split
11338 of 2-uop insns, issue one and queue the other. */
11339 }
11340
11341 ppro_done:
11342 if (issued_this_cycle == 0)
11343 issued_this_cycle = 1;
11344 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11345 }
11346
11347 /* We are about to being issuing insns for this clock cycle.
11348 Override the default sort algorithm to better slot instructions. */
11349 static int
11350 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11351 FILE *dump ATTRIBUTE_UNUSED;
11352 int sched_verbose ATTRIBUTE_UNUSED;
11353 rtx *ready;
11354 int *n_readyp;
11355 int clock_var ATTRIBUTE_UNUSED;
11356 {
11357 int n_ready = *n_readyp;
11358 rtx *e_ready = ready + n_ready - 1;
11359
11360 /* Make sure to go ahead and initialize key items in
11361 ix86_sched_data if we are not going to bother trying to
11362 reorder the ready queue. */
11363 if (n_ready < 2)
11364 {
11365 ix86_sched_data.ppro.issued_this_cycle = 1;
11366 goto out;
11367 }
11368
11369 switch (ix86_cpu)
11370 {
11371 default:
11372 break;
11373
11374 case PROCESSOR_PENTIUMPRO:
11375 ix86_sched_reorder_ppro (ready, e_ready);
11376 break;
11377 }
11378
11379 out:
11380 return ix86_issue_rate ();
11381 }
11382
11383 /* We are about to issue INSN. Return the number of insns left on the
11384 ready queue that can be issued this cycle. */
11385
11386 static int
11387 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11388 FILE *dump;
11389 int sched_verbose;
11390 rtx insn;
11391 int can_issue_more;
11392 {
11393 int i;
11394 switch (ix86_cpu)
11395 {
11396 default:
11397 return can_issue_more - 1;
11398
11399 case PROCESSOR_PENTIUMPRO:
11400 {
11401 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11402
11403 if (uops == PPRO_UOPS_MANY)
11404 {
11405 if (sched_verbose)
11406 ix86_dump_ppro_packet (dump);
11407 ix86_sched_data.ppro.decode[0] = insn;
11408 ix86_sched_data.ppro.decode[1] = NULL;
11409 ix86_sched_data.ppro.decode[2] = NULL;
11410 if (sched_verbose)
11411 ix86_dump_ppro_packet (dump);
11412 ix86_sched_data.ppro.decode[0] = NULL;
11413 }
11414 else if (uops == PPRO_UOPS_FEW)
11415 {
11416 if (sched_verbose)
11417 ix86_dump_ppro_packet (dump);
11418 ix86_sched_data.ppro.decode[0] = insn;
11419 ix86_sched_data.ppro.decode[1] = NULL;
11420 ix86_sched_data.ppro.decode[2] = NULL;
11421 }
11422 else
11423 {
11424 for (i = 0; i < 3; ++i)
11425 if (ix86_sched_data.ppro.decode[i] == NULL)
11426 {
11427 ix86_sched_data.ppro.decode[i] = insn;
11428 break;
11429 }
11430 if (i == 3)
11431 abort ();
11432 if (i == 2)
11433 {
11434 if (sched_verbose)
11435 ix86_dump_ppro_packet (dump);
11436 ix86_sched_data.ppro.decode[0] = NULL;
11437 ix86_sched_data.ppro.decode[1] = NULL;
11438 ix86_sched_data.ppro.decode[2] = NULL;
11439 }
11440 }
11441 }
11442 return --ix86_sched_data.ppro.issued_this_cycle;
11443 }
11444 }
11445
11446 static int
11447 ia32_use_dfa_pipeline_interface ()
11448 {
11449 if (ix86_cpu == PROCESSOR_PENTIUM)
11450 return 1;
11451 return 0;
11452 }
11453
11454 /* How many alternative schedules to try. This should be as wide as the
11455 scheduling freedom in the DFA, but no wider. Making this value too
11456 large results extra work for the scheduler. */
11457
11458 static int
11459 ia32_multipass_dfa_lookahead ()
11460 {
11461 if (ix86_cpu == PROCESSOR_PENTIUM)
11462 return 2;
11463 else
11464 return 0;
11465 }
11466
11467 \f
11468 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11469 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11470 appropriate. */
11471
11472 void
11473 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11474 rtx insns;
11475 rtx dstref, srcref, dstreg, srcreg;
11476 {
11477 rtx insn;
11478
11479 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11480 if (INSN_P (insn))
11481 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11482 dstreg, srcreg);
11483 }
11484
11485 /* Subroutine of above to actually do the updating by recursively walking
11486 the rtx. */
11487
11488 static void
11489 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11490 rtx x;
11491 rtx dstref, srcref, dstreg, srcreg;
11492 {
11493 enum rtx_code code = GET_CODE (x);
11494 const char *format_ptr = GET_RTX_FORMAT (code);
11495 int i, j;
11496
11497 if (code == MEM && XEXP (x, 0) == dstreg)
11498 MEM_COPY_ATTRIBUTES (x, dstref);
11499 else if (code == MEM && XEXP (x, 0) == srcreg)
11500 MEM_COPY_ATTRIBUTES (x, srcref);
11501
11502 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11503 {
11504 if (*format_ptr == 'e')
11505 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11506 dstreg, srcreg);
11507 else if (*format_ptr == 'E')
11508 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11509 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11510 dstreg, srcreg);
11511 }
11512 }
11513 \f
11514 /* Compute the alignment given to a constant that is being placed in memory.
11515 EXP is the constant and ALIGN is the alignment that the object would
11516 ordinarily have.
11517 The value of this function is used instead of that alignment to align
11518 the object. */
11519
11520 int
11521 ix86_constant_alignment (exp, align)
11522 tree exp;
11523 int align;
11524 {
11525 if (TREE_CODE (exp) == REAL_CST)
11526 {
11527 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11528 return 64;
11529 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11530 return 128;
11531 }
11532 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11533 && align < 256)
11534 return 256;
11535
11536 return align;
11537 }
11538
11539 /* Compute the alignment for a static variable.
11540 TYPE is the data type, and ALIGN is the alignment that
11541 the object would ordinarily have. The value of this function is used
11542 instead of that alignment to align the object. */
11543
11544 int
11545 ix86_data_alignment (type, align)
11546 tree type;
11547 int align;
11548 {
11549 if (AGGREGATE_TYPE_P (type)
11550 && TYPE_SIZE (type)
11551 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11552 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11553 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11554 return 256;
11555
11556 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11557 to 16byte boundary. */
11558 if (TARGET_64BIT)
11559 {
11560 if (AGGREGATE_TYPE_P (type)
11561 && TYPE_SIZE (type)
11562 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11563 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11564 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11565 return 128;
11566 }
11567
11568 if (TREE_CODE (type) == ARRAY_TYPE)
11569 {
11570 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11571 return 64;
11572 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11573 return 128;
11574 }
11575 else if (TREE_CODE (type) == COMPLEX_TYPE)
11576 {
11577
11578 if (TYPE_MODE (type) == DCmode && align < 64)
11579 return 64;
11580 if (TYPE_MODE (type) == XCmode && align < 128)
11581 return 128;
11582 }
11583 else if ((TREE_CODE (type) == RECORD_TYPE
11584 || TREE_CODE (type) == UNION_TYPE
11585 || TREE_CODE (type) == QUAL_UNION_TYPE)
11586 && TYPE_FIELDS (type))
11587 {
11588 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11589 return 64;
11590 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11591 return 128;
11592 }
11593 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11594 || TREE_CODE (type) == INTEGER_TYPE)
11595 {
11596 if (TYPE_MODE (type) == DFmode && align < 64)
11597 return 64;
11598 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11599 return 128;
11600 }
11601
11602 return align;
11603 }
11604
11605 /* Compute the alignment for a local variable.
11606 TYPE is the data type, and ALIGN is the alignment that
11607 the object would ordinarily have. The value of this macro is used
11608 instead of that alignment to align the object. */
11609
11610 int
11611 ix86_local_alignment (type, align)
11612 tree type;
11613 int align;
11614 {
11615 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11616 to 16byte boundary. */
11617 if (TARGET_64BIT)
11618 {
11619 if (AGGREGATE_TYPE_P (type)
11620 && TYPE_SIZE (type)
11621 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11622 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11623 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11624 return 128;
11625 }
11626 if (TREE_CODE (type) == ARRAY_TYPE)
11627 {
11628 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11629 return 64;
11630 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11631 return 128;
11632 }
11633 else if (TREE_CODE (type) == COMPLEX_TYPE)
11634 {
11635 if (TYPE_MODE (type) == DCmode && align < 64)
11636 return 64;
11637 if (TYPE_MODE (type) == XCmode && align < 128)
11638 return 128;
11639 }
11640 else if ((TREE_CODE (type) == RECORD_TYPE
11641 || TREE_CODE (type) == UNION_TYPE
11642 || TREE_CODE (type) == QUAL_UNION_TYPE)
11643 && TYPE_FIELDS (type))
11644 {
11645 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11646 return 64;
11647 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11648 return 128;
11649 }
11650 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11651 || TREE_CODE (type) == INTEGER_TYPE)
11652 {
11653
11654 if (TYPE_MODE (type) == DFmode && align < 64)
11655 return 64;
11656 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11657 return 128;
11658 }
11659 return align;
11660 }
11661 \f
11662 /* Emit RTL insns to initialize the variable parts of a trampoline.
11663 FNADDR is an RTX for the address of the function's pure code.
11664 CXT is an RTX for the static chain value for the function. */
11665 void
11666 x86_initialize_trampoline (tramp, fnaddr, cxt)
11667 rtx tramp, fnaddr, cxt;
11668 {
11669 if (!TARGET_64BIT)
11670 {
11671 /* Compute offset from the end of the jmp to the target function. */
11672 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11673 plus_constant (tramp, 10),
11674 NULL_RTX, 1, OPTAB_DIRECT);
11675 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11676 gen_int_mode (0xb9, QImode));
11677 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11678 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11679 gen_int_mode (0xe9, QImode));
11680 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11681 }
11682 else
11683 {
11684 int offset = 0;
11685 /* Try to load address using shorter movl instead of movabs.
11686 We may want to support movq for kernel mode, but kernel does not use
11687 trampolines at the moment. */
11688 if (x86_64_zero_extended_value (fnaddr))
11689 {
11690 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11691 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11692 gen_int_mode (0xbb41, HImode));
11693 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11694 gen_lowpart (SImode, fnaddr));
11695 offset += 6;
11696 }
11697 else
11698 {
11699 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11700 gen_int_mode (0xbb49, HImode));
11701 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11702 fnaddr);
11703 offset += 10;
11704 }
11705 /* Load static chain using movabs to r10. */
11706 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11707 gen_int_mode (0xba49, HImode));
11708 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11709 cxt);
11710 offset += 10;
11711 /* Jump to the r11 */
11712 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11713 gen_int_mode (0xff49, HImode));
11714 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11715 gen_int_mode (0xe3, QImode));
11716 offset += 3;
11717 if (offset > TRAMPOLINE_SIZE)
11718 abort ();
11719 }
11720 }
11721 \f
11722 #define def_builtin(MASK, NAME, TYPE, CODE) \
11723 do { \
11724 if ((MASK) & target_flags) \
11725 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11726 NULL, NULL_TREE); \
11727 } while (0)
11728
11729 struct builtin_description
11730 {
11731 const unsigned int mask;
11732 const enum insn_code icode;
11733 const char *const name;
11734 const enum ix86_builtins code;
11735 const enum rtx_code comparison;
11736 const unsigned int flag;
11737 };
11738
11739 /* Used for builtins that are enabled both by -msse and -msse2. */
11740 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11741
11742 static const struct builtin_description bdesc_comi[] =
11743 {
11744 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11745 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11746 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11747 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11748 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11749 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11750 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11751 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11752 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11753 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11754 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11755 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11756 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11757 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11758 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11759 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11760 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11761 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11762 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11763 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11764 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11765 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11766 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11767 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11768 };
11769
11770 static const struct builtin_description bdesc_2arg[] =
11771 {
11772 /* SSE */
11773 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11774 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11775 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11776 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11777 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11778 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11779 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11780 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11781
11782 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11783 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11784 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11785 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11786 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11787 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11788 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11789 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11790 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11791 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11792 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11793 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11794 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11795 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11796 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11797 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11798 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11799 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11800 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11801 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11802 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11803 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11804 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11805 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11806
11807 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11808 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11809 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11810 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11811
11812 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11813 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11814 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11815 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11816
11817 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11818 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11819 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11820 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11821 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11822
11823 /* MMX */
11824 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11825 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11826 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11827 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11828 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11829 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11830
11831 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11832 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11833 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11834 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11835 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11836 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11837 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11838 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11839
11840 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11841 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11842 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11843
11844 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11845 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11846 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11847 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11848
11849 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11850 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11851
11852 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11853 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11854 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11855 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11856 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11857 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11858
11859 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11860 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11861 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11862 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11863
11864 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11865 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11866 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11867 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11868 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11869 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11870
11871 /* Special. */
11872 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11873 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11874 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11875
11876 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11877 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11878
11879 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11880 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11881 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11882 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11883 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11884 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11885
11886 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11887 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11888 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11889 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11890 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11891 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11892
11893 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11894 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11895 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11896 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11897
11898 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11899 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11900
11901 /* SSE2 */
11902 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11905 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11908 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11909 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11910
11911 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11912 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11913 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11914 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11915 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11916 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11917 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11918 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11919 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11920 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11921 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11922 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11923 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11924 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11925 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11926 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11927 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11928 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11929 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11930 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11931 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11932 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11933 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11934 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11935
11936 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11937 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11938 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11939 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11940
11941 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11942 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11943 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11944 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11945
11946 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11947 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11948 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11949
11950 /* SSE2 MMX */
11951 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11952 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11953 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11954 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11955 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11956 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11957 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11958 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11959
11960 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11961 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11962 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11963 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11964 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11965 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11966 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11967 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11968
11969 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11970 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11971 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11972 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11973
11974 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11975 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11976 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11977 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11978
11979 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11980 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11981
11982 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11983 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11984 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11985 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11986 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11987 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11988
11989 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11990 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11991 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11992 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11993
11994 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11995 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11996 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11997 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11998 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11999 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12000
12001 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12002 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12003 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12004
12005 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12006 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12007
12008 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12009 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12010 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12011 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12012 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12013 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12014
12015 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12016 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12017 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12018 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12019 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12020 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12021
12022 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12023 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12024 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12025 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12026
12027 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12028
12029 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12030 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12031 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12032 };
12033
12034 static const struct builtin_description bdesc_1arg[] =
12035 {
12036 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12037 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12038
12039 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12040 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12041 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12042
12043 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12044 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12045 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12046 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12047
12048 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12049 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12050 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12051
12052 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12053
12054 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12055 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12056
12057 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12058 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12059 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12060 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12061 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12062
12063 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12064
12065 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12066 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12067
12068 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12069 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12070 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
12071 };
12072
12073 void
12074 ix86_init_builtins ()
12075 {
12076 if (TARGET_MMX)
12077 ix86_init_mmx_sse_builtins ();
12078 }
12079
12080 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12081 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12082 builtins. */
12083 static void
12084 ix86_init_mmx_sse_builtins ()
12085 {
12086 const struct builtin_description * d;
12087 size_t i;
12088
12089 tree pchar_type_node = build_pointer_type (char_type_node);
12090 tree pfloat_type_node = build_pointer_type (float_type_node);
12091 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12092 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12093 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12094
12095 /* Comparisons. */
12096 tree int_ftype_v4sf_v4sf
12097 = build_function_type_list (integer_type_node,
12098 V4SF_type_node, V4SF_type_node, NULL_TREE);
12099 tree v4si_ftype_v4sf_v4sf
12100 = build_function_type_list (V4SI_type_node,
12101 V4SF_type_node, V4SF_type_node, NULL_TREE);
12102 /* MMX/SSE/integer conversions. */
12103 tree int_ftype_v4sf
12104 = build_function_type_list (integer_type_node,
12105 V4SF_type_node, NULL_TREE);
12106 tree int_ftype_v8qi
12107 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12108 tree v4sf_ftype_v4sf_int
12109 = build_function_type_list (V4SF_type_node,
12110 V4SF_type_node, integer_type_node, NULL_TREE);
12111 tree v4sf_ftype_v4sf_v2si
12112 = build_function_type_list (V4SF_type_node,
12113 V4SF_type_node, V2SI_type_node, NULL_TREE);
12114 tree int_ftype_v4hi_int
12115 = build_function_type_list (integer_type_node,
12116 V4HI_type_node, integer_type_node, NULL_TREE);
12117 tree v4hi_ftype_v4hi_int_int
12118 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12119 integer_type_node, integer_type_node,
12120 NULL_TREE);
12121 /* Miscellaneous. */
12122 tree v8qi_ftype_v4hi_v4hi
12123 = build_function_type_list (V8QI_type_node,
12124 V4HI_type_node, V4HI_type_node, NULL_TREE);
12125 tree v4hi_ftype_v2si_v2si
12126 = build_function_type_list (V4HI_type_node,
12127 V2SI_type_node, V2SI_type_node, NULL_TREE);
12128 tree v4sf_ftype_v4sf_v4sf_int
12129 = build_function_type_list (V4SF_type_node,
12130 V4SF_type_node, V4SF_type_node,
12131 integer_type_node, NULL_TREE);
12132 tree v2si_ftype_v4hi_v4hi
12133 = build_function_type_list (V2SI_type_node,
12134 V4HI_type_node, V4HI_type_node, NULL_TREE);
12135 tree v4hi_ftype_v4hi_int
12136 = build_function_type_list (V4HI_type_node,
12137 V4HI_type_node, integer_type_node, NULL_TREE);
12138 tree v4hi_ftype_v4hi_di
12139 = build_function_type_list (V4HI_type_node,
12140 V4HI_type_node, long_long_unsigned_type_node,
12141 NULL_TREE);
12142 tree v2si_ftype_v2si_di
12143 = build_function_type_list (V2SI_type_node,
12144 V2SI_type_node, long_long_unsigned_type_node,
12145 NULL_TREE);
12146 tree void_ftype_void
12147 = build_function_type (void_type_node, void_list_node);
12148 tree void_ftype_unsigned
12149 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12150 tree unsigned_ftype_void
12151 = build_function_type (unsigned_type_node, void_list_node);
12152 tree di_ftype_void
12153 = build_function_type (long_long_unsigned_type_node, void_list_node);
12154 tree v4sf_ftype_void
12155 = build_function_type (V4SF_type_node, void_list_node);
12156 tree v2si_ftype_v4sf
12157 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12158 /* Loads/stores. */
12159 tree void_ftype_v8qi_v8qi_pchar
12160 = build_function_type_list (void_type_node,
12161 V8QI_type_node, V8QI_type_node,
12162 pchar_type_node, NULL_TREE);
12163 tree v4sf_ftype_pfloat
12164 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12165 /* @@@ the type is bogus */
12166 tree v4sf_ftype_v4sf_pv2si
12167 = build_function_type_list (V4SF_type_node,
12168 V4SF_type_node, pv2di_type_node, NULL_TREE);
12169 tree void_ftype_pv2si_v4sf
12170 = build_function_type_list (void_type_node,
12171 pv2di_type_node, V4SF_type_node, NULL_TREE);
12172 tree void_ftype_pfloat_v4sf
12173 = build_function_type_list (void_type_node,
12174 pfloat_type_node, V4SF_type_node, NULL_TREE);
12175 tree void_ftype_pdi_di
12176 = build_function_type_list (void_type_node,
12177 pdi_type_node, long_long_unsigned_type_node,
12178 NULL_TREE);
12179 tree void_ftype_pv2di_v2di
12180 = build_function_type_list (void_type_node,
12181 pv2di_type_node, V2DI_type_node, NULL_TREE);
12182 /* Normal vector unops. */
12183 tree v4sf_ftype_v4sf
12184 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12185
12186 /* Normal vector binops. */
12187 tree v4sf_ftype_v4sf_v4sf
12188 = build_function_type_list (V4SF_type_node,
12189 V4SF_type_node, V4SF_type_node, NULL_TREE);
12190 tree v8qi_ftype_v8qi_v8qi
12191 = build_function_type_list (V8QI_type_node,
12192 V8QI_type_node, V8QI_type_node, NULL_TREE);
12193 tree v4hi_ftype_v4hi_v4hi
12194 = build_function_type_list (V4HI_type_node,
12195 V4HI_type_node, V4HI_type_node, NULL_TREE);
12196 tree v2si_ftype_v2si_v2si
12197 = build_function_type_list (V2SI_type_node,
12198 V2SI_type_node, V2SI_type_node, NULL_TREE);
12199 tree di_ftype_di_di
12200 = build_function_type_list (long_long_unsigned_type_node,
12201 long_long_unsigned_type_node,
12202 long_long_unsigned_type_node, NULL_TREE);
12203
12204 tree v2si_ftype_v2sf
12205 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12206 tree v2sf_ftype_v2si
12207 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12208 tree v2si_ftype_v2si
12209 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12210 tree v2sf_ftype_v2sf
12211 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12212 tree v2sf_ftype_v2sf_v2sf
12213 = build_function_type_list (V2SF_type_node,
12214 V2SF_type_node, V2SF_type_node, NULL_TREE);
12215 tree v2si_ftype_v2sf_v2sf
12216 = build_function_type_list (V2SI_type_node,
12217 V2SF_type_node, V2SF_type_node, NULL_TREE);
12218 tree pint_type_node = build_pointer_type (integer_type_node);
12219 tree pdouble_type_node = build_pointer_type (double_type_node);
12220 tree int_ftype_v2df_v2df
12221 = build_function_type_list (integer_type_node,
12222 V2DF_type_node, V2DF_type_node, NULL_TREE);
12223
12224 tree ti_ftype_void
12225 = build_function_type (intTI_type_node, void_list_node);
12226 tree ti_ftype_ti_ti
12227 = build_function_type_list (intTI_type_node,
12228 intTI_type_node, intTI_type_node, NULL_TREE);
12229 tree void_ftype_pvoid
12230 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12231 tree v2di_ftype_di
12232 = build_function_type_list (V2DI_type_node,
12233 long_long_unsigned_type_node, NULL_TREE);
12234 tree v4sf_ftype_v4si
12235 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12236 tree v4si_ftype_v4sf
12237 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12238 tree v2df_ftype_v4si
12239 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12240 tree v4si_ftype_v2df
12241 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12242 tree v2si_ftype_v2df
12243 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12244 tree v4sf_ftype_v2df
12245 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12246 tree v2df_ftype_v2si
12247 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12248 tree v2df_ftype_v4sf
12249 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12250 tree int_ftype_v2df
12251 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12252 tree v2df_ftype_v2df_int
12253 = build_function_type_list (V2DF_type_node,
12254 V2DF_type_node, integer_type_node, NULL_TREE);
12255 tree v4sf_ftype_v4sf_v2df
12256 = build_function_type_list (V4SF_type_node,
12257 V4SF_type_node, V2DF_type_node, NULL_TREE);
12258 tree v2df_ftype_v2df_v4sf
12259 = build_function_type_list (V2DF_type_node,
12260 V2DF_type_node, V4SF_type_node, NULL_TREE);
12261 tree v2df_ftype_v2df_v2df_int
12262 = build_function_type_list (V2DF_type_node,
12263 V2DF_type_node, V2DF_type_node,
12264 integer_type_node,
12265 NULL_TREE);
12266 tree v2df_ftype_v2df_pv2si
12267 = build_function_type_list (V2DF_type_node,
12268 V2DF_type_node, pv2si_type_node, NULL_TREE);
12269 tree void_ftype_pv2si_v2df
12270 = build_function_type_list (void_type_node,
12271 pv2si_type_node, V2DF_type_node, NULL_TREE);
12272 tree void_ftype_pdouble_v2df
12273 = build_function_type_list (void_type_node,
12274 pdouble_type_node, V2DF_type_node, NULL_TREE);
12275 tree void_ftype_pint_int
12276 = build_function_type_list (void_type_node,
12277 pint_type_node, integer_type_node, NULL_TREE);
12278 tree void_ftype_v16qi_v16qi_pchar
12279 = build_function_type_list (void_type_node,
12280 V16QI_type_node, V16QI_type_node,
12281 pchar_type_node, NULL_TREE);
12282 tree v2df_ftype_pdouble
12283 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12284 tree v2df_ftype_v2df_v2df
12285 = build_function_type_list (V2DF_type_node,
12286 V2DF_type_node, V2DF_type_node, NULL_TREE);
12287 tree v16qi_ftype_v16qi_v16qi
12288 = build_function_type_list (V16QI_type_node,
12289 V16QI_type_node, V16QI_type_node, NULL_TREE);
12290 tree v8hi_ftype_v8hi_v8hi
12291 = build_function_type_list (V8HI_type_node,
12292 V8HI_type_node, V8HI_type_node, NULL_TREE);
12293 tree v4si_ftype_v4si_v4si
12294 = build_function_type_list (V4SI_type_node,
12295 V4SI_type_node, V4SI_type_node, NULL_TREE);
12296 tree v2di_ftype_v2di_v2di
12297 = build_function_type_list (V2DI_type_node,
12298 V2DI_type_node, V2DI_type_node, NULL_TREE);
12299 tree v2di_ftype_v2df_v2df
12300 = build_function_type_list (V2DI_type_node,
12301 V2DF_type_node, V2DF_type_node, NULL_TREE);
12302 tree v2df_ftype_v2df
12303 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12304 tree v2df_ftype_double
12305 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12306 tree v2df_ftype_double_double
12307 = build_function_type_list (V2DF_type_node,
12308 double_type_node, double_type_node, NULL_TREE);
12309 tree int_ftype_v8hi_int
12310 = build_function_type_list (integer_type_node,
12311 V8HI_type_node, integer_type_node, NULL_TREE);
12312 tree v8hi_ftype_v8hi_int_int
12313 = build_function_type_list (V8HI_type_node,
12314 V8HI_type_node, integer_type_node,
12315 integer_type_node, NULL_TREE);
12316 tree v2di_ftype_v2di_int
12317 = build_function_type_list (V2DI_type_node,
12318 V2DI_type_node, integer_type_node, NULL_TREE);
12319 tree v4si_ftype_v4si_int
12320 = build_function_type_list (V4SI_type_node,
12321 V4SI_type_node, integer_type_node, NULL_TREE);
12322 tree v8hi_ftype_v8hi_int
12323 = build_function_type_list (V8HI_type_node,
12324 V8HI_type_node, integer_type_node, NULL_TREE);
12325 tree v8hi_ftype_v8hi_v2di
12326 = build_function_type_list (V8HI_type_node,
12327 V8HI_type_node, V2DI_type_node, NULL_TREE);
12328 tree v4si_ftype_v4si_v2di
12329 = build_function_type_list (V4SI_type_node,
12330 V4SI_type_node, V2DI_type_node, NULL_TREE);
12331 tree v4si_ftype_v8hi_v8hi
12332 = build_function_type_list (V4SI_type_node,
12333 V8HI_type_node, V8HI_type_node, NULL_TREE);
12334 tree di_ftype_v8qi_v8qi
12335 = build_function_type_list (long_long_unsigned_type_node,
12336 V8QI_type_node, V8QI_type_node, NULL_TREE);
12337 tree v2di_ftype_v16qi_v16qi
12338 = build_function_type_list (V2DI_type_node,
12339 V16QI_type_node, V16QI_type_node, NULL_TREE);
12340 tree int_ftype_v16qi
12341 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12342
12343 /* Add all builtins that are more or less simple operations on two
12344 operands. */
12345 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12346 {
12347 /* Use one of the operands; the target can have a different mode for
12348 mask-generating compares. */
12349 enum machine_mode mode;
12350 tree type;
12351
12352 if (d->name == 0)
12353 continue;
12354 mode = insn_data[d->icode].operand[1].mode;
12355
12356 switch (mode)
12357 {
12358 case V16QImode:
12359 type = v16qi_ftype_v16qi_v16qi;
12360 break;
12361 case V8HImode:
12362 type = v8hi_ftype_v8hi_v8hi;
12363 break;
12364 case V4SImode:
12365 type = v4si_ftype_v4si_v4si;
12366 break;
12367 case V2DImode:
12368 type = v2di_ftype_v2di_v2di;
12369 break;
12370 case V2DFmode:
12371 type = v2df_ftype_v2df_v2df;
12372 break;
12373 case TImode:
12374 type = ti_ftype_ti_ti;
12375 break;
12376 case V4SFmode:
12377 type = v4sf_ftype_v4sf_v4sf;
12378 break;
12379 case V8QImode:
12380 type = v8qi_ftype_v8qi_v8qi;
12381 break;
12382 case V4HImode:
12383 type = v4hi_ftype_v4hi_v4hi;
12384 break;
12385 case V2SImode:
12386 type = v2si_ftype_v2si_v2si;
12387 break;
12388 case DImode:
12389 type = di_ftype_di_di;
12390 break;
12391
12392 default:
12393 abort ();
12394 }
12395
12396 /* Override for comparisons. */
12397 if (d->icode == CODE_FOR_maskcmpv4sf3
12398 || d->icode == CODE_FOR_maskncmpv4sf3
12399 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12400 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12401 type = v4si_ftype_v4sf_v4sf;
12402
12403 if (d->icode == CODE_FOR_maskcmpv2df3
12404 || d->icode == CODE_FOR_maskncmpv2df3
12405 || d->icode == CODE_FOR_vmmaskcmpv2df3
12406 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12407 type = v2di_ftype_v2df_v2df;
12408
12409 def_builtin (d->mask, d->name, type, d->code);
12410 }
12411
12412 /* Add the remaining MMX insns with somewhat more complicated types. */
12413 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12414 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12415 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12416 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12417 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12418 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12419 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12420
12421 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12422 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12423 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12424
12425 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12426 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12427
12428 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12429 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12430
12431 /* comi/ucomi insns. */
12432 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12433 if (d->mask == MASK_SSE2)
12434 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12435 else
12436 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12437
12438 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12439 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12440 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12441
12442 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12443 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12444 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12445 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12446 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12447 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12448
12449 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12450 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12451
12452 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12453
12454 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12455 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12456 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12457 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12458 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12459 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12460
12461 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12462 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12463 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12464 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12465
12466 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12467 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12468 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12469 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12470
12471 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12472
12473 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12474
12475 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12476 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12477 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12478 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12479 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12480 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12481
12482 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12483
12484 /* Original 3DNow! */
12485 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12486 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12487 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12488 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12489 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12490 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12491 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12492 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12493 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12494 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12495 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12496 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12497 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12498 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12499 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12500 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12501 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12502 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12503 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12504 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12505
12506 /* 3DNow! extension as used in the Athlon CPU. */
12507 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12508 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12509 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12510 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12511 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12512 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12513
12514 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12515
12516 /* SSE2 */
12517 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12518 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12519
12520 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12521 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12522
12523 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12524 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12525 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12526 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12527 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12528 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12529
12530 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12531 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12532 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12533 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12534
12535 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12536 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12537 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12538 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12539 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12540
12541 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12542 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12543 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12544 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12545
12546 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12547 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12548
12549 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12550
12551 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12552 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12553
12554 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12555 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12556 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12557 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12558 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12559
12560 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12561
12562 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12563 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12564
12565 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12566 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12567 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12568
12569 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12570 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12571 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12572
12573 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12574 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12575 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12576 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12577 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12578 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12579 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12580
12581 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12582 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12583 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12584
12585 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12586 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12587 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12588
12589 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12590 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12591 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12592
12593 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12594 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12595
12596 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12597 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12598 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12599
12600 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12601 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12602 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12603
12604 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12605 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12606
12607 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12608 }
12609
12610 /* Errors in the source file can cause expand_expr to return const0_rtx
12611 where we expect a vector. To avoid crashing, use one of the vector
12612 clear instructions. */
12613 static rtx
12614 safe_vector_operand (x, mode)
12615 rtx x;
12616 enum machine_mode mode;
12617 {
12618 if (x != const0_rtx)
12619 return x;
12620 x = gen_reg_rtx (mode);
12621
12622 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12623 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12624 : gen_rtx_SUBREG (DImode, x, 0)));
12625 else
12626 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12627 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12628 return x;
12629 }
12630
12631 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12632
12633 static rtx
12634 ix86_expand_binop_builtin (icode, arglist, target)
12635 enum insn_code icode;
12636 tree arglist;
12637 rtx target;
12638 {
12639 rtx pat;
12640 tree arg0 = TREE_VALUE (arglist);
12641 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12642 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12643 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12644 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12645 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12646 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12647
12648 if (VECTOR_MODE_P (mode0))
12649 op0 = safe_vector_operand (op0, mode0);
12650 if (VECTOR_MODE_P (mode1))
12651 op1 = safe_vector_operand (op1, mode1);
12652
12653 if (! target
12654 || GET_MODE (target) != tmode
12655 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12656 target = gen_reg_rtx (tmode);
12657
12658 /* In case the insn wants input operands in modes different from
12659 the result, abort. */
12660 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12661 abort ();
12662
12663 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12664 op0 = copy_to_mode_reg (mode0, op0);
12665 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12666 op1 = copy_to_mode_reg (mode1, op1);
12667
12668 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12669 yet one of the two must not be a memory. This is normally enforced
12670 by expanders, but we didn't bother to create one here. */
12671 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12672 op0 = copy_to_mode_reg (mode0, op0);
12673
12674 pat = GEN_FCN (icode) (target, op0, op1);
12675 if (! pat)
12676 return 0;
12677 emit_insn (pat);
12678 return target;
12679 }
12680
12681 /* Subroutine of ix86_expand_builtin to take care of stores. */
12682
12683 static rtx
12684 ix86_expand_store_builtin (icode, arglist)
12685 enum insn_code icode;
12686 tree arglist;
12687 {
12688 rtx pat;
12689 tree arg0 = TREE_VALUE (arglist);
12690 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12691 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12692 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12693 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12694 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12695
12696 if (VECTOR_MODE_P (mode1))
12697 op1 = safe_vector_operand (op1, mode1);
12698
12699 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12700
12701 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12702 op1 = copy_to_mode_reg (mode1, op1);
12703
12704 pat = GEN_FCN (icode) (op0, op1);
12705 if (pat)
12706 emit_insn (pat);
12707 return 0;
12708 }
12709
12710 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12711
12712 static rtx
12713 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12714 enum insn_code icode;
12715 tree arglist;
12716 rtx target;
12717 int do_load;
12718 {
12719 rtx pat;
12720 tree arg0 = TREE_VALUE (arglist);
12721 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12722 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12723 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12724
12725 if (! target
12726 || GET_MODE (target) != tmode
12727 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12728 target = gen_reg_rtx (tmode);
12729 if (do_load)
12730 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12731 else
12732 {
12733 if (VECTOR_MODE_P (mode0))
12734 op0 = safe_vector_operand (op0, mode0);
12735
12736 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12737 op0 = copy_to_mode_reg (mode0, op0);
12738 }
12739
12740 pat = GEN_FCN (icode) (target, op0);
12741 if (! pat)
12742 return 0;
12743 emit_insn (pat);
12744 return target;
12745 }
12746
12747 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12748 sqrtss, rsqrtss, rcpss. */
12749
12750 static rtx
12751 ix86_expand_unop1_builtin (icode, arglist, target)
12752 enum insn_code icode;
12753 tree arglist;
12754 rtx target;
12755 {
12756 rtx pat;
12757 tree arg0 = TREE_VALUE (arglist);
12758 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12759 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12760 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12761
12762 if (! target
12763 || GET_MODE (target) != tmode
12764 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12765 target = gen_reg_rtx (tmode);
12766
12767 if (VECTOR_MODE_P (mode0))
12768 op0 = safe_vector_operand (op0, mode0);
12769
12770 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12771 op0 = copy_to_mode_reg (mode0, op0);
12772
12773 op1 = op0;
12774 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12775 op1 = copy_to_mode_reg (mode0, op1);
12776
12777 pat = GEN_FCN (icode) (target, op0, op1);
12778 if (! pat)
12779 return 0;
12780 emit_insn (pat);
12781 return target;
12782 }
12783
12784 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12785
12786 static rtx
12787 ix86_expand_sse_compare (d, arglist, target)
12788 const struct builtin_description *d;
12789 tree arglist;
12790 rtx target;
12791 {
12792 rtx pat;
12793 tree arg0 = TREE_VALUE (arglist);
12794 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12795 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12796 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12797 rtx op2;
12798 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12799 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12800 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12801 enum rtx_code comparison = d->comparison;
12802
12803 if (VECTOR_MODE_P (mode0))
12804 op0 = safe_vector_operand (op0, mode0);
12805 if (VECTOR_MODE_P (mode1))
12806 op1 = safe_vector_operand (op1, mode1);
12807
12808 /* Swap operands if we have a comparison that isn't available in
12809 hardware. */
12810 if (d->flag)
12811 {
12812 rtx tmp = gen_reg_rtx (mode1);
12813 emit_move_insn (tmp, op1);
12814 op1 = op0;
12815 op0 = tmp;
12816 }
12817
12818 if (! target
12819 || GET_MODE (target) != tmode
12820 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12821 target = gen_reg_rtx (tmode);
12822
12823 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12824 op0 = copy_to_mode_reg (mode0, op0);
12825 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12826 op1 = copy_to_mode_reg (mode1, op1);
12827
12828 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12829 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12830 if (! pat)
12831 return 0;
12832 emit_insn (pat);
12833 return target;
12834 }
12835
12836 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12837
12838 static rtx
12839 ix86_expand_sse_comi (d, arglist, target)
12840 const struct builtin_description *d;
12841 tree arglist;
12842 rtx target;
12843 {
12844 rtx pat;
12845 tree arg0 = TREE_VALUE (arglist);
12846 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12847 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12848 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12849 rtx op2;
12850 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12851 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12852 enum rtx_code comparison = d->comparison;
12853
12854 if (VECTOR_MODE_P (mode0))
12855 op0 = safe_vector_operand (op0, mode0);
12856 if (VECTOR_MODE_P (mode1))
12857 op1 = safe_vector_operand (op1, mode1);
12858
12859 /* Swap operands if we have a comparison that isn't available in
12860 hardware. */
12861 if (d->flag)
12862 {
12863 rtx tmp = op1;
12864 op1 = op0;
12865 op0 = tmp;
12866 }
12867
12868 target = gen_reg_rtx (SImode);
12869 emit_move_insn (target, const0_rtx);
12870 target = gen_rtx_SUBREG (QImode, target, 0);
12871
12872 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12873 op0 = copy_to_mode_reg (mode0, op0);
12874 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12875 op1 = copy_to_mode_reg (mode1, op1);
12876
12877 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12878 pat = GEN_FCN (d->icode) (op0, op1, op2);
12879 if (! pat)
12880 return 0;
12881 emit_insn (pat);
12882 emit_insn (gen_rtx_SET (VOIDmode,
12883 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12884 gen_rtx_fmt_ee (comparison, QImode,
12885 gen_rtx_REG (CCmode, FLAGS_REG),
12886 const0_rtx)));
12887
12888 return SUBREG_REG (target);
12889 }
12890
12891 /* Expand an expression EXP that calls a built-in function,
12892 with result going to TARGET if that's convenient
12893 (and in mode MODE if that's convenient).
12894 SUBTARGET may be used as the target for computing one of EXP's operands.
12895 IGNORE is nonzero if the value is to be ignored. */
12896
12897 rtx
12898 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12899 tree exp;
12900 rtx target;
12901 rtx subtarget ATTRIBUTE_UNUSED;
12902 enum machine_mode mode ATTRIBUTE_UNUSED;
12903 int ignore ATTRIBUTE_UNUSED;
12904 {
12905 const struct builtin_description *d;
12906 size_t i;
12907 enum insn_code icode;
12908 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12909 tree arglist = TREE_OPERAND (exp, 1);
12910 tree arg0, arg1, arg2;
12911 rtx op0, op1, op2, pat;
12912 enum machine_mode tmode, mode0, mode1, mode2;
12913 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12914
12915 switch (fcode)
12916 {
12917 case IX86_BUILTIN_EMMS:
12918 emit_insn (gen_emms ());
12919 return 0;
12920
12921 case IX86_BUILTIN_SFENCE:
12922 emit_insn (gen_sfence ());
12923 return 0;
12924
12925 case IX86_BUILTIN_PEXTRW:
12926 case IX86_BUILTIN_PEXTRW128:
12927 icode = (fcode == IX86_BUILTIN_PEXTRW
12928 ? CODE_FOR_mmx_pextrw
12929 : CODE_FOR_sse2_pextrw);
12930 arg0 = TREE_VALUE (arglist);
12931 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12932 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12933 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12934 tmode = insn_data[icode].operand[0].mode;
12935 mode0 = insn_data[icode].operand[1].mode;
12936 mode1 = insn_data[icode].operand[2].mode;
12937
12938 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12939 op0 = copy_to_mode_reg (mode0, op0);
12940 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12941 {
12942 /* @@@ better error message */
12943 error ("selector must be an immediate");
12944 return gen_reg_rtx (tmode);
12945 }
12946 if (target == 0
12947 || GET_MODE (target) != tmode
12948 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12949 target = gen_reg_rtx (tmode);
12950 pat = GEN_FCN (icode) (target, op0, op1);
12951 if (! pat)
12952 return 0;
12953 emit_insn (pat);
12954 return target;
12955
12956 case IX86_BUILTIN_PINSRW:
12957 case IX86_BUILTIN_PINSRW128:
12958 icode = (fcode == IX86_BUILTIN_PINSRW
12959 ? CODE_FOR_mmx_pinsrw
12960 : CODE_FOR_sse2_pinsrw);
12961 arg0 = TREE_VALUE (arglist);
12962 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12963 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12964 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12965 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12966 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12967 tmode = insn_data[icode].operand[0].mode;
12968 mode0 = insn_data[icode].operand[1].mode;
12969 mode1 = insn_data[icode].operand[2].mode;
12970 mode2 = insn_data[icode].operand[3].mode;
12971
12972 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12973 op0 = copy_to_mode_reg (mode0, op0);
12974 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12975 op1 = copy_to_mode_reg (mode1, op1);
12976 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12977 {
12978 /* @@@ better error message */
12979 error ("selector must be an immediate");
12980 return const0_rtx;
12981 }
12982 if (target == 0
12983 || GET_MODE (target) != tmode
12984 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12985 target = gen_reg_rtx (tmode);
12986 pat = GEN_FCN (icode) (target, op0, op1, op2);
12987 if (! pat)
12988 return 0;
12989 emit_insn (pat);
12990 return target;
12991
12992 case IX86_BUILTIN_MASKMOVQ:
12993 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12994 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12995 : CODE_FOR_sse2_maskmovdqu);
12996 /* Note the arg order is different from the operand order. */
12997 arg1 = TREE_VALUE (arglist);
12998 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12999 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13000 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13001 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13002 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13003 mode0 = insn_data[icode].operand[0].mode;
13004 mode1 = insn_data[icode].operand[1].mode;
13005 mode2 = insn_data[icode].operand[2].mode;
13006
13007 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13008 op0 = copy_to_mode_reg (mode0, op0);
13009 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13010 op1 = copy_to_mode_reg (mode1, op1);
13011 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13012 op2 = copy_to_mode_reg (mode2, op2);
13013 pat = GEN_FCN (icode) (op0, op1, op2);
13014 if (! pat)
13015 return 0;
13016 emit_insn (pat);
13017 return 0;
13018
13019 case IX86_BUILTIN_SQRTSS:
13020 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13021 case IX86_BUILTIN_RSQRTSS:
13022 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13023 case IX86_BUILTIN_RCPSS:
13024 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13025
13026 case IX86_BUILTIN_LOADAPS:
13027 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13028
13029 case IX86_BUILTIN_LOADUPS:
13030 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13031
13032 case IX86_BUILTIN_STOREAPS:
13033 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13034 case IX86_BUILTIN_STOREUPS:
13035 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13036
13037 case IX86_BUILTIN_LOADSS:
13038 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13039
13040 case IX86_BUILTIN_STORESS:
13041 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13042
13043 case IX86_BUILTIN_LOADHPS:
13044 case IX86_BUILTIN_LOADLPS:
13045 case IX86_BUILTIN_LOADHPD:
13046 case IX86_BUILTIN_LOADLPD:
13047 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13048 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13049 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13050 : CODE_FOR_sse2_movlpd);
13051 arg0 = TREE_VALUE (arglist);
13052 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13053 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13054 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13055 tmode = insn_data[icode].operand[0].mode;
13056 mode0 = insn_data[icode].operand[1].mode;
13057 mode1 = insn_data[icode].operand[2].mode;
13058
13059 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13060 op0 = copy_to_mode_reg (mode0, op0);
13061 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13062 if (target == 0
13063 || GET_MODE (target) != tmode
13064 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13065 target = gen_reg_rtx (tmode);
13066 pat = GEN_FCN (icode) (target, op0, op1);
13067 if (! pat)
13068 return 0;
13069 emit_insn (pat);
13070 return target;
13071
13072 case IX86_BUILTIN_STOREHPS:
13073 case IX86_BUILTIN_STORELPS:
13074 case IX86_BUILTIN_STOREHPD:
13075 case IX86_BUILTIN_STORELPD:
13076 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13077 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13078 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13079 : CODE_FOR_sse2_movlpd);
13080 arg0 = TREE_VALUE (arglist);
13081 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13082 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13083 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13084 mode0 = insn_data[icode].operand[1].mode;
13085 mode1 = insn_data[icode].operand[2].mode;
13086
13087 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13088 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13089 op1 = copy_to_mode_reg (mode1, op1);
13090
13091 pat = GEN_FCN (icode) (op0, op0, op1);
13092 if (! pat)
13093 return 0;
13094 emit_insn (pat);
13095 return 0;
13096
13097 case IX86_BUILTIN_MOVNTPS:
13098 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13099 case IX86_BUILTIN_MOVNTQ:
13100 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13101
13102 case IX86_BUILTIN_LDMXCSR:
13103 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13104 target = assign_386_stack_local (SImode, 0);
13105 emit_move_insn (target, op0);
13106 emit_insn (gen_ldmxcsr (target));
13107 return 0;
13108
13109 case IX86_BUILTIN_STMXCSR:
13110 target = assign_386_stack_local (SImode, 0);
13111 emit_insn (gen_stmxcsr (target));
13112 return copy_to_mode_reg (SImode, target);
13113
13114 case IX86_BUILTIN_SHUFPS:
13115 case IX86_BUILTIN_SHUFPD:
13116 icode = (fcode == IX86_BUILTIN_SHUFPS
13117 ? CODE_FOR_sse_shufps
13118 : CODE_FOR_sse2_shufpd);
13119 arg0 = TREE_VALUE (arglist);
13120 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13121 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13122 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13123 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13124 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13125 tmode = insn_data[icode].operand[0].mode;
13126 mode0 = insn_data[icode].operand[1].mode;
13127 mode1 = insn_data[icode].operand[2].mode;
13128 mode2 = insn_data[icode].operand[3].mode;
13129
13130 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13131 op0 = copy_to_mode_reg (mode0, op0);
13132 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13133 op1 = copy_to_mode_reg (mode1, op1);
13134 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13135 {
13136 /* @@@ better error message */
13137 error ("mask must be an immediate");
13138 return gen_reg_rtx (tmode);
13139 }
13140 if (target == 0
13141 || GET_MODE (target) != tmode
13142 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13143 target = gen_reg_rtx (tmode);
13144 pat = GEN_FCN (icode) (target, op0, op1, op2);
13145 if (! pat)
13146 return 0;
13147 emit_insn (pat);
13148 return target;
13149
13150 case IX86_BUILTIN_PSHUFW:
13151 case IX86_BUILTIN_PSHUFD:
13152 case IX86_BUILTIN_PSHUFHW:
13153 case IX86_BUILTIN_PSHUFLW:
13154 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13155 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13156 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13157 : CODE_FOR_mmx_pshufw);
13158 arg0 = TREE_VALUE (arglist);
13159 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13160 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13161 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13162 tmode = insn_data[icode].operand[0].mode;
13163 mode1 = insn_data[icode].operand[1].mode;
13164 mode2 = insn_data[icode].operand[2].mode;
13165
13166 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13167 op0 = copy_to_mode_reg (mode1, op0);
13168 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13169 {
13170 /* @@@ better error message */
13171 error ("mask must be an immediate");
13172 return const0_rtx;
13173 }
13174 if (target == 0
13175 || GET_MODE (target) != tmode
13176 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13177 target = gen_reg_rtx (tmode);
13178 pat = GEN_FCN (icode) (target, op0, op1);
13179 if (! pat)
13180 return 0;
13181 emit_insn (pat);
13182 return target;
13183
13184 case IX86_BUILTIN_FEMMS:
13185 emit_insn (gen_femms ());
13186 return NULL_RTX;
13187
13188 case IX86_BUILTIN_PAVGUSB:
13189 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13190
13191 case IX86_BUILTIN_PF2ID:
13192 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13193
13194 case IX86_BUILTIN_PFACC:
13195 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13196
13197 case IX86_BUILTIN_PFADD:
13198 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13199
13200 case IX86_BUILTIN_PFCMPEQ:
13201 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13202
13203 case IX86_BUILTIN_PFCMPGE:
13204 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13205
13206 case IX86_BUILTIN_PFCMPGT:
13207 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13208
13209 case IX86_BUILTIN_PFMAX:
13210 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13211
13212 case IX86_BUILTIN_PFMIN:
13213 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13214
13215 case IX86_BUILTIN_PFMUL:
13216 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13217
13218 case IX86_BUILTIN_PFRCP:
13219 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13220
13221 case IX86_BUILTIN_PFRCPIT1:
13222 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13223
13224 case IX86_BUILTIN_PFRCPIT2:
13225 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13226
13227 case IX86_BUILTIN_PFRSQIT1:
13228 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13229
13230 case IX86_BUILTIN_PFRSQRT:
13231 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13232
13233 case IX86_BUILTIN_PFSUB:
13234 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13235
13236 case IX86_BUILTIN_PFSUBR:
13237 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13238
13239 case IX86_BUILTIN_PI2FD:
13240 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13241
13242 case IX86_BUILTIN_PMULHRW:
13243 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13244
13245 case IX86_BUILTIN_PF2IW:
13246 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13247
13248 case IX86_BUILTIN_PFNACC:
13249 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13250
13251 case IX86_BUILTIN_PFPNACC:
13252 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13253
13254 case IX86_BUILTIN_PI2FW:
13255 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13256
13257 case IX86_BUILTIN_PSWAPDSI:
13258 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13259
13260 case IX86_BUILTIN_PSWAPDSF:
13261 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13262
13263 case IX86_BUILTIN_SSE_ZERO:
13264 target = gen_reg_rtx (V4SFmode);
13265 emit_insn (gen_sse_clrv4sf (target));
13266 return target;
13267
13268 case IX86_BUILTIN_MMX_ZERO:
13269 target = gen_reg_rtx (DImode);
13270 emit_insn (gen_mmx_clrdi (target));
13271 return target;
13272
13273 case IX86_BUILTIN_SQRTSD:
13274 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13275 case IX86_BUILTIN_LOADAPD:
13276 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13277 case IX86_BUILTIN_LOADUPD:
13278 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13279
13280 case IX86_BUILTIN_STOREAPD:
13281 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13282 case IX86_BUILTIN_STOREUPD:
13283 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13284
13285 case IX86_BUILTIN_LOADSD:
13286 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13287
13288 case IX86_BUILTIN_STORESD:
13289 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13290
13291 case IX86_BUILTIN_SETPD1:
13292 target = assign_386_stack_local (DFmode, 0);
13293 arg0 = TREE_VALUE (arglist);
13294 emit_move_insn (adjust_address (target, DFmode, 0),
13295 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13296 op0 = gen_reg_rtx (V2DFmode);
13297 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13298 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13299 return op0;
13300
13301 case IX86_BUILTIN_SETPD:
13302 target = assign_386_stack_local (V2DFmode, 0);
13303 arg0 = TREE_VALUE (arglist);
13304 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13305 emit_move_insn (adjust_address (target, DFmode, 0),
13306 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13307 emit_move_insn (adjust_address (target, DFmode, 8),
13308 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13309 op0 = gen_reg_rtx (V2DFmode);
13310 emit_insn (gen_sse2_movapd (op0, target));
13311 return op0;
13312
13313 case IX86_BUILTIN_LOADRPD:
13314 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13315 gen_reg_rtx (V2DFmode), 1);
13316 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13317 return target;
13318
13319 case IX86_BUILTIN_LOADPD1:
13320 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13321 gen_reg_rtx (V2DFmode), 1);
13322 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13323 return target;
13324
13325 case IX86_BUILTIN_STOREPD1:
13326 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13327 case IX86_BUILTIN_STORERPD:
13328 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13329
13330 case IX86_BUILTIN_MFENCE:
13331 emit_insn (gen_sse2_mfence ());
13332 return 0;
13333 case IX86_BUILTIN_LFENCE:
13334 emit_insn (gen_sse2_lfence ());
13335 return 0;
13336
13337 case IX86_BUILTIN_CLFLUSH:
13338 arg0 = TREE_VALUE (arglist);
13339 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13340 icode = CODE_FOR_sse2_clflush;
13341 mode0 = insn_data[icode].operand[0].mode;
13342 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13343 op0 = copy_to_mode_reg (mode0, op0);
13344
13345 emit_insn (gen_sse2_clflush (op0));
13346 return 0;
13347
13348 case IX86_BUILTIN_MOVNTPD:
13349 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13350 case IX86_BUILTIN_MOVNTDQ:
13351 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13352 case IX86_BUILTIN_MOVNTI:
13353 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13354
13355 default:
13356 break;
13357 }
13358
13359 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13360 if (d->code == fcode)
13361 {
13362 /* Compares are treated specially. */
13363 if (d->icode == CODE_FOR_maskcmpv4sf3
13364 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13365 || d->icode == CODE_FOR_maskncmpv4sf3
13366 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13367 || d->icode == CODE_FOR_maskcmpv2df3
13368 || d->icode == CODE_FOR_vmmaskcmpv2df3
13369 || d->icode == CODE_FOR_maskncmpv2df3
13370 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13371 return ix86_expand_sse_compare (d, arglist, target);
13372
13373 return ix86_expand_binop_builtin (d->icode, arglist, target);
13374 }
13375
13376 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13377 if (d->code == fcode)
13378 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13379
13380 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13381 if (d->code == fcode)
13382 return ix86_expand_sse_comi (d, arglist, target);
13383
13384 /* @@@ Should really do something sensible here. */
13385 return 0;
13386 }
13387
13388 /* Store OPERAND to the memory after reload is completed. This means
13389 that we can't easily use assign_stack_local. */
13390 rtx
13391 ix86_force_to_memory (mode, operand)
13392 enum machine_mode mode;
13393 rtx operand;
13394 {
13395 rtx result;
13396 if (!reload_completed)
13397 abort ();
13398 if (TARGET_64BIT && TARGET_RED_ZONE)
13399 {
13400 result = gen_rtx_MEM (mode,
13401 gen_rtx_PLUS (Pmode,
13402 stack_pointer_rtx,
13403 GEN_INT (-RED_ZONE_SIZE)));
13404 emit_move_insn (result, operand);
13405 }
13406 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13407 {
13408 switch (mode)
13409 {
13410 case HImode:
13411 case SImode:
13412 operand = gen_lowpart (DImode, operand);
13413 /* FALLTHRU */
13414 case DImode:
13415 emit_insn (
13416 gen_rtx_SET (VOIDmode,
13417 gen_rtx_MEM (DImode,
13418 gen_rtx_PRE_DEC (DImode,
13419 stack_pointer_rtx)),
13420 operand));
13421 break;
13422 default:
13423 abort ();
13424 }
13425 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13426 }
13427 else
13428 {
13429 switch (mode)
13430 {
13431 case DImode:
13432 {
13433 rtx operands[2];
13434 split_di (&operand, 1, operands, operands + 1);
13435 emit_insn (
13436 gen_rtx_SET (VOIDmode,
13437 gen_rtx_MEM (SImode,
13438 gen_rtx_PRE_DEC (Pmode,
13439 stack_pointer_rtx)),
13440 operands[1]));
13441 emit_insn (
13442 gen_rtx_SET (VOIDmode,
13443 gen_rtx_MEM (SImode,
13444 gen_rtx_PRE_DEC (Pmode,
13445 stack_pointer_rtx)),
13446 operands[0]));
13447 }
13448 break;
13449 case HImode:
13450 /* It is better to store HImodes as SImodes. */
13451 if (!TARGET_PARTIAL_REG_STALL)
13452 operand = gen_lowpart (SImode, operand);
13453 /* FALLTHRU */
13454 case SImode:
13455 emit_insn (
13456 gen_rtx_SET (VOIDmode,
13457 gen_rtx_MEM (GET_MODE (operand),
13458 gen_rtx_PRE_DEC (SImode,
13459 stack_pointer_rtx)),
13460 operand));
13461 break;
13462 default:
13463 abort ();
13464 }
13465 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13466 }
13467 return result;
13468 }
13469
13470 /* Free operand from the memory. */
13471 void
13472 ix86_free_from_memory (mode)
13473 enum machine_mode mode;
13474 {
13475 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13476 {
13477 int size;
13478
13479 if (mode == DImode || TARGET_64BIT)
13480 size = 8;
13481 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13482 size = 2;
13483 else
13484 size = 4;
13485 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13486 to pop or add instruction if registers are available. */
13487 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13488 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13489 GEN_INT (size))));
13490 }
13491 }
13492
13493 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13494 QImode must go into class Q_REGS.
13495 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13496 movdf to do mem-to-mem moves through integer regs. */
13497 enum reg_class
13498 ix86_preferred_reload_class (x, class)
13499 rtx x;
13500 enum reg_class class;
13501 {
13502 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13503 return NO_REGS;
13504 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13505 {
13506 /* SSE can't load any constant directly yet. */
13507 if (SSE_CLASS_P (class))
13508 return NO_REGS;
13509 /* Floats can load 0 and 1. */
13510 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13511 {
13512 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13513 if (MAYBE_SSE_CLASS_P (class))
13514 return (reg_class_subset_p (class, GENERAL_REGS)
13515 ? GENERAL_REGS : FLOAT_REGS);
13516 else
13517 return class;
13518 }
13519 /* General regs can load everything. */
13520 if (reg_class_subset_p (class, GENERAL_REGS))
13521 return GENERAL_REGS;
13522 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13523 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13524 return NO_REGS;
13525 }
13526 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13527 return NO_REGS;
13528 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13529 return Q_REGS;
13530 return class;
13531 }
13532
13533 /* If we are copying between general and FP registers, we need a memory
13534 location. The same is true for SSE and MMX registers.
13535
13536 The macro can't work reliably when one of the CLASSES is class containing
13537 registers from multiple units (SSE, MMX, integer). We avoid this by never
13538 combining those units in single alternative in the machine description.
13539 Ensure that this constraint holds to avoid unexpected surprises.
13540
13541 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13542 enforce these sanity checks. */
13543 int
13544 ix86_secondary_memory_needed (class1, class2, mode, strict)
13545 enum reg_class class1, class2;
13546 enum machine_mode mode;
13547 int strict;
13548 {
13549 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13550 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13551 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13552 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13553 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13554 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13555 {
13556 if (strict)
13557 abort ();
13558 else
13559 return 1;
13560 }
13561 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13562 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13563 && (mode) != SImode)
13564 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13565 && (mode) != SImode));
13566 }
13567 /* Return the cost of moving data from a register in class CLASS1 to
13568 one in class CLASS2.
13569
13570 It is not required that the cost always equal 2 when FROM is the same as TO;
13571 on some machines it is expensive to move between registers if they are not
13572 general registers. */
13573 int
13574 ix86_register_move_cost (mode, class1, class2)
13575 enum machine_mode mode;
13576 enum reg_class class1, class2;
13577 {
13578 /* In case we require secondary memory, compute cost of the store followed
13579 by load. In order to avoid bad register allocation choices, we need
13580 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13581
13582 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13583 {
13584 int cost = 1;
13585
13586 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13587 MEMORY_MOVE_COST (mode, class1, 1));
13588 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13589 MEMORY_MOVE_COST (mode, class2, 1));
13590
13591 /* In case of copying from general_purpose_register we may emit multiple
13592 stores followed by single load causing memory size mismatch stall.
13593 Count this as arbitarily high cost of 20. */
13594 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13595 cost += 20;
13596
13597 /* In the case of FP/MMX moves, the registers actually overlap, and we
13598 have to switch modes in order to treat them differently. */
13599 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13600 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13601 cost += 20;
13602
13603 return cost;
13604 }
13605
13606 /* Moves between SSE/MMX and integer unit are expensive. */
13607 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13608 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13609 return ix86_cost->mmxsse_to_integer;
13610 if (MAYBE_FLOAT_CLASS_P (class1))
13611 return ix86_cost->fp_move;
13612 if (MAYBE_SSE_CLASS_P (class1))
13613 return ix86_cost->sse_move;
13614 if (MAYBE_MMX_CLASS_P (class1))
13615 return ix86_cost->mmx_move;
13616 return 2;
13617 }
13618
13619 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13620 int
13621 ix86_hard_regno_mode_ok (regno, mode)
13622 int regno;
13623 enum machine_mode mode;
13624 {
13625 /* Flags and only flags can only hold CCmode values. */
13626 if (CC_REGNO_P (regno))
13627 return GET_MODE_CLASS (mode) == MODE_CC;
13628 if (GET_MODE_CLASS (mode) == MODE_CC
13629 || GET_MODE_CLASS (mode) == MODE_RANDOM
13630 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13631 return 0;
13632 if (FP_REGNO_P (regno))
13633 return VALID_FP_MODE_P (mode);
13634 if (SSE_REGNO_P (regno))
13635 return VALID_SSE_REG_MODE (mode);
13636 if (MMX_REGNO_P (regno))
13637 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13638 /* We handle both integer and floats in the general purpose registers.
13639 In future we should be able to handle vector modes as well. */
13640 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13641 return 0;
13642 /* Take care for QImode values - they can be in non-QI regs, but then
13643 they do cause partial register stalls. */
13644 if (regno < 4 || mode != QImode || TARGET_64BIT)
13645 return 1;
13646 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13647 }
13648
13649 /* Return the cost of moving data of mode M between a
13650 register and memory. A value of 2 is the default; this cost is
13651 relative to those in `REGISTER_MOVE_COST'.
13652
13653 If moving between registers and memory is more expensive than
13654 between two registers, you should define this macro to express the
13655 relative cost.
13656
13657 Model also increased moving costs of QImode registers in non
13658 Q_REGS classes.
13659 */
13660 int
13661 ix86_memory_move_cost (mode, class, in)
13662 enum machine_mode mode;
13663 enum reg_class class;
13664 int in;
13665 {
13666 if (FLOAT_CLASS_P (class))
13667 {
13668 int index;
13669 switch (mode)
13670 {
13671 case SFmode:
13672 index = 0;
13673 break;
13674 case DFmode:
13675 index = 1;
13676 break;
13677 case XFmode:
13678 case TFmode:
13679 index = 2;
13680 break;
13681 default:
13682 return 100;
13683 }
13684 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13685 }
13686 if (SSE_CLASS_P (class))
13687 {
13688 int index;
13689 switch (GET_MODE_SIZE (mode))
13690 {
13691 case 4:
13692 index = 0;
13693 break;
13694 case 8:
13695 index = 1;
13696 break;
13697 case 16:
13698 index = 2;
13699 break;
13700 default:
13701 return 100;
13702 }
13703 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13704 }
13705 if (MMX_CLASS_P (class))
13706 {
13707 int index;
13708 switch (GET_MODE_SIZE (mode))
13709 {
13710 case 4:
13711 index = 0;
13712 break;
13713 case 8:
13714 index = 1;
13715 break;
13716 default:
13717 return 100;
13718 }
13719 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13720 }
13721 switch (GET_MODE_SIZE (mode))
13722 {
13723 case 1:
13724 if (in)
13725 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13726 : ix86_cost->movzbl_load);
13727 else
13728 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13729 : ix86_cost->int_store[0] + 4);
13730 break;
13731 case 2:
13732 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13733 default:
13734 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13735 if (mode == TFmode)
13736 mode = XFmode;
13737 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13738 * (int) GET_MODE_SIZE (mode) / 4);
13739 }
13740 }
13741
13742 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13743 static void
13744 ix86_svr3_asm_out_constructor (symbol, priority)
13745 rtx symbol;
13746 int priority ATTRIBUTE_UNUSED;
13747 {
13748 init_section ();
13749 fputs ("\tpushl $", asm_out_file);
13750 assemble_name (asm_out_file, XSTR (symbol, 0));
13751 fputc ('\n', asm_out_file);
13752 }
13753 #endif
13754
13755 #if TARGET_MACHO
13756
13757 static int current_machopic_label_num;
13758
13759 /* Given a symbol name and its associated stub, write out the
13760 definition of the stub. */
13761
13762 void
13763 machopic_output_stub (file, symb, stub)
13764 FILE *file;
13765 const char *symb, *stub;
13766 {
13767 unsigned int length;
13768 char *binder_name, *symbol_name, lazy_ptr_name[32];
13769 int label = ++current_machopic_label_num;
13770
13771 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13772 symb = (*targetm.strip_name_encoding) (symb);
13773
13774 length = strlen (stub);
13775 binder_name = alloca (length + 32);
13776 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13777
13778 length = strlen (symb);
13779 symbol_name = alloca (length + 32);
13780 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13781
13782 sprintf (lazy_ptr_name, "L%d$lz", label);
13783
13784 if (MACHOPIC_PURE)
13785 machopic_picsymbol_stub_section ();
13786 else
13787 machopic_symbol_stub_section ();
13788
13789 fprintf (file, "%s:\n", stub);
13790 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13791
13792 if (MACHOPIC_PURE)
13793 {
13794 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13795 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13796 fprintf (file, "\tjmp %%edx\n");
13797 }
13798 else
13799 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13800
13801 fprintf (file, "%s:\n", binder_name);
13802
13803 if (MACHOPIC_PURE)
13804 {
13805 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13806 fprintf (file, "\tpushl %%eax\n");
13807 }
13808 else
13809 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13810
13811 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13812
13813 machopic_lazy_symbol_ptr_section ();
13814 fprintf (file, "%s:\n", lazy_ptr_name);
13815 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13816 fprintf (file, "\t.long %s\n", binder_name);
13817 }
13818 #endif /* TARGET_MACHO */
13819
13820 /* Order the registers for register allocator. */
13821
13822 void
13823 x86_order_regs_for_local_alloc ()
13824 {
13825 int pos = 0;
13826 int i;
13827
13828 /* First allocate the local general purpose registers. */
13829 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13830 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13831 reg_alloc_order [pos++] = i;
13832
13833 /* Global general purpose registers. */
13834 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13835 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13836 reg_alloc_order [pos++] = i;
13837
13838 /* x87 registers come first in case we are doing FP math
13839 using them. */
13840 if (!TARGET_SSE_MATH)
13841 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13842 reg_alloc_order [pos++] = i;
13843
13844 /* SSE registers. */
13845 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13846 reg_alloc_order [pos++] = i;
13847 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13848 reg_alloc_order [pos++] = i;
13849
13850 /* x87 registerts. */
13851 if (TARGET_SSE_MATH)
13852 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13853 reg_alloc_order [pos++] = i;
13854
13855 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13856 reg_alloc_order [pos++] = i;
13857
13858 /* Initialize the rest of array as we do not allocate some registers
13859 at all. */
13860 while (pos < FIRST_PSEUDO_REGISTER)
13861 reg_alloc_order [pos++] = 0;
13862 }
13863
13864 void
13865 x86_output_mi_thunk (file, delta, function)
13866 FILE *file;
13867 int delta;
13868 tree function;
13869 {
13870 tree parm;
13871 rtx xops[3];
13872
13873 if (ix86_regparm > 0)
13874 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13875 else
13876 parm = NULL_TREE;
13877 for (; parm; parm = TREE_CHAIN (parm))
13878 if (TREE_VALUE (parm) == void_type_node)
13879 break;
13880
13881 xops[0] = GEN_INT (delta);
13882 if (TARGET_64BIT)
13883 {
13884 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13885 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13886 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13887 if (flag_pic)
13888 {
13889 fprintf (file, "\tjmp *");
13890 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13891 fprintf (file, "@GOTPCREL(%%rip)\n");
13892 }
13893 else
13894 {
13895 fprintf (file, "\tjmp ");
13896 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13897 fprintf (file, "\n");
13898 }
13899 }
13900 else
13901 {
13902 if (parm)
13903 xops[1] = gen_rtx_REG (SImode, 0);
13904 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13905 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13906 else
13907 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13908 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13909
13910 if (flag_pic)
13911 {
13912 xops[0] = pic_offset_table_rtx;
13913 xops[1] = gen_label_rtx ();
13914 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13915
13916 if (ix86_regparm > 2)
13917 abort ();
13918 output_asm_insn ("push{l}\t%0", xops);
13919 output_asm_insn ("call\t%P1", xops);
13920 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13921 output_asm_insn ("pop{l}\t%0", xops);
13922 output_asm_insn
13923 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13924 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13925 output_asm_insn
13926 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13927 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13928 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13929 }
13930 else
13931 {
13932 fprintf (file, "\tjmp ");
13933 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13934 fprintf (file, "\n");
13935 }
13936 }
13937 }
13938
13939 int
13940 x86_field_alignment (field, computed)
13941 tree field;
13942 int computed;
13943 {
13944 enum machine_mode mode;
13945 tree type = TREE_TYPE (field);
13946
13947 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13948 return computed;
13949 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13950 ? get_inner_array_type (type) : type);
13951 if (mode == DFmode || mode == DCmode
13952 || GET_MODE_CLASS (mode) == MODE_INT
13953 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13954 return MIN (32, computed);
13955 return computed;
13956 }
13957
13958 /* Implement machine specific optimizations.
13959 At the moment we implement single transformation: AMD Athlon works faster
13960 when RET is not destination of conditional jump or directly preceeded
13961 by other jump instruction. We avoid the penalty by inserting NOP just
13962 before the RET instructions in such cases. */
13963 void
13964 x86_machine_dependent_reorg (first)
13965 rtx first ATTRIBUTE_UNUSED;
13966 {
13967 edge e;
13968
13969 if (!TARGET_ATHLON || !optimize || optimize_size)
13970 return;
13971 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13972 {
13973 basic_block bb = e->src;
13974 rtx ret = bb->end;
13975 rtx prev;
13976 bool insert = false;
13977
13978 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13979 continue;
13980 prev = prev_nonnote_insn (ret);
13981 if (prev && GET_CODE (prev) == CODE_LABEL)
13982 {
13983 edge e;
13984 for (e = bb->pred; e; e = e->pred_next)
13985 if (EDGE_FREQUENCY (e) && e->src->index > 0
13986 && !(e->flags & EDGE_FALLTHRU))
13987 insert = 1;
13988 }
13989 if (!insert)
13990 {
13991 prev = prev_real_insn (ret);
13992 if (prev && GET_CODE (prev) == JUMP_INSN
13993 && any_condjump_p (prev))
13994 insert = 1;
13995 }
13996 if (insert)
13997 emit_insn_before (gen_nop (), ret);
13998 }
13999 }
14000
14001 #include "gt-i386.h"
This page took 0.671155 seconds and 4 git commands to generate.