]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.c (TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD): Define.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
48
49 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
50 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia32_use_dfa_pipeline_interface
51 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia32_multipass_dfa_lookahead
53
54 static int
55 ia32_use_dfa_pipeline_interface ()
56 {
57 if (ix86_cpu == PROCESSOR_PENTIUM)
58 return 1;
59 return 0;
60 }
61
62 /* How many alternative schedules to try. This should be as wide as the
63 scheduling freedom in the DFA, but no wider. Making this value too
64 large results extra work for the scheduler. */
65
66 static int
67 ia32_multipass_dfa_lookahead ()
68 {
69 if (ix86_cpu == PROCESSOR_PENTIUM)
70 return 2;
71 else
72 return 0;
73 }
74
75 #ifndef CHECK_STACK_LIMIT
76 #define CHECK_STACK_LIMIT (-1)
77 #endif
78
79 /* Processor costs (relative to an add) */
80 static const
81 struct processor_costs size_cost = { /* costs for tunning for size */
82 2, /* cost of an add instruction */
83 3, /* cost of a lea instruction */
84 2, /* variable shift costs */
85 3, /* constant shift costs */
86 3, /* cost of starting a multiply */
87 0, /* cost of multiply per each bit set */
88 3, /* cost of a divide/mod */
89 3, /* cost of movsx */
90 3, /* cost of movzx */
91 0, /* "large" insn */
92 2, /* MOVE_RATIO */
93 2, /* cost for loading QImode using movzbl */
94 {2, 2, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 2, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {2, 2, 2}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {2, 2, 2}, /* cost of loading integer registers */
102 3, /* cost of moving MMX register */
103 {3, 3}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {3, 3}, /* cost of storing MMX registers
106 in SImode and DImode */
107 3, /* cost of moving SSE register */
108 {3, 3, 3}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {3, 3, 3}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
113 0, /* size of prefetch block */
114 0, /* number of parallel prefetches */
115 };
116 /* Processor costs (relative to an add) */
117 static const
118 struct processor_costs i386_cost = { /* 386 specific costs */
119 1, /* cost of an add instruction */
120 1, /* cost of a lea instruction */
121 3, /* variable shift costs */
122 2, /* constant shift costs */
123 6, /* cost of starting a multiply */
124 1, /* cost of multiply per each bit set */
125 23, /* cost of a divide/mod */
126 3, /* cost of movsx */
127 2, /* cost of movzx */
128 15, /* "large" insn */
129 3, /* MOVE_RATIO */
130 4, /* cost for loading QImode using movzbl */
131 {2, 4, 2}, /* cost of loading integer registers
132 in QImode, HImode and SImode.
133 Relative to reg-reg move (2). */
134 {2, 4, 2}, /* cost of storing integer registers */
135 2, /* cost of reg,reg fld/fst */
136 {8, 8, 8}, /* cost of loading fp registers
137 in SFmode, DFmode and XFmode */
138 {8, 8, 8}, /* cost of loading integer registers */
139 2, /* cost of moving MMX register */
140 {4, 8}, /* cost of loading MMX registers
141 in SImode and DImode */
142 {4, 8}, /* cost of storing MMX registers
143 in SImode and DImode */
144 2, /* cost of moving SSE register */
145 {4, 8, 16}, /* cost of loading SSE registers
146 in SImode, DImode and TImode */
147 {4, 8, 16}, /* cost of storing SSE registers
148 in SImode, DImode and TImode */
149 3, /* MMX or SSE register to integer */
150 0, /* size of prefetch block */
151 0, /* number of parallel prefetches */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 12, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 40, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 };
190
191 static const
192 struct processor_costs pentium_cost = {
193 1, /* cost of an add instruction */
194 1, /* cost of a lea instruction */
195 4, /* variable shift costs */
196 1, /* constant shift costs */
197 11, /* cost of starting a multiply */
198 0, /* cost of multiply per each bit set */
199 25, /* cost of a divide/mod */
200 3, /* cost of movsx */
201 2, /* cost of movzx */
202 8, /* "large" insn */
203 6, /* MOVE_RATIO */
204 6, /* cost for loading QImode using movzbl */
205 {2, 4, 2}, /* cost of loading integer registers
206 in QImode, HImode and SImode.
207 Relative to reg-reg move (2). */
208 {2, 4, 2}, /* cost of storing integer registers */
209 2, /* cost of reg,reg fld/fst */
210 {2, 2, 6}, /* cost of loading fp registers
211 in SFmode, DFmode and XFmode */
212 {4, 4, 6}, /* cost of loading integer registers */
213 8, /* cost of moving MMX register */
214 {8, 8}, /* cost of loading MMX registers
215 in SImode and DImode */
216 {8, 8}, /* cost of storing MMX registers
217 in SImode and DImode */
218 2, /* cost of moving SSE register */
219 {4, 8, 16}, /* cost of loading SSE registers
220 in SImode, DImode and TImode */
221 {4, 8, 16}, /* cost of storing SSE registers
222 in SImode, DImode and TImode */
223 3, /* MMX or SSE register to integer */
224 0, /* size of prefetch block */
225 0, /* number of parallel prefetches */
226 };
227
228 static const
229 struct processor_costs pentiumpro_cost = {
230 1, /* cost of an add instruction */
231 1, /* cost of a lea instruction */
232 1, /* variable shift costs */
233 1, /* constant shift costs */
234 4, /* cost of starting a multiply */
235 0, /* cost of multiply per each bit set */
236 17, /* cost of a divide/mod */
237 1, /* cost of movsx */
238 1, /* cost of movzx */
239 8, /* "large" insn */
240 6, /* MOVE_RATIO */
241 2, /* cost for loading QImode using movzbl */
242 {4, 4, 4}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 2, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {2, 2, 6}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {4, 4, 6}, /* cost of loading integer registers */
250 2, /* cost of moving MMX register */
251 {2, 2}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {2, 2}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {2, 2, 8}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {2, 2, 8}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 32, /* size of prefetch block */
262 6, /* number of parallel prefetches */
263 };
264
265 static const
266 struct processor_costs k6_cost = {
267 1, /* cost of an add instruction */
268 2, /* cost of a lea instruction */
269 1, /* variable shift costs */
270 1, /* constant shift costs */
271 3, /* cost of starting a multiply */
272 0, /* cost of multiply per each bit set */
273 18, /* cost of a divide/mod */
274 2, /* cost of movsx */
275 2, /* cost of movzx */
276 8, /* "large" insn */
277 4, /* MOVE_RATIO */
278 3, /* cost for loading QImode using movzbl */
279 {4, 5, 4}, /* cost of loading integer registers
280 in QImode, HImode and SImode.
281 Relative to reg-reg move (2). */
282 {2, 3, 2}, /* cost of storing integer registers */
283 4, /* cost of reg,reg fld/fst */
284 {6, 6, 6}, /* cost of loading fp registers
285 in SFmode, DFmode and XFmode */
286 {4, 4, 4}, /* cost of loading integer registers */
287 2, /* cost of moving MMX register */
288 {2, 2}, /* cost of loading MMX registers
289 in SImode and DImode */
290 {2, 2}, /* cost of storing MMX registers
291 in SImode and DImode */
292 2, /* cost of moving SSE register */
293 {2, 2, 8}, /* cost of loading SSE registers
294 in SImode, DImode and TImode */
295 {2, 2, 8}, /* cost of storing SSE registers
296 in SImode, DImode and TImode */
297 6, /* MMX or SSE register to integer */
298 32, /* size of prefetch block */
299 1, /* number of parallel prefetches */
300 };
301
302 static const
303 struct processor_costs athlon_cost = {
304 1, /* cost of an add instruction */
305 2, /* cost of a lea instruction */
306 1, /* variable shift costs */
307 1, /* constant shift costs */
308 5, /* cost of starting a multiply */
309 0, /* cost of multiply per each bit set */
310 42, /* cost of a divide/mod */
311 1, /* cost of movsx */
312 1, /* cost of movzx */
313 8, /* "large" insn */
314 9, /* MOVE_RATIO */
315 4, /* cost for loading QImode using movzbl */
316 {4, 5, 4}, /* cost of loading integer registers
317 in QImode, HImode and SImode.
318 Relative to reg-reg move (2). */
319 {2, 3, 2}, /* cost of storing integer registers */
320 4, /* cost of reg,reg fld/fst */
321 {6, 6, 20}, /* cost of loading fp registers
322 in SFmode, DFmode and XFmode */
323 {4, 4, 16}, /* cost of loading integer registers */
324 2, /* cost of moving MMX register */
325 {2, 2}, /* cost of loading MMX registers
326 in SImode and DImode */
327 {2, 2}, /* cost of storing MMX registers
328 in SImode and DImode */
329 2, /* cost of moving SSE register */
330 {2, 2, 8}, /* cost of loading SSE registers
331 in SImode, DImode and TImode */
332 {2, 2, 8}, /* cost of storing SSE registers
333 in SImode, DImode and TImode */
334 6, /* MMX or SSE register to integer */
335 64, /* size of prefetch block */
336 6, /* number of parallel prefetches */
337 };
338
339 static const
340 struct processor_costs pentium4_cost = {
341 1, /* cost of an add instruction */
342 1, /* cost of a lea instruction */
343 8, /* variable shift costs */
344 8, /* constant shift costs */
345 30, /* cost of starting a multiply */
346 0, /* cost of multiply per each bit set */
347 112, /* cost of a divide/mod */
348 1, /* cost of movsx */
349 1, /* cost of movzx */
350 16, /* "large" insn */
351 6, /* MOVE_RATIO */
352 2, /* cost for loading QImode using movzbl */
353 {4, 5, 4}, /* cost of loading integer registers
354 in QImode, HImode and SImode.
355 Relative to reg-reg move (2). */
356 {2, 3, 2}, /* cost of storing integer registers */
357 2, /* cost of reg,reg fld/fst */
358 {2, 2, 6}, /* cost of loading fp registers
359 in SFmode, DFmode and XFmode */
360 {4, 4, 6}, /* cost of loading integer registers */
361 2, /* cost of moving MMX register */
362 {2, 2}, /* cost of loading MMX registers
363 in SImode and DImode */
364 {2, 2}, /* cost of storing MMX registers
365 in SImode and DImode */
366 12, /* cost of moving SSE register */
367 {12, 12, 12}, /* cost of loading SSE registers
368 in SImode, DImode and TImode */
369 {2, 2, 8}, /* cost of storing SSE registers
370 in SImode, DImode and TImode */
371 10, /* MMX or SSE register to integer */
372 64, /* size of prefetch block */
373 6, /* number of parallel prefetches */
374 };
375
376 const struct processor_costs *ix86_cost = &pentium_cost;
377
378 /* Processor feature/optimization bitmasks. */
379 #define m_386 (1<<PROCESSOR_I386)
380 #define m_486 (1<<PROCESSOR_I486)
381 #define m_PENT (1<<PROCESSOR_PENTIUM)
382 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
383 #define m_K6 (1<<PROCESSOR_K6)
384 #define m_ATHLON (1<<PROCESSOR_ATHLON)
385 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
386
387 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
388 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
389 const int x86_zero_extend_with_and = m_486 | m_PENT;
390 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
391 const int x86_double_with_add = ~m_386;
392 const int x86_use_bit_test = m_386;
393 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
394 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
395 const int x86_3dnow_a = m_ATHLON;
396 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
397 const int x86_branch_hints = m_PENT4;
398 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
399 const int x86_partial_reg_stall = m_PPRO;
400 const int x86_use_loop = m_K6;
401 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
402 const int x86_use_mov0 = m_K6;
403 const int x86_use_cltd = ~(m_PENT | m_K6);
404 const int x86_read_modify_write = ~m_PENT;
405 const int x86_read_modify = ~(m_PENT | m_PPRO);
406 const int x86_split_long_moves = m_PPRO;
407 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
408 const int x86_single_stringop = m_386 | m_PENT4;
409 const int x86_qimode_math = ~(0);
410 const int x86_promote_qi_regs = 0;
411 const int x86_himode_math = ~(m_PPRO);
412 const int x86_promote_hi_regs = m_PPRO;
413 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
414 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
415 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
416 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
417 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
418 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
419 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
420 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
421 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
422 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
423 const int x86_decompose_lea = m_PENT4;
424 const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
425
426 /* In case the avreage insn count for single function invocation is
427 lower than this constant, emit fast (but longer) prologue and
428 epilogue code. */
429 #define FAST_PROLOGUE_INSN_COUNT 30
430 /* Set by prologue expander and used by epilogue expander to determine
431 the style used. */
432 static int use_fast_prologue_epilogue;
433
434 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
435
436 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
437 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
438 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
439
440 /* Array of the smallest class containing reg number REGNO, indexed by
441 REGNO. Used by REGNO_REG_CLASS in i386.h. */
442
443 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
444 {
445 /* ax, dx, cx, bx */
446 AREG, DREG, CREG, BREG,
447 /* si, di, bp, sp */
448 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
449 /* FP registers */
450 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
451 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
452 /* arg pointer */
453 NON_Q_REGS,
454 /* flags, fpsr, dirflag, frame */
455 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
456 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
457 SSE_REGS, SSE_REGS,
458 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
459 MMX_REGS, MMX_REGS,
460 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
461 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
462 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
463 SSE_REGS, SSE_REGS,
464 };
465
466 /* The "default" register map used in 32bit mode. */
467
468 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
469 {
470 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
471 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
472 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
473 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
474 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
475 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
476 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
477 };
478
479 static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
480 1 /*RDX*/, 2 /*RCX*/,
481 FIRST_REX_INT_REG /*R8 */,
482 FIRST_REX_INT_REG + 1 /*R9 */};
483 static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
484
485 /* The "default" register map used in 64bit mode. */
486 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
487 {
488 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
489 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
490 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
491 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
492 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
493 8,9,10,11,12,13,14,15, /* extended integer registers */
494 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
495 };
496
497 /* Define the register numbers to be used in Dwarf debugging information.
498 The SVR4 reference port C compiler uses the following register numbers
499 in its Dwarf output code:
500 0 for %eax (gcc regno = 0)
501 1 for %ecx (gcc regno = 2)
502 2 for %edx (gcc regno = 1)
503 3 for %ebx (gcc regno = 3)
504 4 for %esp (gcc regno = 7)
505 5 for %ebp (gcc regno = 6)
506 6 for %esi (gcc regno = 4)
507 7 for %edi (gcc regno = 5)
508 The following three DWARF register numbers are never generated by
509 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
510 believes these numbers have these meanings.
511 8 for %eip (no gcc equivalent)
512 9 for %eflags (gcc regno = 17)
513 10 for %trapno (no gcc equivalent)
514 It is not at all clear how we should number the FP stack registers
515 for the x86 architecture. If the version of SDB on x86/svr4 were
516 a bit less brain dead with respect to floating-point then we would
517 have a precedent to follow with respect to DWARF register numbers
518 for x86 FP registers, but the SDB on x86/svr4 is so completely
519 broken with respect to FP registers that it is hardly worth thinking
520 of it as something to strive for compatibility with.
521 The version of x86/svr4 SDB I have at the moment does (partially)
522 seem to believe that DWARF register number 11 is associated with
523 the x86 register %st(0), but that's about all. Higher DWARF
524 register numbers don't seem to be associated with anything in
525 particular, and even for DWARF regno 11, SDB only seems to under-
526 stand that it should say that a variable lives in %st(0) (when
527 asked via an `=' command) if we said it was in DWARF regno 11,
528 but SDB still prints garbage when asked for the value of the
529 variable in question (via a `/' command).
530 (Also note that the labels SDB prints for various FP stack regs
531 when doing an `x' command are all wrong.)
532 Note that these problems generally don't affect the native SVR4
533 C compiler because it doesn't allow the use of -O with -g and
534 because when it is *not* optimizing, it allocates a memory
535 location for each floating-point variable, and the memory
536 location is what gets described in the DWARF AT_location
537 attribute for the variable in question.
538 Regardless of the severe mental illness of the x86/svr4 SDB, we
539 do something sensible here and we use the following DWARF
540 register numbers. Note that these are all stack-top-relative
541 numbers.
542 11 for %st(0) (gcc regno = 8)
543 12 for %st(1) (gcc regno = 9)
544 13 for %st(2) (gcc regno = 10)
545 14 for %st(3) (gcc regno = 11)
546 15 for %st(4) (gcc regno = 12)
547 16 for %st(5) (gcc regno = 13)
548 17 for %st(6) (gcc regno = 14)
549 18 for %st(7) (gcc regno = 15)
550 */
551 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
552 {
553 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
554 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
555 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
556 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
557 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
558 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
559 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
560 };
561
562 /* Test and compare insns in i386.md store the information needed to
563 generate branch and scc insns here. */
564
565 rtx ix86_compare_op0 = NULL_RTX;
566 rtx ix86_compare_op1 = NULL_RTX;
567
568 #define MAX_386_STACK_LOCALS 3
569 /* Size of the register save area. */
570 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
571
572 /* Define the structure for the machine field in struct function. */
573 struct machine_function
574 {
575 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
576 int save_varrargs_registers;
577 int accesses_prev_frame;
578 };
579
580 #define ix86_stack_locals (cfun->machine->stack_locals)
581 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
582
583 /* Structure describing stack frame layout.
584 Stack grows downward:
585
586 [arguments]
587 <- ARG_POINTER
588 saved pc
589
590 saved frame pointer if frame_pointer_needed
591 <- HARD_FRAME_POINTER
592 [saved regs]
593
594 [padding1] \
595 )
596 [va_arg registers] (
597 > to_allocate <- FRAME_POINTER
598 [frame] (
599 )
600 [padding2] /
601 */
602 struct ix86_frame
603 {
604 int nregs;
605 int padding1;
606 int va_arg_size;
607 HOST_WIDE_INT frame;
608 int padding2;
609 int outgoing_arguments_size;
610 int red_zone_size;
611
612 HOST_WIDE_INT to_allocate;
613 /* The offsets relative to ARG_POINTER. */
614 HOST_WIDE_INT frame_pointer_offset;
615 HOST_WIDE_INT hard_frame_pointer_offset;
616 HOST_WIDE_INT stack_pointer_offset;
617 };
618
619 /* Used to enable/disable debugging features. */
620 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
621 /* Code model option as passed by user. */
622 const char *ix86_cmodel_string;
623 /* Parsed value. */
624 enum cmodel ix86_cmodel;
625 /* Asm dialect. */
626 const char *ix86_asm_string;
627 enum asm_dialect ix86_asm_dialect = ASM_ATT;
628
629 /* which cpu are we scheduling for */
630 enum processor_type ix86_cpu;
631
632 /* which unit we are generating floating point math for */
633 enum fpmath_unit ix86_fpmath;
634
635 /* which instruction set architecture to use. */
636 int ix86_arch;
637
638 /* Strings to hold which cpu and instruction set architecture to use. */
639 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
640 const char *ix86_arch_string; /* for -march=<xxx> */
641 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
642
643 /* # of registers to use to pass arguments. */
644 const char *ix86_regparm_string;
645
646 /* true if sse prefetch instruction is not NOOP. */
647 int x86_prefetch_sse;
648
649 /* ix86_regparm_string as a number */
650 int ix86_regparm;
651
652 /* Alignment to use for loops and jumps: */
653
654 /* Power of two alignment for loops. */
655 const char *ix86_align_loops_string;
656
657 /* Power of two alignment for non-loop jumps. */
658 const char *ix86_align_jumps_string;
659
660 /* Power of two alignment for stack boundary in bytes. */
661 const char *ix86_preferred_stack_boundary_string;
662
663 /* Preferred alignment for stack boundary in bits. */
664 int ix86_preferred_stack_boundary;
665
666 /* Values 1-5: see jump.c */
667 int ix86_branch_cost;
668 const char *ix86_branch_cost_string;
669
670 /* Power of two alignment for functions. */
671 const char *ix86_align_funcs_string;
672
673 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
674 static char internal_label_prefix[16];
675 static int internal_label_prefix_len;
676 \f
677 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
678 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
679 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
680 int, int, FILE *));
681 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
682 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
683 rtx *, rtx *));
684 static rtx gen_push PARAMS ((rtx));
685 static int memory_address_length PARAMS ((rtx addr));
686 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
687 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
688 static int ix86_safe_length PARAMS ((rtx));
689 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
690 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
691 static void ix86_dump_ppro_packet PARAMS ((FILE *));
692 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
693 static void ix86_init_machine_status PARAMS ((struct function *));
694 static void ix86_mark_machine_status PARAMS ((struct function *));
695 static void ix86_free_machine_status PARAMS ((struct function *));
696 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
697 static int ix86_safe_length_prefix PARAMS ((rtx));
698 static int ix86_nsaved_regs PARAMS ((void));
699 static void ix86_emit_save_regs PARAMS ((void));
700 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
701 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
702 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
703 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
704 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
705 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
706 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
707 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
708 static int ix86_issue_rate PARAMS ((void));
709 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
710 static void ix86_sched_init PARAMS ((FILE *, int, int));
711 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
712 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
713 static void ix86_init_mmx_sse_builtins PARAMS ((void));
714
715 struct ix86_address
716 {
717 rtx base, index, disp;
718 HOST_WIDE_INT scale;
719 };
720
721 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
722
723 struct builtin_description;
724 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
725 tree, rtx));
726 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
727 tree, rtx));
728 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
729 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
730 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
731 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
732 tree, rtx));
733 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
734 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
735 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
736 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
737 enum rtx_code *,
738 enum rtx_code *,
739 enum rtx_code *));
740 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
741 rtx *, rtx *));
742 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
743 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
744 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
745 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
746 static int ix86_save_reg PARAMS ((int, int));
747 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
748 static int ix86_comp_type_attributes PARAMS ((tree, tree));
749 const struct attribute_spec ix86_attribute_table[];
750 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
751 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
752
753 #ifdef DO_GLOBAL_CTORS_BODY
754 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
755 #endif
756
757 /* Register class used for passing given 64bit part of the argument.
758 These represent classes as documented by the PS ABI, with the exception
759 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
760 use SF or DFmode move instead of DImode to avoid reformating penalties.
761
762 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
763 whenever possible (upper half does contain padding).
764 */
765 enum x86_64_reg_class
766 {
767 X86_64_NO_CLASS,
768 X86_64_INTEGER_CLASS,
769 X86_64_INTEGERSI_CLASS,
770 X86_64_SSE_CLASS,
771 X86_64_SSESF_CLASS,
772 X86_64_SSEDF_CLASS,
773 X86_64_SSEUP_CLASS,
774 X86_64_X87_CLASS,
775 X86_64_X87UP_CLASS,
776 X86_64_MEMORY_CLASS
777 };
778 static const char * const x86_64_reg_class_name[] =
779 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
780
781 #define MAX_CLASSES 4
782 static int classify_argument PARAMS ((enum machine_mode, tree,
783 enum x86_64_reg_class [MAX_CLASSES],
784 int));
785 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
786 int *));
787 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
788 const int *, int));
789 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
790 enum x86_64_reg_class));
791 \f
792 /* Initialize the GCC target structure. */
793 #undef TARGET_ATTRIBUTE_TABLE
794 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
795 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
796 # undef TARGET_MERGE_DECL_ATTRIBUTES
797 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
798 #endif
799
800 #undef TARGET_COMP_TYPE_ATTRIBUTES
801 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
802
803 #undef TARGET_INIT_BUILTINS
804 #define TARGET_INIT_BUILTINS ix86_init_builtins
805
806 #undef TARGET_EXPAND_BUILTIN
807 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
808
809 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
810 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
811 HOST_WIDE_INT));
812 # undef TARGET_ASM_FUNCTION_PROLOGUE
813 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
814 #endif
815
816 #undef TARGET_ASM_OPEN_PAREN
817 #define TARGET_ASM_OPEN_PAREN ""
818 #undef TARGET_ASM_CLOSE_PAREN
819 #define TARGET_ASM_CLOSE_PAREN ""
820
821 #undef TARGET_ASM_ALIGNED_HI_OP
822 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
823 #undef TARGET_ASM_ALIGNED_SI_OP
824 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
825 #ifdef ASM_QUAD
826 #undef TARGET_ASM_ALIGNED_DI_OP
827 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
828 #endif
829
830 #undef TARGET_ASM_UNALIGNED_HI_OP
831 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
832 #undef TARGET_ASM_UNALIGNED_SI_OP
833 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
834 #undef TARGET_ASM_UNALIGNED_DI_OP
835 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
836
837 #undef TARGET_SCHED_ADJUST_COST
838 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
839 #undef TARGET_SCHED_ISSUE_RATE
840 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
841 #undef TARGET_SCHED_VARIABLE_ISSUE
842 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
843 #undef TARGET_SCHED_INIT
844 #define TARGET_SCHED_INIT ix86_sched_init
845 #undef TARGET_SCHED_REORDER
846 #define TARGET_SCHED_REORDER ix86_sched_reorder
847
848 struct gcc_target targetm = TARGET_INITIALIZER;
849 \f
850 /* Sometimes certain combinations of command options do not make
851 sense on a particular target machine. You can define a macro
852 `OVERRIDE_OPTIONS' to take account of this. This macro, if
853 defined, is executed once just after all the command options have
854 been parsed.
855
856 Don't use this macro to turn on various extra optimizations for
857 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
858
859 void
860 override_options ()
861 {
862 int i;
863 /* Comes from final.c -- no real reason to change it. */
864 #define MAX_CODE_ALIGN 16
865
866 static struct ptt
867 {
868 const struct processor_costs *cost; /* Processor costs */
869 const int target_enable; /* Target flags to enable. */
870 const int target_disable; /* Target flags to disable. */
871 const int align_loop; /* Default alignments. */
872 const int align_loop_max_skip;
873 const int align_jump;
874 const int align_jump_max_skip;
875 const int align_func;
876 const int branch_cost;
877 }
878 const processor_target_table[PROCESSOR_max] =
879 {
880 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
881 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
882 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
883 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
884 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
885 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
886 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
887 };
888
889 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
890 static struct pta
891 {
892 const char *const name; /* processor name or nickname. */
893 const enum processor_type processor;
894 const enum pta_flags
895 {
896 PTA_SSE = 1,
897 PTA_SSE2 = 2,
898 PTA_MMX = 4,
899 PTA_PREFETCH_SSE = 8,
900 PTA_3DNOW = 16,
901 PTA_3DNOW_A = 64
902 } flags;
903 }
904 const processor_alias_table[] =
905 {
906 {"i386", PROCESSOR_I386, 0},
907 {"i486", PROCESSOR_I486, 0},
908 {"i586", PROCESSOR_PENTIUM, 0},
909 {"pentium", PROCESSOR_PENTIUM, 0},
910 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
911 {"i686", PROCESSOR_PENTIUMPRO, 0},
912 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
913 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
914 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
915 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
916 PTA_MMX | PTA_PREFETCH_SSE},
917 {"k6", PROCESSOR_K6, PTA_MMX},
918 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
919 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
920 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
921 | PTA_3DNOW_A},
922 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
923 | PTA_3DNOW | PTA_3DNOW_A},
924 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
925 | PTA_3DNOW_A | PTA_SSE},
926 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
927 | PTA_3DNOW_A | PTA_SSE},
928 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
929 | PTA_3DNOW_A | PTA_SSE},
930 };
931
932 int const pta_size = ARRAY_SIZE (processor_alias_table);
933
934 #ifdef SUBTARGET_OVERRIDE_OPTIONS
935 SUBTARGET_OVERRIDE_OPTIONS;
936 #endif
937
938 if (!ix86_cpu_string && ix86_arch_string)
939 ix86_cpu_string = ix86_arch_string;
940 if (!ix86_cpu_string)
941 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
942 if (!ix86_arch_string)
943 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
944
945 if (ix86_cmodel_string != 0)
946 {
947 if (!strcmp (ix86_cmodel_string, "small"))
948 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
949 else if (flag_pic)
950 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
951 else if (!strcmp (ix86_cmodel_string, "32"))
952 ix86_cmodel = CM_32;
953 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
954 ix86_cmodel = CM_KERNEL;
955 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
956 ix86_cmodel = CM_MEDIUM;
957 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
958 ix86_cmodel = CM_LARGE;
959 else
960 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
961 }
962 else
963 {
964 ix86_cmodel = CM_32;
965 if (TARGET_64BIT)
966 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
967 }
968 if (ix86_asm_string != 0)
969 {
970 if (!strcmp (ix86_asm_string, "intel"))
971 ix86_asm_dialect = ASM_INTEL;
972 else if (!strcmp (ix86_asm_string, "att"))
973 ix86_asm_dialect = ASM_ATT;
974 else
975 error ("bad value (%s) for -masm= switch", ix86_asm_string);
976 }
977 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
978 error ("code model `%s' not supported in the %s bit mode",
979 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
980 if (ix86_cmodel == CM_LARGE)
981 sorry ("code model `large' not supported yet");
982 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
983 sorry ("%i-bit mode not compiled in",
984 (target_flags & MASK_64BIT) ? 64 : 32);
985
986 for (i = 0; i < pta_size; i++)
987 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
988 {
989 ix86_arch = processor_alias_table[i].processor;
990 /* Default cpu tuning to the architecture. */
991 ix86_cpu = ix86_arch;
992 if (processor_alias_table[i].flags & PTA_MMX
993 && !(target_flags & MASK_MMX_SET))
994 target_flags |= MASK_MMX;
995 if (processor_alias_table[i].flags & PTA_3DNOW
996 && !(target_flags & MASK_3DNOW_SET))
997 target_flags |= MASK_3DNOW;
998 if (processor_alias_table[i].flags & PTA_3DNOW_A
999 && !(target_flags & MASK_3DNOW_A_SET))
1000 target_flags |= MASK_3DNOW_A;
1001 if (processor_alias_table[i].flags & PTA_SSE
1002 && !(target_flags & MASK_SSE_SET))
1003 target_flags |= MASK_SSE;
1004 if (processor_alias_table[i].flags & PTA_SSE2
1005 && !(target_flags & MASK_SSE2_SET))
1006 target_flags |= MASK_SSE2;
1007 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1008 x86_prefetch_sse = true;
1009 break;
1010 }
1011
1012 if (i == pta_size)
1013 error ("bad value (%s) for -march= switch", ix86_arch_string);
1014
1015 for (i = 0; i < pta_size; i++)
1016 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1017 {
1018 ix86_cpu = processor_alias_table[i].processor;
1019 break;
1020 }
1021 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1022 x86_prefetch_sse = true;
1023 if (i == pta_size)
1024 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1025
1026 if (optimize_size)
1027 ix86_cost = &size_cost;
1028 else
1029 ix86_cost = processor_target_table[ix86_cpu].cost;
1030 target_flags |= processor_target_table[ix86_cpu].target_enable;
1031 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1032
1033 /* Arrange to set up i386_stack_locals for all functions. */
1034 init_machine_status = ix86_init_machine_status;
1035 mark_machine_status = ix86_mark_machine_status;
1036 free_machine_status = ix86_free_machine_status;
1037
1038 /* Validate -mregparm= value. */
1039 if (ix86_regparm_string)
1040 {
1041 i = atoi (ix86_regparm_string);
1042 if (i < 0 || i > REGPARM_MAX)
1043 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1044 else
1045 ix86_regparm = i;
1046 }
1047 else
1048 if (TARGET_64BIT)
1049 ix86_regparm = REGPARM_MAX;
1050
1051 /* If the user has provided any of the -malign-* options,
1052 warn and use that value only if -falign-* is not set.
1053 Remove this code in GCC 3.2 or later. */
1054 if (ix86_align_loops_string)
1055 {
1056 warning ("-malign-loops is obsolete, use -falign-loops");
1057 if (align_loops == 0)
1058 {
1059 i = atoi (ix86_align_loops_string);
1060 if (i < 0 || i > MAX_CODE_ALIGN)
1061 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1062 else
1063 align_loops = 1 << i;
1064 }
1065 }
1066
1067 if (ix86_align_jumps_string)
1068 {
1069 warning ("-malign-jumps is obsolete, use -falign-jumps");
1070 if (align_jumps == 0)
1071 {
1072 i = atoi (ix86_align_jumps_string);
1073 if (i < 0 || i > MAX_CODE_ALIGN)
1074 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1075 else
1076 align_jumps = 1 << i;
1077 }
1078 }
1079
1080 if (ix86_align_funcs_string)
1081 {
1082 warning ("-malign-functions is obsolete, use -falign-functions");
1083 if (align_functions == 0)
1084 {
1085 i = atoi (ix86_align_funcs_string);
1086 if (i < 0 || i > MAX_CODE_ALIGN)
1087 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1088 else
1089 align_functions = 1 << i;
1090 }
1091 }
1092
1093 /* Default align_* from the processor table. */
1094 if (align_loops == 0)
1095 {
1096 align_loops = processor_target_table[ix86_cpu].align_loop;
1097 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1098 }
1099 if (align_jumps == 0)
1100 {
1101 align_jumps = processor_target_table[ix86_cpu].align_jump;
1102 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1103 }
1104 if (align_functions == 0)
1105 {
1106 align_functions = processor_target_table[ix86_cpu].align_func;
1107 }
1108
1109 /* Validate -mpreferred-stack-boundary= value, or provide default.
1110 The default of 128 bits is for Pentium III's SSE __m128, but we
1111 don't want additional code to keep the stack aligned when
1112 optimizing for code size. */
1113 ix86_preferred_stack_boundary = (optimize_size
1114 ? TARGET_64BIT ? 64 : 32
1115 : 128);
1116 if (ix86_preferred_stack_boundary_string)
1117 {
1118 i = atoi (ix86_preferred_stack_boundary_string);
1119 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1120 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1121 TARGET_64BIT ? 3 : 2);
1122 else
1123 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1124 }
1125
1126 /* Validate -mbranch-cost= value, or provide default. */
1127 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1128 if (ix86_branch_cost_string)
1129 {
1130 i = atoi (ix86_branch_cost_string);
1131 if (i < 0 || i > 5)
1132 error ("-mbranch-cost=%d is not between 0 and 5", i);
1133 else
1134 ix86_branch_cost = i;
1135 }
1136
1137 /* Keep nonleaf frame pointers. */
1138 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1139 flag_omit_frame_pointer = 1;
1140
1141 /* If we're doing fast math, we don't care about comparison order
1142 wrt NaNs. This lets us use a shorter comparison sequence. */
1143 if (flag_unsafe_math_optimizations)
1144 target_flags &= ~MASK_IEEE_FP;
1145
1146 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1147 since the insns won't need emulation. */
1148 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1149 target_flags &= ~MASK_NO_FANCY_MATH_387;
1150
1151 if (TARGET_64BIT)
1152 {
1153 if (TARGET_ALIGN_DOUBLE)
1154 error ("-malign-double makes no sense in the 64bit mode");
1155 if (TARGET_RTD)
1156 error ("-mrtd calling convention not supported in the 64bit mode");
1157 /* Enable by default the SSE and MMX builtins. */
1158 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1159 ix86_fpmath = FPMATH_SSE;
1160 }
1161 else
1162 ix86_fpmath = FPMATH_387;
1163
1164 if (ix86_fpmath_string != 0)
1165 {
1166 if (! strcmp (ix86_fpmath_string, "387"))
1167 ix86_fpmath = FPMATH_387;
1168 else if (! strcmp (ix86_fpmath_string, "sse"))
1169 {
1170 if (!TARGET_SSE)
1171 {
1172 warning ("SSE instruction set disabled, using 387 arithmetics");
1173 ix86_fpmath = FPMATH_387;
1174 }
1175 else
1176 ix86_fpmath = FPMATH_SSE;
1177 }
1178 else if (! strcmp (ix86_fpmath_string, "387,sse")
1179 || ! strcmp (ix86_fpmath_string, "sse,387"))
1180 {
1181 if (!TARGET_SSE)
1182 {
1183 warning ("SSE instruction set disabled, using 387 arithmetics");
1184 ix86_fpmath = FPMATH_387;
1185 }
1186 else if (!TARGET_80387)
1187 {
1188 warning ("387 instruction set disabled, using SSE arithmetics");
1189 ix86_fpmath = FPMATH_SSE;
1190 }
1191 else
1192 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1193 }
1194 else
1195 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1196 }
1197
1198 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1199 on by -msse. */
1200 if (TARGET_SSE)
1201 {
1202 target_flags |= MASK_MMX;
1203 x86_prefetch_sse = true;
1204 }
1205
1206 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1207 if (TARGET_3DNOW)
1208 {
1209 target_flags |= MASK_MMX;
1210 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1211 extensions it adds. */
1212 if (x86_3dnow_a & (1 << ix86_arch))
1213 target_flags |= MASK_3DNOW_A;
1214 }
1215 if ((x86_accumulate_outgoing_args & CPUMASK)
1216 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1217 && !optimize_size)
1218 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1219
1220 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1221 {
1222 char *p;
1223 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1224 p = strchr (internal_label_prefix, 'X');
1225 internal_label_prefix_len = p - internal_label_prefix;
1226 *p = '\0';
1227 }
1228 }
1229 \f
1230 void
1231 optimization_options (level, size)
1232 int level;
1233 int size ATTRIBUTE_UNUSED;
1234 {
1235 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1236 make the problem with not enough registers even worse. */
1237 #ifdef INSN_SCHEDULING
1238 if (level > 1)
1239 flag_schedule_insns = 0;
1240 #endif
1241 if (TARGET_64BIT && optimize >= 1)
1242 flag_omit_frame_pointer = 1;
1243 if (TARGET_64BIT)
1244 {
1245 flag_pcc_struct_return = 0;
1246 flag_asynchronous_unwind_tables = 1;
1247 }
1248 }
1249 \f
1250 /* Table of valid machine attributes. */
1251 const struct attribute_spec ix86_attribute_table[] =
1252 {
1253 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1254 /* Stdcall attribute says callee is responsible for popping arguments
1255 if they are not variable. */
1256 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1257 /* Cdecl attribute says the callee is a normal C declaration */
1258 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1259 /* Regparm attribute specifies how many integer arguments are to be
1260 passed in registers. */
1261 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1262 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1263 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1264 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1265 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1266 #endif
1267 { NULL, 0, 0, false, false, false, NULL }
1268 };
1269
1270 /* Handle a "cdecl" or "stdcall" attribute;
1271 arguments as in struct attribute_spec.handler. */
1272 static tree
1273 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1274 tree *node;
1275 tree name;
1276 tree args ATTRIBUTE_UNUSED;
1277 int flags ATTRIBUTE_UNUSED;
1278 bool *no_add_attrs;
1279 {
1280 if (TREE_CODE (*node) != FUNCTION_TYPE
1281 && TREE_CODE (*node) != METHOD_TYPE
1282 && TREE_CODE (*node) != FIELD_DECL
1283 && TREE_CODE (*node) != TYPE_DECL)
1284 {
1285 warning ("`%s' attribute only applies to functions",
1286 IDENTIFIER_POINTER (name));
1287 *no_add_attrs = true;
1288 }
1289
1290 if (TARGET_64BIT)
1291 {
1292 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1293 *no_add_attrs = true;
1294 }
1295
1296 return NULL_TREE;
1297 }
1298
1299 /* Handle a "regparm" attribute;
1300 arguments as in struct attribute_spec.handler. */
1301 static tree
1302 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1303 tree *node;
1304 tree name;
1305 tree args;
1306 int flags ATTRIBUTE_UNUSED;
1307 bool *no_add_attrs;
1308 {
1309 if (TREE_CODE (*node) != FUNCTION_TYPE
1310 && TREE_CODE (*node) != METHOD_TYPE
1311 && TREE_CODE (*node) != FIELD_DECL
1312 && TREE_CODE (*node) != TYPE_DECL)
1313 {
1314 warning ("`%s' attribute only applies to functions",
1315 IDENTIFIER_POINTER (name));
1316 *no_add_attrs = true;
1317 }
1318 else
1319 {
1320 tree cst;
1321
1322 cst = TREE_VALUE (args);
1323 if (TREE_CODE (cst) != INTEGER_CST)
1324 {
1325 warning ("`%s' attribute requires an integer constant argument",
1326 IDENTIFIER_POINTER (name));
1327 *no_add_attrs = true;
1328 }
1329 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1330 {
1331 warning ("argument to `%s' attribute larger than %d",
1332 IDENTIFIER_POINTER (name), REGPARM_MAX);
1333 *no_add_attrs = true;
1334 }
1335 }
1336
1337 return NULL_TREE;
1338 }
1339
1340 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1341
1342 /* Generate the assembly code for function entry. FILE is a stdio
1343 stream to output the code to. SIZE is an int: how many units of
1344 temporary storage to allocate.
1345
1346 Refer to the array `regs_ever_live' to determine which registers to
1347 save; `regs_ever_live[I]' is nonzero if register number I is ever
1348 used in the function. This function is responsible for knowing
1349 which registers should not be saved even if used.
1350
1351 We override it here to allow for the new profiling code to go before
1352 the prologue and the old mcount code to go after the prologue (and
1353 after %ebx has been set up for ELF shared library support). */
1354
1355 static void
1356 ix86_osf_output_function_prologue (file, size)
1357 FILE *file;
1358 HOST_WIDE_INT size;
1359 {
1360 const char *prefix = "";
1361 const char *const lprefix = LPREFIX;
1362 int labelno = current_function_profile_label_no;
1363
1364 #ifdef OSF_OS
1365
1366 if (TARGET_UNDERSCORES)
1367 prefix = "_";
1368
1369 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1370 {
1371 if (!flag_pic && !HALF_PIC_P ())
1372 {
1373 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1374 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1375 }
1376
1377 else if (HALF_PIC_P ())
1378 {
1379 rtx symref;
1380
1381 HALF_PIC_EXTERNAL ("_mcount_ptr");
1382 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1383 "_mcount_ptr"));
1384
1385 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1386 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1387 XSTR (symref, 0));
1388 fprintf (file, "\tcall *(%%eax)\n");
1389 }
1390
1391 else
1392 {
1393 static int call_no = 0;
1394
1395 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1396 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1397 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1398 lprefix, call_no++);
1399 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1400 lprefix, labelno);
1401 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1402 prefix);
1403 fprintf (file, "\tcall *(%%eax)\n");
1404 }
1405 }
1406
1407 #else /* !OSF_OS */
1408
1409 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1410 {
1411 if (!flag_pic)
1412 {
1413 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1414 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1415 }
1416
1417 else
1418 {
1419 static int call_no = 0;
1420
1421 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1422 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1423 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1424 lprefix, call_no++);
1425 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1426 lprefix, labelno);
1427 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1428 prefix);
1429 fprintf (file, "\tcall *(%%eax)\n");
1430 }
1431 }
1432 #endif /* !OSF_OS */
1433
1434 function_prologue (file, size);
1435 }
1436
1437 #endif /* OSF_OS || TARGET_OSF1ELF */
1438
1439 /* Return 0 if the attributes for two types are incompatible, 1 if they
1440 are compatible, and 2 if they are nearly compatible (which causes a
1441 warning to be generated). */
1442
1443 static int
1444 ix86_comp_type_attributes (type1, type2)
1445 tree type1;
1446 tree type2;
1447 {
1448 /* Check for mismatch of non-default calling convention. */
1449 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1450
1451 if (TREE_CODE (type1) != FUNCTION_TYPE)
1452 return 1;
1453
1454 /* Check for mismatched return types (cdecl vs stdcall). */
1455 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1456 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1457 return 0;
1458 return 1;
1459 }
1460 \f
1461 /* Value is the number of bytes of arguments automatically
1462 popped when returning from a subroutine call.
1463 FUNDECL is the declaration node of the function (as a tree),
1464 FUNTYPE is the data type of the function (as a tree),
1465 or for a library call it is an identifier node for the subroutine name.
1466 SIZE is the number of bytes of arguments passed on the stack.
1467
1468 On the 80386, the RTD insn may be used to pop them if the number
1469 of args is fixed, but if the number is variable then the caller
1470 must pop them all. RTD can't be used for library calls now
1471 because the library is compiled with the Unix compiler.
1472 Use of RTD is a selectable option, since it is incompatible with
1473 standard Unix calling sequences. If the option is not selected,
1474 the caller must always pop the args.
1475
1476 The attribute stdcall is equivalent to RTD on a per module basis. */
1477
1478 int
1479 ix86_return_pops_args (fundecl, funtype, size)
1480 tree fundecl;
1481 tree funtype;
1482 int size;
1483 {
1484 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1485
1486 /* Cdecl functions override -mrtd, and never pop the stack. */
1487 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1488
1489 /* Stdcall functions will pop the stack if not variable args. */
1490 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1491 rtd = 1;
1492
1493 if (rtd
1494 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1495 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1496 == void_type_node)))
1497 return size;
1498 }
1499
1500 /* Lose any fake structure return argument if it is passed on the stack. */
1501 if (aggregate_value_p (TREE_TYPE (funtype))
1502 && !TARGET_64BIT)
1503 {
1504 int nregs = ix86_regparm;
1505
1506 if (funtype)
1507 {
1508 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1509
1510 if (attr)
1511 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1512 }
1513
1514 if (!nregs)
1515 return GET_MODE_SIZE (Pmode);
1516 }
1517
1518 return 0;
1519 }
1520 \f
1521 /* Argument support functions. */
1522
1523 /* Return true when register may be used to pass function parameters. */
1524 bool
1525 ix86_function_arg_regno_p (regno)
1526 int regno;
1527 {
1528 int i;
1529 if (!TARGET_64BIT)
1530 return (regno < REGPARM_MAX
1531 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1532 if (SSE_REGNO_P (regno) && TARGET_SSE)
1533 return true;
1534 /* RAX is used as hidden argument to va_arg functions. */
1535 if (!regno)
1536 return true;
1537 for (i = 0; i < REGPARM_MAX; i++)
1538 if (regno == x86_64_int_parameter_registers[i])
1539 return true;
1540 return false;
1541 }
1542
1543 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1544 for a call to a function whose data type is FNTYPE.
1545 For a library call, FNTYPE is 0. */
1546
1547 void
1548 init_cumulative_args (cum, fntype, libname)
1549 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1550 tree fntype; /* tree ptr for function decl */
1551 rtx libname; /* SYMBOL_REF of library name or 0 */
1552 {
1553 static CUMULATIVE_ARGS zero_cum;
1554 tree param, next_param;
1555
1556 if (TARGET_DEBUG_ARG)
1557 {
1558 fprintf (stderr, "\ninit_cumulative_args (");
1559 if (fntype)
1560 fprintf (stderr, "fntype code = %s, ret code = %s",
1561 tree_code_name[(int) TREE_CODE (fntype)],
1562 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1563 else
1564 fprintf (stderr, "no fntype");
1565
1566 if (libname)
1567 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1568 }
1569
1570 *cum = zero_cum;
1571
1572 /* Set up the number of registers to use for passing arguments. */
1573 cum->nregs = ix86_regparm;
1574 cum->sse_nregs = SSE_REGPARM_MAX;
1575 if (fntype && !TARGET_64BIT)
1576 {
1577 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1578
1579 if (attr)
1580 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1581 }
1582 cum->maybe_vaarg = false;
1583
1584 /* Determine if this function has variable arguments. This is
1585 indicated by the last argument being 'void_type_mode' if there
1586 are no variable arguments. If there are variable arguments, then
1587 we won't pass anything in registers */
1588
1589 if (cum->nregs)
1590 {
1591 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1592 param != 0; param = next_param)
1593 {
1594 next_param = TREE_CHAIN (param);
1595 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1596 {
1597 if (!TARGET_64BIT)
1598 cum->nregs = 0;
1599 cum->maybe_vaarg = true;
1600 }
1601 }
1602 }
1603 if ((!fntype && !libname)
1604 || (fntype && !TYPE_ARG_TYPES (fntype)))
1605 cum->maybe_vaarg = 1;
1606
1607 if (TARGET_DEBUG_ARG)
1608 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1609
1610 return;
1611 }
1612
1613 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1614 of this code is to classify each 8bytes of incoming argument by the register
1615 class and assign registers accordingly. */
1616
1617 /* Return the union class of CLASS1 and CLASS2.
1618 See the x86-64 PS ABI for details. */
1619
1620 static enum x86_64_reg_class
1621 merge_classes (class1, class2)
1622 enum x86_64_reg_class class1, class2;
1623 {
1624 /* Rule #1: If both classes are equal, this is the resulting class. */
1625 if (class1 == class2)
1626 return class1;
1627
1628 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1629 the other class. */
1630 if (class1 == X86_64_NO_CLASS)
1631 return class2;
1632 if (class2 == X86_64_NO_CLASS)
1633 return class1;
1634
1635 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1636 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1637 return X86_64_MEMORY_CLASS;
1638
1639 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1640 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1641 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1642 return X86_64_INTEGERSI_CLASS;
1643 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1644 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1645 return X86_64_INTEGER_CLASS;
1646
1647 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1648 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1649 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1650 return X86_64_MEMORY_CLASS;
1651
1652 /* Rule #6: Otherwise class SSE is used. */
1653 return X86_64_SSE_CLASS;
1654 }
1655
1656 /* Classify the argument of type TYPE and mode MODE.
1657 CLASSES will be filled by the register class used to pass each word
1658 of the operand. The number of words is returned. In case the parameter
1659 should be passed in memory, 0 is returned. As a special case for zero
1660 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1661
1662 BIT_OFFSET is used internally for handling records and specifies offset
1663 of the offset in bits modulo 256 to avoid overflow cases.
1664
1665 See the x86-64 PS ABI for details.
1666 */
1667
1668 static int
1669 classify_argument (mode, type, classes, bit_offset)
1670 enum machine_mode mode;
1671 tree type;
1672 enum x86_64_reg_class classes[MAX_CLASSES];
1673 int bit_offset;
1674 {
1675 int bytes =
1676 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1677 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1678
1679 if (type && AGGREGATE_TYPE_P (type))
1680 {
1681 int i;
1682 tree field;
1683 enum x86_64_reg_class subclasses[MAX_CLASSES];
1684
1685 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1686 if (bytes > 16)
1687 return 0;
1688
1689 for (i = 0; i < words; i++)
1690 classes[i] = X86_64_NO_CLASS;
1691
1692 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1693 signalize memory class, so handle it as special case. */
1694 if (!words)
1695 {
1696 classes[0] = X86_64_NO_CLASS;
1697 return 1;
1698 }
1699
1700 /* Classify each field of record and merge classes. */
1701 if (TREE_CODE (type) == RECORD_TYPE)
1702 {
1703 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1704 {
1705 if (TREE_CODE (field) == FIELD_DECL)
1706 {
1707 int num;
1708
1709 /* Bitfields are always classified as integer. Handle them
1710 early, since later code would consider them to be
1711 misaligned integers. */
1712 if (DECL_BIT_FIELD (field))
1713 {
1714 for (i = int_bit_position (field) / 8 / 8;
1715 i < (int_bit_position (field)
1716 + tree_low_cst (DECL_SIZE (field), 0)
1717 + 63) / 8 / 8; i++)
1718 classes[i] =
1719 merge_classes (X86_64_INTEGER_CLASS,
1720 classes[i]);
1721 }
1722 else
1723 {
1724 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1725 TREE_TYPE (field), subclasses,
1726 (int_bit_position (field)
1727 + bit_offset) % 256);
1728 if (!num)
1729 return 0;
1730 for (i = 0; i < num; i++)
1731 {
1732 int pos =
1733 (int_bit_position (field) + bit_offset) / 8 / 8;
1734 classes[i + pos] =
1735 merge_classes (subclasses[i], classes[i + pos]);
1736 }
1737 }
1738 }
1739 }
1740 }
1741 /* Arrays are handled as small records. */
1742 else if (TREE_CODE (type) == ARRAY_TYPE)
1743 {
1744 int num;
1745 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1746 TREE_TYPE (type), subclasses, bit_offset);
1747 if (!num)
1748 return 0;
1749
1750 /* The partial classes are now full classes. */
1751 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1752 subclasses[0] = X86_64_SSE_CLASS;
1753 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1754 subclasses[0] = X86_64_INTEGER_CLASS;
1755
1756 for (i = 0; i < words; i++)
1757 classes[i] = subclasses[i % num];
1758 }
1759 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1760 else if (TREE_CODE (type) == UNION_TYPE
1761 || TREE_CODE (type) == QUAL_UNION_TYPE)
1762 {
1763 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1764 {
1765 if (TREE_CODE (field) == FIELD_DECL)
1766 {
1767 int num;
1768 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1769 TREE_TYPE (field), subclasses,
1770 bit_offset);
1771 if (!num)
1772 return 0;
1773 for (i = 0; i < num; i++)
1774 classes[i] = merge_classes (subclasses[i], classes[i]);
1775 }
1776 }
1777 }
1778 else
1779 abort ();
1780
1781 /* Final merger cleanup. */
1782 for (i = 0; i < words; i++)
1783 {
1784 /* If one class is MEMORY, everything should be passed in
1785 memory. */
1786 if (classes[i] == X86_64_MEMORY_CLASS)
1787 return 0;
1788
1789 /* The X86_64_SSEUP_CLASS should be always preceded by
1790 X86_64_SSE_CLASS. */
1791 if (classes[i] == X86_64_SSEUP_CLASS
1792 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1793 classes[i] = X86_64_SSE_CLASS;
1794
1795 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1796 if (classes[i] == X86_64_X87UP_CLASS
1797 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1798 classes[i] = X86_64_SSE_CLASS;
1799 }
1800 return words;
1801 }
1802
1803 /* Compute alignment needed. We align all types to natural boundaries with
1804 exception of XFmode that is aligned to 64bits. */
1805 if (mode != VOIDmode && mode != BLKmode)
1806 {
1807 int mode_alignment = GET_MODE_BITSIZE (mode);
1808
1809 if (mode == XFmode)
1810 mode_alignment = 128;
1811 else if (mode == XCmode)
1812 mode_alignment = 256;
1813 /* Misaligned fields are always returned in memory. */
1814 if (bit_offset % mode_alignment)
1815 return 0;
1816 }
1817
1818 /* Classification of atomic types. */
1819 switch (mode)
1820 {
1821 case DImode:
1822 case SImode:
1823 case HImode:
1824 case QImode:
1825 case CSImode:
1826 case CHImode:
1827 case CQImode:
1828 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1829 classes[0] = X86_64_INTEGERSI_CLASS;
1830 else
1831 classes[0] = X86_64_INTEGER_CLASS;
1832 return 1;
1833 case CDImode:
1834 case TImode:
1835 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1836 return 2;
1837 case CTImode:
1838 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1839 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1840 return 4;
1841 case SFmode:
1842 if (!(bit_offset % 64))
1843 classes[0] = X86_64_SSESF_CLASS;
1844 else
1845 classes[0] = X86_64_SSE_CLASS;
1846 return 1;
1847 case DFmode:
1848 classes[0] = X86_64_SSEDF_CLASS;
1849 return 1;
1850 case TFmode:
1851 classes[0] = X86_64_X87_CLASS;
1852 classes[1] = X86_64_X87UP_CLASS;
1853 return 2;
1854 case TCmode:
1855 classes[0] = X86_64_X87_CLASS;
1856 classes[1] = X86_64_X87UP_CLASS;
1857 classes[2] = X86_64_X87_CLASS;
1858 classes[3] = X86_64_X87UP_CLASS;
1859 return 4;
1860 case DCmode:
1861 classes[0] = X86_64_SSEDF_CLASS;
1862 classes[1] = X86_64_SSEDF_CLASS;
1863 return 2;
1864 case SCmode:
1865 classes[0] = X86_64_SSE_CLASS;
1866 return 1;
1867 case V4SFmode:
1868 case V4SImode:
1869 classes[0] = X86_64_SSE_CLASS;
1870 classes[1] = X86_64_SSEUP_CLASS;
1871 return 2;
1872 case V2SFmode:
1873 case V2SImode:
1874 case V4HImode:
1875 case V8QImode:
1876 classes[0] = X86_64_SSE_CLASS;
1877 return 1;
1878 case BLKmode:
1879 case VOIDmode:
1880 return 0;
1881 default:
1882 abort ();
1883 }
1884 }
1885
1886 /* Examine the argument and return set number of register required in each
1887 class. Return 0 iff parameter should be passed in memory. */
1888 static int
1889 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1890 enum machine_mode mode;
1891 tree type;
1892 int *int_nregs, *sse_nregs;
1893 int in_return;
1894 {
1895 enum x86_64_reg_class class[MAX_CLASSES];
1896 int n = classify_argument (mode, type, class, 0);
1897
1898 *int_nregs = 0;
1899 *sse_nregs = 0;
1900 if (!n)
1901 return 0;
1902 for (n--; n >= 0; n--)
1903 switch (class[n])
1904 {
1905 case X86_64_INTEGER_CLASS:
1906 case X86_64_INTEGERSI_CLASS:
1907 (*int_nregs)++;
1908 break;
1909 case X86_64_SSE_CLASS:
1910 case X86_64_SSESF_CLASS:
1911 case X86_64_SSEDF_CLASS:
1912 (*sse_nregs)++;
1913 break;
1914 case X86_64_NO_CLASS:
1915 case X86_64_SSEUP_CLASS:
1916 break;
1917 case X86_64_X87_CLASS:
1918 case X86_64_X87UP_CLASS:
1919 if (!in_return)
1920 return 0;
1921 break;
1922 case X86_64_MEMORY_CLASS:
1923 abort ();
1924 }
1925 return 1;
1926 }
1927 /* Construct container for the argument used by GCC interface. See
1928 FUNCTION_ARG for the detailed description. */
1929 static rtx
1930 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1931 enum machine_mode mode;
1932 tree type;
1933 int in_return;
1934 int nintregs, nsseregs;
1935 const int * intreg;
1936 int sse_regno;
1937 {
1938 enum machine_mode tmpmode;
1939 int bytes =
1940 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1941 enum x86_64_reg_class class[MAX_CLASSES];
1942 int n;
1943 int i;
1944 int nexps = 0;
1945 int needed_sseregs, needed_intregs;
1946 rtx exp[MAX_CLASSES];
1947 rtx ret;
1948
1949 n = classify_argument (mode, type, class, 0);
1950 if (TARGET_DEBUG_ARG)
1951 {
1952 if (!n)
1953 fprintf (stderr, "Memory class\n");
1954 else
1955 {
1956 fprintf (stderr, "Classes:");
1957 for (i = 0; i < n; i++)
1958 {
1959 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1960 }
1961 fprintf (stderr, "\n");
1962 }
1963 }
1964 if (!n)
1965 return NULL;
1966 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1967 return NULL;
1968 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1969 return NULL;
1970
1971 /* First construct simple cases. Avoid SCmode, since we want to use
1972 single register to pass this type. */
1973 if (n == 1 && mode != SCmode)
1974 switch (class[0])
1975 {
1976 case X86_64_INTEGER_CLASS:
1977 case X86_64_INTEGERSI_CLASS:
1978 return gen_rtx_REG (mode, intreg[0]);
1979 case X86_64_SSE_CLASS:
1980 case X86_64_SSESF_CLASS:
1981 case X86_64_SSEDF_CLASS:
1982 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1983 case X86_64_X87_CLASS:
1984 return gen_rtx_REG (mode, FIRST_STACK_REG);
1985 case X86_64_NO_CLASS:
1986 /* Zero sized array, struct or class. */
1987 return NULL;
1988 default:
1989 abort ();
1990 }
1991 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1992 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1993 if (n == 2
1994 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1995 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1996 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1997 && class[1] == X86_64_INTEGER_CLASS
1998 && (mode == CDImode || mode == TImode)
1999 && intreg[0] + 1 == intreg[1])
2000 return gen_rtx_REG (mode, intreg[0]);
2001 if (n == 4
2002 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2003 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2004 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2005
2006 /* Otherwise figure out the entries of the PARALLEL. */
2007 for (i = 0; i < n; i++)
2008 {
2009 switch (class[i])
2010 {
2011 case X86_64_NO_CLASS:
2012 break;
2013 case X86_64_INTEGER_CLASS:
2014 case X86_64_INTEGERSI_CLASS:
2015 /* Merge TImodes on aligned occassions here too. */
2016 if (i * 8 + 8 > bytes)
2017 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2018 else if (class[i] == X86_64_INTEGERSI_CLASS)
2019 tmpmode = SImode;
2020 else
2021 tmpmode = DImode;
2022 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2023 if (tmpmode == BLKmode)
2024 tmpmode = DImode;
2025 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2026 gen_rtx_REG (tmpmode, *intreg),
2027 GEN_INT (i*8));
2028 intreg++;
2029 break;
2030 case X86_64_SSESF_CLASS:
2031 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2032 gen_rtx_REG (SFmode,
2033 SSE_REGNO (sse_regno)),
2034 GEN_INT (i*8));
2035 sse_regno++;
2036 break;
2037 case X86_64_SSEDF_CLASS:
2038 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2039 gen_rtx_REG (DFmode,
2040 SSE_REGNO (sse_regno)),
2041 GEN_INT (i*8));
2042 sse_regno++;
2043 break;
2044 case X86_64_SSE_CLASS:
2045 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2046 tmpmode = TImode, i++;
2047 else
2048 tmpmode = DImode;
2049 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2050 gen_rtx_REG (tmpmode,
2051 SSE_REGNO (sse_regno)),
2052 GEN_INT (i*8));
2053 sse_regno++;
2054 break;
2055 default:
2056 abort ();
2057 }
2058 }
2059 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2060 for (i = 0; i < nexps; i++)
2061 XVECEXP (ret, 0, i) = exp [i];
2062 return ret;
2063 }
2064
2065 /* Update the data in CUM to advance over an argument
2066 of mode MODE and data type TYPE.
2067 (TYPE is null for libcalls where that information may not be available.) */
2068
2069 void
2070 function_arg_advance (cum, mode, type, named)
2071 CUMULATIVE_ARGS *cum; /* current arg information */
2072 enum machine_mode mode; /* current arg mode */
2073 tree type; /* type of the argument or 0 if lib support */
2074 int named; /* whether or not the argument was named */
2075 {
2076 int bytes =
2077 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2078 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2079
2080 if (TARGET_DEBUG_ARG)
2081 fprintf (stderr,
2082 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2083 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2084 if (TARGET_64BIT)
2085 {
2086 int int_nregs, sse_nregs;
2087 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2088 cum->words += words;
2089 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2090 {
2091 cum->nregs -= int_nregs;
2092 cum->sse_nregs -= sse_nregs;
2093 cum->regno += int_nregs;
2094 cum->sse_regno += sse_nregs;
2095 }
2096 else
2097 cum->words += words;
2098 }
2099 else
2100 {
2101 if (TARGET_SSE && mode == TImode)
2102 {
2103 cum->sse_words += words;
2104 cum->sse_nregs -= 1;
2105 cum->sse_regno += 1;
2106 if (cum->sse_nregs <= 0)
2107 {
2108 cum->sse_nregs = 0;
2109 cum->sse_regno = 0;
2110 }
2111 }
2112 else
2113 {
2114 cum->words += words;
2115 cum->nregs -= words;
2116 cum->regno += words;
2117
2118 if (cum->nregs <= 0)
2119 {
2120 cum->nregs = 0;
2121 cum->regno = 0;
2122 }
2123 }
2124 }
2125 return;
2126 }
2127
2128 /* Define where to put the arguments to a function.
2129 Value is zero to push the argument on the stack,
2130 or a hard register in which to store the argument.
2131
2132 MODE is the argument's machine mode.
2133 TYPE is the data type of the argument (as a tree).
2134 This is null for libcalls where that information may
2135 not be available.
2136 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2137 the preceding args and about the function being called.
2138 NAMED is nonzero if this argument is a named parameter
2139 (otherwise it is an extra parameter matching an ellipsis). */
2140
2141 rtx
2142 function_arg (cum, mode, type, named)
2143 CUMULATIVE_ARGS *cum; /* current arg information */
2144 enum machine_mode mode; /* current arg mode */
2145 tree type; /* type of the argument or 0 if lib support */
2146 int named; /* != 0 for normal args, == 0 for ... args */
2147 {
2148 rtx ret = NULL_RTX;
2149 int bytes =
2150 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2151 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2152
2153 /* Handle an hidden AL argument containing number of registers for varargs
2154 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2155 any AL settings. */
2156 if (mode == VOIDmode)
2157 {
2158 if (TARGET_64BIT)
2159 return GEN_INT (cum->maybe_vaarg
2160 ? (cum->sse_nregs < 0
2161 ? SSE_REGPARM_MAX
2162 : cum->sse_regno)
2163 : -1);
2164 else
2165 return constm1_rtx;
2166 }
2167 if (TARGET_64BIT)
2168 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2169 &x86_64_int_parameter_registers [cum->regno],
2170 cum->sse_regno);
2171 else
2172 switch (mode)
2173 {
2174 /* For now, pass fp/complex values on the stack. */
2175 default:
2176 break;
2177
2178 case BLKmode:
2179 case DImode:
2180 case SImode:
2181 case HImode:
2182 case QImode:
2183 if (words <= cum->nregs)
2184 ret = gen_rtx_REG (mode, cum->regno);
2185 break;
2186 case TImode:
2187 if (cum->sse_nregs)
2188 ret = gen_rtx_REG (mode, cum->sse_regno);
2189 break;
2190 }
2191
2192 if (TARGET_DEBUG_ARG)
2193 {
2194 fprintf (stderr,
2195 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2196 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2197
2198 if (ret)
2199 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2200 else
2201 fprintf (stderr, ", stack");
2202
2203 fprintf (stderr, " )\n");
2204 }
2205
2206 return ret;
2207 }
2208
2209 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2210 and type. */
2211
2212 int
2213 ix86_function_arg_boundary (mode, type)
2214 enum machine_mode mode;
2215 tree type;
2216 {
2217 int align;
2218 if (!TARGET_64BIT)
2219 return PARM_BOUNDARY;
2220 if (type)
2221 align = TYPE_ALIGN (type);
2222 else
2223 align = GET_MODE_ALIGNMENT (mode);
2224 if (align < PARM_BOUNDARY)
2225 align = PARM_BOUNDARY;
2226 if (align > 128)
2227 align = 128;
2228 return align;
2229 }
2230
2231 /* Return true if N is a possible register number of function value. */
2232 bool
2233 ix86_function_value_regno_p (regno)
2234 int regno;
2235 {
2236 if (!TARGET_64BIT)
2237 {
2238 return ((regno) == 0
2239 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2240 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2241 }
2242 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2243 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2244 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2245 }
2246
2247 /* Define how to find the value returned by a function.
2248 VALTYPE is the data type of the value (as a tree).
2249 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2250 otherwise, FUNC is 0. */
2251 rtx
2252 ix86_function_value (valtype)
2253 tree valtype;
2254 {
2255 if (TARGET_64BIT)
2256 {
2257 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2258 REGPARM_MAX, SSE_REGPARM_MAX,
2259 x86_64_int_return_registers, 0);
2260 /* For zero sized structures, construct_continer return NULL, but we need
2261 to keep rest of compiler happy by returning meaningfull value. */
2262 if (!ret)
2263 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2264 return ret;
2265 }
2266 else
2267 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2268 }
2269
2270 /* Return false iff type is returned in memory. */
2271 int
2272 ix86_return_in_memory (type)
2273 tree type;
2274 {
2275 int needed_intregs, needed_sseregs;
2276 if (TARGET_64BIT)
2277 {
2278 return !examine_argument (TYPE_MODE (type), type, 1,
2279 &needed_intregs, &needed_sseregs);
2280 }
2281 else
2282 {
2283 if (TYPE_MODE (type) == BLKmode
2284 || (VECTOR_MODE_P (TYPE_MODE (type))
2285 && int_size_in_bytes (type) == 8)
2286 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2287 && TYPE_MODE (type) != TFmode
2288 && !VECTOR_MODE_P (TYPE_MODE (type))))
2289 return 1;
2290 return 0;
2291 }
2292 }
2293
2294 /* Define how to find the value returned by a library function
2295 assuming the value has mode MODE. */
2296 rtx
2297 ix86_libcall_value (mode)
2298 enum machine_mode mode;
2299 {
2300 if (TARGET_64BIT)
2301 {
2302 switch (mode)
2303 {
2304 case SFmode:
2305 case SCmode:
2306 case DFmode:
2307 case DCmode:
2308 return gen_rtx_REG (mode, FIRST_SSE_REG);
2309 case TFmode:
2310 case TCmode:
2311 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2312 default:
2313 return gen_rtx_REG (mode, 0);
2314 }
2315 }
2316 else
2317 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2318 }
2319 \f
2320 /* Create the va_list data type. */
2321
2322 tree
2323 ix86_build_va_list ()
2324 {
2325 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2326
2327 /* For i386 we use plain pointer to argument area. */
2328 if (!TARGET_64BIT)
2329 return build_pointer_type (char_type_node);
2330
2331 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2332 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2333
2334 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2335 unsigned_type_node);
2336 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2337 unsigned_type_node);
2338 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2339 ptr_type_node);
2340 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2341 ptr_type_node);
2342
2343 DECL_FIELD_CONTEXT (f_gpr) = record;
2344 DECL_FIELD_CONTEXT (f_fpr) = record;
2345 DECL_FIELD_CONTEXT (f_ovf) = record;
2346 DECL_FIELD_CONTEXT (f_sav) = record;
2347
2348 TREE_CHAIN (record) = type_decl;
2349 TYPE_NAME (record) = type_decl;
2350 TYPE_FIELDS (record) = f_gpr;
2351 TREE_CHAIN (f_gpr) = f_fpr;
2352 TREE_CHAIN (f_fpr) = f_ovf;
2353 TREE_CHAIN (f_ovf) = f_sav;
2354
2355 layout_type (record);
2356
2357 /* The correct type is an array type of one element. */
2358 return build_array_type (record, build_index_type (size_zero_node));
2359 }
2360
2361 /* Perform any needed actions needed for a function that is receiving a
2362 variable number of arguments.
2363
2364 CUM is as above.
2365
2366 MODE and TYPE are the mode and type of the current parameter.
2367
2368 PRETEND_SIZE is a variable that should be set to the amount of stack
2369 that must be pushed by the prolog to pretend that our caller pushed
2370 it.
2371
2372 Normally, this macro will push all remaining incoming registers on the
2373 stack and set PRETEND_SIZE to the length of the registers pushed. */
2374
2375 void
2376 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2377 CUMULATIVE_ARGS *cum;
2378 enum machine_mode mode;
2379 tree type;
2380 int *pretend_size ATTRIBUTE_UNUSED;
2381 int no_rtl;
2382
2383 {
2384 CUMULATIVE_ARGS next_cum;
2385 rtx save_area = NULL_RTX, mem;
2386 rtx label;
2387 rtx label_ref;
2388 rtx tmp_reg;
2389 rtx nsse_reg;
2390 int set;
2391 tree fntype;
2392 int stdarg_p;
2393 int i;
2394
2395 if (!TARGET_64BIT)
2396 return;
2397
2398 /* Indicate to allocate space on the stack for varargs save area. */
2399 ix86_save_varrargs_registers = 1;
2400
2401 fntype = TREE_TYPE (current_function_decl);
2402 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2403 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2404 != void_type_node));
2405
2406 /* For varargs, we do not want to skip the dummy va_dcl argument.
2407 For stdargs, we do want to skip the last named argument. */
2408 next_cum = *cum;
2409 if (stdarg_p)
2410 function_arg_advance (&next_cum, mode, type, 1);
2411
2412 if (!no_rtl)
2413 save_area = frame_pointer_rtx;
2414
2415 set = get_varargs_alias_set ();
2416
2417 for (i = next_cum.regno; i < ix86_regparm; i++)
2418 {
2419 mem = gen_rtx_MEM (Pmode,
2420 plus_constant (save_area, i * UNITS_PER_WORD));
2421 set_mem_alias_set (mem, set);
2422 emit_move_insn (mem, gen_rtx_REG (Pmode,
2423 x86_64_int_parameter_registers[i]));
2424 }
2425
2426 if (next_cum.sse_nregs)
2427 {
2428 /* Now emit code to save SSE registers. The AX parameter contains number
2429 of SSE parameter regsiters used to call this function. We use
2430 sse_prologue_save insn template that produces computed jump across
2431 SSE saves. We need some preparation work to get this working. */
2432
2433 label = gen_label_rtx ();
2434 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2435
2436 /* Compute address to jump to :
2437 label - 5*eax + nnamed_sse_arguments*5 */
2438 tmp_reg = gen_reg_rtx (Pmode);
2439 nsse_reg = gen_reg_rtx (Pmode);
2440 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2441 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2442 gen_rtx_MULT (Pmode, nsse_reg,
2443 GEN_INT (4))));
2444 if (next_cum.sse_regno)
2445 emit_move_insn
2446 (nsse_reg,
2447 gen_rtx_CONST (DImode,
2448 gen_rtx_PLUS (DImode,
2449 label_ref,
2450 GEN_INT (next_cum.sse_regno * 4))));
2451 else
2452 emit_move_insn (nsse_reg, label_ref);
2453 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2454
2455 /* Compute address of memory block we save into. We always use pointer
2456 pointing 127 bytes after first byte to store - this is needed to keep
2457 instruction size limited by 4 bytes. */
2458 tmp_reg = gen_reg_rtx (Pmode);
2459 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2460 plus_constant (save_area,
2461 8 * REGPARM_MAX + 127)));
2462 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2463 set_mem_alias_set (mem, set);
2464 set_mem_align (mem, BITS_PER_WORD);
2465
2466 /* And finally do the dirty job! */
2467 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2468 GEN_INT (next_cum.sse_regno), label));
2469 }
2470
2471 }
2472
2473 /* Implement va_start. */
2474
2475 void
2476 ix86_va_start (stdarg_p, valist, nextarg)
2477 int stdarg_p;
2478 tree valist;
2479 rtx nextarg;
2480 {
2481 HOST_WIDE_INT words, n_gpr, n_fpr;
2482 tree f_gpr, f_fpr, f_ovf, f_sav;
2483 tree gpr, fpr, ovf, sav, t;
2484
2485 /* Only 64bit target needs something special. */
2486 if (!TARGET_64BIT)
2487 {
2488 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2489 return;
2490 }
2491
2492 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2493 f_fpr = TREE_CHAIN (f_gpr);
2494 f_ovf = TREE_CHAIN (f_fpr);
2495 f_sav = TREE_CHAIN (f_ovf);
2496
2497 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2498 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2499 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2500 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2501 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2502
2503 /* Count number of gp and fp argument registers used. */
2504 words = current_function_args_info.words;
2505 n_gpr = current_function_args_info.regno;
2506 n_fpr = current_function_args_info.sse_regno;
2507
2508 if (TARGET_DEBUG_ARG)
2509 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2510 (int) words, (int) n_gpr, (int) n_fpr);
2511
2512 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2513 build_int_2 (n_gpr * 8, 0));
2514 TREE_SIDE_EFFECTS (t) = 1;
2515 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2516
2517 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2518 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2519 TREE_SIDE_EFFECTS (t) = 1;
2520 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2521
2522 /* Find the overflow area. */
2523 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2524 if (words != 0)
2525 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2526 build_int_2 (words * UNITS_PER_WORD, 0));
2527 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2528 TREE_SIDE_EFFECTS (t) = 1;
2529 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2530
2531 /* Find the register save area.
2532 Prologue of the function save it right above stack frame. */
2533 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2534 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2535 TREE_SIDE_EFFECTS (t) = 1;
2536 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2537 }
2538
2539 /* Implement va_arg. */
2540 rtx
2541 ix86_va_arg (valist, type)
2542 tree valist, type;
2543 {
2544 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2545 tree f_gpr, f_fpr, f_ovf, f_sav;
2546 tree gpr, fpr, ovf, sav, t;
2547 int size, rsize;
2548 rtx lab_false, lab_over = NULL_RTX;
2549 rtx addr_rtx, r;
2550 rtx container;
2551
2552 /* Only 64bit target needs something special. */
2553 if (!TARGET_64BIT)
2554 {
2555 return std_expand_builtin_va_arg (valist, type);
2556 }
2557
2558 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2559 f_fpr = TREE_CHAIN (f_gpr);
2560 f_ovf = TREE_CHAIN (f_fpr);
2561 f_sav = TREE_CHAIN (f_ovf);
2562
2563 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2564 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2565 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2566 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2567 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2568
2569 size = int_size_in_bytes (type);
2570 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2571
2572 container = construct_container (TYPE_MODE (type), type, 0,
2573 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2574 /*
2575 * Pull the value out of the saved registers ...
2576 */
2577
2578 addr_rtx = gen_reg_rtx (Pmode);
2579
2580 if (container)
2581 {
2582 rtx int_addr_rtx, sse_addr_rtx;
2583 int needed_intregs, needed_sseregs;
2584 int need_temp;
2585
2586 lab_over = gen_label_rtx ();
2587 lab_false = gen_label_rtx ();
2588
2589 examine_argument (TYPE_MODE (type), type, 0,
2590 &needed_intregs, &needed_sseregs);
2591
2592
2593 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2594 || TYPE_ALIGN (type) > 128);
2595
2596 /* In case we are passing structure, verify that it is consetuctive block
2597 on the register save area. If not we need to do moves. */
2598 if (!need_temp && !REG_P (container))
2599 {
2600 /* Verify that all registers are strictly consetuctive */
2601 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2602 {
2603 int i;
2604
2605 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2606 {
2607 rtx slot = XVECEXP (container, 0, i);
2608 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2609 || INTVAL (XEXP (slot, 1)) != i * 16)
2610 need_temp = 1;
2611 }
2612 }
2613 else
2614 {
2615 int i;
2616
2617 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2618 {
2619 rtx slot = XVECEXP (container, 0, i);
2620 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2621 || INTVAL (XEXP (slot, 1)) != i * 8)
2622 need_temp = 1;
2623 }
2624 }
2625 }
2626 if (!need_temp)
2627 {
2628 int_addr_rtx = addr_rtx;
2629 sse_addr_rtx = addr_rtx;
2630 }
2631 else
2632 {
2633 int_addr_rtx = gen_reg_rtx (Pmode);
2634 sse_addr_rtx = gen_reg_rtx (Pmode);
2635 }
2636 /* First ensure that we fit completely in registers. */
2637 if (needed_intregs)
2638 {
2639 emit_cmp_and_jump_insns (expand_expr
2640 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2641 GEN_INT ((REGPARM_MAX - needed_intregs +
2642 1) * 8), GE, const1_rtx, SImode,
2643 1, lab_false);
2644 }
2645 if (needed_sseregs)
2646 {
2647 emit_cmp_and_jump_insns (expand_expr
2648 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2649 GEN_INT ((SSE_REGPARM_MAX -
2650 needed_sseregs + 1) * 16 +
2651 REGPARM_MAX * 8), GE, const1_rtx,
2652 SImode, 1, lab_false);
2653 }
2654
2655 /* Compute index to start of area used for integer regs. */
2656 if (needed_intregs)
2657 {
2658 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2659 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2660 if (r != int_addr_rtx)
2661 emit_move_insn (int_addr_rtx, r);
2662 }
2663 if (needed_sseregs)
2664 {
2665 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2666 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2667 if (r != sse_addr_rtx)
2668 emit_move_insn (sse_addr_rtx, r);
2669 }
2670 if (need_temp)
2671 {
2672 int i;
2673 rtx mem;
2674
2675 /* Never use the memory itself, as it has the alias set. */
2676 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2677 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2678 set_mem_alias_set (mem, get_varargs_alias_set ());
2679 set_mem_align (mem, BITS_PER_UNIT);
2680
2681 for (i = 0; i < XVECLEN (container, 0); i++)
2682 {
2683 rtx slot = XVECEXP (container, 0, i);
2684 rtx reg = XEXP (slot, 0);
2685 enum machine_mode mode = GET_MODE (reg);
2686 rtx src_addr;
2687 rtx src_mem;
2688 int src_offset;
2689 rtx dest_mem;
2690
2691 if (SSE_REGNO_P (REGNO (reg)))
2692 {
2693 src_addr = sse_addr_rtx;
2694 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2695 }
2696 else
2697 {
2698 src_addr = int_addr_rtx;
2699 src_offset = REGNO (reg) * 8;
2700 }
2701 src_mem = gen_rtx_MEM (mode, src_addr);
2702 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2703 src_mem = adjust_address (src_mem, mode, src_offset);
2704 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2705 emit_move_insn (dest_mem, src_mem);
2706 }
2707 }
2708
2709 if (needed_intregs)
2710 {
2711 t =
2712 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2713 build_int_2 (needed_intregs * 8, 0));
2714 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2715 TREE_SIDE_EFFECTS (t) = 1;
2716 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2717 }
2718 if (needed_sseregs)
2719 {
2720 t =
2721 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2722 build_int_2 (needed_sseregs * 16, 0));
2723 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2724 TREE_SIDE_EFFECTS (t) = 1;
2725 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2726 }
2727
2728 emit_jump_insn (gen_jump (lab_over));
2729 emit_barrier ();
2730 emit_label (lab_false);
2731 }
2732
2733 /* ... otherwise out of the overflow area. */
2734
2735 /* Care for on-stack alignment if needed. */
2736 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2737 t = ovf;
2738 else
2739 {
2740 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2741 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2742 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2743 }
2744 t = save_expr (t);
2745
2746 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2747 if (r != addr_rtx)
2748 emit_move_insn (addr_rtx, r);
2749
2750 t =
2751 build (PLUS_EXPR, TREE_TYPE (t), t,
2752 build_int_2 (rsize * UNITS_PER_WORD, 0));
2753 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2754 TREE_SIDE_EFFECTS (t) = 1;
2755 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2756
2757 if (container)
2758 emit_label (lab_over);
2759
2760 return addr_rtx;
2761 }
2762 \f
2763 /* Return nonzero if OP is general operand representable on x86_64. */
2764
2765 int
2766 x86_64_general_operand (op, mode)
2767 rtx op;
2768 enum machine_mode mode;
2769 {
2770 if (!TARGET_64BIT)
2771 return general_operand (op, mode);
2772 if (nonimmediate_operand (op, mode))
2773 return 1;
2774 return x86_64_sign_extended_value (op);
2775 }
2776
2777 /* Return nonzero if OP is general operand representable on x86_64
2778 as either sign extended or zero extended constant. */
2779
2780 int
2781 x86_64_szext_general_operand (op, mode)
2782 rtx op;
2783 enum machine_mode mode;
2784 {
2785 if (!TARGET_64BIT)
2786 return general_operand (op, mode);
2787 if (nonimmediate_operand (op, mode))
2788 return 1;
2789 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2790 }
2791
2792 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2793
2794 int
2795 x86_64_nonmemory_operand (op, mode)
2796 rtx op;
2797 enum machine_mode mode;
2798 {
2799 if (!TARGET_64BIT)
2800 return nonmemory_operand (op, mode);
2801 if (register_operand (op, mode))
2802 return 1;
2803 return x86_64_sign_extended_value (op);
2804 }
2805
2806 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2807
2808 int
2809 x86_64_movabs_operand (op, mode)
2810 rtx op;
2811 enum machine_mode mode;
2812 {
2813 if (!TARGET_64BIT || !flag_pic)
2814 return nonmemory_operand (op, mode);
2815 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2816 return 1;
2817 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2818 return 1;
2819 return 0;
2820 }
2821
2822 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2823
2824 int
2825 x86_64_szext_nonmemory_operand (op, mode)
2826 rtx op;
2827 enum machine_mode mode;
2828 {
2829 if (!TARGET_64BIT)
2830 return nonmemory_operand (op, mode);
2831 if (register_operand (op, mode))
2832 return 1;
2833 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2834 }
2835
2836 /* Return nonzero if OP is immediate operand representable on x86_64. */
2837
2838 int
2839 x86_64_immediate_operand (op, mode)
2840 rtx op;
2841 enum machine_mode mode;
2842 {
2843 if (!TARGET_64BIT)
2844 return immediate_operand (op, mode);
2845 return x86_64_sign_extended_value (op);
2846 }
2847
2848 /* Return nonzero if OP is immediate operand representable on x86_64. */
2849
2850 int
2851 x86_64_zext_immediate_operand (op, mode)
2852 rtx op;
2853 enum machine_mode mode ATTRIBUTE_UNUSED;
2854 {
2855 return x86_64_zero_extended_value (op);
2856 }
2857
2858 /* Return nonzero if OP is (const_int 1), else return zero. */
2859
2860 int
2861 const_int_1_operand (op, mode)
2862 rtx op;
2863 enum machine_mode mode ATTRIBUTE_UNUSED;
2864 {
2865 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2866 }
2867
2868 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2869 reference and a constant. */
2870
2871 int
2872 symbolic_operand (op, mode)
2873 register rtx op;
2874 enum machine_mode mode ATTRIBUTE_UNUSED;
2875 {
2876 switch (GET_CODE (op))
2877 {
2878 case SYMBOL_REF:
2879 case LABEL_REF:
2880 return 1;
2881
2882 case CONST:
2883 op = XEXP (op, 0);
2884 if (GET_CODE (op) == SYMBOL_REF
2885 || GET_CODE (op) == LABEL_REF
2886 || (GET_CODE (op) == UNSPEC
2887 && (XINT (op, 1) == 6
2888 || XINT (op, 1) == 7
2889 || XINT (op, 1) == 15)))
2890 return 1;
2891 if (GET_CODE (op) != PLUS
2892 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2893 return 0;
2894
2895 op = XEXP (op, 0);
2896 if (GET_CODE (op) == SYMBOL_REF
2897 || GET_CODE (op) == LABEL_REF)
2898 return 1;
2899 /* Only @GOTOFF gets offsets. */
2900 if (GET_CODE (op) != UNSPEC
2901 || XINT (op, 1) != 7)
2902 return 0;
2903
2904 op = XVECEXP (op, 0, 0);
2905 if (GET_CODE (op) == SYMBOL_REF
2906 || GET_CODE (op) == LABEL_REF)
2907 return 1;
2908 return 0;
2909
2910 default:
2911 return 0;
2912 }
2913 }
2914
2915 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2916
2917 int
2918 pic_symbolic_operand (op, mode)
2919 register rtx op;
2920 enum machine_mode mode ATTRIBUTE_UNUSED;
2921 {
2922 if (GET_CODE (op) != CONST)
2923 return 0;
2924 op = XEXP (op, 0);
2925 if (TARGET_64BIT)
2926 {
2927 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2928 return 1;
2929 }
2930 else
2931 {
2932 if (GET_CODE (op) == UNSPEC)
2933 return 1;
2934 if (GET_CODE (op) != PLUS
2935 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2936 return 0;
2937 op = XEXP (op, 0);
2938 if (GET_CODE (op) == UNSPEC)
2939 return 1;
2940 }
2941 return 0;
2942 }
2943
2944 /* Return true if OP is a symbolic operand that resolves locally. */
2945
2946 static int
2947 local_symbolic_operand (op, mode)
2948 rtx op;
2949 enum machine_mode mode ATTRIBUTE_UNUSED;
2950 {
2951 if (GET_CODE (op) == LABEL_REF)
2952 return 1;
2953
2954 if (GET_CODE (op) == CONST
2955 && GET_CODE (XEXP (op, 0)) == PLUS
2956 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2957 op = XEXP (XEXP (op, 0), 0);
2958
2959 if (GET_CODE (op) != SYMBOL_REF)
2960 return 0;
2961
2962 /* These we've been told are local by varasm and encode_section_info
2963 respectively. */
2964 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2965 return 1;
2966
2967 /* There is, however, a not insubstantial body of code in the rest of
2968 the compiler that assumes it can just stick the results of
2969 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2970 /* ??? This is a hack. Should update the body of the compiler to
2971 always create a DECL an invoke ENCODE_SECTION_INFO. */
2972 if (strncmp (XSTR (op, 0), internal_label_prefix,
2973 internal_label_prefix_len) == 0)
2974 return 1;
2975
2976 return 0;
2977 }
2978
2979 /* Test for a valid operand for a call instruction. Don't allow the
2980 arg pointer register or virtual regs since they may decay into
2981 reg + const, which the patterns can't handle. */
2982
2983 int
2984 call_insn_operand (op, mode)
2985 rtx op;
2986 enum machine_mode mode ATTRIBUTE_UNUSED;
2987 {
2988 /* Disallow indirect through a virtual register. This leads to
2989 compiler aborts when trying to eliminate them. */
2990 if (GET_CODE (op) == REG
2991 && (op == arg_pointer_rtx
2992 || op == frame_pointer_rtx
2993 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2994 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2995 return 0;
2996
2997 /* Disallow `call 1234'. Due to varying assembler lameness this
2998 gets either rejected or translated to `call .+1234'. */
2999 if (GET_CODE (op) == CONST_INT)
3000 return 0;
3001
3002 /* Explicitly allow SYMBOL_REF even if pic. */
3003 if (GET_CODE (op) == SYMBOL_REF)
3004 return 1;
3005
3006 /* Half-pic doesn't allow anything but registers and constants.
3007 We've just taken care of the later. */
3008 if (HALF_PIC_P ())
3009 return register_operand (op, Pmode);
3010
3011 /* Otherwise we can allow any general_operand in the address. */
3012 return general_operand (op, Pmode);
3013 }
3014
3015 int
3016 constant_call_address_operand (op, mode)
3017 rtx op;
3018 enum machine_mode mode ATTRIBUTE_UNUSED;
3019 {
3020 if (GET_CODE (op) == CONST
3021 && GET_CODE (XEXP (op, 0)) == PLUS
3022 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3023 op = XEXP (XEXP (op, 0), 0);
3024 return GET_CODE (op) == SYMBOL_REF;
3025 }
3026
3027 /* Match exactly zero and one. */
3028
3029 int
3030 const0_operand (op, mode)
3031 register rtx op;
3032 enum machine_mode mode;
3033 {
3034 return op == CONST0_RTX (mode);
3035 }
3036
3037 int
3038 const1_operand (op, mode)
3039 register rtx op;
3040 enum machine_mode mode ATTRIBUTE_UNUSED;
3041 {
3042 return op == const1_rtx;
3043 }
3044
3045 /* Match 2, 4, or 8. Used for leal multiplicands. */
3046
3047 int
3048 const248_operand (op, mode)
3049 register rtx op;
3050 enum machine_mode mode ATTRIBUTE_UNUSED;
3051 {
3052 return (GET_CODE (op) == CONST_INT
3053 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3054 }
3055
3056 /* True if this is a constant appropriate for an increment or decremenmt. */
3057
3058 int
3059 incdec_operand (op, mode)
3060 register rtx op;
3061 enum machine_mode mode ATTRIBUTE_UNUSED;
3062 {
3063 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3064 registers, since carry flag is not set. */
3065 if (TARGET_PENTIUM4 && !optimize_size)
3066 return 0;
3067 return op == const1_rtx || op == constm1_rtx;
3068 }
3069
3070 /* Return nonzero if OP is acceptable as operand of DImode shift
3071 expander. */
3072
3073 int
3074 shiftdi_operand (op, mode)
3075 rtx op;
3076 enum machine_mode mode ATTRIBUTE_UNUSED;
3077 {
3078 if (TARGET_64BIT)
3079 return nonimmediate_operand (op, mode);
3080 else
3081 return register_operand (op, mode);
3082 }
3083
3084 /* Return false if this is the stack pointer, or any other fake
3085 register eliminable to the stack pointer. Otherwise, this is
3086 a register operand.
3087
3088 This is used to prevent esp from being used as an index reg.
3089 Which would only happen in pathological cases. */
3090
3091 int
3092 reg_no_sp_operand (op, mode)
3093 register rtx op;
3094 enum machine_mode mode;
3095 {
3096 rtx t = op;
3097 if (GET_CODE (t) == SUBREG)
3098 t = SUBREG_REG (t);
3099 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3100 return 0;
3101
3102 return register_operand (op, mode);
3103 }
3104
3105 int
3106 mmx_reg_operand (op, mode)
3107 register rtx op;
3108 enum machine_mode mode ATTRIBUTE_UNUSED;
3109 {
3110 return MMX_REG_P (op);
3111 }
3112
3113 /* Return false if this is any eliminable register. Otherwise
3114 general_operand. */
3115
3116 int
3117 general_no_elim_operand (op, mode)
3118 register rtx op;
3119 enum machine_mode mode;
3120 {
3121 rtx t = op;
3122 if (GET_CODE (t) == SUBREG)
3123 t = SUBREG_REG (t);
3124 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3125 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3126 || t == virtual_stack_dynamic_rtx)
3127 return 0;
3128 if (REG_P (t)
3129 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3130 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3131 return 0;
3132
3133 return general_operand (op, mode);
3134 }
3135
3136 /* Return false if this is any eliminable register. Otherwise
3137 register_operand or const_int. */
3138
3139 int
3140 nonmemory_no_elim_operand (op, mode)
3141 register rtx op;
3142 enum machine_mode mode;
3143 {
3144 rtx t = op;
3145 if (GET_CODE (t) == SUBREG)
3146 t = SUBREG_REG (t);
3147 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3148 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3149 || t == virtual_stack_dynamic_rtx)
3150 return 0;
3151
3152 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3153 }
3154
3155 /* Return true if op is a Q_REGS class register. */
3156
3157 int
3158 q_regs_operand (op, mode)
3159 register rtx op;
3160 enum machine_mode mode;
3161 {
3162 if (mode != VOIDmode && GET_MODE (op) != mode)
3163 return 0;
3164 if (GET_CODE (op) == SUBREG)
3165 op = SUBREG_REG (op);
3166 return ANY_QI_REG_P (op);
3167 }
3168
3169 /* Return true if op is a NON_Q_REGS class register. */
3170
3171 int
3172 non_q_regs_operand (op, mode)
3173 register rtx op;
3174 enum machine_mode mode;
3175 {
3176 if (mode != VOIDmode && GET_MODE (op) != mode)
3177 return 0;
3178 if (GET_CODE (op) == SUBREG)
3179 op = SUBREG_REG (op);
3180 return NON_QI_REG_P (op);
3181 }
3182
3183 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3184 insns. */
3185 int
3186 sse_comparison_operator (op, mode)
3187 rtx op;
3188 enum machine_mode mode ATTRIBUTE_UNUSED;
3189 {
3190 enum rtx_code code = GET_CODE (op);
3191 switch (code)
3192 {
3193 /* Operations supported directly. */
3194 case EQ:
3195 case LT:
3196 case LE:
3197 case UNORDERED:
3198 case NE:
3199 case UNGE:
3200 case UNGT:
3201 case ORDERED:
3202 return 1;
3203 /* These are equivalent to ones above in non-IEEE comparisons. */
3204 case UNEQ:
3205 case UNLT:
3206 case UNLE:
3207 case LTGT:
3208 case GE:
3209 case GT:
3210 return !TARGET_IEEE_FP;
3211 default:
3212 return 0;
3213 }
3214 }
3215 /* Return 1 if OP is a valid comparison operator in valid mode. */
3216 int
3217 ix86_comparison_operator (op, mode)
3218 register rtx op;
3219 enum machine_mode mode;
3220 {
3221 enum machine_mode inmode;
3222 enum rtx_code code = GET_CODE (op);
3223 if (mode != VOIDmode && GET_MODE (op) != mode)
3224 return 0;
3225 if (GET_RTX_CLASS (code) != '<')
3226 return 0;
3227 inmode = GET_MODE (XEXP (op, 0));
3228
3229 if (inmode == CCFPmode || inmode == CCFPUmode)
3230 {
3231 enum rtx_code second_code, bypass_code;
3232 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3233 return (bypass_code == NIL && second_code == NIL);
3234 }
3235 switch (code)
3236 {
3237 case EQ: case NE:
3238 return 1;
3239 case LT: case GE:
3240 if (inmode == CCmode || inmode == CCGCmode
3241 || inmode == CCGOCmode || inmode == CCNOmode)
3242 return 1;
3243 return 0;
3244 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3245 if (inmode == CCmode)
3246 return 1;
3247 return 0;
3248 case GT: case LE:
3249 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3250 return 1;
3251 return 0;
3252 default:
3253 return 0;
3254 }
3255 }
3256
3257 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3258
3259 int
3260 fcmov_comparison_operator (op, mode)
3261 register rtx op;
3262 enum machine_mode mode;
3263 {
3264 enum machine_mode inmode;
3265 enum rtx_code code = GET_CODE (op);
3266 if (mode != VOIDmode && GET_MODE (op) != mode)
3267 return 0;
3268 if (GET_RTX_CLASS (code) != '<')
3269 return 0;
3270 inmode = GET_MODE (XEXP (op, 0));
3271 if (inmode == CCFPmode || inmode == CCFPUmode)
3272 {
3273 enum rtx_code second_code, bypass_code;
3274 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3275 if (bypass_code != NIL || second_code != NIL)
3276 return 0;
3277 code = ix86_fp_compare_code_to_integer (code);
3278 }
3279 /* i387 supports just limited amount of conditional codes. */
3280 switch (code)
3281 {
3282 case LTU: case GTU: case LEU: case GEU:
3283 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3284 return 1;
3285 return 0;
3286 case ORDERED: case UNORDERED:
3287 case EQ: case NE:
3288 return 1;
3289 default:
3290 return 0;
3291 }
3292 }
3293
3294 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3295
3296 int
3297 promotable_binary_operator (op, mode)
3298 register rtx op;
3299 enum machine_mode mode ATTRIBUTE_UNUSED;
3300 {
3301 switch (GET_CODE (op))
3302 {
3303 case MULT:
3304 /* Modern CPUs have same latency for HImode and SImode multiply,
3305 but 386 and 486 do HImode multiply faster. */
3306 return ix86_cpu > PROCESSOR_I486;
3307 case PLUS:
3308 case AND:
3309 case IOR:
3310 case XOR:
3311 case ASHIFT:
3312 return 1;
3313 default:
3314 return 0;
3315 }
3316 }
3317
3318 /* Nearly general operand, but accept any const_double, since we wish
3319 to be able to drop them into memory rather than have them get pulled
3320 into registers. */
3321
3322 int
3323 cmp_fp_expander_operand (op, mode)
3324 register rtx op;
3325 enum machine_mode mode;
3326 {
3327 if (mode != VOIDmode && mode != GET_MODE (op))
3328 return 0;
3329 if (GET_CODE (op) == CONST_DOUBLE)
3330 return 1;
3331 return general_operand (op, mode);
3332 }
3333
3334 /* Match an SI or HImode register for a zero_extract. */
3335
3336 int
3337 ext_register_operand (op, mode)
3338 register rtx op;
3339 enum machine_mode mode ATTRIBUTE_UNUSED;
3340 {
3341 int regno;
3342 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3343 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3344 return 0;
3345
3346 if (!register_operand (op, VOIDmode))
3347 return 0;
3348
3349 /* Be curefull to accept only registers having upper parts. */
3350 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3351 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3352 }
3353
3354 /* Return 1 if this is a valid binary floating-point operation.
3355 OP is the expression matched, and MODE is its mode. */
3356
3357 int
3358 binary_fp_operator (op, mode)
3359 register rtx op;
3360 enum machine_mode mode;
3361 {
3362 if (mode != VOIDmode && mode != GET_MODE (op))
3363 return 0;
3364
3365 switch (GET_CODE (op))
3366 {
3367 case PLUS:
3368 case MINUS:
3369 case MULT:
3370 case DIV:
3371 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3372
3373 default:
3374 return 0;
3375 }
3376 }
3377
3378 int
3379 mult_operator (op, mode)
3380 register rtx op;
3381 enum machine_mode mode ATTRIBUTE_UNUSED;
3382 {
3383 return GET_CODE (op) == MULT;
3384 }
3385
3386 int
3387 div_operator (op, mode)
3388 register rtx op;
3389 enum machine_mode mode ATTRIBUTE_UNUSED;
3390 {
3391 return GET_CODE (op) == DIV;
3392 }
3393
3394 int
3395 arith_or_logical_operator (op, mode)
3396 rtx op;
3397 enum machine_mode mode;
3398 {
3399 return ((mode == VOIDmode || GET_MODE (op) == mode)
3400 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3401 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3402 }
3403
3404 /* Returns 1 if OP is memory operand with a displacement. */
3405
3406 int
3407 memory_displacement_operand (op, mode)
3408 register rtx op;
3409 enum machine_mode mode;
3410 {
3411 struct ix86_address parts;
3412
3413 if (! memory_operand (op, mode))
3414 return 0;
3415
3416 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3417 abort ();
3418
3419 return parts.disp != NULL_RTX;
3420 }
3421
3422 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3423 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3424
3425 ??? It seems likely that this will only work because cmpsi is an
3426 expander, and no actual insns use this. */
3427
3428 int
3429 cmpsi_operand (op, mode)
3430 rtx op;
3431 enum machine_mode mode;
3432 {
3433 if (nonimmediate_operand (op, mode))
3434 return 1;
3435
3436 if (GET_CODE (op) == AND
3437 && GET_MODE (op) == SImode
3438 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3439 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3440 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3441 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3442 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3443 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3444 return 1;
3445
3446 return 0;
3447 }
3448
3449 /* Returns 1 if OP is memory operand that can not be represented by the
3450 modRM array. */
3451
3452 int
3453 long_memory_operand (op, mode)
3454 register rtx op;
3455 enum machine_mode mode;
3456 {
3457 if (! memory_operand (op, mode))
3458 return 0;
3459
3460 return memory_address_length (op) != 0;
3461 }
3462
3463 /* Return nonzero if the rtx is known aligned. */
3464
3465 int
3466 aligned_operand (op, mode)
3467 rtx op;
3468 enum machine_mode mode;
3469 {
3470 struct ix86_address parts;
3471
3472 if (!general_operand (op, mode))
3473 return 0;
3474
3475 /* Registers and immediate operands are always "aligned". */
3476 if (GET_CODE (op) != MEM)
3477 return 1;
3478
3479 /* Don't even try to do any aligned optimizations with volatiles. */
3480 if (MEM_VOLATILE_P (op))
3481 return 0;
3482
3483 op = XEXP (op, 0);
3484
3485 /* Pushes and pops are only valid on the stack pointer. */
3486 if (GET_CODE (op) == PRE_DEC
3487 || GET_CODE (op) == POST_INC)
3488 return 1;
3489
3490 /* Decode the address. */
3491 if (! ix86_decompose_address (op, &parts))
3492 abort ();
3493
3494 if (parts.base && GET_CODE (parts.base) == SUBREG)
3495 parts.base = SUBREG_REG (parts.base);
3496 if (parts.index && GET_CODE (parts.index) == SUBREG)
3497 parts.index = SUBREG_REG (parts.index);
3498
3499 /* Look for some component that isn't known to be aligned. */
3500 if (parts.index)
3501 {
3502 if (parts.scale < 4
3503 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3504 return 0;
3505 }
3506 if (parts.base)
3507 {
3508 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3509 return 0;
3510 }
3511 if (parts.disp)
3512 {
3513 if (GET_CODE (parts.disp) != CONST_INT
3514 || (INTVAL (parts.disp) & 3) != 0)
3515 return 0;
3516 }
3517
3518 /* Didn't find one -- this must be an aligned address. */
3519 return 1;
3520 }
3521 \f
3522 /* Return true if the constant is something that can be loaded with
3523 a special instruction. Only handle 0.0 and 1.0; others are less
3524 worthwhile. */
3525
3526 int
3527 standard_80387_constant_p (x)
3528 rtx x;
3529 {
3530 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3531 return -1;
3532 /* Note that on the 80387, other constants, such as pi, that we should support
3533 too. On some machines, these are much slower to load as standard constant,
3534 than to load from doubles in memory. */
3535 if (x == CONST0_RTX (GET_MODE (x)))
3536 return 1;
3537 if (x == CONST1_RTX (GET_MODE (x)))
3538 return 2;
3539 return 0;
3540 }
3541
3542 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3543 */
3544 int
3545 standard_sse_constant_p (x)
3546 rtx x;
3547 {
3548 if (GET_CODE (x) != CONST_DOUBLE)
3549 return -1;
3550 return (x == CONST0_RTX (GET_MODE (x)));
3551 }
3552
3553 /* Returns 1 if OP contains a symbol reference */
3554
3555 int
3556 symbolic_reference_mentioned_p (op)
3557 rtx op;
3558 {
3559 register const char *fmt;
3560 register int i;
3561
3562 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3563 return 1;
3564
3565 fmt = GET_RTX_FORMAT (GET_CODE (op));
3566 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3567 {
3568 if (fmt[i] == 'E')
3569 {
3570 register int j;
3571
3572 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3573 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3574 return 1;
3575 }
3576
3577 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3578 return 1;
3579 }
3580
3581 return 0;
3582 }
3583
3584 /* Return 1 if it is appropriate to emit `ret' instructions in the
3585 body of a function. Do this only if the epilogue is simple, needing a
3586 couple of insns. Prior to reloading, we can't tell how many registers
3587 must be saved, so return 0 then. Return 0 if there is no frame
3588 marker to de-allocate.
3589
3590 If NON_SAVING_SETJMP is defined and true, then it is not possible
3591 for the epilogue to be simple, so return 0. This is a special case
3592 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3593 until final, but jump_optimize may need to know sooner if a
3594 `return' is OK. */
3595
3596 int
3597 ix86_can_use_return_insn_p ()
3598 {
3599 struct ix86_frame frame;
3600
3601 #ifdef NON_SAVING_SETJMP
3602 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3603 return 0;
3604 #endif
3605
3606 if (! reload_completed || frame_pointer_needed)
3607 return 0;
3608
3609 /* Don't allow more than 32 pop, since that's all we can do
3610 with one instruction. */
3611 if (current_function_pops_args
3612 && current_function_args_size >= 32768)
3613 return 0;
3614
3615 ix86_compute_frame_layout (&frame);
3616 return frame.to_allocate == 0 && frame.nregs == 0;
3617 }
3618 \f
3619 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3620 int
3621 x86_64_sign_extended_value (value)
3622 rtx value;
3623 {
3624 switch (GET_CODE (value))
3625 {
3626 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3627 to be at least 32 and this all acceptable constants are
3628 represented as CONST_INT. */
3629 case CONST_INT:
3630 if (HOST_BITS_PER_WIDE_INT == 32)
3631 return 1;
3632 else
3633 {
3634 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3635 return trunc_int_for_mode (val, SImode) == val;
3636 }
3637 break;
3638
3639 /* For certain code models, the symbolic references are known to fit. */
3640 case SYMBOL_REF:
3641 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3642
3643 /* For certain code models, the code is near as well. */
3644 case LABEL_REF:
3645 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3646
3647 /* We also may accept the offsetted memory references in certain special
3648 cases. */
3649 case CONST:
3650 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3651 && XVECLEN (XEXP (value, 0), 0) == 1
3652 && XINT (XEXP (value, 0), 1) == 15)
3653 return 1;
3654 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3655 {
3656 rtx op1 = XEXP (XEXP (value, 0), 0);
3657 rtx op2 = XEXP (XEXP (value, 0), 1);
3658 HOST_WIDE_INT offset;
3659
3660 if (ix86_cmodel == CM_LARGE)
3661 return 0;
3662 if (GET_CODE (op2) != CONST_INT)
3663 return 0;
3664 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3665 switch (GET_CODE (op1))
3666 {
3667 case SYMBOL_REF:
3668 /* For CM_SMALL assume that latest object is 1MB before
3669 end of 31bits boundary. We may also accept pretty
3670 large negative constants knowing that all objects are
3671 in the positive half of address space. */
3672 if (ix86_cmodel == CM_SMALL
3673 && offset < 1024*1024*1024
3674 && trunc_int_for_mode (offset, SImode) == offset)
3675 return 1;
3676 /* For CM_KERNEL we know that all object resist in the
3677 negative half of 32bits address space. We may not
3678 accept negative offsets, since they may be just off
3679 and we may accept pretty large positive ones. */
3680 if (ix86_cmodel == CM_KERNEL
3681 && offset > 0
3682 && trunc_int_for_mode (offset, SImode) == offset)
3683 return 1;
3684 break;
3685 case LABEL_REF:
3686 /* These conditions are similar to SYMBOL_REF ones, just the
3687 constraints for code models differ. */
3688 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3689 && offset < 1024*1024*1024
3690 && trunc_int_for_mode (offset, SImode) == offset)
3691 return 1;
3692 if (ix86_cmodel == CM_KERNEL
3693 && offset > 0
3694 && trunc_int_for_mode (offset, SImode) == offset)
3695 return 1;
3696 break;
3697 default:
3698 return 0;
3699 }
3700 }
3701 return 0;
3702 default:
3703 return 0;
3704 }
3705 }
3706
3707 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3708 int
3709 x86_64_zero_extended_value (value)
3710 rtx value;
3711 {
3712 switch (GET_CODE (value))
3713 {
3714 case CONST_DOUBLE:
3715 if (HOST_BITS_PER_WIDE_INT == 32)
3716 return (GET_MODE (value) == VOIDmode
3717 && !CONST_DOUBLE_HIGH (value));
3718 else
3719 return 0;
3720 case CONST_INT:
3721 if (HOST_BITS_PER_WIDE_INT == 32)
3722 return INTVAL (value) >= 0;
3723 else
3724 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3725 break;
3726
3727 /* For certain code models, the symbolic references are known to fit. */
3728 case SYMBOL_REF:
3729 return ix86_cmodel == CM_SMALL;
3730
3731 /* For certain code models, the code is near as well. */
3732 case LABEL_REF:
3733 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3734
3735 /* We also may accept the offsetted memory references in certain special
3736 cases. */
3737 case CONST:
3738 if (GET_CODE (XEXP (value, 0)) == PLUS)
3739 {
3740 rtx op1 = XEXP (XEXP (value, 0), 0);
3741 rtx op2 = XEXP (XEXP (value, 0), 1);
3742
3743 if (ix86_cmodel == CM_LARGE)
3744 return 0;
3745 switch (GET_CODE (op1))
3746 {
3747 case SYMBOL_REF:
3748 return 0;
3749 /* For small code model we may accept pretty large positive
3750 offsets, since one bit is available for free. Negative
3751 offsets are limited by the size of NULL pointer area
3752 specified by the ABI. */
3753 if (ix86_cmodel == CM_SMALL
3754 && GET_CODE (op2) == CONST_INT
3755 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3756 && (trunc_int_for_mode (INTVAL (op2), SImode)
3757 == INTVAL (op2)))
3758 return 1;
3759 /* ??? For the kernel, we may accept adjustment of
3760 -0x10000000, since we know that it will just convert
3761 negative address space to positive, but perhaps this
3762 is not worthwhile. */
3763 break;
3764 case LABEL_REF:
3765 /* These conditions are similar to SYMBOL_REF ones, just the
3766 constraints for code models differ. */
3767 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3768 && GET_CODE (op2) == CONST_INT
3769 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3770 && (trunc_int_for_mode (INTVAL (op2), SImode)
3771 == INTVAL (op2)))
3772 return 1;
3773 break;
3774 default:
3775 return 0;
3776 }
3777 }
3778 return 0;
3779 default:
3780 return 0;
3781 }
3782 }
3783
3784 /* Value should be nonzero if functions must have frame pointers.
3785 Zero means the frame pointer need not be set up (and parms may
3786 be accessed via the stack pointer) in functions that seem suitable. */
3787
3788 int
3789 ix86_frame_pointer_required ()
3790 {
3791 /* If we accessed previous frames, then the generated code expects
3792 to be able to access the saved ebp value in our frame. */
3793 if (cfun->machine->accesses_prev_frame)
3794 return 1;
3795
3796 /* Several x86 os'es need a frame pointer for other reasons,
3797 usually pertaining to setjmp. */
3798 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3799 return 1;
3800
3801 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3802 the frame pointer by default. Turn it back on now if we've not
3803 got a leaf function. */
3804 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3805 return 1;
3806
3807 return 0;
3808 }
3809
3810 /* Record that the current function accesses previous call frames. */
3811
3812 void
3813 ix86_setup_frame_addresses ()
3814 {
3815 cfun->machine->accesses_prev_frame = 1;
3816 }
3817 \f
3818 static char pic_label_name[32];
3819
3820 /* This function generates code for -fpic that loads %ebx with
3821 the return address of the caller and then returns. */
3822
3823 void
3824 ix86_asm_file_end (file)
3825 FILE *file;
3826 {
3827 rtx xops[2];
3828
3829 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3830 return;
3831
3832 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3833 to updating relocations to a section being discarded such that this
3834 doesn't work. Ought to detect this at configure time. */
3835 #if 0
3836 /* The trick here is to create a linkonce section containing the
3837 pic label thunk, but to refer to it with an internal label.
3838 Because the label is internal, we don't have inter-dso name
3839 binding issues on hosts that don't support ".hidden".
3840
3841 In order to use these macros, however, we must create a fake
3842 function decl. */
3843 if (targetm.have_named_sections)
3844 {
3845 tree decl = build_decl (FUNCTION_DECL,
3846 get_identifier ("i686.get_pc_thunk"),
3847 error_mark_node);
3848 DECL_ONE_ONLY (decl) = 1;
3849 UNIQUE_SECTION (decl, 0);
3850 named_section (decl, NULL);
3851 }
3852 else
3853 #else
3854 text_section ();
3855 #endif
3856
3857 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3858 internal (non-global) label that's being emitted, it didn't make
3859 sense to have .type information for local labels. This caused
3860 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3861 me debug info for a label that you're declaring non-global?) this
3862 was changed to call ASM_OUTPUT_LABEL() instead. */
3863
3864 ASM_OUTPUT_LABEL (file, pic_label_name);
3865
3866 xops[0] = pic_offset_table_rtx;
3867 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3868 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3869 output_asm_insn ("ret", xops);
3870 }
3871
3872 void
3873 load_pic_register ()
3874 {
3875 rtx gotsym, pclab;
3876
3877 if (TARGET_64BIT)
3878 abort ();
3879
3880 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3881
3882 if (TARGET_DEEP_BRANCH_PREDICTION)
3883 {
3884 if (! pic_label_name[0])
3885 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3886 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3887 }
3888 else
3889 {
3890 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3891 }
3892
3893 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3894
3895 if (! TARGET_DEEP_BRANCH_PREDICTION)
3896 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3897
3898 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3899 }
3900
3901 /* Generate an "push" pattern for input ARG. */
3902
3903 static rtx
3904 gen_push (arg)
3905 rtx arg;
3906 {
3907 return gen_rtx_SET (VOIDmode,
3908 gen_rtx_MEM (Pmode,
3909 gen_rtx_PRE_DEC (Pmode,
3910 stack_pointer_rtx)),
3911 arg);
3912 }
3913
3914 /* Return 1 if we need to save REGNO. */
3915 static int
3916 ix86_save_reg (regno, maybe_eh_return)
3917 int regno;
3918 int maybe_eh_return;
3919 {
3920 if (regno == PIC_OFFSET_TABLE_REGNUM
3921 && (current_function_uses_pic_offset_table
3922 || current_function_uses_const_pool
3923 || current_function_calls_eh_return))
3924 return 1;
3925
3926 if (current_function_calls_eh_return && maybe_eh_return)
3927 {
3928 unsigned i;
3929 for (i = 0; ; i++)
3930 {
3931 unsigned test = EH_RETURN_DATA_REGNO (i);
3932 if (test == INVALID_REGNUM)
3933 break;
3934 if (test == (unsigned) regno)
3935 return 1;
3936 }
3937 }
3938
3939 return (regs_ever_live[regno]
3940 && !call_used_regs[regno]
3941 && !fixed_regs[regno]
3942 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3943 }
3944
3945 /* Return number of registers to be saved on the stack. */
3946
3947 static int
3948 ix86_nsaved_regs ()
3949 {
3950 int nregs = 0;
3951 int regno;
3952
3953 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3954 if (ix86_save_reg (regno, true))
3955 nregs++;
3956 return nregs;
3957 }
3958
3959 /* Return the offset between two registers, one to be eliminated, and the other
3960 its replacement, at the start of a routine. */
3961
3962 HOST_WIDE_INT
3963 ix86_initial_elimination_offset (from, to)
3964 int from;
3965 int to;
3966 {
3967 struct ix86_frame frame;
3968 ix86_compute_frame_layout (&frame);
3969
3970 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3971 return frame.hard_frame_pointer_offset;
3972 else if (from == FRAME_POINTER_REGNUM
3973 && to == HARD_FRAME_POINTER_REGNUM)
3974 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3975 else
3976 {
3977 if (to != STACK_POINTER_REGNUM)
3978 abort ();
3979 else if (from == ARG_POINTER_REGNUM)
3980 return frame.stack_pointer_offset;
3981 else if (from != FRAME_POINTER_REGNUM)
3982 abort ();
3983 else
3984 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3985 }
3986 }
3987
3988 /* Fill structure ix86_frame about frame of currently computed function. */
3989
3990 static void
3991 ix86_compute_frame_layout (frame)
3992 struct ix86_frame *frame;
3993 {
3994 HOST_WIDE_INT total_size;
3995 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3996 int offset;
3997 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3998 HOST_WIDE_INT size = get_frame_size ();
3999
4000 frame->nregs = ix86_nsaved_regs ();
4001 total_size = size;
4002
4003 /* Skip return address and saved base pointer. */
4004 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4005
4006 frame->hard_frame_pointer_offset = offset;
4007
4008 /* Do some sanity checking of stack_alignment_needed and
4009 preferred_alignment, since i386 port is the only using those features
4010 that may break easily. */
4011
4012 if (size && !stack_alignment_needed)
4013 abort ();
4014 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4015 abort ();
4016 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4017 abort ();
4018 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4019 abort ();
4020
4021 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4022 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4023
4024 /* Register save area */
4025 offset += frame->nregs * UNITS_PER_WORD;
4026
4027 /* Va-arg area */
4028 if (ix86_save_varrargs_registers)
4029 {
4030 offset += X86_64_VARARGS_SIZE;
4031 frame->va_arg_size = X86_64_VARARGS_SIZE;
4032 }
4033 else
4034 frame->va_arg_size = 0;
4035
4036 /* Align start of frame for local function. */
4037 frame->padding1 = ((offset + stack_alignment_needed - 1)
4038 & -stack_alignment_needed) - offset;
4039
4040 offset += frame->padding1;
4041
4042 /* Frame pointer points here. */
4043 frame->frame_pointer_offset = offset;
4044
4045 offset += size;
4046
4047 /* Add outgoing arguments area. */
4048 if (ACCUMULATE_OUTGOING_ARGS)
4049 {
4050 offset += current_function_outgoing_args_size;
4051 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4052 }
4053 else
4054 frame->outgoing_arguments_size = 0;
4055
4056 /* Align stack boundary. */
4057 frame->padding2 = ((offset + preferred_alignment - 1)
4058 & -preferred_alignment) - offset;
4059
4060 offset += frame->padding2;
4061
4062 /* We've reached end of stack frame. */
4063 frame->stack_pointer_offset = offset;
4064
4065 /* Size prologue needs to allocate. */
4066 frame->to_allocate =
4067 (size + frame->padding1 + frame->padding2
4068 + frame->outgoing_arguments_size + frame->va_arg_size);
4069
4070 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4071 && current_function_is_leaf)
4072 {
4073 frame->red_zone_size = frame->to_allocate;
4074 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4075 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4076 }
4077 else
4078 frame->red_zone_size = 0;
4079 frame->to_allocate -= frame->red_zone_size;
4080 frame->stack_pointer_offset -= frame->red_zone_size;
4081 #if 0
4082 fprintf (stderr, "nregs: %i\n", frame->nregs);
4083 fprintf (stderr, "size: %i\n", size);
4084 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4085 fprintf (stderr, "padding1: %i\n", frame->padding1);
4086 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4087 fprintf (stderr, "padding2: %i\n", frame->padding2);
4088 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4089 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4090 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4091 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4092 frame->hard_frame_pointer_offset);
4093 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4094 #endif
4095 }
4096
4097 /* Emit code to save registers in the prologue. */
4098
4099 static void
4100 ix86_emit_save_regs ()
4101 {
4102 register int regno;
4103 rtx insn;
4104
4105 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4106 if (ix86_save_reg (regno, true))
4107 {
4108 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4109 RTX_FRAME_RELATED_P (insn) = 1;
4110 }
4111 }
4112
4113 /* Emit code to save registers using MOV insns. First register
4114 is restored from POINTER + OFFSET. */
4115 static void
4116 ix86_emit_save_regs_using_mov (pointer, offset)
4117 rtx pointer;
4118 HOST_WIDE_INT offset;
4119 {
4120 int regno;
4121 rtx insn;
4122
4123 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4124 if (ix86_save_reg (regno, true))
4125 {
4126 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4127 Pmode, offset),
4128 gen_rtx_REG (Pmode, regno));
4129 RTX_FRAME_RELATED_P (insn) = 1;
4130 offset += UNITS_PER_WORD;
4131 }
4132 }
4133
4134 /* Expand the prologue into a bunch of separate insns. */
4135
4136 void
4137 ix86_expand_prologue ()
4138 {
4139 rtx insn;
4140 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4141 || current_function_uses_const_pool)
4142 && !TARGET_64BIT);
4143 struct ix86_frame frame;
4144 int use_mov = 0;
4145 HOST_WIDE_INT allocate;
4146
4147 if (!optimize_size)
4148 {
4149 use_fast_prologue_epilogue
4150 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4151 if (TARGET_PROLOGUE_USING_MOVE)
4152 use_mov = use_fast_prologue_epilogue;
4153 }
4154 ix86_compute_frame_layout (&frame);
4155
4156 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4157 slower on all targets. Also sdb doesn't like it. */
4158
4159 if (frame_pointer_needed)
4160 {
4161 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4162 RTX_FRAME_RELATED_P (insn) = 1;
4163
4164 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4165 RTX_FRAME_RELATED_P (insn) = 1;
4166 }
4167
4168 allocate = frame.to_allocate;
4169 /* In case we are dealing only with single register and empty frame,
4170 push is equivalent of the mov+add sequence. */
4171 if (allocate == 0 && frame.nregs <= 1)
4172 use_mov = 0;
4173
4174 if (!use_mov)
4175 ix86_emit_save_regs ();
4176 else
4177 allocate += frame.nregs * UNITS_PER_WORD;
4178
4179 if (allocate == 0)
4180 ;
4181 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4182 {
4183 insn = emit_insn (gen_pro_epilogue_adjust_stack
4184 (stack_pointer_rtx, stack_pointer_rtx,
4185 GEN_INT (-allocate)));
4186 RTX_FRAME_RELATED_P (insn) = 1;
4187 }
4188 else
4189 {
4190 /* ??? Is this only valid for Win32? */
4191
4192 rtx arg0, sym;
4193
4194 if (TARGET_64BIT)
4195 abort ();
4196
4197 arg0 = gen_rtx_REG (SImode, 0);
4198 emit_move_insn (arg0, GEN_INT (allocate));
4199
4200 sym = gen_rtx_MEM (FUNCTION_MODE,
4201 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4202 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4203
4204 CALL_INSN_FUNCTION_USAGE (insn)
4205 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4206 CALL_INSN_FUNCTION_USAGE (insn));
4207 }
4208 if (use_mov)
4209 {
4210 if (!frame_pointer_needed || !frame.to_allocate)
4211 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4212 else
4213 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4214 -frame.nregs * UNITS_PER_WORD);
4215 }
4216
4217 #ifdef SUBTARGET_PROLOGUE
4218 SUBTARGET_PROLOGUE;
4219 #endif
4220
4221 if (pic_reg_used)
4222 load_pic_register ();
4223
4224 /* If we are profiling, make sure no instructions are scheduled before
4225 the call to mcount. However, if -fpic, the above call will have
4226 done that. */
4227 if (current_function_profile && ! pic_reg_used)
4228 emit_insn (gen_blockage ());
4229 }
4230
4231 /* Emit code to restore saved registers using MOV insns. First register
4232 is restored from POINTER + OFFSET. */
4233 static void
4234 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4235 rtx pointer;
4236 int offset;
4237 int maybe_eh_return;
4238 {
4239 int regno;
4240
4241 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4242 if (ix86_save_reg (regno, maybe_eh_return))
4243 {
4244 emit_move_insn (gen_rtx_REG (Pmode, regno),
4245 adjust_address (gen_rtx_MEM (Pmode, pointer),
4246 Pmode, offset));
4247 offset += UNITS_PER_WORD;
4248 }
4249 }
4250
4251 /* Restore function stack, frame, and registers. */
4252
4253 void
4254 ix86_expand_epilogue (style)
4255 int style;
4256 {
4257 int regno;
4258 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4259 struct ix86_frame frame;
4260 HOST_WIDE_INT offset;
4261
4262 ix86_compute_frame_layout (&frame);
4263
4264 /* Calculate start of saved registers relative to ebp. Special care
4265 must be taken for the normal return case of a function using
4266 eh_return: the eax and edx registers are marked as saved, but not
4267 restored along this path. */
4268 offset = frame.nregs;
4269 if (current_function_calls_eh_return && style != 2)
4270 offset -= 2;
4271 offset *= -UNITS_PER_WORD;
4272
4273 /* If we're only restoring one register and sp is not valid then
4274 using a move instruction to restore the register since it's
4275 less work than reloading sp and popping the register.
4276
4277 The default code result in stack adjustment using add/lea instruction,
4278 while this code results in LEAVE instruction (or discrete equivalent),
4279 so it is profitable in some other cases as well. Especially when there
4280 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4281 and there is exactly one register to pop. This heruistic may need some
4282 tuning in future. */
4283 if ((!sp_valid && frame.nregs <= 1)
4284 || (TARGET_EPILOGUE_USING_MOVE
4285 && use_fast_prologue_epilogue
4286 && (frame.nregs > 1 || frame.to_allocate))
4287 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4288 || (frame_pointer_needed && TARGET_USE_LEAVE
4289 && use_fast_prologue_epilogue && frame.nregs == 1)
4290 || current_function_calls_eh_return)
4291 {
4292 /* Restore registers. We can use ebp or esp to address the memory
4293 locations. If both are available, default to ebp, since offsets
4294 are known to be small. Only exception is esp pointing directly to the
4295 end of block of saved registers, where we may simplify addressing
4296 mode. */
4297
4298 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4299 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4300 frame.to_allocate, style == 2);
4301 else
4302 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4303 offset, style == 2);
4304
4305 /* eh_return epilogues need %ecx added to the stack pointer. */
4306 if (style == 2)
4307 {
4308 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4309
4310 if (frame_pointer_needed)
4311 {
4312 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4313 tmp = plus_constant (tmp, UNITS_PER_WORD);
4314 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4315
4316 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4317 emit_move_insn (hard_frame_pointer_rtx, tmp);
4318
4319 emit_insn (gen_pro_epilogue_adjust_stack
4320 (stack_pointer_rtx, sa, const0_rtx));
4321 }
4322 else
4323 {
4324 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4325 tmp = plus_constant (tmp, (frame.to_allocate
4326 + frame.nregs * UNITS_PER_WORD));
4327 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4328 }
4329 }
4330 else if (!frame_pointer_needed)
4331 emit_insn (gen_pro_epilogue_adjust_stack
4332 (stack_pointer_rtx, stack_pointer_rtx,
4333 GEN_INT (frame.to_allocate
4334 + frame.nregs * UNITS_PER_WORD)));
4335 /* If not an i386, mov & pop is faster than "leave". */
4336 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4337 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4338 else
4339 {
4340 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4341 hard_frame_pointer_rtx,
4342 const0_rtx));
4343 if (TARGET_64BIT)
4344 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4345 else
4346 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4347 }
4348 }
4349 else
4350 {
4351 /* First step is to deallocate the stack frame so that we can
4352 pop the registers. */
4353 if (!sp_valid)
4354 {
4355 if (!frame_pointer_needed)
4356 abort ();
4357 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4358 hard_frame_pointer_rtx,
4359 GEN_INT (offset)));
4360 }
4361 else if (frame.to_allocate)
4362 emit_insn (gen_pro_epilogue_adjust_stack
4363 (stack_pointer_rtx, stack_pointer_rtx,
4364 GEN_INT (frame.to_allocate)));
4365
4366 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4367 if (ix86_save_reg (regno, false))
4368 {
4369 if (TARGET_64BIT)
4370 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4371 else
4372 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4373 }
4374 if (frame_pointer_needed)
4375 {
4376 /* Leave results in shorter dependency chains on CPUs that are
4377 able to grok it fast. */
4378 if (TARGET_USE_LEAVE)
4379 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4380 else if (TARGET_64BIT)
4381 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4382 else
4383 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4384 }
4385 }
4386
4387 /* Sibcall epilogues don't want a return instruction. */
4388 if (style == 0)
4389 return;
4390
4391 if (current_function_pops_args && current_function_args_size)
4392 {
4393 rtx popc = GEN_INT (current_function_pops_args);
4394
4395 /* i386 can only pop 64K bytes. If asked to pop more, pop
4396 return address, do explicit add, and jump indirectly to the
4397 caller. */
4398
4399 if (current_function_pops_args >= 65536)
4400 {
4401 rtx ecx = gen_rtx_REG (SImode, 2);
4402
4403 /* There are is no "pascal" calling convention in 64bit ABI. */
4404 if (TARGET_64BIT)
4405 abort ();
4406
4407 emit_insn (gen_popsi1 (ecx));
4408 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4409 emit_jump_insn (gen_return_indirect_internal (ecx));
4410 }
4411 else
4412 emit_jump_insn (gen_return_pop_internal (popc));
4413 }
4414 else
4415 emit_jump_insn (gen_return_internal ());
4416 }
4417 \f
4418 /* Extract the parts of an RTL expression that is a valid memory address
4419 for an instruction. Return 0 if the structure of the address is
4420 grossly off. Return -1 if the address contains ASHIFT, so it is not
4421 strictly valid, but still used for computing length of lea instruction.
4422 */
4423
4424 static int
4425 ix86_decompose_address (addr, out)
4426 register rtx addr;
4427 struct ix86_address *out;
4428 {
4429 rtx base = NULL_RTX;
4430 rtx index = NULL_RTX;
4431 rtx disp = NULL_RTX;
4432 HOST_WIDE_INT scale = 1;
4433 rtx scale_rtx = NULL_RTX;
4434 int retval = 1;
4435
4436 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4437 base = addr;
4438 else if (GET_CODE (addr) == PLUS)
4439 {
4440 rtx op0 = XEXP (addr, 0);
4441 rtx op1 = XEXP (addr, 1);
4442 enum rtx_code code0 = GET_CODE (op0);
4443 enum rtx_code code1 = GET_CODE (op1);
4444
4445 if (code0 == REG || code0 == SUBREG)
4446 {
4447 if (code1 == REG || code1 == SUBREG)
4448 index = op0, base = op1; /* index + base */
4449 else
4450 base = op0, disp = op1; /* base + displacement */
4451 }
4452 else if (code0 == MULT)
4453 {
4454 index = XEXP (op0, 0);
4455 scale_rtx = XEXP (op0, 1);
4456 if (code1 == REG || code1 == SUBREG)
4457 base = op1; /* index*scale + base */
4458 else
4459 disp = op1; /* index*scale + disp */
4460 }
4461 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4462 {
4463 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4464 scale_rtx = XEXP (XEXP (op0, 0), 1);
4465 base = XEXP (op0, 1);
4466 disp = op1;
4467 }
4468 else if (code0 == PLUS)
4469 {
4470 index = XEXP (op0, 0); /* index + base + disp */
4471 base = XEXP (op0, 1);
4472 disp = op1;
4473 }
4474 else
4475 return 0;
4476 }
4477 else if (GET_CODE (addr) == MULT)
4478 {
4479 index = XEXP (addr, 0); /* index*scale */
4480 scale_rtx = XEXP (addr, 1);
4481 }
4482 else if (GET_CODE (addr) == ASHIFT)
4483 {
4484 rtx tmp;
4485
4486 /* We're called for lea too, which implements ashift on occasion. */
4487 index = XEXP (addr, 0);
4488 tmp = XEXP (addr, 1);
4489 if (GET_CODE (tmp) != CONST_INT)
4490 return 0;
4491 scale = INTVAL (tmp);
4492 if ((unsigned HOST_WIDE_INT) scale > 3)
4493 return 0;
4494 scale = 1 << scale;
4495 retval = -1;
4496 }
4497 else
4498 disp = addr; /* displacement */
4499
4500 /* Extract the integral value of scale. */
4501 if (scale_rtx)
4502 {
4503 if (GET_CODE (scale_rtx) != CONST_INT)
4504 return 0;
4505 scale = INTVAL (scale_rtx);
4506 }
4507
4508 /* Allow arg pointer and stack pointer as index if there is not scaling */
4509 if (base && index && scale == 1
4510 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4511 || index == stack_pointer_rtx))
4512 {
4513 rtx tmp = base;
4514 base = index;
4515 index = tmp;
4516 }
4517
4518 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4519 if ((base == hard_frame_pointer_rtx
4520 || base == frame_pointer_rtx
4521 || base == arg_pointer_rtx) && !disp)
4522 disp = const0_rtx;
4523
4524 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4525 Avoid this by transforming to [%esi+0]. */
4526 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4527 && base && !index && !disp
4528 && REG_P (base)
4529 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4530 disp = const0_rtx;
4531
4532 /* Special case: encode reg+reg instead of reg*2. */
4533 if (!base && index && scale && scale == 2)
4534 base = index, scale = 1;
4535
4536 /* Special case: scaling cannot be encoded without base or displacement. */
4537 if (!base && !disp && index && scale != 1)
4538 disp = const0_rtx;
4539
4540 out->base = base;
4541 out->index = index;
4542 out->disp = disp;
4543 out->scale = scale;
4544
4545 return retval;
4546 }
4547 \f
4548 /* Return cost of the memory address x.
4549 For i386, it is better to use a complex address than let gcc copy
4550 the address into a reg and make a new pseudo. But not if the address
4551 requires to two regs - that would mean more pseudos with longer
4552 lifetimes. */
4553 int
4554 ix86_address_cost (x)
4555 rtx x;
4556 {
4557 struct ix86_address parts;
4558 int cost = 1;
4559
4560 if (!ix86_decompose_address (x, &parts))
4561 abort ();
4562
4563 if (parts.base && GET_CODE (parts.base) == SUBREG)
4564 parts.base = SUBREG_REG (parts.base);
4565 if (parts.index && GET_CODE (parts.index) == SUBREG)
4566 parts.index = SUBREG_REG (parts.index);
4567
4568 /* More complex memory references are better. */
4569 if (parts.disp && parts.disp != const0_rtx)
4570 cost--;
4571
4572 /* Attempt to minimize number of registers in the address. */
4573 if ((parts.base
4574 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4575 || (parts.index
4576 && (!REG_P (parts.index)
4577 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4578 cost++;
4579
4580 if (parts.base
4581 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4582 && parts.index
4583 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4584 && parts.base != parts.index)
4585 cost++;
4586
4587 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4588 since it's predecode logic can't detect the length of instructions
4589 and it degenerates to vector decoded. Increase cost of such
4590 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4591 to split such addresses or even refuse such addresses at all.
4592
4593 Following addressing modes are affected:
4594 [base+scale*index]
4595 [scale*index+disp]
4596 [base+index]
4597
4598 The first and last case may be avoidable by explicitly coding the zero in
4599 memory address, but I don't have AMD-K6 machine handy to check this
4600 theory. */
4601
4602 if (TARGET_K6
4603 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4604 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4605 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4606 cost += 10;
4607
4608 return cost;
4609 }
4610 \f
4611 /* If X is a machine specific address (i.e. a symbol or label being
4612 referenced as a displacement from the GOT implemented using an
4613 UNSPEC), then return the base term. Otherwise return X. */
4614
4615 rtx
4616 ix86_find_base_term (x)
4617 rtx x;
4618 {
4619 rtx term;
4620
4621 if (TARGET_64BIT)
4622 {
4623 if (GET_CODE (x) != CONST)
4624 return x;
4625 term = XEXP (x, 0);
4626 if (GET_CODE (term) == PLUS
4627 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4628 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4629 term = XEXP (term, 0);
4630 if (GET_CODE (term) != UNSPEC
4631 || XVECLEN (term, 0) != 1
4632 || XINT (term, 1) != 15)
4633 return x;
4634
4635 term = XVECEXP (term, 0, 0);
4636
4637 if (GET_CODE (term) != SYMBOL_REF
4638 && GET_CODE (term) != LABEL_REF)
4639 return x;
4640
4641 return term;
4642 }
4643
4644 if (GET_CODE (x) != PLUS
4645 || XEXP (x, 0) != pic_offset_table_rtx
4646 || GET_CODE (XEXP (x, 1)) != CONST)
4647 return x;
4648
4649 term = XEXP (XEXP (x, 1), 0);
4650
4651 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4652 term = XEXP (term, 0);
4653
4654 if (GET_CODE (term) != UNSPEC
4655 || XVECLEN (term, 0) != 1
4656 || XINT (term, 1) != 7)
4657 return x;
4658
4659 term = XVECEXP (term, 0, 0);
4660
4661 if (GET_CODE (term) != SYMBOL_REF
4662 && GET_CODE (term) != LABEL_REF)
4663 return x;
4664
4665 return term;
4666 }
4667 \f
4668 /* Determine if a given CONST RTX is a valid memory displacement
4669 in PIC mode. */
4670
4671 int
4672 legitimate_pic_address_disp_p (disp)
4673 register rtx disp;
4674 {
4675 /* In 64bit mode we can allow direct addresses of symbols and labels
4676 when they are not dynamic symbols. */
4677 if (TARGET_64BIT)
4678 {
4679 rtx x = disp;
4680 if (GET_CODE (disp) == CONST)
4681 x = XEXP (disp, 0);
4682 /* ??? Handle PIC code models */
4683 if (GET_CODE (x) == PLUS
4684 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4685 && ix86_cmodel == CM_SMALL_PIC
4686 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4687 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4688 x = XEXP (x, 0);
4689 if (local_symbolic_operand (x, Pmode))
4690 return 1;
4691 }
4692 if (GET_CODE (disp) != CONST)
4693 return 0;
4694 disp = XEXP (disp, 0);
4695
4696 if (TARGET_64BIT)
4697 {
4698 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4699 of GOT tables. We should not need these anyway. */
4700 if (GET_CODE (disp) != UNSPEC
4701 || XVECLEN (disp, 0) != 1
4702 || XINT (disp, 1) != 15)
4703 return 0;
4704
4705 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4706 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4707 return 0;
4708 return 1;
4709 }
4710
4711 if (GET_CODE (disp) == PLUS)
4712 {
4713 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4714 return 0;
4715 disp = XEXP (disp, 0);
4716 }
4717
4718 if (GET_CODE (disp) != UNSPEC
4719 || XVECLEN (disp, 0) != 1)
4720 return 0;
4721
4722 /* Must be @GOT or @GOTOFF. */
4723 switch (XINT (disp, 1))
4724 {
4725 case 6: /* @GOT */
4726 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4727
4728 case 7: /* @GOTOFF */
4729 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4730 }
4731
4732 return 0;
4733 }
4734
4735 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4736 memory address for an instruction. The MODE argument is the machine mode
4737 for the MEM expression that wants to use this address.
4738
4739 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4740 convert common non-canonical forms to canonical form so that they will
4741 be recognized. */
4742
4743 int
4744 legitimate_address_p (mode, addr, strict)
4745 enum machine_mode mode;
4746 register rtx addr;
4747 int strict;
4748 {
4749 struct ix86_address parts;
4750 rtx base, index, disp;
4751 HOST_WIDE_INT scale;
4752 const char *reason = NULL;
4753 rtx reason_rtx = NULL_RTX;
4754
4755 if (TARGET_DEBUG_ADDR)
4756 {
4757 fprintf (stderr,
4758 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4759 GET_MODE_NAME (mode), strict);
4760 debug_rtx (addr);
4761 }
4762
4763 if (ix86_decompose_address (addr, &parts) <= 0)
4764 {
4765 reason = "decomposition failed";
4766 goto report_error;
4767 }
4768
4769 base = parts.base;
4770 index = parts.index;
4771 disp = parts.disp;
4772 scale = parts.scale;
4773
4774 /* Validate base register.
4775
4776 Don't allow SUBREG's here, it can lead to spill failures when the base
4777 is one word out of a two word structure, which is represented internally
4778 as a DImode int. */
4779
4780 if (base)
4781 {
4782 rtx reg;
4783 reason_rtx = base;
4784
4785 if (GET_CODE (base) == SUBREG)
4786 reg = SUBREG_REG (base);
4787 else
4788 reg = base;
4789
4790 if (GET_CODE (reg) != REG)
4791 {
4792 reason = "base is not a register";
4793 goto report_error;
4794 }
4795
4796 if (GET_MODE (base) != Pmode)
4797 {
4798 reason = "base is not in Pmode";
4799 goto report_error;
4800 }
4801
4802 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
4803 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
4804 {
4805 reason = "base is not valid";
4806 goto report_error;
4807 }
4808 }
4809
4810 /* Validate index register.
4811
4812 Don't allow SUBREG's here, it can lead to spill failures when the index
4813 is one word out of a two word structure, which is represented internally
4814 as a DImode int. */
4815
4816 if (index)
4817 {
4818 rtx reg;
4819 reason_rtx = index;
4820
4821 if (GET_CODE (index) == SUBREG)
4822 reg = SUBREG_REG (index);
4823 else
4824 reg = index;
4825
4826 if (GET_CODE (reg) != REG)
4827 {
4828 reason = "index is not a register";
4829 goto report_error;
4830 }
4831
4832 if (GET_MODE (index) != Pmode)
4833 {
4834 reason = "index is not in Pmode";
4835 goto report_error;
4836 }
4837
4838 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
4839 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
4840 {
4841 reason = "index is not valid";
4842 goto report_error;
4843 }
4844 }
4845
4846 /* Validate scale factor. */
4847 if (scale != 1)
4848 {
4849 reason_rtx = GEN_INT (scale);
4850 if (!index)
4851 {
4852 reason = "scale without index";
4853 goto report_error;
4854 }
4855
4856 if (scale != 2 && scale != 4 && scale != 8)
4857 {
4858 reason = "scale is not a valid multiplier";
4859 goto report_error;
4860 }
4861 }
4862
4863 /* Validate displacement. */
4864 if (disp)
4865 {
4866 reason_rtx = disp;
4867
4868 if (!CONSTANT_ADDRESS_P (disp))
4869 {
4870 reason = "displacement is not constant";
4871 goto report_error;
4872 }
4873
4874 if (TARGET_64BIT)
4875 {
4876 if (!x86_64_sign_extended_value (disp))
4877 {
4878 reason = "displacement is out of range";
4879 goto report_error;
4880 }
4881 }
4882 else
4883 {
4884 if (GET_CODE (disp) == CONST_DOUBLE)
4885 {
4886 reason = "displacement is a const_double";
4887 goto report_error;
4888 }
4889 }
4890
4891 if (flag_pic && SYMBOLIC_CONST (disp))
4892 {
4893 if (TARGET_64BIT && (index || base))
4894 {
4895 reason = "non-constant pic memory reference";
4896 goto report_error;
4897 }
4898 if (! legitimate_pic_address_disp_p (disp))
4899 {
4900 reason = "displacement is an invalid pic construct";
4901 goto report_error;
4902 }
4903
4904 /* This code used to verify that a symbolic pic displacement
4905 includes the pic_offset_table_rtx register.
4906
4907 While this is good idea, unfortunately these constructs may
4908 be created by "adds using lea" optimization for incorrect
4909 code like:
4910
4911 int a;
4912 int foo(int i)
4913 {
4914 return *(&a+i);
4915 }
4916
4917 This code is nonsensical, but results in addressing
4918 GOT table with pic_offset_table_rtx base. We can't
4919 just refuse it easily, since it gets matched by
4920 "addsi3" pattern, that later gets split to lea in the
4921 case output register differs from input. While this
4922 can be handled by separate addsi pattern for this case
4923 that never results in lea, this seems to be easier and
4924 correct fix for crash to disable this test. */
4925 }
4926 else if (HALF_PIC_P ())
4927 {
4928 if (! HALF_PIC_ADDRESS_P (disp)
4929 || (base != NULL_RTX || index != NULL_RTX))
4930 {
4931 reason = "displacement is an invalid half-pic reference";
4932 goto report_error;
4933 }
4934 }
4935 }
4936
4937 /* Everything looks valid. */
4938 if (TARGET_DEBUG_ADDR)
4939 fprintf (stderr, "Success.\n");
4940 return TRUE;
4941
4942 report_error:
4943 if (TARGET_DEBUG_ADDR)
4944 {
4945 fprintf (stderr, "Error: %s\n", reason);
4946 debug_rtx (reason_rtx);
4947 }
4948 return FALSE;
4949 }
4950 \f
4951 /* Return an unique alias set for the GOT. */
4952
4953 static HOST_WIDE_INT
4954 ix86_GOT_alias_set ()
4955 {
4956 static HOST_WIDE_INT set = -1;
4957 if (set == -1)
4958 set = new_alias_set ();
4959 return set;
4960 }
4961
4962 /* Return a legitimate reference for ORIG (an address) using the
4963 register REG. If REG is 0, a new pseudo is generated.
4964
4965 There are two types of references that must be handled:
4966
4967 1. Global data references must load the address from the GOT, via
4968 the PIC reg. An insn is emitted to do this load, and the reg is
4969 returned.
4970
4971 2. Static data references, constant pool addresses, and code labels
4972 compute the address as an offset from the GOT, whose base is in
4973 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4974 differentiate them from global data objects. The returned
4975 address is the PIC reg + an unspec constant.
4976
4977 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4978 reg also appears in the address. */
4979
4980 rtx
4981 legitimize_pic_address (orig, reg)
4982 rtx orig;
4983 rtx reg;
4984 {
4985 rtx addr = orig;
4986 rtx new = orig;
4987 rtx base;
4988
4989 if (local_symbolic_operand (addr, Pmode))
4990 {
4991 /* In 64bit mode we can address such objects directly. */
4992 if (TARGET_64BIT)
4993 new = addr;
4994 else
4995 {
4996 /* This symbol may be referenced via a displacement from the PIC
4997 base address (@GOTOFF). */
4998
4999 current_function_uses_pic_offset_table = 1;
5000 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
5001 new = gen_rtx_CONST (Pmode, new);
5002 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5003
5004 if (reg != 0)
5005 {
5006 emit_move_insn (reg, new);
5007 new = reg;
5008 }
5009 }
5010 }
5011 else if (GET_CODE (addr) == SYMBOL_REF)
5012 {
5013 if (TARGET_64BIT)
5014 {
5015 current_function_uses_pic_offset_table = 1;
5016 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
5017 new = gen_rtx_CONST (Pmode, new);
5018 new = gen_rtx_MEM (Pmode, new);
5019 RTX_UNCHANGING_P (new) = 1;
5020 set_mem_alias_set (new, ix86_GOT_alias_set ());
5021
5022 if (reg == 0)
5023 reg = gen_reg_rtx (Pmode);
5024 /* Use directly gen_movsi, otherwise the address is loaded
5025 into register for CSE. We don't want to CSE this addresses,
5026 instead we CSE addresses from the GOT table, so skip this. */
5027 emit_insn (gen_movsi (reg, new));
5028 new = reg;
5029 }
5030 else
5031 {
5032 /* This symbol must be referenced via a load from the
5033 Global Offset Table (@GOT). */
5034
5035 current_function_uses_pic_offset_table = 1;
5036 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
5037 new = gen_rtx_CONST (Pmode, new);
5038 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5039 new = gen_rtx_MEM (Pmode, new);
5040 RTX_UNCHANGING_P (new) = 1;
5041 set_mem_alias_set (new, ix86_GOT_alias_set ());
5042
5043 if (reg == 0)
5044 reg = gen_reg_rtx (Pmode);
5045 emit_move_insn (reg, new);
5046 new = reg;
5047 }
5048 }
5049 else
5050 {
5051 if (GET_CODE (addr) == CONST)
5052 {
5053 addr = XEXP (addr, 0);
5054
5055 /* We must match stuff we generate before. Assume the only
5056 unspecs that can get here are ours. Not that we could do
5057 anything with them anyway... */
5058 if (GET_CODE (addr) == UNSPEC
5059 || (GET_CODE (addr) == PLUS
5060 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5061 return orig;
5062 if (GET_CODE (addr) != PLUS)
5063 abort ();
5064 }
5065 if (GET_CODE (addr) == PLUS)
5066 {
5067 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5068
5069 /* Check first to see if this is a constant offset from a @GOTOFF
5070 symbol reference. */
5071 if (local_symbolic_operand (op0, Pmode)
5072 && GET_CODE (op1) == CONST_INT)
5073 {
5074 if (!TARGET_64BIT)
5075 {
5076 current_function_uses_pic_offset_table = 1;
5077 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
5078 new = gen_rtx_PLUS (Pmode, new, op1);
5079 new = gen_rtx_CONST (Pmode, new);
5080 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5081
5082 if (reg != 0)
5083 {
5084 emit_move_insn (reg, new);
5085 new = reg;
5086 }
5087 }
5088 else
5089 {
5090 /* ??? We need to limit offsets here. */
5091 }
5092 }
5093 else
5094 {
5095 base = legitimize_pic_address (XEXP (addr, 0), reg);
5096 new = legitimize_pic_address (XEXP (addr, 1),
5097 base == reg ? NULL_RTX : reg);
5098
5099 if (GET_CODE (new) == CONST_INT)
5100 new = plus_constant (base, INTVAL (new));
5101 else
5102 {
5103 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5104 {
5105 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5106 new = XEXP (new, 1);
5107 }
5108 new = gen_rtx_PLUS (Pmode, base, new);
5109 }
5110 }
5111 }
5112 }
5113 return new;
5114 }
5115 \f
5116 /* Try machine-dependent ways of modifying an illegitimate address
5117 to be legitimate. If we find one, return the new, valid address.
5118 This macro is used in only one place: `memory_address' in explow.c.
5119
5120 OLDX is the address as it was before break_out_memory_refs was called.
5121 In some cases it is useful to look at this to decide what needs to be done.
5122
5123 MODE and WIN are passed so that this macro can use
5124 GO_IF_LEGITIMATE_ADDRESS.
5125
5126 It is always safe for this macro to do nothing. It exists to recognize
5127 opportunities to optimize the output.
5128
5129 For the 80386, we handle X+REG by loading X into a register R and
5130 using R+REG. R will go in a general reg and indexing will be used.
5131 However, if REG is a broken-out memory address or multiplication,
5132 nothing needs to be done because REG can certainly go in a general reg.
5133
5134 When -fpic is used, special handling is needed for symbolic references.
5135 See comments by legitimize_pic_address in i386.c for details. */
5136
5137 rtx
5138 legitimize_address (x, oldx, mode)
5139 register rtx x;
5140 register rtx oldx ATTRIBUTE_UNUSED;
5141 enum machine_mode mode;
5142 {
5143 int changed = 0;
5144 unsigned log;
5145
5146 if (TARGET_DEBUG_ADDR)
5147 {
5148 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5149 GET_MODE_NAME (mode));
5150 debug_rtx (x);
5151 }
5152
5153 if (flag_pic && SYMBOLIC_CONST (x))
5154 return legitimize_pic_address (x, 0);
5155
5156 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5157 if (GET_CODE (x) == ASHIFT
5158 && GET_CODE (XEXP (x, 1)) == CONST_INT
5159 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5160 {
5161 changed = 1;
5162 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5163 GEN_INT (1 << log));
5164 }
5165
5166 if (GET_CODE (x) == PLUS)
5167 {
5168 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5169
5170 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5171 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5172 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5173 {
5174 changed = 1;
5175 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5176 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5177 GEN_INT (1 << log));
5178 }
5179
5180 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5181 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5182 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5183 {
5184 changed = 1;
5185 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5186 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5187 GEN_INT (1 << log));
5188 }
5189
5190 /* Put multiply first if it isn't already. */
5191 if (GET_CODE (XEXP (x, 1)) == MULT)
5192 {
5193 rtx tmp = XEXP (x, 0);
5194 XEXP (x, 0) = XEXP (x, 1);
5195 XEXP (x, 1) = tmp;
5196 changed = 1;
5197 }
5198
5199 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5200 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5201 created by virtual register instantiation, register elimination, and
5202 similar optimizations. */
5203 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5204 {
5205 changed = 1;
5206 x = gen_rtx_PLUS (Pmode,
5207 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5208 XEXP (XEXP (x, 1), 0)),
5209 XEXP (XEXP (x, 1), 1));
5210 }
5211
5212 /* Canonicalize
5213 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5214 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5215 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5216 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5217 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5218 && CONSTANT_P (XEXP (x, 1)))
5219 {
5220 rtx constant;
5221 rtx other = NULL_RTX;
5222
5223 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5224 {
5225 constant = XEXP (x, 1);
5226 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5227 }
5228 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5229 {
5230 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5231 other = XEXP (x, 1);
5232 }
5233 else
5234 constant = 0;
5235
5236 if (constant)
5237 {
5238 changed = 1;
5239 x = gen_rtx_PLUS (Pmode,
5240 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5241 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5242 plus_constant (other, INTVAL (constant)));
5243 }
5244 }
5245
5246 if (changed && legitimate_address_p (mode, x, FALSE))
5247 return x;
5248
5249 if (GET_CODE (XEXP (x, 0)) == MULT)
5250 {
5251 changed = 1;
5252 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5253 }
5254
5255 if (GET_CODE (XEXP (x, 1)) == MULT)
5256 {
5257 changed = 1;
5258 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5259 }
5260
5261 if (changed
5262 && GET_CODE (XEXP (x, 1)) == REG
5263 && GET_CODE (XEXP (x, 0)) == REG)
5264 return x;
5265
5266 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5267 {
5268 changed = 1;
5269 x = legitimize_pic_address (x, 0);
5270 }
5271
5272 if (changed && legitimate_address_p (mode, x, FALSE))
5273 return x;
5274
5275 if (GET_CODE (XEXP (x, 0)) == REG)
5276 {
5277 register rtx temp = gen_reg_rtx (Pmode);
5278 register rtx val = force_operand (XEXP (x, 1), temp);
5279 if (val != temp)
5280 emit_move_insn (temp, val);
5281
5282 XEXP (x, 1) = temp;
5283 return x;
5284 }
5285
5286 else if (GET_CODE (XEXP (x, 1)) == REG)
5287 {
5288 register rtx temp = gen_reg_rtx (Pmode);
5289 register rtx val = force_operand (XEXP (x, 0), temp);
5290 if (val != temp)
5291 emit_move_insn (temp, val);
5292
5293 XEXP (x, 0) = temp;
5294 return x;
5295 }
5296 }
5297
5298 return x;
5299 }
5300 \f
5301 /* Print an integer constant expression in assembler syntax. Addition
5302 and subtraction are the only arithmetic that may appear in these
5303 expressions. FILE is the stdio stream to write to, X is the rtx, and
5304 CODE is the operand print code from the output string. */
5305
5306 static void
5307 output_pic_addr_const (file, x, code)
5308 FILE *file;
5309 rtx x;
5310 int code;
5311 {
5312 char buf[256];
5313
5314 switch (GET_CODE (x))
5315 {
5316 case PC:
5317 if (flag_pic)
5318 putc ('.', file);
5319 else
5320 abort ();
5321 break;
5322
5323 case SYMBOL_REF:
5324 assemble_name (file, XSTR (x, 0));
5325 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5326 fputs ("@PLT", file);
5327 break;
5328
5329 case LABEL_REF:
5330 x = XEXP (x, 0);
5331 /* FALLTHRU */
5332 case CODE_LABEL:
5333 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5334 assemble_name (asm_out_file, buf);
5335 break;
5336
5337 case CONST_INT:
5338 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5339 break;
5340
5341 case CONST:
5342 /* This used to output parentheses around the expression,
5343 but that does not work on the 386 (either ATT or BSD assembler). */
5344 output_pic_addr_const (file, XEXP (x, 0), code);
5345 break;
5346
5347 case CONST_DOUBLE:
5348 if (GET_MODE (x) == VOIDmode)
5349 {
5350 /* We can use %d if the number is <32 bits and positive. */
5351 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5352 fprintf (file, "0x%lx%08lx",
5353 (unsigned long) CONST_DOUBLE_HIGH (x),
5354 (unsigned long) CONST_DOUBLE_LOW (x));
5355 else
5356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5357 }
5358 else
5359 /* We can't handle floating point constants;
5360 PRINT_OPERAND must handle them. */
5361 output_operand_lossage ("floating constant misused");
5362 break;
5363
5364 case PLUS:
5365 /* Some assemblers need integer constants to appear first. */
5366 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5367 {
5368 output_pic_addr_const (file, XEXP (x, 0), code);
5369 putc ('+', file);
5370 output_pic_addr_const (file, XEXP (x, 1), code);
5371 }
5372 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5373 {
5374 output_pic_addr_const (file, XEXP (x, 1), code);
5375 putc ('+', file);
5376 output_pic_addr_const (file, XEXP (x, 0), code);
5377 }
5378 else
5379 abort ();
5380 break;
5381
5382 case MINUS:
5383 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5384 output_pic_addr_const (file, XEXP (x, 0), code);
5385 putc ('-', file);
5386 output_pic_addr_const (file, XEXP (x, 1), code);
5387 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5388 break;
5389
5390 case UNSPEC:
5391 if (XVECLEN (x, 0) != 1)
5392 abort ();
5393 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5394 switch (XINT (x, 1))
5395 {
5396 case 6:
5397 fputs ("@GOT", file);
5398 break;
5399 case 7:
5400 fputs ("@GOTOFF", file);
5401 break;
5402 case 8:
5403 fputs ("@PLT", file);
5404 break;
5405 case 15:
5406 fputs ("@GOTPCREL(%RIP)", file);
5407 break;
5408 default:
5409 output_operand_lossage ("invalid UNSPEC as operand");
5410 break;
5411 }
5412 break;
5413
5414 default:
5415 output_operand_lossage ("invalid expression as operand");
5416 }
5417 }
5418
5419 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5420 We need to handle our special PIC relocations. */
5421
5422 void
5423 i386_dwarf_output_addr_const (file, x)
5424 FILE *file;
5425 rtx x;
5426 {
5427 #ifdef ASM_QUAD
5428 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5429 #else
5430 if (TARGET_64BIT)
5431 abort ();
5432 fprintf (file, "%s", ASM_LONG);
5433 #endif
5434 if (flag_pic)
5435 output_pic_addr_const (file, x, '\0');
5436 else
5437 output_addr_const (file, x);
5438 fputc ('\n', file);
5439 }
5440
5441 /* In the name of slightly smaller debug output, and to cater to
5442 general assembler losage, recognize PIC+GOTOFF and turn it back
5443 into a direct symbol reference. */
5444
5445 rtx
5446 i386_simplify_dwarf_addr (orig_x)
5447 rtx orig_x;
5448 {
5449 rtx x = orig_x, y;
5450
5451 if (GET_CODE (x) == MEM)
5452 x = XEXP (x, 0);
5453
5454 if (TARGET_64BIT)
5455 {
5456 if (GET_CODE (x) != CONST
5457 || GET_CODE (XEXP (x, 0)) != UNSPEC
5458 || XINT (XEXP (x, 0), 1) != 15
5459 || GET_CODE (orig_x) != MEM)
5460 return orig_x;
5461 return XVECEXP (XEXP (x, 0), 0, 0);
5462 }
5463
5464 if (GET_CODE (x) != PLUS
5465 || GET_CODE (XEXP (x, 1)) != CONST)
5466 return orig_x;
5467
5468 if (GET_CODE (XEXP (x, 0)) == REG
5469 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5470 /* %ebx + GOT/GOTOFF */
5471 y = NULL;
5472 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5473 {
5474 /* %ebx + %reg * scale + GOT/GOTOFF */
5475 y = XEXP (x, 0);
5476 if (GET_CODE (XEXP (y, 0)) == REG
5477 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5478 y = XEXP (y, 1);
5479 else if (GET_CODE (XEXP (y, 1)) == REG
5480 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5481 y = XEXP (y, 0);
5482 else
5483 return orig_x;
5484 if (GET_CODE (y) != REG
5485 && GET_CODE (y) != MULT
5486 && GET_CODE (y) != ASHIFT)
5487 return orig_x;
5488 }
5489 else
5490 return orig_x;
5491
5492 x = XEXP (XEXP (x, 1), 0);
5493 if (GET_CODE (x) == UNSPEC
5494 && ((XINT (x, 1) == 6 && GET_CODE (orig_x) == MEM)
5495 || (XINT (x, 1) == 7 && GET_CODE (orig_x) != MEM)))
5496 {
5497 if (y)
5498 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5499 return XVECEXP (x, 0, 0);
5500 }
5501
5502 if (GET_CODE (x) == PLUS
5503 && GET_CODE (XEXP (x, 0)) == UNSPEC
5504 && GET_CODE (XEXP (x, 1)) == CONST_INT
5505 && ((XINT (XEXP (x, 0), 1) == 6 && GET_CODE (orig_x) == MEM)
5506 || (XINT (XEXP (x, 0), 1) == 7 && GET_CODE (orig_x) != MEM)))
5507 {
5508 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5509 if (y)
5510 return gen_rtx_PLUS (Pmode, y, x);
5511 return x;
5512 }
5513
5514 return orig_x;
5515 }
5516 \f
5517 static void
5518 put_condition_code (code, mode, reverse, fp, file)
5519 enum rtx_code code;
5520 enum machine_mode mode;
5521 int reverse, fp;
5522 FILE *file;
5523 {
5524 const char *suffix;
5525
5526 if (mode == CCFPmode || mode == CCFPUmode)
5527 {
5528 enum rtx_code second_code, bypass_code;
5529 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5530 if (bypass_code != NIL || second_code != NIL)
5531 abort ();
5532 code = ix86_fp_compare_code_to_integer (code);
5533 mode = CCmode;
5534 }
5535 if (reverse)
5536 code = reverse_condition (code);
5537
5538 switch (code)
5539 {
5540 case EQ:
5541 suffix = "e";
5542 break;
5543 case NE:
5544 suffix = "ne";
5545 break;
5546 case GT:
5547 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5548 abort ();
5549 suffix = "g";
5550 break;
5551 case GTU:
5552 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5553 Those same assemblers have the same but opposite losage on cmov. */
5554 if (mode != CCmode)
5555 abort ();
5556 suffix = fp ? "nbe" : "a";
5557 break;
5558 case LT:
5559 if (mode == CCNOmode || mode == CCGOCmode)
5560 suffix = "s";
5561 else if (mode == CCmode || mode == CCGCmode)
5562 suffix = "l";
5563 else
5564 abort ();
5565 break;
5566 case LTU:
5567 if (mode != CCmode)
5568 abort ();
5569 suffix = "b";
5570 break;
5571 case GE:
5572 if (mode == CCNOmode || mode == CCGOCmode)
5573 suffix = "ns";
5574 else if (mode == CCmode || mode == CCGCmode)
5575 suffix = "ge";
5576 else
5577 abort ();
5578 break;
5579 case GEU:
5580 /* ??? As above. */
5581 if (mode != CCmode)
5582 abort ();
5583 suffix = fp ? "nb" : "ae";
5584 break;
5585 case LE:
5586 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5587 abort ();
5588 suffix = "le";
5589 break;
5590 case LEU:
5591 if (mode != CCmode)
5592 abort ();
5593 suffix = "be";
5594 break;
5595 case UNORDERED:
5596 suffix = fp ? "u" : "p";
5597 break;
5598 case ORDERED:
5599 suffix = fp ? "nu" : "np";
5600 break;
5601 default:
5602 abort ();
5603 }
5604 fputs (suffix, file);
5605 }
5606
5607 void
5608 print_reg (x, code, file)
5609 rtx x;
5610 int code;
5611 FILE *file;
5612 {
5613 if (REGNO (x) == ARG_POINTER_REGNUM
5614 || REGNO (x) == FRAME_POINTER_REGNUM
5615 || REGNO (x) == FLAGS_REG
5616 || REGNO (x) == FPSR_REG)
5617 abort ();
5618
5619 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
5620 putc ('%', file);
5621
5622 if (code == 'w' || MMX_REG_P (x))
5623 code = 2;
5624 else if (code == 'b')
5625 code = 1;
5626 else if (code == 'k')
5627 code = 4;
5628 else if (code == 'q')
5629 code = 8;
5630 else if (code == 'y')
5631 code = 3;
5632 else if (code == 'h')
5633 code = 0;
5634 else
5635 code = GET_MODE_SIZE (GET_MODE (x));
5636
5637 /* Irritatingly, AMD extended registers use different naming convention
5638 from the normal registers. */
5639 if (REX_INT_REG_P (x))
5640 {
5641 if (!TARGET_64BIT)
5642 abort ();
5643 switch (code)
5644 {
5645 case 0:
5646 error ("extended registers have no high halves");
5647 break;
5648 case 1:
5649 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5650 break;
5651 case 2:
5652 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5653 break;
5654 case 4:
5655 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5656 break;
5657 case 8:
5658 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5659 break;
5660 default:
5661 error ("unsupported operand size for extended register");
5662 break;
5663 }
5664 return;
5665 }
5666 switch (code)
5667 {
5668 case 3:
5669 if (STACK_TOP_P (x))
5670 {
5671 fputs ("st(0)", file);
5672 break;
5673 }
5674 /* FALLTHRU */
5675 case 8:
5676 case 4:
5677 case 12:
5678 if (! ANY_FP_REG_P (x))
5679 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5680 /* FALLTHRU */
5681 case 16:
5682 case 2:
5683 fputs (hi_reg_name[REGNO (x)], file);
5684 break;
5685 case 1:
5686 fputs (qi_reg_name[REGNO (x)], file);
5687 break;
5688 case 0:
5689 fputs (qi_high_reg_name[REGNO (x)], file);
5690 break;
5691 default:
5692 abort ();
5693 }
5694 }
5695
5696 /* Meaning of CODE:
5697 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5698 C -- print opcode suffix for set/cmov insn.
5699 c -- like C, but print reversed condition
5700 F,f -- likewise, but for floating-point.
5701 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5702 nothing
5703 R -- print the prefix for register names.
5704 z -- print the opcode suffix for the size of the current operand.
5705 * -- print a star (in certain assembler syntax)
5706 A -- print an absolute memory reference.
5707 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5708 s -- print a shift double count, followed by the assemblers argument
5709 delimiter.
5710 b -- print the QImode name of the register for the indicated operand.
5711 %b0 would print %al if operands[0] is reg 0.
5712 w -- likewise, print the HImode name of the register.
5713 k -- likewise, print the SImode name of the register.
5714 q -- likewise, print the DImode name of the register.
5715 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5716 y -- print "st(0)" instead of "st" as a register.
5717 D -- print condition for SSE cmp instruction.
5718 P -- if PIC, print an @PLT suffix.
5719 X -- don't print any sort of PIC '@' suffix for a symbol.
5720 */
5721
5722 void
5723 print_operand (file, x, code)
5724 FILE *file;
5725 rtx x;
5726 int code;
5727 {
5728 if (code)
5729 {
5730 switch (code)
5731 {
5732 case '*':
5733 if (ASSEMBLER_DIALECT == ASM_ATT)
5734 putc ('*', file);
5735 return;
5736
5737 case 'A':
5738 if (ASSEMBLER_DIALECT == ASM_ATT)
5739 putc ('*', file);
5740 else if (ASSEMBLER_DIALECT == ASM_INTEL)
5741 {
5742 /* Intel syntax. For absolute addresses, registers should not
5743 be surrounded by braces. */
5744 if (GET_CODE (x) != REG)
5745 {
5746 putc ('[', file);
5747 PRINT_OPERAND (file, x, 0);
5748 putc (']', file);
5749 return;
5750 }
5751 }
5752 else
5753 abort ();
5754
5755 PRINT_OPERAND (file, x, 0);
5756 return;
5757
5758
5759 case 'L':
5760 if (ASSEMBLER_DIALECT == ASM_ATT)
5761 putc ('l', file);
5762 return;
5763
5764 case 'W':
5765 if (ASSEMBLER_DIALECT == ASM_ATT)
5766 putc ('w', file);
5767 return;
5768
5769 case 'B':
5770 if (ASSEMBLER_DIALECT == ASM_ATT)
5771 putc ('b', file);
5772 return;
5773
5774 case 'Q':
5775 if (ASSEMBLER_DIALECT == ASM_ATT)
5776 putc ('l', file);
5777 return;
5778
5779 case 'S':
5780 if (ASSEMBLER_DIALECT == ASM_ATT)
5781 putc ('s', file);
5782 return;
5783
5784 case 'T':
5785 if (ASSEMBLER_DIALECT == ASM_ATT)
5786 putc ('t', file);
5787 return;
5788
5789 case 'z':
5790 /* 387 opcodes don't get size suffixes if the operands are
5791 registers. */
5792 if (STACK_REG_P (x))
5793 return;
5794
5795 /* Likewise if using Intel opcodes. */
5796 if (ASSEMBLER_DIALECT == ASM_INTEL)
5797 return;
5798
5799 /* This is the size of op from size of operand. */
5800 switch (GET_MODE_SIZE (GET_MODE (x)))
5801 {
5802 case 2:
5803 #ifdef HAVE_GAS_FILDS_FISTS
5804 putc ('s', file);
5805 #endif
5806 return;
5807
5808 case 4:
5809 if (GET_MODE (x) == SFmode)
5810 {
5811 putc ('s', file);
5812 return;
5813 }
5814 else
5815 putc ('l', file);
5816 return;
5817
5818 case 12:
5819 case 16:
5820 putc ('t', file);
5821 return;
5822
5823 case 8:
5824 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5825 {
5826 #ifdef GAS_MNEMONICS
5827 putc ('q', file);
5828 #else
5829 putc ('l', file);
5830 putc ('l', file);
5831 #endif
5832 }
5833 else
5834 putc ('l', file);
5835 return;
5836
5837 default:
5838 abort ();
5839 }
5840
5841 case 'b':
5842 case 'w':
5843 case 'k':
5844 case 'q':
5845 case 'h':
5846 case 'y':
5847 case 'X':
5848 case 'P':
5849 break;
5850
5851 case 's':
5852 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5853 {
5854 PRINT_OPERAND (file, x, 0);
5855 putc (',', file);
5856 }
5857 return;
5858
5859 case 'D':
5860 /* Little bit of braindamage here. The SSE compare instructions
5861 does use completely different names for the comparisons that the
5862 fp conditional moves. */
5863 switch (GET_CODE (x))
5864 {
5865 case EQ:
5866 case UNEQ:
5867 fputs ("eq", file);
5868 break;
5869 case LT:
5870 case UNLT:
5871 fputs ("lt", file);
5872 break;
5873 case LE:
5874 case UNLE:
5875 fputs ("le", file);
5876 break;
5877 case UNORDERED:
5878 fputs ("unord", file);
5879 break;
5880 case NE:
5881 case LTGT:
5882 fputs ("neq", file);
5883 break;
5884 case UNGE:
5885 case GE:
5886 fputs ("nlt", file);
5887 break;
5888 case UNGT:
5889 case GT:
5890 fputs ("nle", file);
5891 break;
5892 case ORDERED:
5893 fputs ("ord", file);
5894 break;
5895 default:
5896 abort ();
5897 break;
5898 }
5899 return;
5900 case 'O':
5901 #ifdef CMOV_SUN_AS_SYNTAX
5902 if (ASSEMBLER_DIALECT == ASM_ATT)
5903 {
5904 switch (GET_MODE (x))
5905 {
5906 case HImode: putc ('w', file); break;
5907 case SImode:
5908 case SFmode: putc ('l', file); break;
5909 case DImode:
5910 case DFmode: putc ('q', file); break;
5911 default: abort ();
5912 }
5913 putc ('.', file);
5914 }
5915 #endif
5916 return;
5917 case 'C':
5918 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5919 return;
5920 case 'F':
5921 #ifdef CMOV_SUN_AS_SYNTAX
5922 if (ASSEMBLER_DIALECT == ASM_ATT)
5923 putc ('.', file);
5924 #endif
5925 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5926 return;
5927
5928 /* Like above, but reverse condition */
5929 case 'c':
5930 /* Check to see if argument to %c is really a constant
5931 and not a condition code which needs to be reversed. */
5932 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5933 {
5934 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5935 return;
5936 }
5937 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5938 return;
5939 case 'f':
5940 #ifdef CMOV_SUN_AS_SYNTAX
5941 if (ASSEMBLER_DIALECT == ASM_ATT)
5942 putc ('.', file);
5943 #endif
5944 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5945 return;
5946 case '+':
5947 {
5948 rtx x;
5949
5950 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5951 return;
5952
5953 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5954 if (x)
5955 {
5956 int pred_val = INTVAL (XEXP (x, 0));
5957
5958 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5959 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5960 {
5961 int taken = pred_val > REG_BR_PROB_BASE / 2;
5962 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5963
5964 /* Emit hints only in the case default branch prediction
5965 heruistics would fail. */
5966 if (taken != cputaken)
5967 {
5968 /* We use 3e (DS) prefix for taken branches and
5969 2e (CS) prefix for not taken branches. */
5970 if (taken)
5971 fputs ("ds ; ", file);
5972 else
5973 fputs ("cs ; ", file);
5974 }
5975 }
5976 }
5977 return;
5978 }
5979 default:
5980 output_operand_lossage ("invalid operand code `%c'", code);
5981 }
5982 }
5983
5984 if (GET_CODE (x) == REG)
5985 {
5986 PRINT_REG (x, code, file);
5987 }
5988
5989 else if (GET_CODE (x) == MEM)
5990 {
5991 /* No `byte ptr' prefix for call instructions. */
5992 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
5993 {
5994 const char * size;
5995 switch (GET_MODE_SIZE (GET_MODE (x)))
5996 {
5997 case 1: size = "BYTE"; break;
5998 case 2: size = "WORD"; break;
5999 case 4: size = "DWORD"; break;
6000 case 8: size = "QWORD"; break;
6001 case 12: size = "XWORD"; break;
6002 case 16: size = "XMMWORD"; break;
6003 default:
6004 abort ();
6005 }
6006
6007 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6008 if (code == 'b')
6009 size = "BYTE";
6010 else if (code == 'w')
6011 size = "WORD";
6012 else if (code == 'k')
6013 size = "DWORD";
6014
6015 fputs (size, file);
6016 fputs (" PTR ", file);
6017 }
6018
6019 x = XEXP (x, 0);
6020 if (flag_pic && CONSTANT_ADDRESS_P (x))
6021 output_pic_addr_const (file, x, code);
6022 /* Avoid (%rip) for call operands. */
6023 else if (CONSTANT_ADDRESS_P (x) && code =='P'
6024 && GET_CODE (x) != CONST_INT)
6025 output_addr_const (file, x);
6026 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6027 output_operand_lossage ("invalid constraints for operand");
6028 else
6029 output_address (x);
6030 }
6031
6032 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6033 {
6034 REAL_VALUE_TYPE r;
6035 long l;
6036
6037 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6038 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6039
6040 if (ASSEMBLER_DIALECT == ASM_ATT)
6041 putc ('$', file);
6042 fprintf (file, "0x%lx", l);
6043 }
6044
6045 /* These float cases don't actually occur as immediate operands. */
6046 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6047 {
6048 REAL_VALUE_TYPE r;
6049 char dstr[30];
6050
6051 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6052 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6053 fprintf (file, "%s", dstr);
6054 }
6055
6056 else if (GET_CODE (x) == CONST_DOUBLE
6057 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6058 {
6059 REAL_VALUE_TYPE r;
6060 char dstr[30];
6061
6062 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6063 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6064 fprintf (file, "%s", dstr);
6065 }
6066 else
6067 {
6068 if (code != 'P')
6069 {
6070 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6071 {
6072 if (ASSEMBLER_DIALECT == ASM_ATT)
6073 putc ('$', file);
6074 }
6075 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6076 || GET_CODE (x) == LABEL_REF)
6077 {
6078 if (ASSEMBLER_DIALECT == ASM_ATT)
6079 putc ('$', file);
6080 else
6081 fputs ("OFFSET FLAT:", file);
6082 }
6083 }
6084 if (GET_CODE (x) == CONST_INT)
6085 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6086 else if (flag_pic)
6087 output_pic_addr_const (file, x, code);
6088 else
6089 output_addr_const (file, x);
6090 }
6091 }
6092 \f
6093 /* Print a memory operand whose address is ADDR. */
6094
6095 void
6096 print_operand_address (file, addr)
6097 FILE *file;
6098 register rtx addr;
6099 {
6100 struct ix86_address parts;
6101 rtx base, index, disp;
6102 int scale;
6103
6104 if (! ix86_decompose_address (addr, &parts))
6105 abort ();
6106
6107 base = parts.base;
6108 index = parts.index;
6109 disp = parts.disp;
6110 scale = parts.scale;
6111
6112 if (!base && !index)
6113 {
6114 /* Displacement only requires special attention. */
6115
6116 if (GET_CODE (disp) == CONST_INT)
6117 {
6118 if (ASSEMBLER_DIALECT == ASM_INTEL)
6119 {
6120 if (USER_LABEL_PREFIX[0] == 0)
6121 putc ('%', file);
6122 fputs ("ds:", file);
6123 }
6124 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6125 }
6126 else if (flag_pic)
6127 output_pic_addr_const (file, addr, 0);
6128 else
6129 output_addr_const (file, addr);
6130
6131 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6132 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6133 fputs ("(%rip)", file);
6134 }
6135 else
6136 {
6137 if (ASSEMBLER_DIALECT == ASM_ATT)
6138 {
6139 if (disp)
6140 {
6141 if (flag_pic)
6142 output_pic_addr_const (file, disp, 0);
6143 else if (GET_CODE (disp) == LABEL_REF)
6144 output_asm_label (disp);
6145 else
6146 output_addr_const (file, disp);
6147 }
6148
6149 putc ('(', file);
6150 if (base)
6151 PRINT_REG (base, 0, file);
6152 if (index)
6153 {
6154 putc (',', file);
6155 PRINT_REG (index, 0, file);
6156 if (scale != 1)
6157 fprintf (file, ",%d", scale);
6158 }
6159 putc (')', file);
6160 }
6161 else
6162 {
6163 rtx offset = NULL_RTX;
6164
6165 if (disp)
6166 {
6167 /* Pull out the offset of a symbol; print any symbol itself. */
6168 if (GET_CODE (disp) == CONST
6169 && GET_CODE (XEXP (disp, 0)) == PLUS
6170 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6171 {
6172 offset = XEXP (XEXP (disp, 0), 1);
6173 disp = gen_rtx_CONST (VOIDmode,
6174 XEXP (XEXP (disp, 0), 0));
6175 }
6176
6177 if (flag_pic)
6178 output_pic_addr_const (file, disp, 0);
6179 else if (GET_CODE (disp) == LABEL_REF)
6180 output_asm_label (disp);
6181 else if (GET_CODE (disp) == CONST_INT)
6182 offset = disp;
6183 else
6184 output_addr_const (file, disp);
6185 }
6186
6187 putc ('[', file);
6188 if (base)
6189 {
6190 PRINT_REG (base, 0, file);
6191 if (offset)
6192 {
6193 if (INTVAL (offset) >= 0)
6194 putc ('+', file);
6195 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6196 }
6197 }
6198 else if (offset)
6199 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6200 else
6201 putc ('0', file);
6202
6203 if (index)
6204 {
6205 putc ('+', file);
6206 PRINT_REG (index, 0, file);
6207 if (scale != 1)
6208 fprintf (file, "*%d", scale);
6209 }
6210 putc (']', file);
6211 }
6212 }
6213 }
6214 \f
6215 /* Split one or more DImode RTL references into pairs of SImode
6216 references. The RTL can be REG, offsettable MEM, integer constant, or
6217 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6218 split and "num" is its length. lo_half and hi_half are output arrays
6219 that parallel "operands". */
6220
6221 void
6222 split_di (operands, num, lo_half, hi_half)
6223 rtx operands[];
6224 int num;
6225 rtx lo_half[], hi_half[];
6226 {
6227 while (num--)
6228 {
6229 rtx op = operands[num];
6230
6231 /* simplify_subreg refuse to split volatile memory addresses,
6232 but we still have to handle it. */
6233 if (GET_CODE (op) == MEM)
6234 {
6235 lo_half[num] = adjust_address (op, SImode, 0);
6236 hi_half[num] = adjust_address (op, SImode, 4);
6237 }
6238 else
6239 {
6240 lo_half[num] = simplify_gen_subreg (SImode, op,
6241 GET_MODE (op) == VOIDmode
6242 ? DImode : GET_MODE (op), 0);
6243 hi_half[num] = simplify_gen_subreg (SImode, op,
6244 GET_MODE (op) == VOIDmode
6245 ? DImode : GET_MODE (op), 4);
6246 }
6247 }
6248 }
6249 /* Split one or more TImode RTL references into pairs of SImode
6250 references. The RTL can be REG, offsettable MEM, integer constant, or
6251 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6252 split and "num" is its length. lo_half and hi_half are output arrays
6253 that parallel "operands". */
6254
6255 void
6256 split_ti (operands, num, lo_half, hi_half)
6257 rtx operands[];
6258 int num;
6259 rtx lo_half[], hi_half[];
6260 {
6261 while (num--)
6262 {
6263 rtx op = operands[num];
6264
6265 /* simplify_subreg refuse to split volatile memory addresses, but we
6266 still have to handle it. */
6267 if (GET_CODE (op) == MEM)
6268 {
6269 lo_half[num] = adjust_address (op, DImode, 0);
6270 hi_half[num] = adjust_address (op, DImode, 8);
6271 }
6272 else
6273 {
6274 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6275 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6276 }
6277 }
6278 }
6279 \f
6280 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6281 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6282 is the expression of the binary operation. The output may either be
6283 emitted here, or returned to the caller, like all output_* functions.
6284
6285 There is no guarantee that the operands are the same mode, as they
6286 might be within FLOAT or FLOAT_EXTEND expressions. */
6287
6288 #ifndef SYSV386_COMPAT
6289 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6290 wants to fix the assemblers because that causes incompatibility
6291 with gcc. No-one wants to fix gcc because that causes
6292 incompatibility with assemblers... You can use the option of
6293 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6294 #define SYSV386_COMPAT 1
6295 #endif
6296
6297 const char *
6298 output_387_binary_op (insn, operands)
6299 rtx insn;
6300 rtx *operands;
6301 {
6302 static char buf[30];
6303 const char *p;
6304 const char *ssep;
6305 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6306
6307 #ifdef ENABLE_CHECKING
6308 /* Even if we do not want to check the inputs, this documents input
6309 constraints. Which helps in understanding the following code. */
6310 if (STACK_REG_P (operands[0])
6311 && ((REG_P (operands[1])
6312 && REGNO (operands[0]) == REGNO (operands[1])
6313 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6314 || (REG_P (operands[2])
6315 && REGNO (operands[0]) == REGNO (operands[2])
6316 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6317 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6318 ; /* ok */
6319 else if (!is_sse)
6320 abort ();
6321 #endif
6322
6323 switch (GET_CODE (operands[3]))
6324 {
6325 case PLUS:
6326 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6327 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6328 p = "fiadd";
6329 else
6330 p = "fadd";
6331 ssep = "add";
6332 break;
6333
6334 case MINUS:
6335 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6336 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6337 p = "fisub";
6338 else
6339 p = "fsub";
6340 ssep = "sub";
6341 break;
6342
6343 case MULT:
6344 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6345 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6346 p = "fimul";
6347 else
6348 p = "fmul";
6349 ssep = "mul";
6350 break;
6351
6352 case DIV:
6353 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6354 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6355 p = "fidiv";
6356 else
6357 p = "fdiv";
6358 ssep = "div";
6359 break;
6360
6361 default:
6362 abort ();
6363 }
6364
6365 if (is_sse)
6366 {
6367 strcpy (buf, ssep);
6368 if (GET_MODE (operands[0]) == SFmode)
6369 strcat (buf, "ss\t{%2, %0|%0, %2}");
6370 else
6371 strcat (buf, "sd\t{%2, %0|%0, %2}");
6372 return buf;
6373 }
6374 strcpy (buf, p);
6375
6376 switch (GET_CODE (operands[3]))
6377 {
6378 case MULT:
6379 case PLUS:
6380 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6381 {
6382 rtx temp = operands[2];
6383 operands[2] = operands[1];
6384 operands[1] = temp;
6385 }
6386
6387 /* know operands[0] == operands[1]. */
6388
6389 if (GET_CODE (operands[2]) == MEM)
6390 {
6391 p = "%z2\t%2";
6392 break;
6393 }
6394
6395 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6396 {
6397 if (STACK_TOP_P (operands[0]))
6398 /* How is it that we are storing to a dead operand[2]?
6399 Well, presumably operands[1] is dead too. We can't
6400 store the result to st(0) as st(0) gets popped on this
6401 instruction. Instead store to operands[2] (which I
6402 think has to be st(1)). st(1) will be popped later.
6403 gcc <= 2.8.1 didn't have this check and generated
6404 assembly code that the Unixware assembler rejected. */
6405 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6406 else
6407 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6408 break;
6409 }
6410
6411 if (STACK_TOP_P (operands[0]))
6412 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6413 else
6414 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6415 break;
6416
6417 case MINUS:
6418 case DIV:
6419 if (GET_CODE (operands[1]) == MEM)
6420 {
6421 p = "r%z1\t%1";
6422 break;
6423 }
6424
6425 if (GET_CODE (operands[2]) == MEM)
6426 {
6427 p = "%z2\t%2";
6428 break;
6429 }
6430
6431 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6432 {
6433 #if SYSV386_COMPAT
6434 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6435 derived assemblers, confusingly reverse the direction of
6436 the operation for fsub{r} and fdiv{r} when the
6437 destination register is not st(0). The Intel assembler
6438 doesn't have this brain damage. Read !SYSV386_COMPAT to
6439 figure out what the hardware really does. */
6440 if (STACK_TOP_P (operands[0]))
6441 p = "{p\t%0, %2|rp\t%2, %0}";
6442 else
6443 p = "{rp\t%2, %0|p\t%0, %2}";
6444 #else
6445 if (STACK_TOP_P (operands[0]))
6446 /* As above for fmul/fadd, we can't store to st(0). */
6447 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6448 else
6449 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6450 #endif
6451 break;
6452 }
6453
6454 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6455 {
6456 #if SYSV386_COMPAT
6457 if (STACK_TOP_P (operands[0]))
6458 p = "{rp\t%0, %1|p\t%1, %0}";
6459 else
6460 p = "{p\t%1, %0|rp\t%0, %1}";
6461 #else
6462 if (STACK_TOP_P (operands[0]))
6463 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6464 else
6465 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6466 #endif
6467 break;
6468 }
6469
6470 if (STACK_TOP_P (operands[0]))
6471 {
6472 if (STACK_TOP_P (operands[1]))
6473 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6474 else
6475 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6476 break;
6477 }
6478 else if (STACK_TOP_P (operands[1]))
6479 {
6480 #if SYSV386_COMPAT
6481 p = "{\t%1, %0|r\t%0, %1}";
6482 #else
6483 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6484 #endif
6485 }
6486 else
6487 {
6488 #if SYSV386_COMPAT
6489 p = "{r\t%2, %0|\t%0, %2}";
6490 #else
6491 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6492 #endif
6493 }
6494 break;
6495
6496 default:
6497 abort ();
6498 }
6499
6500 strcat (buf, p);
6501 return buf;
6502 }
6503
6504 /* Output code to initialize control word copies used by
6505 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6506 is set to control word rounding downwards. */
6507 void
6508 emit_i387_cw_initialization (normal, round_down)
6509 rtx normal, round_down;
6510 {
6511 rtx reg = gen_reg_rtx (HImode);
6512
6513 emit_insn (gen_x86_fnstcw_1 (normal));
6514 emit_move_insn (reg, normal);
6515 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6516 && !TARGET_64BIT)
6517 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6518 else
6519 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6520 emit_move_insn (round_down, reg);
6521 }
6522
6523 /* Output code for INSN to convert a float to a signed int. OPERANDS
6524 are the insn operands. The output may be [HSD]Imode and the input
6525 operand may be [SDX]Fmode. */
6526
6527 const char *
6528 output_fix_trunc (insn, operands)
6529 rtx insn;
6530 rtx *operands;
6531 {
6532 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6533 int dimode_p = GET_MODE (operands[0]) == DImode;
6534
6535 /* Jump through a hoop or two for DImode, since the hardware has no
6536 non-popping instruction. We used to do this a different way, but
6537 that was somewhat fragile and broke with post-reload splitters. */
6538 if (dimode_p && !stack_top_dies)
6539 output_asm_insn ("fld\t%y1", operands);
6540
6541 if (!STACK_TOP_P (operands[1]))
6542 abort ();
6543
6544 if (GET_CODE (operands[0]) != MEM)
6545 abort ();
6546
6547 output_asm_insn ("fldcw\t%3", operands);
6548 if (stack_top_dies || dimode_p)
6549 output_asm_insn ("fistp%z0\t%0", operands);
6550 else
6551 output_asm_insn ("fist%z0\t%0", operands);
6552 output_asm_insn ("fldcw\t%2", operands);
6553
6554 return "";
6555 }
6556
6557 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6558 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6559 when fucom should be used. */
6560
6561 const char *
6562 output_fp_compare (insn, operands, eflags_p, unordered_p)
6563 rtx insn;
6564 rtx *operands;
6565 int eflags_p, unordered_p;
6566 {
6567 int stack_top_dies;
6568 rtx cmp_op0 = operands[0];
6569 rtx cmp_op1 = operands[1];
6570 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6571
6572 if (eflags_p == 2)
6573 {
6574 cmp_op0 = cmp_op1;
6575 cmp_op1 = operands[2];
6576 }
6577 if (is_sse)
6578 {
6579 if (GET_MODE (operands[0]) == SFmode)
6580 if (unordered_p)
6581 return "ucomiss\t{%1, %0|%0, %1}";
6582 else
6583 return "comiss\t{%1, %0|%0, %y}";
6584 else
6585 if (unordered_p)
6586 return "ucomisd\t{%1, %0|%0, %1}";
6587 else
6588 return "comisd\t{%1, %0|%0, %y}";
6589 }
6590
6591 if (! STACK_TOP_P (cmp_op0))
6592 abort ();
6593
6594 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6595
6596 if (STACK_REG_P (cmp_op1)
6597 && stack_top_dies
6598 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6599 && REGNO (cmp_op1) != FIRST_STACK_REG)
6600 {
6601 /* If both the top of the 387 stack dies, and the other operand
6602 is also a stack register that dies, then this must be a
6603 `fcompp' float compare */
6604
6605 if (eflags_p == 1)
6606 {
6607 /* There is no double popping fcomi variant. Fortunately,
6608 eflags is immune from the fstp's cc clobbering. */
6609 if (unordered_p)
6610 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6611 else
6612 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6613 return "fstp\t%y0";
6614 }
6615 else
6616 {
6617 if (eflags_p == 2)
6618 {
6619 if (unordered_p)
6620 return "fucompp\n\tfnstsw\t%0";
6621 else
6622 return "fcompp\n\tfnstsw\t%0";
6623 }
6624 else
6625 {
6626 if (unordered_p)
6627 return "fucompp";
6628 else
6629 return "fcompp";
6630 }
6631 }
6632 }
6633 else
6634 {
6635 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6636
6637 static const char * const alt[24] =
6638 {
6639 "fcom%z1\t%y1",
6640 "fcomp%z1\t%y1",
6641 "fucom%z1\t%y1",
6642 "fucomp%z1\t%y1",
6643
6644 "ficom%z1\t%y1",
6645 "ficomp%z1\t%y1",
6646 NULL,
6647 NULL,
6648
6649 "fcomi\t{%y1, %0|%0, %y1}",
6650 "fcomip\t{%y1, %0|%0, %y1}",
6651 "fucomi\t{%y1, %0|%0, %y1}",
6652 "fucomip\t{%y1, %0|%0, %y1}",
6653
6654 NULL,
6655 NULL,
6656 NULL,
6657 NULL,
6658
6659 "fcom%z2\t%y2\n\tfnstsw\t%0",
6660 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6661 "fucom%z2\t%y2\n\tfnstsw\t%0",
6662 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6663
6664 "ficom%z2\t%y2\n\tfnstsw\t%0",
6665 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6666 NULL,
6667 NULL
6668 };
6669
6670 int mask;
6671 const char *ret;
6672
6673 mask = eflags_p << 3;
6674 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6675 mask |= unordered_p << 1;
6676 mask |= stack_top_dies;
6677
6678 if (mask >= 24)
6679 abort ();
6680 ret = alt[mask];
6681 if (ret == NULL)
6682 abort ();
6683
6684 return ret;
6685 }
6686 }
6687
6688 void
6689 ix86_output_addr_vec_elt (file, value)
6690 FILE *file;
6691 int value;
6692 {
6693 const char *directive = ASM_LONG;
6694
6695 if (TARGET_64BIT)
6696 {
6697 #ifdef ASM_QUAD
6698 directive = ASM_QUAD;
6699 #else
6700 abort ();
6701 #endif
6702 }
6703
6704 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6705 }
6706
6707 void
6708 ix86_output_addr_diff_elt (file, value, rel)
6709 FILE *file;
6710 int value, rel;
6711 {
6712 if (TARGET_64BIT)
6713 fprintf (file, "%s%s%d-%s%d\n",
6714 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6715 else if (HAVE_AS_GOTOFF_IN_DATA)
6716 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6717 else
6718 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6719 ASM_LONG, LPREFIX, value);
6720 }
6721 \f
6722 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6723 for the target. */
6724
6725 void
6726 ix86_expand_clear (dest)
6727 rtx dest;
6728 {
6729 rtx tmp;
6730
6731 /* We play register width games, which are only valid after reload. */
6732 if (!reload_completed)
6733 abort ();
6734
6735 /* Avoid HImode and its attendant prefix byte. */
6736 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6737 dest = gen_rtx_REG (SImode, REGNO (dest));
6738
6739 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6740
6741 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6742 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6743 {
6744 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6745 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6746 }
6747
6748 emit_insn (tmp);
6749 }
6750
6751 void
6752 ix86_expand_move (mode, operands)
6753 enum machine_mode mode;
6754 rtx operands[];
6755 {
6756 int strict = (reload_in_progress || reload_completed);
6757 rtx insn;
6758
6759 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6760 {
6761 /* Emit insns to move operands[1] into operands[0]. */
6762
6763 if (GET_CODE (operands[0]) == MEM)
6764 operands[1] = force_reg (Pmode, operands[1]);
6765 else
6766 {
6767 rtx temp = operands[0];
6768 if (GET_CODE (temp) != REG)
6769 temp = gen_reg_rtx (Pmode);
6770 temp = legitimize_pic_address (operands[1], temp);
6771 if (temp == operands[0])
6772 return;
6773 operands[1] = temp;
6774 }
6775 }
6776 else
6777 {
6778 if (GET_CODE (operands[0]) == MEM
6779 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6780 || !push_operand (operands[0], mode))
6781 && GET_CODE (operands[1]) == MEM)
6782 operands[1] = force_reg (mode, operands[1]);
6783
6784 if (push_operand (operands[0], mode)
6785 && ! general_no_elim_operand (operands[1], mode))
6786 operands[1] = copy_to_mode_reg (mode, operands[1]);
6787
6788 /* Force large constants in 64bit compilation into register
6789 to get them CSEed. */
6790 if (TARGET_64BIT && mode == DImode
6791 && immediate_operand (operands[1], mode)
6792 && !x86_64_zero_extended_value (operands[1])
6793 && !register_operand (operands[0], mode)
6794 && optimize && !reload_completed && !reload_in_progress)
6795 operands[1] = copy_to_mode_reg (mode, operands[1]);
6796
6797 if (FLOAT_MODE_P (mode))
6798 {
6799 /* If we are loading a floating point constant to a register,
6800 force the value to memory now, since we'll get better code
6801 out the back end. */
6802
6803 if (strict)
6804 ;
6805 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6806 && register_operand (operands[0], mode))
6807 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6808 }
6809 }
6810
6811 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6812
6813 emit_insn (insn);
6814 }
6815
6816 void
6817 ix86_expand_vector_move (mode, operands)
6818 enum machine_mode mode;
6819 rtx operands[];
6820 {
6821 /* Force constants other than zero into memory. We do not know how
6822 the instructions used to build constants modify the upper 64 bits
6823 of the register, once we have that information we may be able
6824 to handle some of them more efficiently. */
6825 if ((reload_in_progress | reload_completed) == 0
6826 && register_operand (operands[0], mode)
6827 && CONSTANT_P (operands[1]))
6828 {
6829 rtx addr = gen_reg_rtx (Pmode);
6830 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6831 operands[1] = gen_rtx_MEM (mode, addr);
6832 }
6833
6834 /* Make operand1 a register if it isn't already. */
6835 if ((reload_in_progress | reload_completed) == 0
6836 && !register_operand (operands[0], mode)
6837 && !register_operand (operands[1], mode)
6838 && operands[1] != CONST0_RTX (mode))
6839 {
6840 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
6841 emit_move_insn (operands[0], temp);
6842 return;
6843 }
6844
6845 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6846 }
6847
6848 /* Attempt to expand a binary operator. Make the expansion closer to the
6849 actual machine, then just general_operand, which will allow 3 separate
6850 memory references (one output, two input) in a single insn. */
6851
6852 void
6853 ix86_expand_binary_operator (code, mode, operands)
6854 enum rtx_code code;
6855 enum machine_mode mode;
6856 rtx operands[];
6857 {
6858 int matching_memory;
6859 rtx src1, src2, dst, op, clob;
6860
6861 dst = operands[0];
6862 src1 = operands[1];
6863 src2 = operands[2];
6864
6865 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6866 if (GET_RTX_CLASS (code) == 'c'
6867 && (rtx_equal_p (dst, src2)
6868 || immediate_operand (src1, mode)))
6869 {
6870 rtx temp = src1;
6871 src1 = src2;
6872 src2 = temp;
6873 }
6874
6875 /* If the destination is memory, and we do not have matching source
6876 operands, do things in registers. */
6877 matching_memory = 0;
6878 if (GET_CODE (dst) == MEM)
6879 {
6880 if (rtx_equal_p (dst, src1))
6881 matching_memory = 1;
6882 else if (GET_RTX_CLASS (code) == 'c'
6883 && rtx_equal_p (dst, src2))
6884 matching_memory = 2;
6885 else
6886 dst = gen_reg_rtx (mode);
6887 }
6888
6889 /* Both source operands cannot be in memory. */
6890 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6891 {
6892 if (matching_memory != 2)
6893 src2 = force_reg (mode, src2);
6894 else
6895 src1 = force_reg (mode, src1);
6896 }
6897
6898 /* If the operation is not commutable, source 1 cannot be a constant
6899 or non-matching memory. */
6900 if ((CONSTANT_P (src1)
6901 || (!matching_memory && GET_CODE (src1) == MEM))
6902 && GET_RTX_CLASS (code) != 'c')
6903 src1 = force_reg (mode, src1);
6904
6905 /* If optimizing, copy to regs to improve CSE */
6906 if (optimize && ! no_new_pseudos)
6907 {
6908 if (GET_CODE (dst) == MEM)
6909 dst = gen_reg_rtx (mode);
6910 if (GET_CODE (src1) == MEM)
6911 src1 = force_reg (mode, src1);
6912 if (GET_CODE (src2) == MEM)
6913 src2 = force_reg (mode, src2);
6914 }
6915
6916 /* Emit the instruction. */
6917
6918 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6919 if (reload_in_progress)
6920 {
6921 /* Reload doesn't know about the flags register, and doesn't know that
6922 it doesn't want to clobber it. We can only do this with PLUS. */
6923 if (code != PLUS)
6924 abort ();
6925 emit_insn (op);
6926 }
6927 else
6928 {
6929 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6930 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6931 }
6932
6933 /* Fix up the destination if needed. */
6934 if (dst != operands[0])
6935 emit_move_insn (operands[0], dst);
6936 }
6937
6938 /* Return TRUE or FALSE depending on whether the binary operator meets the
6939 appropriate constraints. */
6940
6941 int
6942 ix86_binary_operator_ok (code, mode, operands)
6943 enum rtx_code code;
6944 enum machine_mode mode ATTRIBUTE_UNUSED;
6945 rtx operands[3];
6946 {
6947 /* Both source operands cannot be in memory. */
6948 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6949 return 0;
6950 /* If the operation is not commutable, source 1 cannot be a constant. */
6951 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6952 return 0;
6953 /* If the destination is memory, we must have a matching source operand. */
6954 if (GET_CODE (operands[0]) == MEM
6955 && ! (rtx_equal_p (operands[0], operands[1])
6956 || (GET_RTX_CLASS (code) == 'c'
6957 && rtx_equal_p (operands[0], operands[2]))))
6958 return 0;
6959 /* If the operation is not commutable and the source 1 is memory, we must
6960 have a matching destination. */
6961 if (GET_CODE (operands[1]) == MEM
6962 && GET_RTX_CLASS (code) != 'c'
6963 && ! rtx_equal_p (operands[0], operands[1]))
6964 return 0;
6965 return 1;
6966 }
6967
6968 /* Attempt to expand a unary operator. Make the expansion closer to the
6969 actual machine, then just general_operand, which will allow 2 separate
6970 memory references (one output, one input) in a single insn. */
6971
6972 void
6973 ix86_expand_unary_operator (code, mode, operands)
6974 enum rtx_code code;
6975 enum machine_mode mode;
6976 rtx operands[];
6977 {
6978 int matching_memory;
6979 rtx src, dst, op, clob;
6980
6981 dst = operands[0];
6982 src = operands[1];
6983
6984 /* If the destination is memory, and we do not have matching source
6985 operands, do things in registers. */
6986 matching_memory = 0;
6987 if (GET_CODE (dst) == MEM)
6988 {
6989 if (rtx_equal_p (dst, src))
6990 matching_memory = 1;
6991 else
6992 dst = gen_reg_rtx (mode);
6993 }
6994
6995 /* When source operand is memory, destination must match. */
6996 if (!matching_memory && GET_CODE (src) == MEM)
6997 src = force_reg (mode, src);
6998
6999 /* If optimizing, copy to regs to improve CSE */
7000 if (optimize && ! no_new_pseudos)
7001 {
7002 if (GET_CODE (dst) == MEM)
7003 dst = gen_reg_rtx (mode);
7004 if (GET_CODE (src) == MEM)
7005 src = force_reg (mode, src);
7006 }
7007
7008 /* Emit the instruction. */
7009
7010 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7011 if (reload_in_progress || code == NOT)
7012 {
7013 /* Reload doesn't know about the flags register, and doesn't know that
7014 it doesn't want to clobber it. */
7015 if (code != NOT)
7016 abort ();
7017 emit_insn (op);
7018 }
7019 else
7020 {
7021 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7022 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7023 }
7024
7025 /* Fix up the destination if needed. */
7026 if (dst != operands[0])
7027 emit_move_insn (operands[0], dst);
7028 }
7029
7030 /* Return TRUE or FALSE depending on whether the unary operator meets the
7031 appropriate constraints. */
7032
7033 int
7034 ix86_unary_operator_ok (code, mode, operands)
7035 enum rtx_code code ATTRIBUTE_UNUSED;
7036 enum machine_mode mode ATTRIBUTE_UNUSED;
7037 rtx operands[2] ATTRIBUTE_UNUSED;
7038 {
7039 /* If one of operands is memory, source and destination must match. */
7040 if ((GET_CODE (operands[0]) == MEM
7041 || GET_CODE (operands[1]) == MEM)
7042 && ! rtx_equal_p (operands[0], operands[1]))
7043 return FALSE;
7044 return TRUE;
7045 }
7046
7047 /* Return TRUE or FALSE depending on whether the first SET in INSN
7048 has source and destination with matching CC modes, and that the
7049 CC mode is at least as constrained as REQ_MODE. */
7050
7051 int
7052 ix86_match_ccmode (insn, req_mode)
7053 rtx insn;
7054 enum machine_mode req_mode;
7055 {
7056 rtx set;
7057 enum machine_mode set_mode;
7058
7059 set = PATTERN (insn);
7060 if (GET_CODE (set) == PARALLEL)
7061 set = XVECEXP (set, 0, 0);
7062 if (GET_CODE (set) != SET)
7063 abort ();
7064 if (GET_CODE (SET_SRC (set)) != COMPARE)
7065 abort ();
7066
7067 set_mode = GET_MODE (SET_DEST (set));
7068 switch (set_mode)
7069 {
7070 case CCNOmode:
7071 if (req_mode != CCNOmode
7072 && (req_mode != CCmode
7073 || XEXP (SET_SRC (set), 1) != const0_rtx))
7074 return 0;
7075 break;
7076 case CCmode:
7077 if (req_mode == CCGCmode)
7078 return 0;
7079 /* FALLTHRU */
7080 case CCGCmode:
7081 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7082 return 0;
7083 /* FALLTHRU */
7084 case CCGOCmode:
7085 if (req_mode == CCZmode)
7086 return 0;
7087 /* FALLTHRU */
7088 case CCZmode:
7089 break;
7090
7091 default:
7092 abort ();
7093 }
7094
7095 return (GET_MODE (SET_SRC (set)) == set_mode);
7096 }
7097
7098 /* Generate insn patterns to do an integer compare of OPERANDS. */
7099
7100 static rtx
7101 ix86_expand_int_compare (code, op0, op1)
7102 enum rtx_code code;
7103 rtx op0, op1;
7104 {
7105 enum machine_mode cmpmode;
7106 rtx tmp, flags;
7107
7108 cmpmode = SELECT_CC_MODE (code, op0, op1);
7109 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7110
7111 /* This is very simple, but making the interface the same as in the
7112 FP case makes the rest of the code easier. */
7113 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7114 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7115
7116 /* Return the test that should be put into the flags user, i.e.
7117 the bcc, scc, or cmov instruction. */
7118 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7119 }
7120
7121 /* Figure out whether to use ordered or unordered fp comparisons.
7122 Return the appropriate mode to use. */
7123
7124 enum machine_mode
7125 ix86_fp_compare_mode (code)
7126 enum rtx_code code ATTRIBUTE_UNUSED;
7127 {
7128 /* ??? In order to make all comparisons reversible, we do all comparisons
7129 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7130 all forms trapping and nontrapping comparisons, we can make inequality
7131 comparisons trapping again, since it results in better code when using
7132 FCOM based compares. */
7133 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7134 }
7135
7136 enum machine_mode
7137 ix86_cc_mode (code, op0, op1)
7138 enum rtx_code code;
7139 rtx op0, op1;
7140 {
7141 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7142 return ix86_fp_compare_mode (code);
7143 switch (code)
7144 {
7145 /* Only zero flag is needed. */
7146 case EQ: /* ZF=0 */
7147 case NE: /* ZF!=0 */
7148 return CCZmode;
7149 /* Codes needing carry flag. */
7150 case GEU: /* CF=0 */
7151 case GTU: /* CF=0 & ZF=0 */
7152 case LTU: /* CF=1 */
7153 case LEU: /* CF=1 | ZF=1 */
7154 return CCmode;
7155 /* Codes possibly doable only with sign flag when
7156 comparing against zero. */
7157 case GE: /* SF=OF or SF=0 */
7158 case LT: /* SF<>OF or SF=1 */
7159 if (op1 == const0_rtx)
7160 return CCGOCmode;
7161 else
7162 /* For other cases Carry flag is not required. */
7163 return CCGCmode;
7164 /* Codes doable only with sign flag when comparing
7165 against zero, but we miss jump instruction for it
7166 so we need to use relational tests agains overflow
7167 that thus needs to be zero. */
7168 case GT: /* ZF=0 & SF=OF */
7169 case LE: /* ZF=1 | SF<>OF */
7170 if (op1 == const0_rtx)
7171 return CCNOmode;
7172 else
7173 return CCGCmode;
7174 /* strcmp pattern do (use flags) and combine may ask us for proper
7175 mode. */
7176 case USE:
7177 return CCmode;
7178 default:
7179 abort ();
7180 }
7181 }
7182
7183 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7184
7185 int
7186 ix86_use_fcomi_compare (code)
7187 enum rtx_code code ATTRIBUTE_UNUSED;
7188 {
7189 enum rtx_code swapped_code = swap_condition (code);
7190 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7191 || (ix86_fp_comparison_cost (swapped_code)
7192 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7193 }
7194
7195 /* Swap, force into registers, or otherwise massage the two operands
7196 to a fp comparison. The operands are updated in place; the new
7197 comparsion code is returned. */
7198
7199 static enum rtx_code
7200 ix86_prepare_fp_compare_args (code, pop0, pop1)
7201 enum rtx_code code;
7202 rtx *pop0, *pop1;
7203 {
7204 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7205 rtx op0 = *pop0, op1 = *pop1;
7206 enum machine_mode op_mode = GET_MODE (op0);
7207 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7208
7209 /* All of the unordered compare instructions only work on registers.
7210 The same is true of the XFmode compare instructions. The same is
7211 true of the fcomi compare instructions. */
7212
7213 if (!is_sse
7214 && (fpcmp_mode == CCFPUmode
7215 || op_mode == XFmode
7216 || op_mode == TFmode
7217 || ix86_use_fcomi_compare (code)))
7218 {
7219 op0 = force_reg (op_mode, op0);
7220 op1 = force_reg (op_mode, op1);
7221 }
7222 else
7223 {
7224 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7225 things around if they appear profitable, otherwise force op0
7226 into a register. */
7227
7228 if (standard_80387_constant_p (op0) == 0
7229 || (GET_CODE (op0) == MEM
7230 && ! (standard_80387_constant_p (op1) == 0
7231 || GET_CODE (op1) == MEM)))
7232 {
7233 rtx tmp;
7234 tmp = op0, op0 = op1, op1 = tmp;
7235 code = swap_condition (code);
7236 }
7237
7238 if (GET_CODE (op0) != REG)
7239 op0 = force_reg (op_mode, op0);
7240
7241 if (CONSTANT_P (op1))
7242 {
7243 if (standard_80387_constant_p (op1))
7244 op1 = force_reg (op_mode, op1);
7245 else
7246 op1 = validize_mem (force_const_mem (op_mode, op1));
7247 }
7248 }
7249
7250 /* Try to rearrange the comparison to make it cheaper. */
7251 if (ix86_fp_comparison_cost (code)
7252 > ix86_fp_comparison_cost (swap_condition (code))
7253 && (GET_CODE (op1) == REG || !no_new_pseudos))
7254 {
7255 rtx tmp;
7256 tmp = op0, op0 = op1, op1 = tmp;
7257 code = swap_condition (code);
7258 if (GET_CODE (op0) != REG)
7259 op0 = force_reg (op_mode, op0);
7260 }
7261
7262 *pop0 = op0;
7263 *pop1 = op1;
7264 return code;
7265 }
7266
7267 /* Convert comparison codes we use to represent FP comparison to integer
7268 code that will result in proper branch. Return UNKNOWN if no such code
7269 is available. */
7270 static enum rtx_code
7271 ix86_fp_compare_code_to_integer (code)
7272 enum rtx_code code;
7273 {
7274 switch (code)
7275 {
7276 case GT:
7277 return GTU;
7278 case GE:
7279 return GEU;
7280 case ORDERED:
7281 case UNORDERED:
7282 return code;
7283 break;
7284 case UNEQ:
7285 return EQ;
7286 break;
7287 case UNLT:
7288 return LTU;
7289 break;
7290 case UNLE:
7291 return LEU;
7292 break;
7293 case LTGT:
7294 return NE;
7295 break;
7296 default:
7297 return UNKNOWN;
7298 }
7299 }
7300
7301 /* Split comparison code CODE into comparisons we can do using branch
7302 instructions. BYPASS_CODE is comparison code for branch that will
7303 branch around FIRST_CODE and SECOND_CODE. If some of branches
7304 is not required, set value to NIL.
7305 We never require more than two branches. */
7306 static void
7307 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7308 enum rtx_code code, *bypass_code, *first_code, *second_code;
7309 {
7310 *first_code = code;
7311 *bypass_code = NIL;
7312 *second_code = NIL;
7313
7314 /* The fcomi comparison sets flags as follows:
7315
7316 cmp ZF PF CF
7317 > 0 0 0
7318 < 0 0 1
7319 = 1 0 0
7320 un 1 1 1 */
7321
7322 switch (code)
7323 {
7324 case GT: /* GTU - CF=0 & ZF=0 */
7325 case GE: /* GEU - CF=0 */
7326 case ORDERED: /* PF=0 */
7327 case UNORDERED: /* PF=1 */
7328 case UNEQ: /* EQ - ZF=1 */
7329 case UNLT: /* LTU - CF=1 */
7330 case UNLE: /* LEU - CF=1 | ZF=1 */
7331 case LTGT: /* EQ - ZF=0 */
7332 break;
7333 case LT: /* LTU - CF=1 - fails on unordered */
7334 *first_code = UNLT;
7335 *bypass_code = UNORDERED;
7336 break;
7337 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7338 *first_code = UNLE;
7339 *bypass_code = UNORDERED;
7340 break;
7341 case EQ: /* EQ - ZF=1 - fails on unordered */
7342 *first_code = UNEQ;
7343 *bypass_code = UNORDERED;
7344 break;
7345 case NE: /* NE - ZF=0 - fails on unordered */
7346 *first_code = LTGT;
7347 *second_code = UNORDERED;
7348 break;
7349 case UNGE: /* GEU - CF=0 - fails on unordered */
7350 *first_code = GE;
7351 *second_code = UNORDERED;
7352 break;
7353 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7354 *first_code = GT;
7355 *second_code = UNORDERED;
7356 break;
7357 default:
7358 abort ();
7359 }
7360 if (!TARGET_IEEE_FP)
7361 {
7362 *second_code = NIL;
7363 *bypass_code = NIL;
7364 }
7365 }
7366
7367 /* Return cost of comparison done fcom + arithmetics operations on AX.
7368 All following functions do use number of instructions as an cost metrics.
7369 In future this should be tweaked to compute bytes for optimize_size and
7370 take into account performance of various instructions on various CPUs. */
7371 static int
7372 ix86_fp_comparison_arithmetics_cost (code)
7373 enum rtx_code code;
7374 {
7375 if (!TARGET_IEEE_FP)
7376 return 4;
7377 /* The cost of code output by ix86_expand_fp_compare. */
7378 switch (code)
7379 {
7380 case UNLE:
7381 case UNLT:
7382 case LTGT:
7383 case GT:
7384 case GE:
7385 case UNORDERED:
7386 case ORDERED:
7387 case UNEQ:
7388 return 4;
7389 break;
7390 case LT:
7391 case NE:
7392 case EQ:
7393 case UNGE:
7394 return 5;
7395 break;
7396 case LE:
7397 case UNGT:
7398 return 6;
7399 break;
7400 default:
7401 abort ();
7402 }
7403 }
7404
7405 /* Return cost of comparison done using fcomi operation.
7406 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7407 static int
7408 ix86_fp_comparison_fcomi_cost (code)
7409 enum rtx_code code;
7410 {
7411 enum rtx_code bypass_code, first_code, second_code;
7412 /* Return arbitarily high cost when instruction is not supported - this
7413 prevents gcc from using it. */
7414 if (!TARGET_CMOVE)
7415 return 1024;
7416 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7417 return (bypass_code != NIL || second_code != NIL) + 2;
7418 }
7419
7420 /* Return cost of comparison done using sahf operation.
7421 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7422 static int
7423 ix86_fp_comparison_sahf_cost (code)
7424 enum rtx_code code;
7425 {
7426 enum rtx_code bypass_code, first_code, second_code;
7427 /* Return arbitarily high cost when instruction is not preferred - this
7428 avoids gcc from using it. */
7429 if (!TARGET_USE_SAHF && !optimize_size)
7430 return 1024;
7431 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7432 return (bypass_code != NIL || second_code != NIL) + 3;
7433 }
7434
7435 /* Compute cost of the comparison done using any method.
7436 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7437 static int
7438 ix86_fp_comparison_cost (code)
7439 enum rtx_code code;
7440 {
7441 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7442 int min;
7443
7444 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7445 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7446
7447 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7448 if (min > sahf_cost)
7449 min = sahf_cost;
7450 if (min > fcomi_cost)
7451 min = fcomi_cost;
7452 return min;
7453 }
7454
7455 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7456
7457 static rtx
7458 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7459 enum rtx_code code;
7460 rtx op0, op1, scratch;
7461 rtx *second_test;
7462 rtx *bypass_test;
7463 {
7464 enum machine_mode fpcmp_mode, intcmp_mode;
7465 rtx tmp, tmp2;
7466 int cost = ix86_fp_comparison_cost (code);
7467 enum rtx_code bypass_code, first_code, second_code;
7468
7469 fpcmp_mode = ix86_fp_compare_mode (code);
7470 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7471
7472 if (second_test)
7473 *second_test = NULL_RTX;
7474 if (bypass_test)
7475 *bypass_test = NULL_RTX;
7476
7477 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7478
7479 /* Do fcomi/sahf based test when profitable. */
7480 if ((bypass_code == NIL || bypass_test)
7481 && (second_code == NIL || second_test)
7482 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7483 {
7484 if (TARGET_CMOVE)
7485 {
7486 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7487 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7488 tmp);
7489 emit_insn (tmp);
7490 }
7491 else
7492 {
7493 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7494 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7495 if (!scratch)
7496 scratch = gen_reg_rtx (HImode);
7497 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7498 emit_insn (gen_x86_sahf_1 (scratch));
7499 }
7500
7501 /* The FP codes work out to act like unsigned. */
7502 intcmp_mode = fpcmp_mode;
7503 code = first_code;
7504 if (bypass_code != NIL)
7505 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7506 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7507 const0_rtx);
7508 if (second_code != NIL)
7509 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7510 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7511 const0_rtx);
7512 }
7513 else
7514 {
7515 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7516 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7517 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7518 if (!scratch)
7519 scratch = gen_reg_rtx (HImode);
7520 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7521
7522 /* In the unordered case, we have to check C2 for NaN's, which
7523 doesn't happen to work out to anything nice combination-wise.
7524 So do some bit twiddling on the value we've got in AH to come
7525 up with an appropriate set of condition codes. */
7526
7527 intcmp_mode = CCNOmode;
7528 switch (code)
7529 {
7530 case GT:
7531 case UNGT:
7532 if (code == GT || !TARGET_IEEE_FP)
7533 {
7534 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7535 code = EQ;
7536 }
7537 else
7538 {
7539 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7540 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7541 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7542 intcmp_mode = CCmode;
7543 code = GEU;
7544 }
7545 break;
7546 case LT:
7547 case UNLT:
7548 if (code == LT && TARGET_IEEE_FP)
7549 {
7550 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7551 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7552 intcmp_mode = CCmode;
7553 code = EQ;
7554 }
7555 else
7556 {
7557 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7558 code = NE;
7559 }
7560 break;
7561 case GE:
7562 case UNGE:
7563 if (code == GE || !TARGET_IEEE_FP)
7564 {
7565 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7566 code = EQ;
7567 }
7568 else
7569 {
7570 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7571 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7572 GEN_INT (0x01)));
7573 code = NE;
7574 }
7575 break;
7576 case LE:
7577 case UNLE:
7578 if (code == LE && TARGET_IEEE_FP)
7579 {
7580 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7581 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7582 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7583 intcmp_mode = CCmode;
7584 code = LTU;
7585 }
7586 else
7587 {
7588 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7589 code = NE;
7590 }
7591 break;
7592 case EQ:
7593 case UNEQ:
7594 if (code == EQ && TARGET_IEEE_FP)
7595 {
7596 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7597 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7598 intcmp_mode = CCmode;
7599 code = EQ;
7600 }
7601 else
7602 {
7603 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7604 code = NE;
7605 break;
7606 }
7607 break;
7608 case NE:
7609 case LTGT:
7610 if (code == NE && TARGET_IEEE_FP)
7611 {
7612 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7613 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7614 GEN_INT (0x40)));
7615 code = NE;
7616 }
7617 else
7618 {
7619 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7620 code = EQ;
7621 }
7622 break;
7623
7624 case UNORDERED:
7625 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7626 code = NE;
7627 break;
7628 case ORDERED:
7629 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7630 code = EQ;
7631 break;
7632
7633 default:
7634 abort ();
7635 }
7636 }
7637
7638 /* Return the test that should be put into the flags user, i.e.
7639 the bcc, scc, or cmov instruction. */
7640 return gen_rtx_fmt_ee (code, VOIDmode,
7641 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7642 const0_rtx);
7643 }
7644
7645 rtx
7646 ix86_expand_compare (code, second_test, bypass_test)
7647 enum rtx_code code;
7648 rtx *second_test, *bypass_test;
7649 {
7650 rtx op0, op1, ret;
7651 op0 = ix86_compare_op0;
7652 op1 = ix86_compare_op1;
7653
7654 if (second_test)
7655 *second_test = NULL_RTX;
7656 if (bypass_test)
7657 *bypass_test = NULL_RTX;
7658
7659 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7660 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7661 second_test, bypass_test);
7662 else
7663 ret = ix86_expand_int_compare (code, op0, op1);
7664
7665 return ret;
7666 }
7667
7668 /* Return true if the CODE will result in nontrivial jump sequence. */
7669 bool
7670 ix86_fp_jump_nontrivial_p (code)
7671 enum rtx_code code;
7672 {
7673 enum rtx_code bypass_code, first_code, second_code;
7674 if (!TARGET_CMOVE)
7675 return true;
7676 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7677 return bypass_code != NIL || second_code != NIL;
7678 }
7679
7680 void
7681 ix86_expand_branch (code, label)
7682 enum rtx_code code;
7683 rtx label;
7684 {
7685 rtx tmp;
7686
7687 switch (GET_MODE (ix86_compare_op0))
7688 {
7689 case QImode:
7690 case HImode:
7691 case SImode:
7692 simple:
7693 tmp = ix86_expand_compare (code, NULL, NULL);
7694 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7695 gen_rtx_LABEL_REF (VOIDmode, label),
7696 pc_rtx);
7697 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7698 return;
7699
7700 case SFmode:
7701 case DFmode:
7702 case XFmode:
7703 case TFmode:
7704 {
7705 rtvec vec;
7706 int use_fcomi;
7707 enum rtx_code bypass_code, first_code, second_code;
7708
7709 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7710 &ix86_compare_op1);
7711
7712 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7713
7714 /* Check whether we will use the natural sequence with one jump. If
7715 so, we can expand jump early. Otherwise delay expansion by
7716 creating compound insn to not confuse optimizers. */
7717 if (bypass_code == NIL && second_code == NIL
7718 && TARGET_CMOVE)
7719 {
7720 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7721 gen_rtx_LABEL_REF (VOIDmode, label),
7722 pc_rtx, NULL_RTX);
7723 }
7724 else
7725 {
7726 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7727 ix86_compare_op0, ix86_compare_op1);
7728 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7729 gen_rtx_LABEL_REF (VOIDmode, label),
7730 pc_rtx);
7731 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7732
7733 use_fcomi = ix86_use_fcomi_compare (code);
7734 vec = rtvec_alloc (3 + !use_fcomi);
7735 RTVEC_ELT (vec, 0) = tmp;
7736 RTVEC_ELT (vec, 1)
7737 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7738 RTVEC_ELT (vec, 2)
7739 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7740 if (! use_fcomi)
7741 RTVEC_ELT (vec, 3)
7742 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7743
7744 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7745 }
7746 return;
7747 }
7748
7749 case DImode:
7750 if (TARGET_64BIT)
7751 goto simple;
7752 /* Expand DImode branch into multiple compare+branch. */
7753 {
7754 rtx lo[2], hi[2], label2;
7755 enum rtx_code code1, code2, code3;
7756
7757 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7758 {
7759 tmp = ix86_compare_op0;
7760 ix86_compare_op0 = ix86_compare_op1;
7761 ix86_compare_op1 = tmp;
7762 code = swap_condition (code);
7763 }
7764 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7765 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7766
7767 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7768 avoid two branches. This costs one extra insn, so disable when
7769 optimizing for size. */
7770
7771 if ((code == EQ || code == NE)
7772 && (!optimize_size
7773 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7774 {
7775 rtx xor0, xor1;
7776
7777 xor1 = hi[0];
7778 if (hi[1] != const0_rtx)
7779 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7780 NULL_RTX, 0, OPTAB_WIDEN);
7781
7782 xor0 = lo[0];
7783 if (lo[1] != const0_rtx)
7784 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7785 NULL_RTX, 0, OPTAB_WIDEN);
7786
7787 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7788 NULL_RTX, 0, OPTAB_WIDEN);
7789
7790 ix86_compare_op0 = tmp;
7791 ix86_compare_op1 = const0_rtx;
7792 ix86_expand_branch (code, label);
7793 return;
7794 }
7795
7796 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7797 op1 is a constant and the low word is zero, then we can just
7798 examine the high word. */
7799
7800 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7801 switch (code)
7802 {
7803 case LT: case LTU: case GE: case GEU:
7804 ix86_compare_op0 = hi[0];
7805 ix86_compare_op1 = hi[1];
7806 ix86_expand_branch (code, label);
7807 return;
7808 default:
7809 break;
7810 }
7811
7812 /* Otherwise, we need two or three jumps. */
7813
7814 label2 = gen_label_rtx ();
7815
7816 code1 = code;
7817 code2 = swap_condition (code);
7818 code3 = unsigned_condition (code);
7819
7820 switch (code)
7821 {
7822 case LT: case GT: case LTU: case GTU:
7823 break;
7824
7825 case LE: code1 = LT; code2 = GT; break;
7826 case GE: code1 = GT; code2 = LT; break;
7827 case LEU: code1 = LTU; code2 = GTU; break;
7828 case GEU: code1 = GTU; code2 = LTU; break;
7829
7830 case EQ: code1 = NIL; code2 = NE; break;
7831 case NE: code2 = NIL; break;
7832
7833 default:
7834 abort ();
7835 }
7836
7837 /*
7838 * a < b =>
7839 * if (hi(a) < hi(b)) goto true;
7840 * if (hi(a) > hi(b)) goto false;
7841 * if (lo(a) < lo(b)) goto true;
7842 * false:
7843 */
7844
7845 ix86_compare_op0 = hi[0];
7846 ix86_compare_op1 = hi[1];
7847
7848 if (code1 != NIL)
7849 ix86_expand_branch (code1, label);
7850 if (code2 != NIL)
7851 ix86_expand_branch (code2, label2);
7852
7853 ix86_compare_op0 = lo[0];
7854 ix86_compare_op1 = lo[1];
7855 ix86_expand_branch (code3, label);
7856
7857 if (code2 != NIL)
7858 emit_label (label2);
7859 return;
7860 }
7861
7862 default:
7863 abort ();
7864 }
7865 }
7866
7867 /* Split branch based on floating point condition. */
7868 void
7869 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7870 enum rtx_code code;
7871 rtx op1, op2, target1, target2, tmp;
7872 {
7873 rtx second, bypass;
7874 rtx label = NULL_RTX;
7875 rtx condition;
7876 int bypass_probability = -1, second_probability = -1, probability = -1;
7877 rtx i;
7878
7879 if (target2 != pc_rtx)
7880 {
7881 rtx tmp = target2;
7882 code = reverse_condition_maybe_unordered (code);
7883 target2 = target1;
7884 target1 = tmp;
7885 }
7886
7887 condition = ix86_expand_fp_compare (code, op1, op2,
7888 tmp, &second, &bypass);
7889
7890 if (split_branch_probability >= 0)
7891 {
7892 /* Distribute the probabilities across the jumps.
7893 Assume the BYPASS and SECOND to be always test
7894 for UNORDERED. */
7895 probability = split_branch_probability;
7896
7897 /* Value of 1 is low enough to make no need for probability
7898 to be updated. Later we may run some experiments and see
7899 if unordered values are more frequent in practice. */
7900 if (bypass)
7901 bypass_probability = 1;
7902 if (second)
7903 second_probability = 1;
7904 }
7905 if (bypass != NULL_RTX)
7906 {
7907 label = gen_label_rtx ();
7908 i = emit_jump_insn (gen_rtx_SET
7909 (VOIDmode, pc_rtx,
7910 gen_rtx_IF_THEN_ELSE (VOIDmode,
7911 bypass,
7912 gen_rtx_LABEL_REF (VOIDmode,
7913 label),
7914 pc_rtx)));
7915 if (bypass_probability >= 0)
7916 REG_NOTES (i)
7917 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7918 GEN_INT (bypass_probability),
7919 REG_NOTES (i));
7920 }
7921 i = emit_jump_insn (gen_rtx_SET
7922 (VOIDmode, pc_rtx,
7923 gen_rtx_IF_THEN_ELSE (VOIDmode,
7924 condition, target1, target2)));
7925 if (probability >= 0)
7926 REG_NOTES (i)
7927 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7928 GEN_INT (probability),
7929 REG_NOTES (i));
7930 if (second != NULL_RTX)
7931 {
7932 i = emit_jump_insn (gen_rtx_SET
7933 (VOIDmode, pc_rtx,
7934 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7935 target2)));
7936 if (second_probability >= 0)
7937 REG_NOTES (i)
7938 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7939 GEN_INT (second_probability),
7940 REG_NOTES (i));
7941 }
7942 if (label != NULL_RTX)
7943 emit_label (label);
7944 }
7945
7946 int
7947 ix86_expand_setcc (code, dest)
7948 enum rtx_code code;
7949 rtx dest;
7950 {
7951 rtx ret, tmp, tmpreg;
7952 rtx second_test, bypass_test;
7953
7954 if (GET_MODE (ix86_compare_op0) == DImode
7955 && !TARGET_64BIT)
7956 return 0; /* FAIL */
7957
7958 if (GET_MODE (dest) != QImode)
7959 abort ();
7960
7961 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7962 PUT_MODE (ret, QImode);
7963
7964 tmp = dest;
7965 tmpreg = dest;
7966
7967 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7968 if (bypass_test || second_test)
7969 {
7970 rtx test = second_test;
7971 int bypass = 0;
7972 rtx tmp2 = gen_reg_rtx (QImode);
7973 if (bypass_test)
7974 {
7975 if (second_test)
7976 abort ();
7977 test = bypass_test;
7978 bypass = 1;
7979 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7980 }
7981 PUT_MODE (test, QImode);
7982 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7983
7984 if (bypass)
7985 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7986 else
7987 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7988 }
7989
7990 return 1; /* DONE */
7991 }
7992
7993 int
7994 ix86_expand_int_movcc (operands)
7995 rtx operands[];
7996 {
7997 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7998 rtx compare_seq, compare_op;
7999 rtx second_test, bypass_test;
8000 enum machine_mode mode = GET_MODE (operands[0]);
8001
8002 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8003 In case comparsion is done with immediate, we can convert it to LTU or
8004 GEU by altering the integer. */
8005
8006 if ((code == LEU || code == GTU)
8007 && GET_CODE (ix86_compare_op1) == CONST_INT
8008 && mode != HImode
8009 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
8010 /* The operand still must be representable as sign extended value. */
8011 && (!TARGET_64BIT
8012 || GET_MODE (ix86_compare_op0) != DImode
8013 || (unsigned int) INTVAL (ix86_compare_op1) != 0x7fffffff)
8014 && GET_CODE (operands[2]) == CONST_INT
8015 && GET_CODE (operands[3]) == CONST_INT)
8016 {
8017 if (code == LEU)
8018 code = LTU;
8019 else
8020 code = GEU;
8021 ix86_compare_op1
8022 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8023 GET_MODE (ix86_compare_op0));
8024 }
8025
8026 start_sequence ();
8027 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8028 compare_seq = gen_sequence ();
8029 end_sequence ();
8030
8031 compare_code = GET_CODE (compare_op);
8032
8033 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8034 HImode insns, we'd be swallowed in word prefix ops. */
8035
8036 if (mode != HImode
8037 && (mode != DImode || TARGET_64BIT)
8038 && GET_CODE (operands[2]) == CONST_INT
8039 && GET_CODE (operands[3]) == CONST_INT)
8040 {
8041 rtx out = operands[0];
8042 HOST_WIDE_INT ct = INTVAL (operands[2]);
8043 HOST_WIDE_INT cf = INTVAL (operands[3]);
8044 HOST_WIDE_INT diff;
8045
8046 if ((compare_code == LTU || compare_code == GEU)
8047 && !second_test && !bypass_test)
8048 {
8049
8050 /* Detect overlap between destination and compare sources. */
8051 rtx tmp = out;
8052
8053 /* To simplify rest of code, restrict to the GEU case. */
8054 if (compare_code == LTU)
8055 {
8056 int tmp = ct;
8057 ct = cf;
8058 cf = tmp;
8059 compare_code = reverse_condition (compare_code);
8060 code = reverse_condition (code);
8061 }
8062 diff = ct - cf;
8063
8064 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8065 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8066 tmp = gen_reg_rtx (mode);
8067
8068 emit_insn (compare_seq);
8069 if (mode == DImode)
8070 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8071 else
8072 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8073
8074 if (diff == 1)
8075 {
8076 /*
8077 * cmpl op0,op1
8078 * sbbl dest,dest
8079 * [addl dest, ct]
8080 *
8081 * Size 5 - 8.
8082 */
8083 if (ct)
8084 tmp = expand_simple_binop (mode, PLUS,
8085 tmp, GEN_INT (ct),
8086 tmp, 1, OPTAB_DIRECT);
8087 }
8088 else if (cf == -1)
8089 {
8090 /*
8091 * cmpl op0,op1
8092 * sbbl dest,dest
8093 * orl $ct, dest
8094 *
8095 * Size 8.
8096 */
8097 tmp = expand_simple_binop (mode, IOR,
8098 tmp, GEN_INT (ct),
8099 tmp, 1, OPTAB_DIRECT);
8100 }
8101 else if (diff == -1 && ct)
8102 {
8103 /*
8104 * cmpl op0,op1
8105 * sbbl dest,dest
8106 * xorl $-1, dest
8107 * [addl dest, cf]
8108 *
8109 * Size 8 - 11.
8110 */
8111 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8112 if (cf)
8113 tmp = expand_simple_binop (mode, PLUS,
8114 tmp, GEN_INT (cf),
8115 tmp, 1, OPTAB_DIRECT);
8116 }
8117 else
8118 {
8119 /*
8120 * cmpl op0,op1
8121 * sbbl dest,dest
8122 * andl cf - ct, dest
8123 * [addl dest, ct]
8124 *
8125 * Size 8 - 11.
8126 */
8127 tmp = expand_simple_binop (mode, AND,
8128 tmp,
8129 gen_int_mode (cf - ct, mode),
8130 tmp, 1, OPTAB_DIRECT);
8131 if (ct)
8132 tmp = expand_simple_binop (mode, PLUS,
8133 tmp, GEN_INT (ct),
8134 tmp, 1, OPTAB_DIRECT);
8135 }
8136
8137 if (tmp != out)
8138 emit_move_insn (out, tmp);
8139
8140 return 1; /* DONE */
8141 }
8142
8143 diff = ct - cf;
8144 if (diff < 0)
8145 {
8146 HOST_WIDE_INT tmp;
8147 tmp = ct, ct = cf, cf = tmp;
8148 diff = -diff;
8149 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8150 {
8151 /* We may be reversing unordered compare to normal compare, that
8152 is not valid in general (we may convert non-trapping condition
8153 to trapping one), however on i386 we currently emit all
8154 comparisons unordered. */
8155 compare_code = reverse_condition_maybe_unordered (compare_code);
8156 code = reverse_condition_maybe_unordered (code);
8157 }
8158 else
8159 {
8160 compare_code = reverse_condition (compare_code);
8161 code = reverse_condition (code);
8162 }
8163 }
8164
8165 compare_code = NIL;
8166 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8167 && GET_CODE (ix86_compare_op1) == CONST_INT)
8168 {
8169 if (ix86_compare_op1 == const0_rtx
8170 && (code == LT || code == GE))
8171 compare_code = code;
8172 else if (ix86_compare_op1 == constm1_rtx)
8173 {
8174 if (code == LE)
8175 compare_code = LT;
8176 else if (code == GT)
8177 compare_code = GE;
8178 }
8179 }
8180
8181 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8182 if (compare_code != NIL
8183 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8184 && (cf == -1 || ct == -1))
8185 {
8186 /* If lea code below could be used, only optimize
8187 if it results in a 2 insn sequence. */
8188
8189 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8190 || diff == 3 || diff == 5 || diff == 9)
8191 || (compare_code == LT && ct == -1)
8192 || (compare_code == GE && cf == -1))
8193 {
8194 /*
8195 * notl op1 (if necessary)
8196 * sarl $31, op1
8197 * orl cf, op1
8198 */
8199 if (ct != -1)
8200 {
8201 cf = ct;
8202 ct = -1;
8203 code = reverse_condition (code);
8204 }
8205
8206 out = emit_store_flag (out, code, ix86_compare_op0,
8207 ix86_compare_op1, VOIDmode, 0, -1);
8208
8209 out = expand_simple_binop (mode, IOR,
8210 out, GEN_INT (cf),
8211 out, 1, OPTAB_DIRECT);
8212 if (out != operands[0])
8213 emit_move_insn (operands[0], out);
8214
8215 return 1; /* DONE */
8216 }
8217 }
8218
8219 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8220 || diff == 3 || diff == 5 || diff == 9)
8221 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8222 {
8223 /*
8224 * xorl dest,dest
8225 * cmpl op1,op2
8226 * setcc dest
8227 * lea cf(dest*(ct-cf)),dest
8228 *
8229 * Size 14.
8230 *
8231 * This also catches the degenerate setcc-only case.
8232 */
8233
8234 rtx tmp;
8235 int nops;
8236
8237 out = emit_store_flag (out, code, ix86_compare_op0,
8238 ix86_compare_op1, VOIDmode, 0, 1);
8239
8240 nops = 0;
8241 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8242 done in proper mode to match. */
8243 if (diff == 1)
8244 tmp = out;
8245 else
8246 {
8247 rtx out1;
8248 out1 = out;
8249 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8250 nops++;
8251 if (diff & 1)
8252 {
8253 tmp = gen_rtx_PLUS (mode, tmp, out1);
8254 nops++;
8255 }
8256 }
8257 if (cf != 0)
8258 {
8259 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8260 nops++;
8261 }
8262 if (tmp != out
8263 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8264 {
8265 if (nops == 1)
8266 {
8267 rtx clob;
8268
8269 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8270 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8271
8272 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8273 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8274 emit_insn (tmp);
8275 }
8276 else
8277 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8278 }
8279 if (out != operands[0])
8280 emit_move_insn (operands[0], out);
8281
8282 return 1; /* DONE */
8283 }
8284
8285 /*
8286 * General case: Jumpful:
8287 * xorl dest,dest cmpl op1, op2
8288 * cmpl op1, op2 movl ct, dest
8289 * setcc dest jcc 1f
8290 * decl dest movl cf, dest
8291 * andl (cf-ct),dest 1:
8292 * addl ct,dest
8293 *
8294 * Size 20. Size 14.
8295 *
8296 * This is reasonably steep, but branch mispredict costs are
8297 * high on modern cpus, so consider failing only if optimizing
8298 * for space.
8299 *
8300 * %%% Parameterize branch_cost on the tuning architecture, then
8301 * use that. The 80386 couldn't care less about mispredicts.
8302 */
8303
8304 if (!optimize_size && !TARGET_CMOVE)
8305 {
8306 if (ct == 0)
8307 {
8308 ct = cf;
8309 cf = 0;
8310 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8311 /* We may be reversing unordered compare to normal compare,
8312 that is not valid in general (we may convert non-trapping
8313 condition to trapping one), however on i386 we currently
8314 emit all comparisons unordered. */
8315 code = reverse_condition_maybe_unordered (code);
8316 else
8317 {
8318 code = reverse_condition (code);
8319 if (compare_code != NIL)
8320 compare_code = reverse_condition (compare_code);
8321 }
8322 }
8323
8324 if (compare_code != NIL)
8325 {
8326 /* notl op1 (if needed)
8327 sarl $31, op1
8328 andl (cf-ct), op1
8329 addl ct, op1
8330
8331 For x < 0 (resp. x <= -1) there will be no notl,
8332 so if possible swap the constants to get rid of the
8333 complement.
8334 True/false will be -1/0 while code below (store flag
8335 followed by decrement) is 0/-1, so the constants need
8336 to be exchanged once more. */
8337
8338 if (compare_code == GE || !cf)
8339 {
8340 code = reverse_condition (code);
8341 compare_code = LT;
8342 }
8343 else
8344 {
8345 HOST_WIDE_INT tmp = cf;
8346 cf = ct;
8347 ct = tmp;
8348 }
8349
8350 out = emit_store_flag (out, code, ix86_compare_op0,
8351 ix86_compare_op1, VOIDmode, 0, -1);
8352 }
8353 else
8354 {
8355 out = emit_store_flag (out, code, ix86_compare_op0,
8356 ix86_compare_op1, VOIDmode, 0, 1);
8357
8358 out = expand_simple_binop (mode, PLUS,
8359 out, constm1_rtx,
8360 out, 1, OPTAB_DIRECT);
8361 }
8362
8363 out = expand_simple_binop (mode, AND,
8364 out,
8365 gen_int_mode (cf - ct, mode),
8366 out, 1, OPTAB_DIRECT);
8367 out = expand_simple_binop (mode, PLUS,
8368 out, GEN_INT (ct),
8369 out, 1, OPTAB_DIRECT);
8370 if (out != operands[0])
8371 emit_move_insn (operands[0], out);
8372
8373 return 1; /* DONE */
8374 }
8375 }
8376
8377 if (!TARGET_CMOVE)
8378 {
8379 /* Try a few things more with specific constants and a variable. */
8380
8381 optab op;
8382 rtx var, orig_out, out, tmp;
8383
8384 if (optimize_size)
8385 return 0; /* FAIL */
8386
8387 /* If one of the two operands is an interesting constant, load a
8388 constant with the above and mask it in with a logical operation. */
8389
8390 if (GET_CODE (operands[2]) == CONST_INT)
8391 {
8392 var = operands[3];
8393 if (INTVAL (operands[2]) == 0)
8394 operands[3] = constm1_rtx, op = and_optab;
8395 else if (INTVAL (operands[2]) == -1)
8396 operands[3] = const0_rtx, op = ior_optab;
8397 else
8398 return 0; /* FAIL */
8399 }
8400 else if (GET_CODE (operands[3]) == CONST_INT)
8401 {
8402 var = operands[2];
8403 if (INTVAL (operands[3]) == 0)
8404 operands[2] = constm1_rtx, op = and_optab;
8405 else if (INTVAL (operands[3]) == -1)
8406 operands[2] = const0_rtx, op = ior_optab;
8407 else
8408 return 0; /* FAIL */
8409 }
8410 else
8411 return 0; /* FAIL */
8412
8413 orig_out = operands[0];
8414 tmp = gen_reg_rtx (mode);
8415 operands[0] = tmp;
8416
8417 /* Recurse to get the constant loaded. */
8418 if (ix86_expand_int_movcc (operands) == 0)
8419 return 0; /* FAIL */
8420
8421 /* Mask in the interesting variable. */
8422 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8423 OPTAB_WIDEN);
8424 if (out != orig_out)
8425 emit_move_insn (orig_out, out);
8426
8427 return 1; /* DONE */
8428 }
8429
8430 /*
8431 * For comparison with above,
8432 *
8433 * movl cf,dest
8434 * movl ct,tmp
8435 * cmpl op1,op2
8436 * cmovcc tmp,dest
8437 *
8438 * Size 15.
8439 */
8440
8441 if (! nonimmediate_operand (operands[2], mode))
8442 operands[2] = force_reg (mode, operands[2]);
8443 if (! nonimmediate_operand (operands[3], mode))
8444 operands[3] = force_reg (mode, operands[3]);
8445
8446 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8447 {
8448 rtx tmp = gen_reg_rtx (mode);
8449 emit_move_insn (tmp, operands[3]);
8450 operands[3] = tmp;
8451 }
8452 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8453 {
8454 rtx tmp = gen_reg_rtx (mode);
8455 emit_move_insn (tmp, operands[2]);
8456 operands[2] = tmp;
8457 }
8458 if (! register_operand (operands[2], VOIDmode)
8459 && ! register_operand (operands[3], VOIDmode))
8460 operands[2] = force_reg (mode, operands[2]);
8461
8462 emit_insn (compare_seq);
8463 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8464 gen_rtx_IF_THEN_ELSE (mode,
8465 compare_op, operands[2],
8466 operands[3])));
8467 if (bypass_test)
8468 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8469 gen_rtx_IF_THEN_ELSE (mode,
8470 bypass_test,
8471 operands[3],
8472 operands[0])));
8473 if (second_test)
8474 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8475 gen_rtx_IF_THEN_ELSE (mode,
8476 second_test,
8477 operands[2],
8478 operands[0])));
8479
8480 return 1; /* DONE */
8481 }
8482
8483 int
8484 ix86_expand_fp_movcc (operands)
8485 rtx operands[];
8486 {
8487 enum rtx_code code;
8488 rtx tmp;
8489 rtx compare_op, second_test, bypass_test;
8490
8491 /* For SF/DFmode conditional moves based on comparisons
8492 in same mode, we may want to use SSE min/max instructions. */
8493 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8494 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8495 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8496 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8497 && (!TARGET_IEEE_FP
8498 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8499 /* We may be called from the post-reload splitter. */
8500 && (!REG_P (operands[0])
8501 || SSE_REG_P (operands[0])
8502 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8503 {
8504 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8505 code = GET_CODE (operands[1]);
8506
8507 /* See if we have (cross) match between comparison operands and
8508 conditional move operands. */
8509 if (rtx_equal_p (operands[2], op1))
8510 {
8511 rtx tmp = op0;
8512 op0 = op1;
8513 op1 = tmp;
8514 code = reverse_condition_maybe_unordered (code);
8515 }
8516 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8517 {
8518 /* Check for min operation. */
8519 if (code == LT)
8520 {
8521 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8522 if (memory_operand (op0, VOIDmode))
8523 op0 = force_reg (GET_MODE (operands[0]), op0);
8524 if (GET_MODE (operands[0]) == SFmode)
8525 emit_insn (gen_minsf3 (operands[0], op0, op1));
8526 else
8527 emit_insn (gen_mindf3 (operands[0], op0, op1));
8528 return 1;
8529 }
8530 /* Check for max operation. */
8531 if (code == GT)
8532 {
8533 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8534 if (memory_operand (op0, VOIDmode))
8535 op0 = force_reg (GET_MODE (operands[0]), op0);
8536 if (GET_MODE (operands[0]) == SFmode)
8537 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8538 else
8539 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8540 return 1;
8541 }
8542 }
8543 /* Manage condition to be sse_comparison_operator. In case we are
8544 in non-ieee mode, try to canonicalize the destination operand
8545 to be first in the comparison - this helps reload to avoid extra
8546 moves. */
8547 if (!sse_comparison_operator (operands[1], VOIDmode)
8548 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8549 {
8550 rtx tmp = ix86_compare_op0;
8551 ix86_compare_op0 = ix86_compare_op1;
8552 ix86_compare_op1 = tmp;
8553 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8554 VOIDmode, ix86_compare_op0,
8555 ix86_compare_op1);
8556 }
8557 /* Similary try to manage result to be first operand of conditional
8558 move. We also don't support the NE comparison on SSE, so try to
8559 avoid it. */
8560 if ((rtx_equal_p (operands[0], operands[3])
8561 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8562 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8563 {
8564 rtx tmp = operands[2];
8565 operands[2] = operands[3];
8566 operands[3] = tmp;
8567 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8568 (GET_CODE (operands[1])),
8569 VOIDmode, ix86_compare_op0,
8570 ix86_compare_op1);
8571 }
8572 if (GET_MODE (operands[0]) == SFmode)
8573 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8574 operands[2], operands[3],
8575 ix86_compare_op0, ix86_compare_op1));
8576 else
8577 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8578 operands[2], operands[3],
8579 ix86_compare_op0, ix86_compare_op1));
8580 return 1;
8581 }
8582
8583 /* The floating point conditional move instructions don't directly
8584 support conditions resulting from a signed integer comparison. */
8585
8586 code = GET_CODE (operands[1]);
8587 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8588
8589 /* The floating point conditional move instructions don't directly
8590 support signed integer comparisons. */
8591
8592 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8593 {
8594 if (second_test != NULL || bypass_test != NULL)
8595 abort ();
8596 tmp = gen_reg_rtx (QImode);
8597 ix86_expand_setcc (code, tmp);
8598 code = NE;
8599 ix86_compare_op0 = tmp;
8600 ix86_compare_op1 = const0_rtx;
8601 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8602 }
8603 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8604 {
8605 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8606 emit_move_insn (tmp, operands[3]);
8607 operands[3] = tmp;
8608 }
8609 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8610 {
8611 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8612 emit_move_insn (tmp, operands[2]);
8613 operands[2] = tmp;
8614 }
8615
8616 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8617 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8618 compare_op,
8619 operands[2],
8620 operands[3])));
8621 if (bypass_test)
8622 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8623 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8624 bypass_test,
8625 operands[3],
8626 operands[0])));
8627 if (second_test)
8628 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8629 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8630 second_test,
8631 operands[2],
8632 operands[0])));
8633
8634 return 1;
8635 }
8636
8637 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8638 works for floating pointer parameters and nonoffsetable memories.
8639 For pushes, it returns just stack offsets; the values will be saved
8640 in the right order. Maximally three parts are generated. */
8641
8642 static int
8643 ix86_split_to_parts (operand, parts, mode)
8644 rtx operand;
8645 rtx *parts;
8646 enum machine_mode mode;
8647 {
8648 int size;
8649
8650 if (!TARGET_64BIT)
8651 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8652 else
8653 size = (GET_MODE_SIZE (mode) + 4) / 8;
8654
8655 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8656 abort ();
8657 if (size < 2 || size > 3)
8658 abort ();
8659
8660 /* Optimize constant pool reference to immediates. This is used by fp moves,
8661 that force all constants to memory to allow combining. */
8662
8663 if (GET_CODE (operand) == MEM
8664 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8665 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8666 operand = get_pool_constant (XEXP (operand, 0));
8667
8668 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8669 {
8670 /* The only non-offsetable memories we handle are pushes. */
8671 if (! push_operand (operand, VOIDmode))
8672 abort ();
8673
8674 operand = copy_rtx (operand);
8675 PUT_MODE (operand, Pmode);
8676 parts[0] = parts[1] = parts[2] = operand;
8677 }
8678 else if (!TARGET_64BIT)
8679 {
8680 if (mode == DImode)
8681 split_di (&operand, 1, &parts[0], &parts[1]);
8682 else
8683 {
8684 if (REG_P (operand))
8685 {
8686 if (!reload_completed)
8687 abort ();
8688 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8689 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8690 if (size == 3)
8691 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8692 }
8693 else if (offsettable_memref_p (operand))
8694 {
8695 operand = adjust_address (operand, SImode, 0);
8696 parts[0] = operand;
8697 parts[1] = adjust_address (operand, SImode, 4);
8698 if (size == 3)
8699 parts[2] = adjust_address (operand, SImode, 8);
8700 }
8701 else if (GET_CODE (operand) == CONST_DOUBLE)
8702 {
8703 REAL_VALUE_TYPE r;
8704 long l[4];
8705
8706 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8707 switch (mode)
8708 {
8709 case XFmode:
8710 case TFmode:
8711 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8712 parts[2] = gen_int_mode (l[2], SImode);
8713 break;
8714 case DFmode:
8715 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8716 break;
8717 default:
8718 abort ();
8719 }
8720 parts[1] = gen_int_mode (l[1], SImode);
8721 parts[0] = gen_int_mode (l[0], SImode);
8722 }
8723 else
8724 abort ();
8725 }
8726 }
8727 else
8728 {
8729 if (mode == TImode)
8730 split_ti (&operand, 1, &parts[0], &parts[1]);
8731 if (mode == XFmode || mode == TFmode)
8732 {
8733 if (REG_P (operand))
8734 {
8735 if (!reload_completed)
8736 abort ();
8737 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8738 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8739 }
8740 else if (offsettable_memref_p (operand))
8741 {
8742 operand = adjust_address (operand, DImode, 0);
8743 parts[0] = operand;
8744 parts[1] = adjust_address (operand, SImode, 8);
8745 }
8746 else if (GET_CODE (operand) == CONST_DOUBLE)
8747 {
8748 REAL_VALUE_TYPE r;
8749 long l[3];
8750
8751 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8752 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8753 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8754 if (HOST_BITS_PER_WIDE_INT >= 64)
8755 parts[0]
8756 = gen_int_mode
8757 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8758 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8759 DImode);
8760 else
8761 parts[0] = immed_double_const (l[0], l[1], DImode);
8762 parts[1] = gen_int_mode (l[2], SImode);
8763 }
8764 else
8765 abort ();
8766 }
8767 }
8768
8769 return size;
8770 }
8771
8772 /* Emit insns to perform a move or push of DI, DF, and XF values.
8773 Return false when normal moves are needed; true when all required
8774 insns have been emitted. Operands 2-4 contain the input values
8775 int the correct order; operands 5-7 contain the output values. */
8776
8777 void
8778 ix86_split_long_move (operands)
8779 rtx operands[];
8780 {
8781 rtx part[2][3];
8782 int nparts;
8783 int push = 0;
8784 int collisions = 0;
8785 enum machine_mode mode = GET_MODE (operands[0]);
8786
8787 /* The DFmode expanders may ask us to move double.
8788 For 64bit target this is single move. By hiding the fact
8789 here we simplify i386.md splitters. */
8790 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8791 {
8792 /* Optimize constant pool reference to immediates. This is used by
8793 fp moves, that force all constants to memory to allow combining. */
8794
8795 if (GET_CODE (operands[1]) == MEM
8796 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8797 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8798 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8799 if (push_operand (operands[0], VOIDmode))
8800 {
8801 operands[0] = copy_rtx (operands[0]);
8802 PUT_MODE (operands[0], Pmode);
8803 }
8804 else
8805 operands[0] = gen_lowpart (DImode, operands[0]);
8806 operands[1] = gen_lowpart (DImode, operands[1]);
8807 emit_move_insn (operands[0], operands[1]);
8808 return;
8809 }
8810
8811 /* The only non-offsettable memory we handle is push. */
8812 if (push_operand (operands[0], VOIDmode))
8813 push = 1;
8814 else if (GET_CODE (operands[0]) == MEM
8815 && ! offsettable_memref_p (operands[0]))
8816 abort ();
8817
8818 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8819 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8820
8821 /* When emitting push, take care for source operands on the stack. */
8822 if (push && GET_CODE (operands[1]) == MEM
8823 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8824 {
8825 if (nparts == 3)
8826 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8827 XEXP (part[1][2], 0));
8828 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8829 XEXP (part[1][1], 0));
8830 }
8831
8832 /* We need to do copy in the right order in case an address register
8833 of the source overlaps the destination. */
8834 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8835 {
8836 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8837 collisions++;
8838 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8839 collisions++;
8840 if (nparts == 3
8841 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8842 collisions++;
8843
8844 /* Collision in the middle part can be handled by reordering. */
8845 if (collisions == 1 && nparts == 3
8846 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8847 {
8848 rtx tmp;
8849 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8850 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8851 }
8852
8853 /* If there are more collisions, we can't handle it by reordering.
8854 Do an lea to the last part and use only one colliding move. */
8855 else if (collisions > 1)
8856 {
8857 collisions = 1;
8858 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8859 XEXP (part[1][0], 0)));
8860 part[1][0] = change_address (part[1][0],
8861 TARGET_64BIT ? DImode : SImode,
8862 part[0][nparts - 1]);
8863 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8864 if (nparts == 3)
8865 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8866 }
8867 }
8868
8869 if (push)
8870 {
8871 if (!TARGET_64BIT)
8872 {
8873 if (nparts == 3)
8874 {
8875 /* We use only first 12 bytes of TFmode value, but for pushing we
8876 are required to adjust stack as if we were pushing real 16byte
8877 value. */
8878 if (mode == TFmode && !TARGET_64BIT)
8879 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8880 GEN_INT (-4)));
8881 emit_move_insn (part[0][2], part[1][2]);
8882 }
8883 }
8884 else
8885 {
8886 /* In 64bit mode we don't have 32bit push available. In case this is
8887 register, it is OK - we will just use larger counterpart. We also
8888 retype memory - these comes from attempt to avoid REX prefix on
8889 moving of second half of TFmode value. */
8890 if (GET_MODE (part[1][1]) == SImode)
8891 {
8892 if (GET_CODE (part[1][1]) == MEM)
8893 part[1][1] = adjust_address (part[1][1], DImode, 0);
8894 else if (REG_P (part[1][1]))
8895 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8896 else
8897 abort ();
8898 if (GET_MODE (part[1][0]) == SImode)
8899 part[1][0] = part[1][1];
8900 }
8901 }
8902 emit_move_insn (part[0][1], part[1][1]);
8903 emit_move_insn (part[0][0], part[1][0]);
8904 return;
8905 }
8906
8907 /* Choose correct order to not overwrite the source before it is copied. */
8908 if ((REG_P (part[0][0])
8909 && REG_P (part[1][1])
8910 && (REGNO (part[0][0]) == REGNO (part[1][1])
8911 || (nparts == 3
8912 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8913 || (collisions > 0
8914 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8915 {
8916 if (nparts == 3)
8917 {
8918 operands[2] = part[0][2];
8919 operands[3] = part[0][1];
8920 operands[4] = part[0][0];
8921 operands[5] = part[1][2];
8922 operands[6] = part[1][1];
8923 operands[7] = part[1][0];
8924 }
8925 else
8926 {
8927 operands[2] = part[0][1];
8928 operands[3] = part[0][0];
8929 operands[5] = part[1][1];
8930 operands[6] = part[1][0];
8931 }
8932 }
8933 else
8934 {
8935 if (nparts == 3)
8936 {
8937 operands[2] = part[0][0];
8938 operands[3] = part[0][1];
8939 operands[4] = part[0][2];
8940 operands[5] = part[1][0];
8941 operands[6] = part[1][1];
8942 operands[7] = part[1][2];
8943 }
8944 else
8945 {
8946 operands[2] = part[0][0];
8947 operands[3] = part[0][1];
8948 operands[5] = part[1][0];
8949 operands[6] = part[1][1];
8950 }
8951 }
8952 emit_move_insn (operands[2], operands[5]);
8953 emit_move_insn (operands[3], operands[6]);
8954 if (nparts == 3)
8955 emit_move_insn (operands[4], operands[7]);
8956
8957 return;
8958 }
8959
8960 void
8961 ix86_split_ashldi (operands, scratch)
8962 rtx *operands, scratch;
8963 {
8964 rtx low[2], high[2];
8965 int count;
8966
8967 if (GET_CODE (operands[2]) == CONST_INT)
8968 {
8969 split_di (operands, 2, low, high);
8970 count = INTVAL (operands[2]) & 63;
8971
8972 if (count >= 32)
8973 {
8974 emit_move_insn (high[0], low[1]);
8975 emit_move_insn (low[0], const0_rtx);
8976
8977 if (count > 32)
8978 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8979 }
8980 else
8981 {
8982 if (!rtx_equal_p (operands[0], operands[1]))
8983 emit_move_insn (operands[0], operands[1]);
8984 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8985 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8986 }
8987 }
8988 else
8989 {
8990 if (!rtx_equal_p (operands[0], operands[1]))
8991 emit_move_insn (operands[0], operands[1]);
8992
8993 split_di (operands, 1, low, high);
8994
8995 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8996 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8997
8998 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8999 {
9000 if (! no_new_pseudos)
9001 scratch = force_reg (SImode, const0_rtx);
9002 else
9003 emit_move_insn (scratch, const0_rtx);
9004
9005 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9006 scratch));
9007 }
9008 else
9009 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9010 }
9011 }
9012
9013 void
9014 ix86_split_ashrdi (operands, scratch)
9015 rtx *operands, scratch;
9016 {
9017 rtx low[2], high[2];
9018 int count;
9019
9020 if (GET_CODE (operands[2]) == CONST_INT)
9021 {
9022 split_di (operands, 2, low, high);
9023 count = INTVAL (operands[2]) & 63;
9024
9025 if (count >= 32)
9026 {
9027 emit_move_insn (low[0], high[1]);
9028
9029 if (! reload_completed)
9030 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9031 else
9032 {
9033 emit_move_insn (high[0], low[0]);
9034 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9035 }
9036
9037 if (count > 32)
9038 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9039 }
9040 else
9041 {
9042 if (!rtx_equal_p (operands[0], operands[1]))
9043 emit_move_insn (operands[0], operands[1]);
9044 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9045 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9046 }
9047 }
9048 else
9049 {
9050 if (!rtx_equal_p (operands[0], operands[1]))
9051 emit_move_insn (operands[0], operands[1]);
9052
9053 split_di (operands, 1, low, high);
9054
9055 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9056 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9057
9058 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9059 {
9060 if (! no_new_pseudos)
9061 scratch = gen_reg_rtx (SImode);
9062 emit_move_insn (scratch, high[0]);
9063 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9064 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9065 scratch));
9066 }
9067 else
9068 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9069 }
9070 }
9071
9072 void
9073 ix86_split_lshrdi (operands, scratch)
9074 rtx *operands, scratch;
9075 {
9076 rtx low[2], high[2];
9077 int count;
9078
9079 if (GET_CODE (operands[2]) == CONST_INT)
9080 {
9081 split_di (operands, 2, low, high);
9082 count = INTVAL (operands[2]) & 63;
9083
9084 if (count >= 32)
9085 {
9086 emit_move_insn (low[0], high[1]);
9087 emit_move_insn (high[0], const0_rtx);
9088
9089 if (count > 32)
9090 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9091 }
9092 else
9093 {
9094 if (!rtx_equal_p (operands[0], operands[1]))
9095 emit_move_insn (operands[0], operands[1]);
9096 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9097 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9098 }
9099 }
9100 else
9101 {
9102 if (!rtx_equal_p (operands[0], operands[1]))
9103 emit_move_insn (operands[0], operands[1]);
9104
9105 split_di (operands, 1, low, high);
9106
9107 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9108 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9109
9110 /* Heh. By reversing the arguments, we can reuse this pattern. */
9111 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9112 {
9113 if (! no_new_pseudos)
9114 scratch = force_reg (SImode, const0_rtx);
9115 else
9116 emit_move_insn (scratch, const0_rtx);
9117
9118 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9119 scratch));
9120 }
9121 else
9122 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9123 }
9124 }
9125
9126 /* Helper function for the string operations below. Dest VARIABLE whether
9127 it is aligned to VALUE bytes. If true, jump to the label. */
9128 static rtx
9129 ix86_expand_aligntest (variable, value)
9130 rtx variable;
9131 int value;
9132 {
9133 rtx label = gen_label_rtx ();
9134 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9135 if (GET_MODE (variable) == DImode)
9136 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9137 else
9138 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9139 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9140 1, label);
9141 return label;
9142 }
9143
9144 /* Adjust COUNTER by the VALUE. */
9145 static void
9146 ix86_adjust_counter (countreg, value)
9147 rtx countreg;
9148 HOST_WIDE_INT value;
9149 {
9150 if (GET_MODE (countreg) == DImode)
9151 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9152 else
9153 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9154 }
9155
9156 /* Zero extend possibly SImode EXP to Pmode register. */
9157 rtx
9158 ix86_zero_extend_to_Pmode (exp)
9159 rtx exp;
9160 {
9161 rtx r;
9162 if (GET_MODE (exp) == VOIDmode)
9163 return force_reg (Pmode, exp);
9164 if (GET_MODE (exp) == Pmode)
9165 return copy_to_mode_reg (Pmode, exp);
9166 r = gen_reg_rtx (Pmode);
9167 emit_insn (gen_zero_extendsidi2 (r, exp));
9168 return r;
9169 }
9170
9171 /* Expand string move (memcpy) operation. Use i386 string operations when
9172 profitable. expand_clrstr contains similar code. */
9173 int
9174 ix86_expand_movstr (dst, src, count_exp, align_exp)
9175 rtx dst, src, count_exp, align_exp;
9176 {
9177 rtx srcreg, destreg, countreg;
9178 enum machine_mode counter_mode;
9179 HOST_WIDE_INT align = 0;
9180 unsigned HOST_WIDE_INT count = 0;
9181 rtx insns;
9182
9183 start_sequence ();
9184
9185 if (GET_CODE (align_exp) == CONST_INT)
9186 align = INTVAL (align_exp);
9187
9188 /* This simple hack avoids all inlining code and simplifies code below. */
9189 if (!TARGET_ALIGN_STRINGOPS)
9190 align = 64;
9191
9192 if (GET_CODE (count_exp) == CONST_INT)
9193 count = INTVAL (count_exp);
9194
9195 /* Figure out proper mode for counter. For 32bits it is always SImode,
9196 for 64bits use SImode when possible, otherwise DImode.
9197 Set count to number of bytes copied when known at compile time. */
9198 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9199 || x86_64_zero_extended_value (count_exp))
9200 counter_mode = SImode;
9201 else
9202 counter_mode = DImode;
9203
9204 if (counter_mode != SImode && counter_mode != DImode)
9205 abort ();
9206
9207 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9208 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9209
9210 emit_insn (gen_cld ());
9211
9212 /* When optimizing for size emit simple rep ; movsb instruction for
9213 counts not divisible by 4. */
9214
9215 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9216 {
9217 countreg = ix86_zero_extend_to_Pmode (count_exp);
9218 if (TARGET_64BIT)
9219 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9220 destreg, srcreg, countreg));
9221 else
9222 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9223 destreg, srcreg, countreg));
9224 }
9225
9226 /* For constant aligned (or small unaligned) copies use rep movsl
9227 followed by code copying the rest. For PentiumPro ensure 8 byte
9228 alignment to allow rep movsl acceleration. */
9229
9230 else if (count != 0
9231 && (align >= 8
9232 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9233 || optimize_size || count < (unsigned int) 64))
9234 {
9235 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9236 if (count & ~(size - 1))
9237 {
9238 countreg = copy_to_mode_reg (counter_mode,
9239 GEN_INT ((count >> (size == 4 ? 2 : 3))
9240 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9241 countreg = ix86_zero_extend_to_Pmode (countreg);
9242 if (size == 4)
9243 {
9244 if (TARGET_64BIT)
9245 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9246 destreg, srcreg, countreg));
9247 else
9248 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9249 destreg, srcreg, countreg));
9250 }
9251 else
9252 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9253 destreg, srcreg, countreg));
9254 }
9255 if (size == 8 && (count & 0x04))
9256 emit_insn (gen_strmovsi (destreg, srcreg));
9257 if (count & 0x02)
9258 emit_insn (gen_strmovhi (destreg, srcreg));
9259 if (count & 0x01)
9260 emit_insn (gen_strmovqi (destreg, srcreg));
9261 }
9262 /* The generic code based on the glibc implementation:
9263 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9264 allowing accelerated copying there)
9265 - copy the data using rep movsl
9266 - copy the rest. */
9267 else
9268 {
9269 rtx countreg2;
9270 rtx label = NULL;
9271 int desired_alignment = (TARGET_PENTIUMPRO
9272 && (count == 0 || count >= (unsigned int) 260)
9273 ? 8 : UNITS_PER_WORD);
9274
9275 /* In case we don't know anything about the alignment, default to
9276 library version, since it is usually equally fast and result in
9277 shorter code. */
9278 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9279 {
9280 end_sequence ();
9281 return 0;
9282 }
9283
9284 if (TARGET_SINGLE_STRINGOP)
9285 emit_insn (gen_cld ());
9286
9287 countreg2 = gen_reg_rtx (Pmode);
9288 countreg = copy_to_mode_reg (counter_mode, count_exp);
9289
9290 /* We don't use loops to align destination and to copy parts smaller
9291 than 4 bytes, because gcc is able to optimize such code better (in
9292 the case the destination or the count really is aligned, gcc is often
9293 able to predict the branches) and also it is friendlier to the
9294 hardware branch prediction.
9295
9296 Using loops is benefical for generic case, because we can
9297 handle small counts using the loops. Many CPUs (such as Athlon)
9298 have large REP prefix setup costs.
9299
9300 This is quite costy. Maybe we can revisit this decision later or
9301 add some customizability to this code. */
9302
9303 if (count == 0 && align < desired_alignment)
9304 {
9305 label = gen_label_rtx ();
9306 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9307 LEU, 0, counter_mode, 1, label);
9308 }
9309 if (align <= 1)
9310 {
9311 rtx label = ix86_expand_aligntest (destreg, 1);
9312 emit_insn (gen_strmovqi (destreg, srcreg));
9313 ix86_adjust_counter (countreg, 1);
9314 emit_label (label);
9315 LABEL_NUSES (label) = 1;
9316 }
9317 if (align <= 2)
9318 {
9319 rtx label = ix86_expand_aligntest (destreg, 2);
9320 emit_insn (gen_strmovhi (destreg, srcreg));
9321 ix86_adjust_counter (countreg, 2);
9322 emit_label (label);
9323 LABEL_NUSES (label) = 1;
9324 }
9325 if (align <= 4 && desired_alignment > 4)
9326 {
9327 rtx label = ix86_expand_aligntest (destreg, 4);
9328 emit_insn (gen_strmovsi (destreg, srcreg));
9329 ix86_adjust_counter (countreg, 4);
9330 emit_label (label);
9331 LABEL_NUSES (label) = 1;
9332 }
9333
9334 if (label && desired_alignment > 4 && !TARGET_64BIT)
9335 {
9336 emit_label (label);
9337 LABEL_NUSES (label) = 1;
9338 label = NULL_RTX;
9339 }
9340 if (!TARGET_SINGLE_STRINGOP)
9341 emit_insn (gen_cld ());
9342 if (TARGET_64BIT)
9343 {
9344 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9345 GEN_INT (3)));
9346 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9347 destreg, srcreg, countreg2));
9348 }
9349 else
9350 {
9351 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9352 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9353 destreg, srcreg, countreg2));
9354 }
9355
9356 if (label)
9357 {
9358 emit_label (label);
9359 LABEL_NUSES (label) = 1;
9360 }
9361 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9362 emit_insn (gen_strmovsi (destreg, srcreg));
9363 if ((align <= 4 || count == 0) && TARGET_64BIT)
9364 {
9365 rtx label = ix86_expand_aligntest (countreg, 4);
9366 emit_insn (gen_strmovsi (destreg, srcreg));
9367 emit_label (label);
9368 LABEL_NUSES (label) = 1;
9369 }
9370 if (align > 2 && count != 0 && (count & 2))
9371 emit_insn (gen_strmovhi (destreg, srcreg));
9372 if (align <= 2 || count == 0)
9373 {
9374 rtx label = ix86_expand_aligntest (countreg, 2);
9375 emit_insn (gen_strmovhi (destreg, srcreg));
9376 emit_label (label);
9377 LABEL_NUSES (label) = 1;
9378 }
9379 if (align > 1 && count != 0 && (count & 1))
9380 emit_insn (gen_strmovqi (destreg, srcreg));
9381 if (align <= 1 || count == 0)
9382 {
9383 rtx label = ix86_expand_aligntest (countreg, 1);
9384 emit_insn (gen_strmovqi (destreg, srcreg));
9385 emit_label (label);
9386 LABEL_NUSES (label) = 1;
9387 }
9388 }
9389
9390 insns = get_insns ();
9391 end_sequence ();
9392
9393 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9394 emit_insns (insns);
9395 return 1;
9396 }
9397
9398 /* Expand string clear operation (bzero). Use i386 string operations when
9399 profitable. expand_movstr contains similar code. */
9400 int
9401 ix86_expand_clrstr (src, count_exp, align_exp)
9402 rtx src, count_exp, align_exp;
9403 {
9404 rtx destreg, zeroreg, countreg;
9405 enum machine_mode counter_mode;
9406 HOST_WIDE_INT align = 0;
9407 unsigned HOST_WIDE_INT count = 0;
9408
9409 if (GET_CODE (align_exp) == CONST_INT)
9410 align = INTVAL (align_exp);
9411
9412 /* This simple hack avoids all inlining code and simplifies code below. */
9413 if (!TARGET_ALIGN_STRINGOPS)
9414 align = 32;
9415
9416 if (GET_CODE (count_exp) == CONST_INT)
9417 count = INTVAL (count_exp);
9418 /* Figure out proper mode for counter. For 32bits it is always SImode,
9419 for 64bits use SImode when possible, otherwise DImode.
9420 Set count to number of bytes copied when known at compile time. */
9421 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9422 || x86_64_zero_extended_value (count_exp))
9423 counter_mode = SImode;
9424 else
9425 counter_mode = DImode;
9426
9427 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9428
9429 emit_insn (gen_cld ());
9430
9431 /* When optimizing for size emit simple rep ; movsb instruction for
9432 counts not divisible by 4. */
9433
9434 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9435 {
9436 countreg = ix86_zero_extend_to_Pmode (count_exp);
9437 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9438 if (TARGET_64BIT)
9439 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9440 destreg, countreg));
9441 else
9442 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9443 destreg, countreg));
9444 }
9445 else if (count != 0
9446 && (align >= 8
9447 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9448 || optimize_size || count < (unsigned int) 64))
9449 {
9450 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9451 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9452 if (count & ~(size - 1))
9453 {
9454 countreg = copy_to_mode_reg (counter_mode,
9455 GEN_INT ((count >> (size == 4 ? 2 : 3))
9456 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9457 countreg = ix86_zero_extend_to_Pmode (countreg);
9458 if (size == 4)
9459 {
9460 if (TARGET_64BIT)
9461 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9462 destreg, countreg));
9463 else
9464 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9465 destreg, countreg));
9466 }
9467 else
9468 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9469 destreg, countreg));
9470 }
9471 if (size == 8 && (count & 0x04))
9472 emit_insn (gen_strsetsi (destreg,
9473 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9474 if (count & 0x02)
9475 emit_insn (gen_strsethi (destreg,
9476 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9477 if (count & 0x01)
9478 emit_insn (gen_strsetqi (destreg,
9479 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9480 }
9481 else
9482 {
9483 rtx countreg2;
9484 rtx label = NULL;
9485 /* Compute desired alignment of the string operation. */
9486 int desired_alignment = (TARGET_PENTIUMPRO
9487 && (count == 0 || count >= (unsigned int) 260)
9488 ? 8 : UNITS_PER_WORD);
9489
9490 /* In case we don't know anything about the alignment, default to
9491 library version, since it is usually equally fast and result in
9492 shorter code. */
9493 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9494 return 0;
9495
9496 if (TARGET_SINGLE_STRINGOP)
9497 emit_insn (gen_cld ());
9498
9499 countreg2 = gen_reg_rtx (Pmode);
9500 countreg = copy_to_mode_reg (counter_mode, count_exp);
9501 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9502
9503 if (count == 0 && align < desired_alignment)
9504 {
9505 label = gen_label_rtx ();
9506 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
9507 LEU, 0, counter_mode, 1, label);
9508 }
9509 if (align <= 1)
9510 {
9511 rtx label = ix86_expand_aligntest (destreg, 1);
9512 emit_insn (gen_strsetqi (destreg,
9513 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9514 ix86_adjust_counter (countreg, 1);
9515 emit_label (label);
9516 LABEL_NUSES (label) = 1;
9517 }
9518 if (align <= 2)
9519 {
9520 rtx label = ix86_expand_aligntest (destreg, 2);
9521 emit_insn (gen_strsethi (destreg,
9522 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9523 ix86_adjust_counter (countreg, 2);
9524 emit_label (label);
9525 LABEL_NUSES (label) = 1;
9526 }
9527 if (align <= 4 && desired_alignment > 4)
9528 {
9529 rtx label = ix86_expand_aligntest (destreg, 4);
9530 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9531 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9532 : zeroreg)));
9533 ix86_adjust_counter (countreg, 4);
9534 emit_label (label);
9535 LABEL_NUSES (label) = 1;
9536 }
9537
9538 if (label && desired_alignment > 4 && !TARGET_64BIT)
9539 {
9540 emit_label (label);
9541 LABEL_NUSES (label) = 1;
9542 label = NULL_RTX;
9543 }
9544
9545 if (!TARGET_SINGLE_STRINGOP)
9546 emit_insn (gen_cld ());
9547 if (TARGET_64BIT)
9548 {
9549 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9550 GEN_INT (3)));
9551 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9552 destreg, countreg2));
9553 }
9554 else
9555 {
9556 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9557 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9558 destreg, countreg2));
9559 }
9560 if (label)
9561 {
9562 emit_label (label);
9563 LABEL_NUSES (label) = 1;
9564 }
9565
9566 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9567 emit_insn (gen_strsetsi (destreg,
9568 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9569 if (TARGET_64BIT && (align <= 4 || count == 0))
9570 {
9571 rtx label = ix86_expand_aligntest (countreg, 2);
9572 emit_insn (gen_strsetsi (destreg,
9573 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9574 emit_label (label);
9575 LABEL_NUSES (label) = 1;
9576 }
9577 if (align > 2 && count != 0 && (count & 2))
9578 emit_insn (gen_strsethi (destreg,
9579 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9580 if (align <= 2 || count == 0)
9581 {
9582 rtx label = ix86_expand_aligntest (countreg, 2);
9583 emit_insn (gen_strsethi (destreg,
9584 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9585 emit_label (label);
9586 LABEL_NUSES (label) = 1;
9587 }
9588 if (align > 1 && count != 0 && (count & 1))
9589 emit_insn (gen_strsetqi (destreg,
9590 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9591 if (align <= 1 || count == 0)
9592 {
9593 rtx label = ix86_expand_aligntest (countreg, 1);
9594 emit_insn (gen_strsetqi (destreg,
9595 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9596 emit_label (label);
9597 LABEL_NUSES (label) = 1;
9598 }
9599 }
9600 return 1;
9601 }
9602 /* Expand strlen. */
9603 int
9604 ix86_expand_strlen (out, src, eoschar, align)
9605 rtx out, src, eoschar, align;
9606 {
9607 rtx addr, scratch1, scratch2, scratch3, scratch4;
9608
9609 /* The generic case of strlen expander is long. Avoid it's
9610 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9611
9612 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9613 && !TARGET_INLINE_ALL_STRINGOPS
9614 && !optimize_size
9615 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9616 return 0;
9617
9618 addr = force_reg (Pmode, XEXP (src, 0));
9619 scratch1 = gen_reg_rtx (Pmode);
9620
9621 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9622 && !optimize_size)
9623 {
9624 /* Well it seems that some optimizer does not combine a call like
9625 foo(strlen(bar), strlen(bar));
9626 when the move and the subtraction is done here. It does calculate
9627 the length just once when these instructions are done inside of
9628 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9629 often used and I use one fewer register for the lifetime of
9630 output_strlen_unroll() this is better. */
9631
9632 emit_move_insn (out, addr);
9633
9634 ix86_expand_strlensi_unroll_1 (out, align);
9635
9636 /* strlensi_unroll_1 returns the address of the zero at the end of
9637 the string, like memchr(), so compute the length by subtracting
9638 the start address. */
9639 if (TARGET_64BIT)
9640 emit_insn (gen_subdi3 (out, out, addr));
9641 else
9642 emit_insn (gen_subsi3 (out, out, addr));
9643 }
9644 else
9645 {
9646 scratch2 = gen_reg_rtx (Pmode);
9647 scratch3 = gen_reg_rtx (Pmode);
9648 scratch4 = force_reg (Pmode, constm1_rtx);
9649
9650 emit_move_insn (scratch3, addr);
9651 eoschar = force_reg (QImode, eoschar);
9652
9653 emit_insn (gen_cld ());
9654 if (TARGET_64BIT)
9655 {
9656 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9657 align, scratch4, scratch3));
9658 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9659 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9660 }
9661 else
9662 {
9663 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9664 align, scratch4, scratch3));
9665 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9666 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9667 }
9668 }
9669 return 1;
9670 }
9671
9672 /* Expand the appropriate insns for doing strlen if not just doing
9673 repnz; scasb
9674
9675 out = result, initialized with the start address
9676 align_rtx = alignment of the address.
9677 scratch = scratch register, initialized with the startaddress when
9678 not aligned, otherwise undefined
9679
9680 This is just the body. It needs the initialisations mentioned above and
9681 some address computing at the end. These things are done in i386.md. */
9682
9683 static void
9684 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9685 rtx out, align_rtx;
9686 {
9687 int align;
9688 rtx tmp;
9689 rtx align_2_label = NULL_RTX;
9690 rtx align_3_label = NULL_RTX;
9691 rtx align_4_label = gen_label_rtx ();
9692 rtx end_0_label = gen_label_rtx ();
9693 rtx mem;
9694 rtx tmpreg = gen_reg_rtx (SImode);
9695 rtx scratch = gen_reg_rtx (SImode);
9696
9697 align = 0;
9698 if (GET_CODE (align_rtx) == CONST_INT)
9699 align = INTVAL (align_rtx);
9700
9701 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9702
9703 /* Is there a known alignment and is it less than 4? */
9704 if (align < 4)
9705 {
9706 rtx scratch1 = gen_reg_rtx (Pmode);
9707 emit_move_insn (scratch1, out);
9708 /* Is there a known alignment and is it not 2? */
9709 if (align != 2)
9710 {
9711 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9712 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9713
9714 /* Leave just the 3 lower bits. */
9715 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9716 NULL_RTX, 0, OPTAB_WIDEN);
9717
9718 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9719 Pmode, 1, align_4_label);
9720 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9721 Pmode, 1, align_2_label);
9722 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9723 Pmode, 1, align_3_label);
9724 }
9725 else
9726 {
9727 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9728 check if is aligned to 4 - byte. */
9729
9730 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9731 NULL_RTX, 0, OPTAB_WIDEN);
9732
9733 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9734 Pmode, 1, align_4_label);
9735 }
9736
9737 mem = gen_rtx_MEM (QImode, out);
9738
9739 /* Now compare the bytes. */
9740
9741 /* Compare the first n unaligned byte on a byte per byte basis. */
9742 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9743 QImode, 1, end_0_label);
9744
9745 /* Increment the address. */
9746 if (TARGET_64BIT)
9747 emit_insn (gen_adddi3 (out, out, const1_rtx));
9748 else
9749 emit_insn (gen_addsi3 (out, out, const1_rtx));
9750
9751 /* Not needed with an alignment of 2 */
9752 if (align != 2)
9753 {
9754 emit_label (align_2_label);
9755
9756 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9757 end_0_label);
9758
9759 if (TARGET_64BIT)
9760 emit_insn (gen_adddi3 (out, out, const1_rtx));
9761 else
9762 emit_insn (gen_addsi3 (out, out, const1_rtx));
9763
9764 emit_label (align_3_label);
9765 }
9766
9767 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9768 end_0_label);
9769
9770 if (TARGET_64BIT)
9771 emit_insn (gen_adddi3 (out, out, const1_rtx));
9772 else
9773 emit_insn (gen_addsi3 (out, out, const1_rtx));
9774 }
9775
9776 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9777 align this loop. It gives only huge programs, but does not help to
9778 speed up. */
9779 emit_label (align_4_label);
9780
9781 mem = gen_rtx_MEM (SImode, out);
9782 emit_move_insn (scratch, mem);
9783 if (TARGET_64BIT)
9784 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9785 else
9786 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9787
9788 /* This formula yields a nonzero result iff one of the bytes is zero.
9789 This saves three branches inside loop and many cycles. */
9790
9791 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9792 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9793 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9794 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9795 gen_int_mode (0x80808080, SImode)));
9796 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9797 align_4_label);
9798
9799 if (TARGET_CMOVE)
9800 {
9801 rtx reg = gen_reg_rtx (SImode);
9802 rtx reg2 = gen_reg_rtx (Pmode);
9803 emit_move_insn (reg, tmpreg);
9804 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9805
9806 /* If zero is not in the first two bytes, move two bytes forward. */
9807 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9808 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9809 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9810 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9811 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9812 reg,
9813 tmpreg)));
9814 /* Emit lea manually to avoid clobbering of flags. */
9815 emit_insn (gen_rtx_SET (SImode, reg2,
9816 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9817
9818 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9819 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9820 emit_insn (gen_rtx_SET (VOIDmode, out,
9821 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9822 reg2,
9823 out)));
9824
9825 }
9826 else
9827 {
9828 rtx end_2_label = gen_label_rtx ();
9829 /* Is zero in the first two bytes? */
9830
9831 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9832 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9833 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9834 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9835 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9836 pc_rtx);
9837 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9838 JUMP_LABEL (tmp) = end_2_label;
9839
9840 /* Not in the first two. Move two bytes forward. */
9841 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9842 if (TARGET_64BIT)
9843 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9844 else
9845 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9846
9847 emit_label (end_2_label);
9848
9849 }
9850
9851 /* Avoid branch in fixing the byte. */
9852 tmpreg = gen_lowpart (QImode, tmpreg);
9853 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9854 if (TARGET_64BIT)
9855 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9856 else
9857 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9858
9859 emit_label (end_0_label);
9860 }
9861 \f
9862 /* Clear stack slot assignments remembered from previous functions.
9863 This is called from INIT_EXPANDERS once before RTL is emitted for each
9864 function. */
9865
9866 static void
9867 ix86_init_machine_status (p)
9868 struct function *p;
9869 {
9870 p->machine = (struct machine_function *)
9871 xcalloc (1, sizeof (struct machine_function));
9872 }
9873
9874 /* Mark machine specific bits of P for GC. */
9875 static void
9876 ix86_mark_machine_status (p)
9877 struct function *p;
9878 {
9879 struct machine_function *machine = p->machine;
9880 enum machine_mode mode;
9881 int n;
9882
9883 if (! machine)
9884 return;
9885
9886 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9887 mode = (enum machine_mode) ((int) mode + 1))
9888 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9889 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9890 }
9891
9892 static void
9893 ix86_free_machine_status (p)
9894 struct function *p;
9895 {
9896 free (p->machine);
9897 p->machine = NULL;
9898 }
9899
9900 /* Return a MEM corresponding to a stack slot with mode MODE.
9901 Allocate a new slot if necessary.
9902
9903 The RTL for a function can have several slots available: N is
9904 which slot to use. */
9905
9906 rtx
9907 assign_386_stack_local (mode, n)
9908 enum machine_mode mode;
9909 int n;
9910 {
9911 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9912 abort ();
9913
9914 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9915 ix86_stack_locals[(int) mode][n]
9916 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9917
9918 return ix86_stack_locals[(int) mode][n];
9919 }
9920 \f
9921 /* Calculate the length of the memory address in the instruction
9922 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9923
9924 static int
9925 memory_address_length (addr)
9926 rtx addr;
9927 {
9928 struct ix86_address parts;
9929 rtx base, index, disp;
9930 int len;
9931
9932 if (GET_CODE (addr) == PRE_DEC
9933 || GET_CODE (addr) == POST_INC
9934 || GET_CODE (addr) == PRE_MODIFY
9935 || GET_CODE (addr) == POST_MODIFY)
9936 return 0;
9937
9938 if (! ix86_decompose_address (addr, &parts))
9939 abort ();
9940
9941 base = parts.base;
9942 index = parts.index;
9943 disp = parts.disp;
9944 len = 0;
9945
9946 /* Register Indirect. */
9947 if (base && !index && !disp)
9948 {
9949 /* Special cases: ebp and esp need the two-byte modrm form. */
9950 if (addr == stack_pointer_rtx
9951 || addr == arg_pointer_rtx
9952 || addr == frame_pointer_rtx
9953 || addr == hard_frame_pointer_rtx)
9954 len = 1;
9955 }
9956
9957 /* Direct Addressing. */
9958 else if (disp && !base && !index)
9959 len = 4;
9960
9961 else
9962 {
9963 /* Find the length of the displacement constant. */
9964 if (disp)
9965 {
9966 if (GET_CODE (disp) == CONST_INT
9967 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9968 len = 1;
9969 else
9970 len = 4;
9971 }
9972
9973 /* An index requires the two-byte modrm form. */
9974 if (index)
9975 len += 1;
9976 }
9977
9978 return len;
9979 }
9980
9981 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9982 expect that insn have 8bit immediate alternative. */
9983 int
9984 ix86_attr_length_immediate_default (insn, shortform)
9985 rtx insn;
9986 int shortform;
9987 {
9988 int len = 0;
9989 int i;
9990 extract_insn_cached (insn);
9991 for (i = recog_data.n_operands - 1; i >= 0; --i)
9992 if (CONSTANT_P (recog_data.operand[i]))
9993 {
9994 if (len)
9995 abort ();
9996 if (shortform
9997 && GET_CODE (recog_data.operand[i]) == CONST_INT
9998 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9999 len = 1;
10000 else
10001 {
10002 switch (get_attr_mode (insn))
10003 {
10004 case MODE_QI:
10005 len+=1;
10006 break;
10007 case MODE_HI:
10008 len+=2;
10009 break;
10010 case MODE_SI:
10011 len+=4;
10012 break;
10013 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10014 case MODE_DI:
10015 len+=4;
10016 break;
10017 default:
10018 fatal_insn ("unknown insn mode", insn);
10019 }
10020 }
10021 }
10022 return len;
10023 }
10024 /* Compute default value for "length_address" attribute. */
10025 int
10026 ix86_attr_length_address_default (insn)
10027 rtx insn;
10028 {
10029 int i;
10030 extract_insn_cached (insn);
10031 for (i = recog_data.n_operands - 1; i >= 0; --i)
10032 if (GET_CODE (recog_data.operand[i]) == MEM)
10033 {
10034 return memory_address_length (XEXP (recog_data.operand[i], 0));
10035 break;
10036 }
10037 return 0;
10038 }
10039 \f
10040 /* Return the maximum number of instructions a cpu can issue. */
10041
10042 static int
10043 ix86_issue_rate ()
10044 {
10045 switch (ix86_cpu)
10046 {
10047 case PROCESSOR_PENTIUM:
10048 case PROCESSOR_K6:
10049 return 2;
10050
10051 case PROCESSOR_PENTIUMPRO:
10052 case PROCESSOR_PENTIUM4:
10053 case PROCESSOR_ATHLON:
10054 return 3;
10055
10056 default:
10057 return 1;
10058 }
10059 }
10060
10061 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10062 by DEP_INSN and nothing set by DEP_INSN. */
10063
10064 static int
10065 ix86_flags_dependant (insn, dep_insn, insn_type)
10066 rtx insn, dep_insn;
10067 enum attr_type insn_type;
10068 {
10069 rtx set, set2;
10070
10071 /* Simplify the test for uninteresting insns. */
10072 if (insn_type != TYPE_SETCC
10073 && insn_type != TYPE_ICMOV
10074 && insn_type != TYPE_FCMOV
10075 && insn_type != TYPE_IBR)
10076 return 0;
10077
10078 if ((set = single_set (dep_insn)) != 0)
10079 {
10080 set = SET_DEST (set);
10081 set2 = NULL_RTX;
10082 }
10083 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10084 && XVECLEN (PATTERN (dep_insn), 0) == 2
10085 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10086 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10087 {
10088 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10089 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10090 }
10091 else
10092 return 0;
10093
10094 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10095 return 0;
10096
10097 /* This test is true if the dependent insn reads the flags but
10098 not any other potentially set register. */
10099 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10100 return 0;
10101
10102 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10103 return 0;
10104
10105 return 1;
10106 }
10107
10108 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10109 address with operands set by DEP_INSN. */
10110
10111 static int
10112 ix86_agi_dependant (insn, dep_insn, insn_type)
10113 rtx insn, dep_insn;
10114 enum attr_type insn_type;
10115 {
10116 rtx addr;
10117
10118 if (insn_type == TYPE_LEA
10119 && TARGET_PENTIUM)
10120 {
10121 addr = PATTERN (insn);
10122 if (GET_CODE (addr) == SET)
10123 ;
10124 else if (GET_CODE (addr) == PARALLEL
10125 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10126 addr = XVECEXP (addr, 0, 0);
10127 else
10128 abort ();
10129 addr = SET_SRC (addr);
10130 }
10131 else
10132 {
10133 int i;
10134 extract_insn_cached (insn);
10135 for (i = recog_data.n_operands - 1; i >= 0; --i)
10136 if (GET_CODE (recog_data.operand[i]) == MEM)
10137 {
10138 addr = XEXP (recog_data.operand[i], 0);
10139 goto found;
10140 }
10141 return 0;
10142 found:;
10143 }
10144
10145 return modified_in_p (addr, dep_insn);
10146 }
10147
10148 static int
10149 ix86_adjust_cost (insn, link, dep_insn, cost)
10150 rtx insn, link, dep_insn;
10151 int cost;
10152 {
10153 enum attr_type insn_type, dep_insn_type;
10154 enum attr_memory memory, dep_memory;
10155 rtx set, set2;
10156 int dep_insn_code_number;
10157
10158 /* Anti and output depenancies have zero cost on all CPUs. */
10159 if (REG_NOTE_KIND (link) != 0)
10160 return 0;
10161
10162 dep_insn_code_number = recog_memoized (dep_insn);
10163
10164 /* If we can't recognize the insns, we can't really do anything. */
10165 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10166 return cost;
10167
10168 insn_type = get_attr_type (insn);
10169 dep_insn_type = get_attr_type (dep_insn);
10170
10171 switch (ix86_cpu)
10172 {
10173 case PROCESSOR_PENTIUM:
10174 /* Address Generation Interlock adds a cycle of latency. */
10175 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10176 cost += 1;
10177
10178 /* ??? Compares pair with jump/setcc. */
10179 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10180 cost = 0;
10181
10182 /* Floating point stores require value to be ready one cycle ealier. */
10183 if (insn_type == TYPE_FMOV
10184 && get_attr_memory (insn) == MEMORY_STORE
10185 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10186 cost += 1;
10187 break;
10188
10189 case PROCESSOR_PENTIUMPRO:
10190 memory = get_attr_memory (insn);
10191 dep_memory = get_attr_memory (dep_insn);
10192
10193 /* Since we can't represent delayed latencies of load+operation,
10194 increase the cost here for non-imov insns. */
10195 if (dep_insn_type != TYPE_IMOV
10196 && dep_insn_type != TYPE_FMOV
10197 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10198 cost += 1;
10199
10200 /* INT->FP conversion is expensive. */
10201 if (get_attr_fp_int_src (dep_insn))
10202 cost += 5;
10203
10204 /* There is one cycle extra latency between an FP op and a store. */
10205 if (insn_type == TYPE_FMOV
10206 && (set = single_set (dep_insn)) != NULL_RTX
10207 && (set2 = single_set (insn)) != NULL_RTX
10208 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10209 && GET_CODE (SET_DEST (set2)) == MEM)
10210 cost += 1;
10211
10212 /* Show ability of reorder buffer to hide latency of load by executing
10213 in parallel with previous instruction in case
10214 previous instruction is not needed to compute the address. */
10215 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10216 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10217 {
10218 /* Claim moves to take one cycle, as core can issue one load
10219 at time and the next load can start cycle later. */
10220 if (dep_insn_type == TYPE_IMOV
10221 || dep_insn_type == TYPE_FMOV)
10222 cost = 1;
10223 else if (cost > 1)
10224 cost--;
10225 }
10226 break;
10227
10228 case PROCESSOR_K6:
10229 memory = get_attr_memory (insn);
10230 dep_memory = get_attr_memory (dep_insn);
10231 /* The esp dependency is resolved before the instruction is really
10232 finished. */
10233 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10234 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10235 return 1;
10236
10237 /* Since we can't represent delayed latencies of load+operation,
10238 increase the cost here for non-imov insns. */
10239 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10240 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10241
10242 /* INT->FP conversion is expensive. */
10243 if (get_attr_fp_int_src (dep_insn))
10244 cost += 5;
10245
10246 /* Show ability of reorder buffer to hide latency of load by executing
10247 in parallel with previous instruction in case
10248 previous instruction is not needed to compute the address. */
10249 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10250 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10251 {
10252 /* Claim moves to take one cycle, as core can issue one load
10253 at time and the next load can start cycle later. */
10254 if (dep_insn_type == TYPE_IMOV
10255 || dep_insn_type == TYPE_FMOV)
10256 cost = 1;
10257 else if (cost > 2)
10258 cost -= 2;
10259 else
10260 cost = 1;
10261 }
10262 break;
10263
10264 case PROCESSOR_ATHLON:
10265 memory = get_attr_memory (insn);
10266 dep_memory = get_attr_memory (dep_insn);
10267
10268 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10269 {
10270 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10271 cost += 2;
10272 else
10273 cost += 3;
10274 }
10275 /* Show ability of reorder buffer to hide latency of load by executing
10276 in parallel with previous instruction in case
10277 previous instruction is not needed to compute the address. */
10278 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10279 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10280 {
10281 /* Claim moves to take one cycle, as core can issue one load
10282 at time and the next load can start cycle later. */
10283 if (dep_insn_type == TYPE_IMOV
10284 || dep_insn_type == TYPE_FMOV)
10285 cost = 0;
10286 else if (cost >= 3)
10287 cost -= 3;
10288 else
10289 cost = 0;
10290 }
10291
10292 default:
10293 break;
10294 }
10295
10296 return cost;
10297 }
10298
10299 static union
10300 {
10301 struct ppro_sched_data
10302 {
10303 rtx decode[3];
10304 int issued_this_cycle;
10305 } ppro;
10306 } ix86_sched_data;
10307
10308 static int
10309 ix86_safe_length (insn)
10310 rtx insn;
10311 {
10312 if (recog_memoized (insn) >= 0)
10313 return get_attr_length (insn);
10314 else
10315 return 128;
10316 }
10317
10318 static int
10319 ix86_safe_length_prefix (insn)
10320 rtx insn;
10321 {
10322 if (recog_memoized (insn) >= 0)
10323 return get_attr_length (insn);
10324 else
10325 return 0;
10326 }
10327
10328 static enum attr_memory
10329 ix86_safe_memory (insn)
10330 rtx insn;
10331 {
10332 if (recog_memoized (insn) >= 0)
10333 return get_attr_memory (insn);
10334 else
10335 return MEMORY_UNKNOWN;
10336 }
10337
10338 static enum attr_ppro_uops
10339 ix86_safe_ppro_uops (insn)
10340 rtx insn;
10341 {
10342 if (recog_memoized (insn) >= 0)
10343 return get_attr_ppro_uops (insn);
10344 else
10345 return PPRO_UOPS_MANY;
10346 }
10347
10348 static void
10349 ix86_dump_ppro_packet (dump)
10350 FILE *dump;
10351 {
10352 if (ix86_sched_data.ppro.decode[0])
10353 {
10354 fprintf (dump, "PPRO packet: %d",
10355 INSN_UID (ix86_sched_data.ppro.decode[0]));
10356 if (ix86_sched_data.ppro.decode[1])
10357 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10358 if (ix86_sched_data.ppro.decode[2])
10359 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10360 fputc ('\n', dump);
10361 }
10362 }
10363
10364 /* We're beginning a new block. Initialize data structures as necessary. */
10365
10366 static void
10367 ix86_sched_init (dump, sched_verbose, veclen)
10368 FILE *dump ATTRIBUTE_UNUSED;
10369 int sched_verbose ATTRIBUTE_UNUSED;
10370 int veclen ATTRIBUTE_UNUSED;
10371 {
10372 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10373 }
10374
10375 /* Shift INSN to SLOT, and shift everything else down. */
10376
10377 static void
10378 ix86_reorder_insn (insnp, slot)
10379 rtx *insnp, *slot;
10380 {
10381 if (insnp != slot)
10382 {
10383 rtx insn = *insnp;
10384 do
10385 insnp[0] = insnp[1];
10386 while (++insnp != slot);
10387 *insnp = insn;
10388 }
10389 }
10390
10391 static void
10392 ix86_sched_reorder_ppro (ready, e_ready)
10393 rtx *ready;
10394 rtx *e_ready;
10395 {
10396 rtx decode[3];
10397 enum attr_ppro_uops cur_uops;
10398 int issued_this_cycle;
10399 rtx *insnp;
10400 int i;
10401
10402 /* At this point .ppro.decode contains the state of the three
10403 decoders from last "cycle". That is, those insns that were
10404 actually independent. But here we're scheduling for the
10405 decoder, and we may find things that are decodable in the
10406 same cycle. */
10407
10408 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10409 issued_this_cycle = 0;
10410
10411 insnp = e_ready;
10412 cur_uops = ix86_safe_ppro_uops (*insnp);
10413
10414 /* If the decoders are empty, and we've a complex insn at the
10415 head of the priority queue, let it issue without complaint. */
10416 if (decode[0] == NULL)
10417 {
10418 if (cur_uops == PPRO_UOPS_MANY)
10419 {
10420 decode[0] = *insnp;
10421 goto ppro_done;
10422 }
10423
10424 /* Otherwise, search for a 2-4 uop unsn to issue. */
10425 while (cur_uops != PPRO_UOPS_FEW)
10426 {
10427 if (insnp == ready)
10428 break;
10429 cur_uops = ix86_safe_ppro_uops (*--insnp);
10430 }
10431
10432 /* If so, move it to the head of the line. */
10433 if (cur_uops == PPRO_UOPS_FEW)
10434 ix86_reorder_insn (insnp, e_ready);
10435
10436 /* Issue the head of the queue. */
10437 issued_this_cycle = 1;
10438 decode[0] = *e_ready--;
10439 }
10440
10441 /* Look for simple insns to fill in the other two slots. */
10442 for (i = 1; i < 3; ++i)
10443 if (decode[i] == NULL)
10444 {
10445 if (ready >= e_ready)
10446 goto ppro_done;
10447
10448 insnp = e_ready;
10449 cur_uops = ix86_safe_ppro_uops (*insnp);
10450 while (cur_uops != PPRO_UOPS_ONE)
10451 {
10452 if (insnp == ready)
10453 break;
10454 cur_uops = ix86_safe_ppro_uops (*--insnp);
10455 }
10456
10457 /* Found one. Move it to the head of the queue and issue it. */
10458 if (cur_uops == PPRO_UOPS_ONE)
10459 {
10460 ix86_reorder_insn (insnp, e_ready);
10461 decode[i] = *e_ready--;
10462 issued_this_cycle++;
10463 continue;
10464 }
10465
10466 /* ??? Didn't find one. Ideally, here we would do a lazy split
10467 of 2-uop insns, issue one and queue the other. */
10468 }
10469
10470 ppro_done:
10471 if (issued_this_cycle == 0)
10472 issued_this_cycle = 1;
10473 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10474 }
10475
10476 /* We are about to being issuing insns for this clock cycle.
10477 Override the default sort algorithm to better slot instructions. */
10478 static int
10479 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10480 FILE *dump ATTRIBUTE_UNUSED;
10481 int sched_verbose ATTRIBUTE_UNUSED;
10482 rtx *ready;
10483 int *n_readyp;
10484 int clock_var ATTRIBUTE_UNUSED;
10485 {
10486 int n_ready = *n_readyp;
10487 rtx *e_ready = ready + n_ready - 1;
10488
10489 if (n_ready < 2)
10490 goto out;
10491
10492 switch (ix86_cpu)
10493 {
10494 default:
10495 break;
10496
10497 case PROCESSOR_PENTIUMPRO:
10498 ix86_sched_reorder_ppro (ready, e_ready);
10499 break;
10500 }
10501
10502 out:
10503 return ix86_issue_rate ();
10504 }
10505
10506 /* We are about to issue INSN. Return the number of insns left on the
10507 ready queue that can be issued this cycle. */
10508
10509 static int
10510 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10511 FILE *dump;
10512 int sched_verbose;
10513 rtx insn;
10514 int can_issue_more;
10515 {
10516 int i;
10517 switch (ix86_cpu)
10518 {
10519 default:
10520 return can_issue_more - 1;
10521
10522 case PROCESSOR_PENTIUMPRO:
10523 {
10524 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10525
10526 if (uops == PPRO_UOPS_MANY)
10527 {
10528 if (sched_verbose)
10529 ix86_dump_ppro_packet (dump);
10530 ix86_sched_data.ppro.decode[0] = insn;
10531 ix86_sched_data.ppro.decode[1] = NULL;
10532 ix86_sched_data.ppro.decode[2] = NULL;
10533 if (sched_verbose)
10534 ix86_dump_ppro_packet (dump);
10535 ix86_sched_data.ppro.decode[0] = NULL;
10536 }
10537 else if (uops == PPRO_UOPS_FEW)
10538 {
10539 if (sched_verbose)
10540 ix86_dump_ppro_packet (dump);
10541 ix86_sched_data.ppro.decode[0] = insn;
10542 ix86_sched_data.ppro.decode[1] = NULL;
10543 ix86_sched_data.ppro.decode[2] = NULL;
10544 }
10545 else
10546 {
10547 for (i = 0; i < 3; ++i)
10548 if (ix86_sched_data.ppro.decode[i] == NULL)
10549 {
10550 ix86_sched_data.ppro.decode[i] = insn;
10551 break;
10552 }
10553 if (i == 3)
10554 abort ();
10555 if (i == 2)
10556 {
10557 if (sched_verbose)
10558 ix86_dump_ppro_packet (dump);
10559 ix86_sched_data.ppro.decode[0] = NULL;
10560 ix86_sched_data.ppro.decode[1] = NULL;
10561 ix86_sched_data.ppro.decode[2] = NULL;
10562 }
10563 }
10564 }
10565 return --ix86_sched_data.ppro.issued_this_cycle;
10566 }
10567 }
10568 \f
10569 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10570 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10571 appropriate. */
10572
10573 void
10574 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10575 rtx insns;
10576 rtx dstref, srcref, dstreg, srcreg;
10577 {
10578 rtx insn;
10579
10580 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10581 if (INSN_P (insn))
10582 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10583 dstreg, srcreg);
10584 }
10585
10586 /* Subroutine of above to actually do the updating by recursively walking
10587 the rtx. */
10588
10589 static void
10590 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10591 rtx x;
10592 rtx dstref, srcref, dstreg, srcreg;
10593 {
10594 enum rtx_code code = GET_CODE (x);
10595 const char *format_ptr = GET_RTX_FORMAT (code);
10596 int i, j;
10597
10598 if (code == MEM && XEXP (x, 0) == dstreg)
10599 MEM_COPY_ATTRIBUTES (x, dstref);
10600 else if (code == MEM && XEXP (x, 0) == srcreg)
10601 MEM_COPY_ATTRIBUTES (x, srcref);
10602
10603 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10604 {
10605 if (*format_ptr == 'e')
10606 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10607 dstreg, srcreg);
10608 else if (*format_ptr == 'E')
10609 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10610 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10611 dstreg, srcreg);
10612 }
10613 }
10614 \f
10615 /* Compute the alignment given to a constant that is being placed in memory.
10616 EXP is the constant and ALIGN is the alignment that the object would
10617 ordinarily have.
10618 The value of this function is used instead of that alignment to align
10619 the object. */
10620
10621 int
10622 ix86_constant_alignment (exp, align)
10623 tree exp;
10624 int align;
10625 {
10626 if (TREE_CODE (exp) == REAL_CST)
10627 {
10628 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10629 return 64;
10630 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10631 return 128;
10632 }
10633 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10634 && align < 256)
10635 return 256;
10636
10637 return align;
10638 }
10639
10640 /* Compute the alignment for a static variable.
10641 TYPE is the data type, and ALIGN is the alignment that
10642 the object would ordinarily have. The value of this function is used
10643 instead of that alignment to align the object. */
10644
10645 int
10646 ix86_data_alignment (type, align)
10647 tree type;
10648 int align;
10649 {
10650 if (AGGREGATE_TYPE_P (type)
10651 && TYPE_SIZE (type)
10652 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10653 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10654 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10655 return 256;
10656
10657 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10658 to 16byte boundary. */
10659 if (TARGET_64BIT)
10660 {
10661 if (AGGREGATE_TYPE_P (type)
10662 && TYPE_SIZE (type)
10663 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10664 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10665 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10666 return 128;
10667 }
10668
10669 if (TREE_CODE (type) == ARRAY_TYPE)
10670 {
10671 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10672 return 64;
10673 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10674 return 128;
10675 }
10676 else if (TREE_CODE (type) == COMPLEX_TYPE)
10677 {
10678
10679 if (TYPE_MODE (type) == DCmode && align < 64)
10680 return 64;
10681 if (TYPE_MODE (type) == XCmode && align < 128)
10682 return 128;
10683 }
10684 else if ((TREE_CODE (type) == RECORD_TYPE
10685 || TREE_CODE (type) == UNION_TYPE
10686 || TREE_CODE (type) == QUAL_UNION_TYPE)
10687 && TYPE_FIELDS (type))
10688 {
10689 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10690 return 64;
10691 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10692 return 128;
10693 }
10694 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10695 || TREE_CODE (type) == INTEGER_TYPE)
10696 {
10697 if (TYPE_MODE (type) == DFmode && align < 64)
10698 return 64;
10699 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10700 return 128;
10701 }
10702
10703 return align;
10704 }
10705
10706 /* Compute the alignment for a local variable.
10707 TYPE is the data type, and ALIGN is the alignment that
10708 the object would ordinarily have. The value of this macro is used
10709 instead of that alignment to align the object. */
10710
10711 int
10712 ix86_local_alignment (type, align)
10713 tree type;
10714 int align;
10715 {
10716 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10717 to 16byte boundary. */
10718 if (TARGET_64BIT)
10719 {
10720 if (AGGREGATE_TYPE_P (type)
10721 && TYPE_SIZE (type)
10722 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10723 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10724 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10725 return 128;
10726 }
10727 if (TREE_CODE (type) == ARRAY_TYPE)
10728 {
10729 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10730 return 64;
10731 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10732 return 128;
10733 }
10734 else if (TREE_CODE (type) == COMPLEX_TYPE)
10735 {
10736 if (TYPE_MODE (type) == DCmode && align < 64)
10737 return 64;
10738 if (TYPE_MODE (type) == XCmode && align < 128)
10739 return 128;
10740 }
10741 else if ((TREE_CODE (type) == RECORD_TYPE
10742 || TREE_CODE (type) == UNION_TYPE
10743 || TREE_CODE (type) == QUAL_UNION_TYPE)
10744 && TYPE_FIELDS (type))
10745 {
10746 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10747 return 64;
10748 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10749 return 128;
10750 }
10751 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10752 || TREE_CODE (type) == INTEGER_TYPE)
10753 {
10754
10755 if (TYPE_MODE (type) == DFmode && align < 64)
10756 return 64;
10757 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10758 return 128;
10759 }
10760 return align;
10761 }
10762 \f
10763 /* Emit RTL insns to initialize the variable parts of a trampoline.
10764 FNADDR is an RTX for the address of the function's pure code.
10765 CXT is an RTX for the static chain value for the function. */
10766 void
10767 x86_initialize_trampoline (tramp, fnaddr, cxt)
10768 rtx tramp, fnaddr, cxt;
10769 {
10770 if (!TARGET_64BIT)
10771 {
10772 /* Compute offset from the end of the jmp to the target function. */
10773 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10774 plus_constant (tramp, 10),
10775 NULL_RTX, 1, OPTAB_DIRECT);
10776 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10777 gen_int_mode (0xb9, QImode));
10778 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10779 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10780 gen_int_mode (0xe9, QImode));
10781 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10782 }
10783 else
10784 {
10785 int offset = 0;
10786 /* Try to load address using shorter movl instead of movabs.
10787 We may want to support movq for kernel mode, but kernel does not use
10788 trampolines at the moment. */
10789 if (x86_64_zero_extended_value (fnaddr))
10790 {
10791 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10792 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10793 gen_int_mode (0xbb41, HImode));
10794 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10795 gen_lowpart (SImode, fnaddr));
10796 offset += 6;
10797 }
10798 else
10799 {
10800 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10801 gen_int_mode (0xbb49, HImode));
10802 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10803 fnaddr);
10804 offset += 10;
10805 }
10806 /* Load static chain using movabs to r10. */
10807 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10808 gen_int_mode (0xba49, HImode));
10809 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10810 cxt);
10811 offset += 10;
10812 /* Jump to the r11 */
10813 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10814 gen_int_mode (0xff49, HImode));
10815 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10816 gen_int_mode (0xe3, QImode));
10817 offset += 3;
10818 if (offset > TRAMPOLINE_SIZE)
10819 abort ();
10820 }
10821 }
10822 \f
10823 #define def_builtin(MASK, NAME, TYPE, CODE) \
10824 do { \
10825 if ((MASK) & target_flags) \
10826 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10827 } while (0)
10828
10829 struct builtin_description
10830 {
10831 const unsigned int mask;
10832 const enum insn_code icode;
10833 const char *const name;
10834 const enum ix86_builtins code;
10835 const enum rtx_code comparison;
10836 const unsigned int flag;
10837 };
10838
10839 /* Used for builtins that are enabled both by -msse and -msse2. */
10840 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
10841
10842 static const struct builtin_description bdesc_comi[] =
10843 {
10844 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10845 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10846 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10847 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10848 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10849 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10850 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10851 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10852 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10853 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10854 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10855 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
10856 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
10857 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
10858 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
10859 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
10860 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
10861 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
10862 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
10863 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
10864 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
10865 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
10866 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
10867 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
10868 };
10869
10870 static const struct builtin_description bdesc_2arg[] =
10871 {
10872 /* SSE */
10873 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10874 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10875 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10876 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10877 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10878 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10879 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10880 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10881
10882 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10883 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10884 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10885 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10886 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10887 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10888 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10889 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10890 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10891 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10892 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10893 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10894 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10895 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10896 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10897 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10898 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10899 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10900 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10901 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10902 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10903 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10904 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10905 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10906
10907 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10908 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10909 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10910 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10911
10912 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10913 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10914 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10915 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10916 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10917
10918 /* MMX */
10919 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10920 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10921 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10922 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10923 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10924 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10925
10926 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10927 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10928 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10929 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10930 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10931 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10932 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10933 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10934
10935 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10936 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10937 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10938
10939 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10940 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10941 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10942 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10943
10944 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10945 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10946
10947 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10948 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10949 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10950 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10951 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10952 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10953
10954 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10955 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10956 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10957 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10958
10959 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10960 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10961 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10962 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10963 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10964 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10965
10966 /* Special. */
10967 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10968 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10969 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10970
10971 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10972 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10973
10974 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10975 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10976 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10977 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10978 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10979 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10980
10981 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10982 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10983 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10984 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10985 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10986 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10987
10988 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10989 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10990 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10991 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10992
10993 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10994 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
10995
10996 /* SSE2 */
10997 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
10998 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
10999 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11000 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11001 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11002 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11003 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11004 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11005
11006 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11007 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11008 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11009 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11010 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11011 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11012 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11013 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11014 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11015 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11016 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11017 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11018 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11019 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11020 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11021 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11022 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11023 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11024 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11025 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11026 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11027 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11028 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11029 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11030
11031 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11032 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11033 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11034 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11035
11036 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11037 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11038 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11039 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11040
11041 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11042 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11043 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11044
11045 /* SSE2 MMX */
11046 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11047 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11048 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11049 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11050 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11051 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11052 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11053 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11054
11055 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11056 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11057 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11058 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11059 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11060 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11061 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11062 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11063
11064 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11065 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11066 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11067 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11068
11069 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11070 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11071 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11072 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11073
11074 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11075 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11076
11077 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11078 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11079 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11080 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11081 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11082 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11083
11084 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11085 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11086 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11087 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11088
11089 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11090 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11091 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11092 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11093 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11094 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11095
11096 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11097 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11098 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11099
11100 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11101 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11102
11103 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11104 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11105 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11106 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11107 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11108 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11109
11110 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11111 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11112 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11113 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11114 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11115 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11116
11117 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11118 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11119 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11120 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11121
11122 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11123
11124 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11125 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11126 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11127 };
11128
11129 static const struct builtin_description bdesc_1arg[] =
11130 {
11131 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11132 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11133
11134 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11135 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11136 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11137
11138 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11139 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11140 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11141 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11142
11143 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11144 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11145 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11146
11147 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11148
11149 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11150 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11151
11152 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11153 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11154 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11155 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11156 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11157
11158 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11159
11160 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11161 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11162
11163 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11164 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11165 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11166 };
11167
11168 void
11169 ix86_init_builtins ()
11170 {
11171 if (TARGET_MMX)
11172 ix86_init_mmx_sse_builtins ();
11173 }
11174
11175 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11176 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11177 builtins. */
11178 static void
11179 ix86_init_mmx_sse_builtins ()
11180 {
11181 const struct builtin_description * d;
11182 size_t i;
11183 tree endlink = void_list_node;
11184
11185 tree pchar_type_node = build_pointer_type (char_type_node);
11186 tree pfloat_type_node = build_pointer_type (float_type_node);
11187 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11188 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11189 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11190
11191 /* Comparisons. */
11192 tree int_ftype_v4sf_v4sf
11193 = build_function_type (integer_type_node,
11194 tree_cons (NULL_TREE, V4SF_type_node,
11195 tree_cons (NULL_TREE,
11196 V4SF_type_node,
11197 endlink)));
11198 tree v4si_ftype_v4sf_v4sf
11199 = build_function_type (V4SI_type_node,
11200 tree_cons (NULL_TREE, V4SF_type_node,
11201 tree_cons (NULL_TREE,
11202 V4SF_type_node,
11203 endlink)));
11204 /* MMX/SSE/integer conversions. */
11205 tree int_ftype_v4sf
11206 = build_function_type (integer_type_node,
11207 tree_cons (NULL_TREE, V4SF_type_node,
11208 endlink));
11209 tree int_ftype_v8qi
11210 = build_function_type (integer_type_node,
11211 tree_cons (NULL_TREE, V8QI_type_node,
11212 endlink));
11213 tree v4sf_ftype_v4sf_int
11214 = build_function_type (V4SF_type_node,
11215 tree_cons (NULL_TREE, V4SF_type_node,
11216 tree_cons (NULL_TREE, integer_type_node,
11217 endlink)));
11218 tree v4sf_ftype_v4sf_v2si
11219 = build_function_type (V4SF_type_node,
11220 tree_cons (NULL_TREE, V4SF_type_node,
11221 tree_cons (NULL_TREE, V2SI_type_node,
11222 endlink)));
11223 tree int_ftype_v4hi_int
11224 = build_function_type (integer_type_node,
11225 tree_cons (NULL_TREE, V4HI_type_node,
11226 tree_cons (NULL_TREE, integer_type_node,
11227 endlink)));
11228 tree v4hi_ftype_v4hi_int_int
11229 = build_function_type (V4HI_type_node,
11230 tree_cons (NULL_TREE, V4HI_type_node,
11231 tree_cons (NULL_TREE, integer_type_node,
11232 tree_cons (NULL_TREE,
11233 integer_type_node,
11234 endlink))));
11235 /* Miscellaneous. */
11236 tree v8qi_ftype_v4hi_v4hi
11237 = build_function_type (V8QI_type_node,
11238 tree_cons (NULL_TREE, V4HI_type_node,
11239 tree_cons (NULL_TREE, V4HI_type_node,
11240 endlink)));
11241 tree v4hi_ftype_v2si_v2si
11242 = build_function_type (V4HI_type_node,
11243 tree_cons (NULL_TREE, V2SI_type_node,
11244 tree_cons (NULL_TREE, V2SI_type_node,
11245 endlink)));
11246 tree v4sf_ftype_v4sf_v4sf_int
11247 = build_function_type (V4SF_type_node,
11248 tree_cons (NULL_TREE, V4SF_type_node,
11249 tree_cons (NULL_TREE, V4SF_type_node,
11250 tree_cons (NULL_TREE,
11251 integer_type_node,
11252 endlink))));
11253 tree v2si_ftype_v4hi_v4hi
11254 = build_function_type (V2SI_type_node,
11255 tree_cons (NULL_TREE, V4HI_type_node,
11256 tree_cons (NULL_TREE, V4HI_type_node,
11257 endlink)));
11258 tree v4hi_ftype_v4hi_int
11259 = build_function_type (V4HI_type_node,
11260 tree_cons (NULL_TREE, V4HI_type_node,
11261 tree_cons (NULL_TREE, integer_type_node,
11262 endlink)));
11263 tree v4hi_ftype_v4hi_di
11264 = build_function_type (V4HI_type_node,
11265 tree_cons (NULL_TREE, V4HI_type_node,
11266 tree_cons (NULL_TREE,
11267 long_long_integer_type_node,
11268 endlink)));
11269 tree v2si_ftype_v2si_di
11270 = build_function_type (V2SI_type_node,
11271 tree_cons (NULL_TREE, V2SI_type_node,
11272 tree_cons (NULL_TREE,
11273 long_long_integer_type_node,
11274 endlink)));
11275 tree void_ftype_void
11276 = build_function_type (void_type_node, endlink);
11277 tree void_ftype_unsigned
11278 = build_function_type (void_type_node,
11279 tree_cons (NULL_TREE, unsigned_type_node,
11280 endlink));
11281 tree unsigned_ftype_void
11282 = build_function_type (unsigned_type_node, endlink);
11283 tree di_ftype_void
11284 = build_function_type (long_long_unsigned_type_node, endlink);
11285 tree v4sf_ftype_void
11286 = build_function_type (V4SF_type_node, endlink);
11287 tree v2si_ftype_v4sf
11288 = build_function_type (V2SI_type_node,
11289 tree_cons (NULL_TREE, V4SF_type_node,
11290 endlink));
11291 /* Loads/stores. */
11292 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11293 tree_cons (NULL_TREE, V8QI_type_node,
11294 tree_cons (NULL_TREE,
11295 pchar_type_node,
11296 endlink)));
11297 tree void_ftype_v8qi_v8qi_pchar
11298 = build_function_type (void_type_node, maskmovq_args);
11299 tree v4sf_ftype_pfloat
11300 = build_function_type (V4SF_type_node,
11301 tree_cons (NULL_TREE, pfloat_type_node,
11302 endlink));
11303 /* @@@ the type is bogus */
11304 tree v4sf_ftype_v4sf_pv2si
11305 = build_function_type (V4SF_type_node,
11306 tree_cons (NULL_TREE, V4SF_type_node,
11307 tree_cons (NULL_TREE, pv2si_type_node,
11308 endlink)));
11309 tree void_ftype_pv2si_v4sf
11310 = build_function_type (void_type_node,
11311 tree_cons (NULL_TREE, pv2si_type_node,
11312 tree_cons (NULL_TREE, V4SF_type_node,
11313 endlink)));
11314 tree void_ftype_pfloat_v4sf
11315 = build_function_type (void_type_node,
11316 tree_cons (NULL_TREE, pfloat_type_node,
11317 tree_cons (NULL_TREE, V4SF_type_node,
11318 endlink)));
11319 tree void_ftype_pdi_di
11320 = build_function_type (void_type_node,
11321 tree_cons (NULL_TREE, pdi_type_node,
11322 tree_cons (NULL_TREE,
11323 long_long_unsigned_type_node,
11324 endlink)));
11325 tree void_ftype_pv2di_v2di
11326 = build_function_type (void_type_node,
11327 tree_cons (NULL_TREE, pv2di_type_node,
11328 tree_cons (NULL_TREE,
11329 V2DI_type_node,
11330 endlink)));
11331 /* Normal vector unops. */
11332 tree v4sf_ftype_v4sf
11333 = build_function_type (V4SF_type_node,
11334 tree_cons (NULL_TREE, V4SF_type_node,
11335 endlink));
11336
11337 /* Normal vector binops. */
11338 tree v4sf_ftype_v4sf_v4sf
11339 = build_function_type (V4SF_type_node,
11340 tree_cons (NULL_TREE, V4SF_type_node,
11341 tree_cons (NULL_TREE, V4SF_type_node,
11342 endlink)));
11343 tree v8qi_ftype_v8qi_v8qi
11344 = build_function_type (V8QI_type_node,
11345 tree_cons (NULL_TREE, V8QI_type_node,
11346 tree_cons (NULL_TREE, V8QI_type_node,
11347 endlink)));
11348 tree v4hi_ftype_v4hi_v4hi
11349 = build_function_type (V4HI_type_node,
11350 tree_cons (NULL_TREE, V4HI_type_node,
11351 tree_cons (NULL_TREE, V4HI_type_node,
11352 endlink)));
11353 tree v2si_ftype_v2si_v2si
11354 = build_function_type (V2SI_type_node,
11355 tree_cons (NULL_TREE, V2SI_type_node,
11356 tree_cons (NULL_TREE, V2SI_type_node,
11357 endlink)));
11358 tree di_ftype_di_di
11359 = build_function_type (long_long_unsigned_type_node,
11360 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11361 tree_cons (NULL_TREE,
11362 long_long_unsigned_type_node,
11363 endlink)));
11364
11365 tree v2si_ftype_v2sf
11366 = build_function_type (V2SI_type_node,
11367 tree_cons (NULL_TREE, V2SF_type_node,
11368 endlink));
11369 tree v2sf_ftype_v2si
11370 = build_function_type (V2SF_type_node,
11371 tree_cons (NULL_TREE, V2SI_type_node,
11372 endlink));
11373 tree v2si_ftype_v2si
11374 = build_function_type (V2SI_type_node,
11375 tree_cons (NULL_TREE, V2SI_type_node,
11376 endlink));
11377 tree v2sf_ftype_v2sf
11378 = build_function_type (V2SF_type_node,
11379 tree_cons (NULL_TREE, V2SF_type_node,
11380 endlink));
11381 tree v2sf_ftype_v2sf_v2sf
11382 = build_function_type (V2SF_type_node,
11383 tree_cons (NULL_TREE, V2SF_type_node,
11384 tree_cons (NULL_TREE,
11385 V2SF_type_node,
11386 endlink)));
11387 tree v2si_ftype_v2sf_v2sf
11388 = build_function_type (V2SI_type_node,
11389 tree_cons (NULL_TREE, V2SF_type_node,
11390 tree_cons (NULL_TREE,
11391 V2SF_type_node,
11392 endlink)));
11393 tree pint_type_node = build_pointer_type (integer_type_node);
11394 tree pdouble_type_node = build_pointer_type (double_type_node);
11395 tree int_ftype_v2df_v2df
11396 = build_function_type (integer_type_node,
11397 tree_cons (NULL_TREE, V2DF_type_node,
11398 tree_cons (NULL_TREE, V2DF_type_node, endlink)));
11399
11400 tree ti_ftype_void
11401 = build_function_type (intTI_type_node, endlink);
11402 tree ti_ftype_ti_ti
11403 = build_function_type (intTI_type_node,
11404 tree_cons (NULL_TREE, intTI_type_node,
11405 tree_cons (NULL_TREE, intTI_type_node,
11406 endlink)));
11407 tree void_ftype_pvoid
11408 = build_function_type (void_type_node,
11409 tree_cons (NULL_TREE, ptr_type_node, endlink));
11410 tree v2di_ftype_di
11411 = build_function_type (V2DI_type_node,
11412 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11413 endlink));
11414 tree v4sf_ftype_v4si
11415 = build_function_type (V4SF_type_node,
11416 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11417 tree v4si_ftype_v4sf
11418 = build_function_type (V4SI_type_node,
11419 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11420 tree v2df_ftype_v4si
11421 = build_function_type (V2DF_type_node,
11422 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11423 tree v4si_ftype_v2df
11424 = build_function_type (V4SI_type_node,
11425 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11426 tree v2si_ftype_v2df
11427 = build_function_type (V2SI_type_node,
11428 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11429 tree v4sf_ftype_v2df
11430 = build_function_type (V4SF_type_node,
11431 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11432 tree v2df_ftype_v2si
11433 = build_function_type (V2DF_type_node,
11434 tree_cons (NULL_TREE, V2SI_type_node, endlink));
11435 tree v2df_ftype_v4sf
11436 = build_function_type (V2DF_type_node,
11437 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11438 tree int_ftype_v2df
11439 = build_function_type (integer_type_node,
11440 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11441 tree v2df_ftype_v2df_int
11442 = build_function_type (V2DF_type_node,
11443 tree_cons (NULL_TREE, V2DF_type_node,
11444 tree_cons (NULL_TREE, integer_type_node,
11445 endlink)));
11446 tree v4sf_ftype_v4sf_v2df
11447 = build_function_type (V4SF_type_node,
11448 tree_cons (NULL_TREE, V4SF_type_node,
11449 tree_cons (NULL_TREE, V2DF_type_node,
11450 endlink)));
11451 tree v2df_ftype_v2df_v4sf
11452 = build_function_type (V2DF_type_node,
11453 tree_cons (NULL_TREE, V2DF_type_node,
11454 tree_cons (NULL_TREE, V4SF_type_node,
11455 endlink)));
11456 tree v2df_ftype_v2df_v2df_int
11457 = build_function_type (V2DF_type_node,
11458 tree_cons (NULL_TREE, V2DF_type_node,
11459 tree_cons (NULL_TREE, V2DF_type_node,
11460 tree_cons (NULL_TREE,
11461 integer_type_node,
11462 endlink))));
11463 tree v2df_ftype_v2df_pv2si
11464 = build_function_type (V2DF_type_node,
11465 tree_cons (NULL_TREE, V2DF_type_node,
11466 tree_cons (NULL_TREE, pv2si_type_node,
11467 endlink)));
11468 tree void_ftype_pv2si_v2df
11469 = build_function_type (void_type_node,
11470 tree_cons (NULL_TREE, pv2si_type_node,
11471 tree_cons (NULL_TREE, V2DF_type_node,
11472 endlink)));
11473 tree void_ftype_pdouble_v2df
11474 = build_function_type (void_type_node,
11475 tree_cons (NULL_TREE, pdouble_type_node,
11476 tree_cons (NULL_TREE, V2DF_type_node,
11477 endlink)));
11478 tree void_ftype_pint_int
11479 = build_function_type (void_type_node,
11480 tree_cons (NULL_TREE, pint_type_node,
11481 tree_cons (NULL_TREE, integer_type_node,
11482 endlink)));
11483 tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node,
11484 tree_cons (NULL_TREE, V16QI_type_node,
11485 tree_cons (NULL_TREE,
11486 pchar_type_node,
11487 endlink)));
11488 tree void_ftype_v16qi_v16qi_pchar
11489 = build_function_type (void_type_node, maskmovdqu_args);
11490 tree v2df_ftype_pdouble
11491 = build_function_type (V2DF_type_node,
11492 tree_cons (NULL_TREE, pdouble_type_node,
11493 endlink));
11494 tree v2df_ftype_v2df_v2df
11495 = build_function_type (V2DF_type_node,
11496 tree_cons (NULL_TREE, V2DF_type_node,
11497 tree_cons (NULL_TREE, V2DF_type_node,
11498 endlink)));
11499 tree v16qi_ftype_v16qi_v16qi
11500 = build_function_type (V16QI_type_node,
11501 tree_cons (NULL_TREE, V16QI_type_node,
11502 tree_cons (NULL_TREE, V16QI_type_node,
11503 endlink)));
11504 tree v8hi_ftype_v8hi_v8hi
11505 = build_function_type (V8HI_type_node,
11506 tree_cons (NULL_TREE, V8HI_type_node,
11507 tree_cons (NULL_TREE, V8HI_type_node,
11508 endlink)));
11509 tree v4si_ftype_v4si_v4si
11510 = build_function_type (V4SI_type_node,
11511 tree_cons (NULL_TREE, V4SI_type_node,
11512 tree_cons (NULL_TREE, V4SI_type_node,
11513 endlink)));
11514 tree v2di_ftype_v2di_v2di
11515 = build_function_type (V2DI_type_node,
11516 tree_cons (NULL_TREE, V2DI_type_node,
11517 tree_cons (NULL_TREE, V2DI_type_node,
11518 endlink)));
11519 tree v2di_ftype_v2df_v2df
11520 = build_function_type (V2DI_type_node,
11521 tree_cons (NULL_TREE, V2DF_type_node,
11522 tree_cons (NULL_TREE, V2DF_type_node,
11523 endlink)));
11524 tree v2df_ftype_v2df
11525 = build_function_type (V2DF_type_node,
11526 tree_cons (NULL_TREE, V2DF_type_node,
11527 endlink));
11528 tree v2df_ftype_double
11529 = build_function_type (V2DF_type_node,
11530 tree_cons (NULL_TREE, double_type_node,
11531 endlink));
11532 tree v2df_ftype_double_double
11533 = build_function_type (V2DF_type_node,
11534 tree_cons (NULL_TREE, double_type_node,
11535 tree_cons (NULL_TREE, double_type_node,
11536 endlink)));
11537 tree int_ftype_v8hi_int
11538 = build_function_type (integer_type_node,
11539 tree_cons (NULL_TREE, V8HI_type_node,
11540 tree_cons (NULL_TREE, integer_type_node,
11541 endlink)));
11542 tree v8hi_ftype_v8hi_int_int
11543 = build_function_type (V8HI_type_node,
11544 tree_cons (NULL_TREE, V8HI_type_node,
11545 tree_cons (NULL_TREE, integer_type_node,
11546 tree_cons (NULL_TREE,
11547 integer_type_node,
11548 endlink))));
11549 tree v2di_ftype_v2di_int
11550 = build_function_type (V2DI_type_node,
11551 tree_cons (NULL_TREE, V2DI_type_node,
11552 tree_cons (NULL_TREE, integer_type_node,
11553 endlink)));
11554 tree v4si_ftype_v4si_int
11555 = build_function_type (V4SI_type_node,
11556 tree_cons (NULL_TREE, V4SI_type_node,
11557 tree_cons (NULL_TREE, integer_type_node,
11558 endlink)));
11559 tree v8hi_ftype_v8hi_int
11560 = build_function_type (V8HI_type_node,
11561 tree_cons (NULL_TREE, V8HI_type_node,
11562 tree_cons (NULL_TREE, integer_type_node,
11563 endlink)));
11564 tree v8hi_ftype_v8hi_v2di
11565 = build_function_type (V8HI_type_node,
11566 tree_cons (NULL_TREE, V8HI_type_node,
11567 tree_cons (NULL_TREE, V2DI_type_node,
11568 endlink)));
11569 tree v4si_ftype_v4si_v2di
11570 = build_function_type (V4SI_type_node,
11571 tree_cons (NULL_TREE, V4SI_type_node,
11572 tree_cons (NULL_TREE, V2DI_type_node,
11573 endlink)));
11574 tree v4si_ftype_v8hi_v8hi
11575 = build_function_type (V4SI_type_node,
11576 tree_cons (NULL_TREE, V8HI_type_node,
11577 tree_cons (NULL_TREE, V8HI_type_node,
11578 endlink)));
11579 tree di_ftype_v8qi_v8qi
11580 = build_function_type (long_long_unsigned_type_node,
11581 tree_cons (NULL_TREE, V8QI_type_node,
11582 tree_cons (NULL_TREE, V8QI_type_node,
11583 endlink)));
11584 tree v2di_ftype_v16qi_v16qi
11585 = build_function_type (V2DI_type_node,
11586 tree_cons (NULL_TREE, V16QI_type_node,
11587 tree_cons (NULL_TREE, V16QI_type_node,
11588 endlink)));
11589 tree int_ftype_v16qi
11590 = build_function_type (integer_type_node,
11591 tree_cons (NULL_TREE, V16QI_type_node, endlink));
11592
11593 /* Add all builtins that are more or less simple operations on two
11594 operands. */
11595 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11596 {
11597 /* Use one of the operands; the target can have a different mode for
11598 mask-generating compares. */
11599 enum machine_mode mode;
11600 tree type;
11601
11602 if (d->name == 0)
11603 continue;
11604 mode = insn_data[d->icode].operand[1].mode;
11605
11606 switch (mode)
11607 {
11608 case V16QImode:
11609 type = v16qi_ftype_v16qi_v16qi;
11610 break;
11611 case V8HImode:
11612 type = v8hi_ftype_v8hi_v8hi;
11613 break;
11614 case V4SImode:
11615 type = v4si_ftype_v4si_v4si;
11616 break;
11617 case V2DImode:
11618 type = v2di_ftype_v2di_v2di;
11619 break;
11620 case V2DFmode:
11621 type = v2df_ftype_v2df_v2df;
11622 break;
11623 case TImode:
11624 type = ti_ftype_ti_ti;
11625 break;
11626 case V4SFmode:
11627 type = v4sf_ftype_v4sf_v4sf;
11628 break;
11629 case V8QImode:
11630 type = v8qi_ftype_v8qi_v8qi;
11631 break;
11632 case V4HImode:
11633 type = v4hi_ftype_v4hi_v4hi;
11634 break;
11635 case V2SImode:
11636 type = v2si_ftype_v2si_v2si;
11637 break;
11638 case DImode:
11639 type = di_ftype_di_di;
11640 break;
11641
11642 default:
11643 abort ();
11644 }
11645
11646 /* Override for comparisons. */
11647 if (d->icode == CODE_FOR_maskcmpv4sf3
11648 || d->icode == CODE_FOR_maskncmpv4sf3
11649 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11650 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11651 type = v4si_ftype_v4sf_v4sf;
11652
11653 if (d->icode == CODE_FOR_maskcmpv2df3
11654 || d->icode == CODE_FOR_maskncmpv2df3
11655 || d->icode == CODE_FOR_vmmaskcmpv2df3
11656 || d->icode == CODE_FOR_vmmaskncmpv2df3)
11657 type = v2di_ftype_v2df_v2df;
11658
11659 def_builtin (d->mask, d->name, type, d->code);
11660 }
11661
11662 /* Add the remaining MMX insns with somewhat more complicated types. */
11663 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11664 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11665 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11666 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11667 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11668 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11669 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11670
11671 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11672 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11673 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11674
11675 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11676 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11677
11678 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11679 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11680
11681 /* comi/ucomi insns. */
11682 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
11683 if (d->mask == MASK_SSE2)
11684 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
11685 else
11686 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11687
11688 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11689 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11690 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11691
11692 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11693 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11694 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11695 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11696 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11697 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11698
11699 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11700 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11701 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11702 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11703
11704 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11705 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11706
11707 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11708
11709 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11710 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11711 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11712 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11713 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11714 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11715
11716 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11717 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11718 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11719 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11720
11721 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11722 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11723 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11724 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11725
11726 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11727
11728 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11729
11730 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11731 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11732 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11733 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11734 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11735 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11736
11737 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11738
11739 /* Original 3DNow! */
11740 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11741 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11742 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11743 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11744 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11745 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11746 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11747 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11748 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11749 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11750 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11751 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11752 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11753 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11754 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11755 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11756 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11757 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11758 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11759 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11760
11761 /* 3DNow! extension as used in the Athlon CPU. */
11762 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11763 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11764 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11765 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11766 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11767 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11768
11769 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11770
11771 /* SSE2 */
11772 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
11773 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
11774
11775 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
11776 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
11777
11778 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
11779 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
11780 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
11781 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
11782 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
11783 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
11784
11785 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
11786 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
11787 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
11788 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
11789
11790 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
11791 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
11792 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
11793 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
11794 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
11795
11796 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
11797 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
11798 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
11799 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
11800
11801 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
11802 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
11803
11804 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
11805
11806 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
11807 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
11808
11809 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
11810 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
11811 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
11812 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
11813 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
11814
11815 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
11816
11817 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
11818 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
11819
11820 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
11821 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
11822 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
11823
11824 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
11825 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
11826 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
11827
11828 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
11829 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
11830 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
11831 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
11832 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
11833 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
11834 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
11835
11836 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
11837 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
11838 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
11839
11840 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
11841 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
11842 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
11843
11844 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
11845 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
11846 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
11847
11848 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
11849 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
11850
11851 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
11852 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
11853 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
11854
11855 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
11856 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
11857 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
11858
11859 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
11860 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
11861
11862 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
11863 }
11864
11865 /* Errors in the source file can cause expand_expr to return const0_rtx
11866 where we expect a vector. To avoid crashing, use one of the vector
11867 clear instructions. */
11868 static rtx
11869 safe_vector_operand (x, mode)
11870 rtx x;
11871 enum machine_mode mode;
11872 {
11873 if (x != const0_rtx)
11874 return x;
11875 x = gen_reg_rtx (mode);
11876
11877 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11878 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11879 : gen_rtx_SUBREG (DImode, x, 0)));
11880 else
11881 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11882 : gen_rtx_SUBREG (V4SFmode, x, 0)));
11883 return x;
11884 }
11885
11886 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11887
11888 static rtx
11889 ix86_expand_binop_builtin (icode, arglist, target)
11890 enum insn_code icode;
11891 tree arglist;
11892 rtx target;
11893 {
11894 rtx pat;
11895 tree arg0 = TREE_VALUE (arglist);
11896 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11897 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11898 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11899 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11900 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11901 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11902
11903 if (VECTOR_MODE_P (mode0))
11904 op0 = safe_vector_operand (op0, mode0);
11905 if (VECTOR_MODE_P (mode1))
11906 op1 = safe_vector_operand (op1, mode1);
11907
11908 if (! target
11909 || GET_MODE (target) != tmode
11910 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11911 target = gen_reg_rtx (tmode);
11912
11913 /* In case the insn wants input operands in modes different from
11914 the result, abort. */
11915 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11916 abort ();
11917
11918 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11919 op0 = copy_to_mode_reg (mode0, op0);
11920 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11921 op1 = copy_to_mode_reg (mode1, op1);
11922
11923 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11924 yet one of the two must not be a memory. This is normally enforced
11925 by expanders, but we didn't bother to create one here. */
11926 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11927 op0 = copy_to_mode_reg (mode0, op0);
11928
11929 pat = GEN_FCN (icode) (target, op0, op1);
11930 if (! pat)
11931 return 0;
11932 emit_insn (pat);
11933 return target;
11934 }
11935
11936 /* In type_for_mode we restrict the ability to create TImode types
11937 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11938 to have a V4SFmode signature. Convert them in-place to TImode. */
11939
11940 static rtx
11941 ix86_expand_timode_binop_builtin (icode, arglist, target)
11942 enum insn_code icode;
11943 tree arglist;
11944 rtx target;
11945 {
11946 rtx pat;
11947 tree arg0 = TREE_VALUE (arglist);
11948 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11949 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11950 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11951
11952 op0 = gen_lowpart (TImode, op0);
11953 op1 = gen_lowpart (TImode, op1);
11954 target = gen_reg_rtx (TImode);
11955
11956 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11957 op0 = copy_to_mode_reg (TImode, op0);
11958 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11959 op1 = copy_to_mode_reg (TImode, op1);
11960
11961 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11962 yet one of the two must not be a memory. This is normally enforced
11963 by expanders, but we didn't bother to create one here. */
11964 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11965 op0 = copy_to_mode_reg (TImode, op0);
11966
11967 pat = GEN_FCN (icode) (target, op0, op1);
11968 if (! pat)
11969 return 0;
11970 emit_insn (pat);
11971
11972 return gen_lowpart (V4SFmode, target);
11973 }
11974
11975 /* Subroutine of ix86_expand_builtin to take care of stores. */
11976
11977 static rtx
11978 ix86_expand_store_builtin (icode, arglist)
11979 enum insn_code icode;
11980 tree arglist;
11981 {
11982 rtx pat;
11983 tree arg0 = TREE_VALUE (arglist);
11984 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11985 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11986 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11987 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11988 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11989
11990 if (VECTOR_MODE_P (mode1))
11991 op1 = safe_vector_operand (op1, mode1);
11992
11993 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11994
11995 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11996 op1 = copy_to_mode_reg (mode1, op1);
11997
11998 pat = GEN_FCN (icode) (op0, op1);
11999 if (pat)
12000 emit_insn (pat);
12001 return 0;
12002 }
12003
12004 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12005
12006 static rtx
12007 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12008 enum insn_code icode;
12009 tree arglist;
12010 rtx target;
12011 int do_load;
12012 {
12013 rtx pat;
12014 tree arg0 = TREE_VALUE (arglist);
12015 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12016 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12017 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12018
12019 if (! target
12020 || GET_MODE (target) != tmode
12021 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12022 target = gen_reg_rtx (tmode);
12023 if (do_load)
12024 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12025 else
12026 {
12027 if (VECTOR_MODE_P (mode0))
12028 op0 = safe_vector_operand (op0, mode0);
12029
12030 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12031 op0 = copy_to_mode_reg (mode0, op0);
12032 }
12033
12034 pat = GEN_FCN (icode) (target, op0);
12035 if (! pat)
12036 return 0;
12037 emit_insn (pat);
12038 return target;
12039 }
12040
12041 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12042 sqrtss, rsqrtss, rcpss. */
12043
12044 static rtx
12045 ix86_expand_unop1_builtin (icode, arglist, target)
12046 enum insn_code icode;
12047 tree arglist;
12048 rtx target;
12049 {
12050 rtx pat;
12051 tree arg0 = TREE_VALUE (arglist);
12052 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12053 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12054 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12055
12056 if (! target
12057 || GET_MODE (target) != tmode
12058 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12059 target = gen_reg_rtx (tmode);
12060
12061 if (VECTOR_MODE_P (mode0))
12062 op0 = safe_vector_operand (op0, mode0);
12063
12064 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12065 op0 = copy_to_mode_reg (mode0, op0);
12066
12067 op1 = op0;
12068 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12069 op1 = copy_to_mode_reg (mode0, op1);
12070
12071 pat = GEN_FCN (icode) (target, op0, op1);
12072 if (! pat)
12073 return 0;
12074 emit_insn (pat);
12075 return target;
12076 }
12077
12078 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12079
12080 static rtx
12081 ix86_expand_sse_compare (d, arglist, target)
12082 const struct builtin_description *d;
12083 tree arglist;
12084 rtx target;
12085 {
12086 rtx pat;
12087 tree arg0 = TREE_VALUE (arglist);
12088 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12089 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12090 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12091 rtx op2;
12092 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12093 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12094 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12095 enum rtx_code comparison = d->comparison;
12096
12097 if (VECTOR_MODE_P (mode0))
12098 op0 = safe_vector_operand (op0, mode0);
12099 if (VECTOR_MODE_P (mode1))
12100 op1 = safe_vector_operand (op1, mode1);
12101
12102 /* Swap operands if we have a comparison that isn't available in
12103 hardware. */
12104 if (d->flag)
12105 {
12106 rtx tmp = gen_reg_rtx (mode1);
12107 emit_move_insn (tmp, op1);
12108 op1 = op0;
12109 op0 = tmp;
12110 }
12111
12112 if (! target
12113 || GET_MODE (target) != tmode
12114 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12115 target = gen_reg_rtx (tmode);
12116
12117 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12118 op0 = copy_to_mode_reg (mode0, op0);
12119 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12120 op1 = copy_to_mode_reg (mode1, op1);
12121
12122 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12123 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12124 if (! pat)
12125 return 0;
12126 emit_insn (pat);
12127 return target;
12128 }
12129
12130 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12131
12132 static rtx
12133 ix86_expand_sse_comi (d, arglist, target)
12134 const struct builtin_description *d;
12135 tree arglist;
12136 rtx target;
12137 {
12138 rtx pat;
12139 tree arg0 = TREE_VALUE (arglist);
12140 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12141 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12142 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12143 rtx op2;
12144 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12145 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12146 enum rtx_code comparison = d->comparison;
12147
12148 if (VECTOR_MODE_P (mode0))
12149 op0 = safe_vector_operand (op0, mode0);
12150 if (VECTOR_MODE_P (mode1))
12151 op1 = safe_vector_operand (op1, mode1);
12152
12153 /* Swap operands if we have a comparison that isn't available in
12154 hardware. */
12155 if (d->flag)
12156 {
12157 rtx tmp = op1;
12158 op1 = op0;
12159 op0 = tmp;
12160 }
12161
12162 target = gen_reg_rtx (SImode);
12163 emit_move_insn (target, const0_rtx);
12164 target = gen_rtx_SUBREG (QImode, target, 0);
12165
12166 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12167 op0 = copy_to_mode_reg (mode0, op0);
12168 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12169 op1 = copy_to_mode_reg (mode1, op1);
12170
12171 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12172 pat = GEN_FCN (d->icode) (op0, op1, op2);
12173 if (! pat)
12174 return 0;
12175 emit_insn (pat);
12176 emit_insn (gen_rtx_SET (VOIDmode,
12177 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12178 gen_rtx_fmt_ee (comparison, QImode,
12179 gen_rtx_REG (CCmode, FLAGS_REG),
12180 const0_rtx)));
12181
12182 return SUBREG_REG (target);
12183 }
12184
12185 /* Expand an expression EXP that calls a built-in function,
12186 with result going to TARGET if that's convenient
12187 (and in mode MODE if that's convenient).
12188 SUBTARGET may be used as the target for computing one of EXP's operands.
12189 IGNORE is nonzero if the value is to be ignored. */
12190
12191 rtx
12192 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12193 tree exp;
12194 rtx target;
12195 rtx subtarget ATTRIBUTE_UNUSED;
12196 enum machine_mode mode ATTRIBUTE_UNUSED;
12197 int ignore ATTRIBUTE_UNUSED;
12198 {
12199 const struct builtin_description *d;
12200 size_t i;
12201 enum insn_code icode;
12202 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12203 tree arglist = TREE_OPERAND (exp, 1);
12204 tree arg0, arg1, arg2;
12205 rtx op0, op1, op2, pat;
12206 enum machine_mode tmode, mode0, mode1, mode2;
12207 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12208
12209 switch (fcode)
12210 {
12211 case IX86_BUILTIN_EMMS:
12212 emit_insn (gen_emms ());
12213 return 0;
12214
12215 case IX86_BUILTIN_SFENCE:
12216 emit_insn (gen_sfence ());
12217 return 0;
12218
12219 case IX86_BUILTIN_PEXTRW:
12220 case IX86_BUILTIN_PEXTRW128:
12221 icode = (fcode == IX86_BUILTIN_PEXTRW
12222 ? CODE_FOR_mmx_pextrw
12223 : CODE_FOR_sse2_pextrw);
12224 arg0 = TREE_VALUE (arglist);
12225 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12226 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12227 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12228 tmode = insn_data[icode].operand[0].mode;
12229 mode0 = insn_data[icode].operand[1].mode;
12230 mode1 = insn_data[icode].operand[2].mode;
12231
12232 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12233 op0 = copy_to_mode_reg (mode0, op0);
12234 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12235 {
12236 /* @@@ better error message */
12237 error ("selector must be an immediate");
12238 return gen_reg_rtx (tmode);
12239 }
12240 if (target == 0
12241 || GET_MODE (target) != tmode
12242 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12243 target = gen_reg_rtx (tmode);
12244 pat = GEN_FCN (icode) (target, op0, op1);
12245 if (! pat)
12246 return 0;
12247 emit_insn (pat);
12248 return target;
12249
12250 case IX86_BUILTIN_PINSRW:
12251 case IX86_BUILTIN_PINSRW128:
12252 icode = (fcode == IX86_BUILTIN_PINSRW
12253 ? CODE_FOR_mmx_pinsrw
12254 : CODE_FOR_sse2_pinsrw);
12255 arg0 = TREE_VALUE (arglist);
12256 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12257 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12258 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12259 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12260 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12261 tmode = insn_data[icode].operand[0].mode;
12262 mode0 = insn_data[icode].operand[1].mode;
12263 mode1 = insn_data[icode].operand[2].mode;
12264 mode2 = insn_data[icode].operand[3].mode;
12265
12266 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12267 op0 = copy_to_mode_reg (mode0, op0);
12268 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12269 op1 = copy_to_mode_reg (mode1, op1);
12270 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12271 {
12272 /* @@@ better error message */
12273 error ("selector must be an immediate");
12274 return const0_rtx;
12275 }
12276 if (target == 0
12277 || GET_MODE (target) != tmode
12278 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12279 target = gen_reg_rtx (tmode);
12280 pat = GEN_FCN (icode) (target, op0, op1, op2);
12281 if (! pat)
12282 return 0;
12283 emit_insn (pat);
12284 return target;
12285
12286 case IX86_BUILTIN_MASKMOVQ:
12287 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12288 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12289 : CODE_FOR_sse2_maskmovdqu);
12290 /* Note the arg order is different from the operand order. */
12291 arg1 = TREE_VALUE (arglist);
12292 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12293 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12294 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12295 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12296 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12297 mode0 = insn_data[icode].operand[0].mode;
12298 mode1 = insn_data[icode].operand[1].mode;
12299 mode2 = insn_data[icode].operand[2].mode;
12300
12301 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12302 op0 = copy_to_mode_reg (mode0, op0);
12303 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12304 op1 = copy_to_mode_reg (mode1, op1);
12305 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12306 op2 = copy_to_mode_reg (mode2, op2);
12307 pat = GEN_FCN (icode) (op0, op1, op2);
12308 if (! pat)
12309 return 0;
12310 emit_insn (pat);
12311 return 0;
12312
12313 case IX86_BUILTIN_SQRTSS:
12314 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12315 case IX86_BUILTIN_RSQRTSS:
12316 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12317 case IX86_BUILTIN_RCPSS:
12318 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12319
12320 case IX86_BUILTIN_ANDPS:
12321 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12322 arglist, target);
12323 case IX86_BUILTIN_ANDNPS:
12324 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12325 arglist, target);
12326 case IX86_BUILTIN_ORPS:
12327 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12328 arglist, target);
12329 case IX86_BUILTIN_XORPS:
12330 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12331 arglist, target);
12332
12333 case IX86_BUILTIN_LOADAPS:
12334 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12335
12336 case IX86_BUILTIN_LOADUPS:
12337 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12338
12339 case IX86_BUILTIN_STOREAPS:
12340 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12341 case IX86_BUILTIN_STOREUPS:
12342 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12343
12344 case IX86_BUILTIN_LOADSS:
12345 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12346
12347 case IX86_BUILTIN_STORESS:
12348 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12349
12350 case IX86_BUILTIN_LOADHPS:
12351 case IX86_BUILTIN_LOADLPS:
12352 case IX86_BUILTIN_LOADHPD:
12353 case IX86_BUILTIN_LOADLPD:
12354 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12355 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12356 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12357 : CODE_FOR_sse2_movlpd);
12358 arg0 = TREE_VALUE (arglist);
12359 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12360 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12361 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12362 tmode = insn_data[icode].operand[0].mode;
12363 mode0 = insn_data[icode].operand[1].mode;
12364 mode1 = insn_data[icode].operand[2].mode;
12365
12366 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12367 op0 = copy_to_mode_reg (mode0, op0);
12368 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12369 if (target == 0
12370 || GET_MODE (target) != tmode
12371 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12372 target = gen_reg_rtx (tmode);
12373 pat = GEN_FCN (icode) (target, op0, op1);
12374 if (! pat)
12375 return 0;
12376 emit_insn (pat);
12377 return target;
12378
12379 case IX86_BUILTIN_STOREHPS:
12380 case IX86_BUILTIN_STORELPS:
12381 case IX86_BUILTIN_STOREHPD:
12382 case IX86_BUILTIN_STORELPD:
12383 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12384 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12385 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12386 : CODE_FOR_sse2_movlpd);
12387 arg0 = TREE_VALUE (arglist);
12388 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12389 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12390 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12391 mode0 = insn_data[icode].operand[1].mode;
12392 mode1 = insn_data[icode].operand[2].mode;
12393
12394 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12395 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12396 op1 = copy_to_mode_reg (mode1, op1);
12397
12398 pat = GEN_FCN (icode) (op0, op0, op1);
12399 if (! pat)
12400 return 0;
12401 emit_insn (pat);
12402 return 0;
12403
12404 case IX86_BUILTIN_MOVNTPS:
12405 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
12406 case IX86_BUILTIN_MOVNTQ:
12407 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
12408
12409 case IX86_BUILTIN_LDMXCSR:
12410 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12411 target = assign_386_stack_local (SImode, 0);
12412 emit_move_insn (target, op0);
12413 emit_insn (gen_ldmxcsr (target));
12414 return 0;
12415
12416 case IX86_BUILTIN_STMXCSR:
12417 target = assign_386_stack_local (SImode, 0);
12418 emit_insn (gen_stmxcsr (target));
12419 return copy_to_mode_reg (SImode, target);
12420
12421 case IX86_BUILTIN_SHUFPS:
12422 case IX86_BUILTIN_SHUFPD:
12423 icode = (fcode == IX86_BUILTIN_SHUFPS
12424 ? CODE_FOR_sse_shufps
12425 : CODE_FOR_sse2_shufpd);
12426 arg0 = TREE_VALUE (arglist);
12427 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12428 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12429 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12430 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12431 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12432 tmode = insn_data[icode].operand[0].mode;
12433 mode0 = insn_data[icode].operand[1].mode;
12434 mode1 = insn_data[icode].operand[2].mode;
12435 mode2 = insn_data[icode].operand[3].mode;
12436
12437 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12438 op0 = copy_to_mode_reg (mode0, op0);
12439 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12440 op1 = copy_to_mode_reg (mode1, op1);
12441 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12442 {
12443 /* @@@ better error message */
12444 error ("mask must be an immediate");
12445 return gen_reg_rtx (tmode);
12446 }
12447 if (target == 0
12448 || GET_MODE (target) != tmode
12449 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12450 target = gen_reg_rtx (tmode);
12451 pat = GEN_FCN (icode) (target, op0, op1, op2);
12452 if (! pat)
12453 return 0;
12454 emit_insn (pat);
12455 return target;
12456
12457 case IX86_BUILTIN_PSHUFW:
12458 case IX86_BUILTIN_PSHUFD:
12459 case IX86_BUILTIN_PSHUFHW:
12460 case IX86_BUILTIN_PSHUFLW:
12461 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
12462 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
12463 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
12464 : CODE_FOR_mmx_pshufw);
12465 arg0 = TREE_VALUE (arglist);
12466 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12467 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12468 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12469 tmode = insn_data[icode].operand[0].mode;
12470 mode1 = insn_data[icode].operand[1].mode;
12471 mode2 = insn_data[icode].operand[2].mode;
12472
12473 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12474 op0 = copy_to_mode_reg (mode1, op0);
12475 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12476 {
12477 /* @@@ better error message */
12478 error ("mask must be an immediate");
12479 return const0_rtx;
12480 }
12481 if (target == 0
12482 || GET_MODE (target) != tmode
12483 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12484 target = gen_reg_rtx (tmode);
12485 pat = GEN_FCN (icode) (target, op0, op1);
12486 if (! pat)
12487 return 0;
12488 emit_insn (pat);
12489 return target;
12490
12491 case IX86_BUILTIN_FEMMS:
12492 emit_insn (gen_femms ());
12493 return NULL_RTX;
12494
12495 case IX86_BUILTIN_PAVGUSB:
12496 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12497
12498 case IX86_BUILTIN_PF2ID:
12499 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
12500
12501 case IX86_BUILTIN_PFACC:
12502 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
12503
12504 case IX86_BUILTIN_PFADD:
12505 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
12506
12507 case IX86_BUILTIN_PFCMPEQ:
12508 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
12509
12510 case IX86_BUILTIN_PFCMPGE:
12511 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
12512
12513 case IX86_BUILTIN_PFCMPGT:
12514 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
12515
12516 case IX86_BUILTIN_PFMAX:
12517 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
12518
12519 case IX86_BUILTIN_PFMIN:
12520 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
12521
12522 case IX86_BUILTIN_PFMUL:
12523 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
12524
12525 case IX86_BUILTIN_PFRCP:
12526 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
12527
12528 case IX86_BUILTIN_PFRCPIT1:
12529 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
12530
12531 case IX86_BUILTIN_PFRCPIT2:
12532 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
12533
12534 case IX86_BUILTIN_PFRSQIT1:
12535 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
12536
12537 case IX86_BUILTIN_PFRSQRT:
12538 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12539
12540 case IX86_BUILTIN_PFSUB:
12541 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12542
12543 case IX86_BUILTIN_PFSUBR:
12544 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12545
12546 case IX86_BUILTIN_PI2FD:
12547 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12548
12549 case IX86_BUILTIN_PMULHRW:
12550 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12551
12552 case IX86_BUILTIN_PF2IW:
12553 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12554
12555 case IX86_BUILTIN_PFNACC:
12556 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12557
12558 case IX86_BUILTIN_PFPNACC:
12559 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12560
12561 case IX86_BUILTIN_PI2FW:
12562 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12563
12564 case IX86_BUILTIN_PSWAPDSI:
12565 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12566
12567 case IX86_BUILTIN_PSWAPDSF:
12568 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12569
12570 case IX86_BUILTIN_SSE_ZERO:
12571 target = gen_reg_rtx (V4SFmode);
12572 emit_insn (gen_sse_clrv4sf (target));
12573 return target;
12574
12575 case IX86_BUILTIN_MMX_ZERO:
12576 target = gen_reg_rtx (DImode);
12577 emit_insn (gen_mmx_clrdi (target));
12578 return target;
12579
12580 case IX86_BUILTIN_SQRTSD:
12581 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
12582 case IX86_BUILTIN_LOADAPD:
12583 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
12584 case IX86_BUILTIN_LOADUPD:
12585 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
12586
12587 case IX86_BUILTIN_STOREAPD:
12588 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12589 case IX86_BUILTIN_STOREUPD:
12590 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
12591
12592 case IX86_BUILTIN_LOADSD:
12593 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
12594
12595 case IX86_BUILTIN_STORESD:
12596 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
12597
12598 case IX86_BUILTIN_SETPD1:
12599 target = assign_386_stack_local (DFmode, 0);
12600 arg0 = TREE_VALUE (arglist);
12601 emit_move_insn (adjust_address (target, DFmode, 0),
12602 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12603 op0 = gen_reg_rtx (V2DFmode);
12604 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
12605 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
12606 return op0;
12607
12608 case IX86_BUILTIN_SETPD:
12609 target = assign_386_stack_local (V2DFmode, 0);
12610 arg0 = TREE_VALUE (arglist);
12611 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12612 emit_move_insn (adjust_address (target, DFmode, 0),
12613 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12614 emit_move_insn (adjust_address (target, DFmode, 8),
12615 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
12616 op0 = gen_reg_rtx (V2DFmode);
12617 emit_insn (gen_sse2_movapd (op0, target));
12618 return op0;
12619
12620 case IX86_BUILTIN_LOADRPD:
12621 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
12622 gen_reg_rtx (V2DFmode), 1);
12623 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
12624 return target;
12625
12626 case IX86_BUILTIN_LOADPD1:
12627 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
12628 gen_reg_rtx (V2DFmode), 1);
12629 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
12630 return target;
12631
12632 case IX86_BUILTIN_STOREPD1:
12633 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12634 case IX86_BUILTIN_STORERPD:
12635 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12636
12637 case IX86_BUILTIN_MFENCE:
12638 emit_insn (gen_sse2_mfence ());
12639 return 0;
12640 case IX86_BUILTIN_LFENCE:
12641 emit_insn (gen_sse2_lfence ());
12642 return 0;
12643
12644 case IX86_BUILTIN_CLFLUSH:
12645 arg0 = TREE_VALUE (arglist);
12646 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12647 icode = CODE_FOR_sse2_clflush;
12648 mode0 = insn_data[icode].operand[0].mode;
12649 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12650 op0 = copy_to_mode_reg (mode0, op0);
12651
12652 emit_insn (gen_sse2_clflush (op0));
12653 return 0;
12654
12655 case IX86_BUILTIN_MOVNTPD:
12656 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
12657 case IX86_BUILTIN_MOVNTDQ:
12658 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
12659 case IX86_BUILTIN_MOVNTI:
12660 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
12661
12662 default:
12663 break;
12664 }
12665
12666 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12667 if (d->code == fcode)
12668 {
12669 /* Compares are treated specially. */
12670 if (d->icode == CODE_FOR_maskcmpv4sf3
12671 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12672 || d->icode == CODE_FOR_maskncmpv4sf3
12673 || d->icode == CODE_FOR_vmmaskncmpv4sf3
12674 || d->icode == CODE_FOR_maskcmpv2df3
12675 || d->icode == CODE_FOR_vmmaskcmpv2df3
12676 || d->icode == CODE_FOR_maskncmpv2df3
12677 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12678 return ix86_expand_sse_compare (d, arglist, target);
12679
12680 return ix86_expand_binop_builtin (d->icode, arglist, target);
12681 }
12682
12683 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12684 if (d->code == fcode)
12685 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12686
12687 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12688 if (d->code == fcode)
12689 return ix86_expand_sse_comi (d, arglist, target);
12690
12691 /* @@@ Should really do something sensible here. */
12692 return 0;
12693 }
12694
12695 /* Store OPERAND to the memory after reload is completed. This means
12696 that we can't easily use assign_stack_local. */
12697 rtx
12698 ix86_force_to_memory (mode, operand)
12699 enum machine_mode mode;
12700 rtx operand;
12701 {
12702 rtx result;
12703 if (!reload_completed)
12704 abort ();
12705 if (TARGET_64BIT && TARGET_RED_ZONE)
12706 {
12707 result = gen_rtx_MEM (mode,
12708 gen_rtx_PLUS (Pmode,
12709 stack_pointer_rtx,
12710 GEN_INT (-RED_ZONE_SIZE)));
12711 emit_move_insn (result, operand);
12712 }
12713 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12714 {
12715 switch (mode)
12716 {
12717 case HImode:
12718 case SImode:
12719 operand = gen_lowpart (DImode, operand);
12720 /* FALLTHRU */
12721 case DImode:
12722 emit_insn (
12723 gen_rtx_SET (VOIDmode,
12724 gen_rtx_MEM (DImode,
12725 gen_rtx_PRE_DEC (DImode,
12726 stack_pointer_rtx)),
12727 operand));
12728 break;
12729 default:
12730 abort ();
12731 }
12732 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12733 }
12734 else
12735 {
12736 switch (mode)
12737 {
12738 case DImode:
12739 {
12740 rtx operands[2];
12741 split_di (&operand, 1, operands, operands + 1);
12742 emit_insn (
12743 gen_rtx_SET (VOIDmode,
12744 gen_rtx_MEM (SImode,
12745 gen_rtx_PRE_DEC (Pmode,
12746 stack_pointer_rtx)),
12747 operands[1]));
12748 emit_insn (
12749 gen_rtx_SET (VOIDmode,
12750 gen_rtx_MEM (SImode,
12751 gen_rtx_PRE_DEC (Pmode,
12752 stack_pointer_rtx)),
12753 operands[0]));
12754 }
12755 break;
12756 case HImode:
12757 /* It is better to store HImodes as SImodes. */
12758 if (!TARGET_PARTIAL_REG_STALL)
12759 operand = gen_lowpart (SImode, operand);
12760 /* FALLTHRU */
12761 case SImode:
12762 emit_insn (
12763 gen_rtx_SET (VOIDmode,
12764 gen_rtx_MEM (GET_MODE (operand),
12765 gen_rtx_PRE_DEC (SImode,
12766 stack_pointer_rtx)),
12767 operand));
12768 break;
12769 default:
12770 abort ();
12771 }
12772 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12773 }
12774 return result;
12775 }
12776
12777 /* Free operand from the memory. */
12778 void
12779 ix86_free_from_memory (mode)
12780 enum machine_mode mode;
12781 {
12782 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12783 {
12784 int size;
12785
12786 if (mode == DImode || TARGET_64BIT)
12787 size = 8;
12788 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12789 size = 2;
12790 else
12791 size = 4;
12792 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12793 to pop or add instruction if registers are available. */
12794 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12795 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12796 GEN_INT (size))));
12797 }
12798 }
12799
12800 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12801 QImode must go into class Q_REGS.
12802 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12803 movdf to do mem-to-mem moves through integer regs. */
12804 enum reg_class
12805 ix86_preferred_reload_class (x, class)
12806 rtx x;
12807 enum reg_class class;
12808 {
12809 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12810 {
12811 /* SSE can't load any constant directly yet. */
12812 if (SSE_CLASS_P (class))
12813 return NO_REGS;
12814 /* Floats can load 0 and 1. */
12815 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12816 {
12817 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12818 if (MAYBE_SSE_CLASS_P (class))
12819 return (reg_class_subset_p (class, GENERAL_REGS)
12820 ? GENERAL_REGS : FLOAT_REGS);
12821 else
12822 return class;
12823 }
12824 /* General regs can load everything. */
12825 if (reg_class_subset_p (class, GENERAL_REGS))
12826 return GENERAL_REGS;
12827 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12828 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12829 return NO_REGS;
12830 }
12831 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12832 return NO_REGS;
12833 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12834 return Q_REGS;
12835 return class;
12836 }
12837
12838 /* If we are copying between general and FP registers, we need a memory
12839 location. The same is true for SSE and MMX registers.
12840
12841 The macro can't work reliably when one of the CLASSES is class containing
12842 registers from multiple units (SSE, MMX, integer). We avoid this by never
12843 combining those units in single alternative in the machine description.
12844 Ensure that this constraint holds to avoid unexpected surprises.
12845
12846 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12847 enforce these sanity checks. */
12848 int
12849 ix86_secondary_memory_needed (class1, class2, mode, strict)
12850 enum reg_class class1, class2;
12851 enum machine_mode mode;
12852 int strict;
12853 {
12854 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12855 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12856 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12857 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12858 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12859 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12860 {
12861 if (strict)
12862 abort ();
12863 else
12864 return 1;
12865 }
12866 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12867 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12868 && (mode) != SImode)
12869 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12870 && (mode) != SImode));
12871 }
12872 /* Return the cost of moving data from a register in class CLASS1 to
12873 one in class CLASS2.
12874
12875 It is not required that the cost always equal 2 when FROM is the same as TO;
12876 on some machines it is expensive to move between registers if they are not
12877 general registers. */
12878 int
12879 ix86_register_move_cost (mode, class1, class2)
12880 enum machine_mode mode;
12881 enum reg_class class1, class2;
12882 {
12883 /* In case we require secondary memory, compute cost of the store followed
12884 by load. In case of copying from general_purpose_register we may emit
12885 multiple stores followed by single load causing memory size mismatch
12886 stall. Count this as arbitarily high cost of 20. */
12887 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12888 {
12889 int add_cost = 0;
12890 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12891 add_cost = 20;
12892 return (MEMORY_MOVE_COST (mode, class1, 0)
12893 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12894 }
12895 /* Moves between SSE/MMX and integer unit are expensive. */
12896 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12897 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12898 return ix86_cost->mmxsse_to_integer;
12899 if (MAYBE_FLOAT_CLASS_P (class1))
12900 return ix86_cost->fp_move;
12901 if (MAYBE_SSE_CLASS_P (class1))
12902 return ix86_cost->sse_move;
12903 if (MAYBE_MMX_CLASS_P (class1))
12904 return ix86_cost->mmx_move;
12905 return 2;
12906 }
12907
12908 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12909 int
12910 ix86_hard_regno_mode_ok (regno, mode)
12911 int regno;
12912 enum machine_mode mode;
12913 {
12914 /* Flags and only flags can only hold CCmode values. */
12915 if (CC_REGNO_P (regno))
12916 return GET_MODE_CLASS (mode) == MODE_CC;
12917 if (GET_MODE_CLASS (mode) == MODE_CC
12918 || GET_MODE_CLASS (mode) == MODE_RANDOM
12919 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12920 return 0;
12921 if (FP_REGNO_P (regno))
12922 return VALID_FP_MODE_P (mode);
12923 if (SSE_REGNO_P (regno))
12924 return VALID_SSE_REG_MODE (mode);
12925 if (MMX_REGNO_P (regno))
12926 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12927 /* We handle both integer and floats in the general purpose registers.
12928 In future we should be able to handle vector modes as well. */
12929 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12930 return 0;
12931 /* Take care for QImode values - they can be in non-QI regs, but then
12932 they do cause partial register stalls. */
12933 if (regno < 4 || mode != QImode || TARGET_64BIT)
12934 return 1;
12935 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12936 }
12937
12938 /* Return the cost of moving data of mode M between a
12939 register and memory. A value of 2 is the default; this cost is
12940 relative to those in `REGISTER_MOVE_COST'.
12941
12942 If moving between registers and memory is more expensive than
12943 between two registers, you should define this macro to express the
12944 relative cost.
12945
12946 Model also increased moving costs of QImode registers in non
12947 Q_REGS classes.
12948 */
12949 int
12950 ix86_memory_move_cost (mode, class, in)
12951 enum machine_mode mode;
12952 enum reg_class class;
12953 int in;
12954 {
12955 if (FLOAT_CLASS_P (class))
12956 {
12957 int index;
12958 switch (mode)
12959 {
12960 case SFmode:
12961 index = 0;
12962 break;
12963 case DFmode:
12964 index = 1;
12965 break;
12966 case XFmode:
12967 case TFmode:
12968 index = 2;
12969 break;
12970 default:
12971 return 100;
12972 }
12973 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12974 }
12975 if (SSE_CLASS_P (class))
12976 {
12977 int index;
12978 switch (GET_MODE_SIZE (mode))
12979 {
12980 case 4:
12981 index = 0;
12982 break;
12983 case 8:
12984 index = 1;
12985 break;
12986 case 16:
12987 index = 2;
12988 break;
12989 default:
12990 return 100;
12991 }
12992 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12993 }
12994 if (MMX_CLASS_P (class))
12995 {
12996 int index;
12997 switch (GET_MODE_SIZE (mode))
12998 {
12999 case 4:
13000 index = 0;
13001 break;
13002 case 8:
13003 index = 1;
13004 break;
13005 default:
13006 return 100;
13007 }
13008 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13009 }
13010 switch (GET_MODE_SIZE (mode))
13011 {
13012 case 1:
13013 if (in)
13014 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13015 : ix86_cost->movzbl_load);
13016 else
13017 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13018 : ix86_cost->int_store[0] + 4);
13019 break;
13020 case 2:
13021 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13022 default:
13023 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13024 if (mode == TFmode)
13025 mode = XFmode;
13026 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13027 * (int) GET_MODE_SIZE (mode) / 4);
13028 }
13029 }
13030
13031 #ifdef DO_GLOBAL_CTORS_BODY
13032 static void
13033 ix86_svr3_asm_out_constructor (symbol, priority)
13034 rtx symbol;
13035 int priority ATTRIBUTE_UNUSED;
13036 {
13037 init_section ();
13038 fputs ("\tpushl $", asm_out_file);
13039 assemble_name (asm_out_file, XSTR (symbol, 0));
13040 fputc ('\n', asm_out_file);
13041 }
13042 #endif
13043
13044 /* Order the registers for register allocator. */
13045
13046 void
13047 x86_order_regs_for_local_alloc ()
13048 {
13049 int pos = 0;
13050 int i;
13051
13052 /* First allocate the local general purpose registers. */
13053 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13054 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13055 reg_alloc_order [pos++] = i;
13056
13057 /* Global general purpose registers. */
13058 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13059 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13060 reg_alloc_order [pos++] = i;
13061
13062 /* x87 registers come first in case we are doing FP math
13063 using them. */
13064 if (!TARGET_SSE_MATH)
13065 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13066 reg_alloc_order [pos++] = i;
13067
13068 /* SSE registers. */
13069 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13070 reg_alloc_order [pos++] = i;
13071 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13072 reg_alloc_order [pos++] = i;
13073
13074 /* x87 registerts. */
13075 if (TARGET_SSE_MATH)
13076 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13077 reg_alloc_order [pos++] = i;
13078
13079 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13080 reg_alloc_order [pos++] = i;
13081
13082 /* Initialize the rest of array as we do not allocate some registers
13083 at all. */
13084 while (pos < FIRST_PSEUDO_REGISTER)
13085 reg_alloc_order [pos++] = 0;
13086 }
This page took 0.775283 seconds and 6 git commands to generate.