]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.c (ix86_expand_call): New function, extracted from md call patterns.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 };
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 };
125
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 };
162
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
198 };
199
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
235 };
236
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
272 };
273
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309 };
310
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 };
347
348 const struct processor_costs *ix86_cost = &pentium_cost;
349
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
380 const int x86_single_stringop = m_386 | m_PENT4;
381 const int x86_qimode_math = ~(0);
382 const int x86_promote_qi_regs = 0;
383 const int x86_himode_math = ~(m_PPRO);
384 const int x86_promote_hi_regs = m_PPRO;
385 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
386 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
387 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
388 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
389 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
390 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
391 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
392 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
393 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_decompose_lea = m_PENT4;
396 const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
397
398 /* In case the avreage insn count for single function invocation is
399 lower than this constant, emit fast (but longer) prologue and
400 epilogue code. */
401 #define FAST_PROLOGUE_INSN_COUNT 30
402 /* Set by prologue expander and used by epilogue expander to determine
403 the style used. */
404 static int use_fast_prologue_epilogue;
405
406 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
407
408 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
409 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
410 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
411
412 /* Array of the smallest class containing reg number REGNO, indexed by
413 REGNO. Used by REGNO_REG_CLASS in i386.h. */
414
415 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
416 {
417 /* ax, dx, cx, bx */
418 AREG, DREG, CREG, BREG,
419 /* si, di, bp, sp */
420 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
421 /* FP registers */
422 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
423 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
424 /* arg pointer */
425 NON_Q_REGS,
426 /* flags, fpsr, dirflag, frame */
427 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
428 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
429 SSE_REGS, SSE_REGS,
430 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
431 MMX_REGS, MMX_REGS,
432 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
433 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
434 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
435 SSE_REGS, SSE_REGS,
436 };
437
438 /* The "default" register map used in 32bit mode. */
439
440 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
441 {
442 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
443 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
444 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
445 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
446 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
447 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
448 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
449 };
450
451 static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
452 1 /*RDX*/, 2 /*RCX*/,
453 FIRST_REX_INT_REG /*R8 */,
454 FIRST_REX_INT_REG + 1 /*R9 */};
455 static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
456
457 /* The "default" register map used in 64bit mode. */
458 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
459 {
460 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
461 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
462 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
463 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
464 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
465 8,9,10,11,12,13,14,15, /* extended integer registers */
466 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
467 };
468
469 /* Define the register numbers to be used in Dwarf debugging information.
470 The SVR4 reference port C compiler uses the following register numbers
471 in its Dwarf output code:
472 0 for %eax (gcc regno = 0)
473 1 for %ecx (gcc regno = 2)
474 2 for %edx (gcc regno = 1)
475 3 for %ebx (gcc regno = 3)
476 4 for %esp (gcc regno = 7)
477 5 for %ebp (gcc regno = 6)
478 6 for %esi (gcc regno = 4)
479 7 for %edi (gcc regno = 5)
480 The following three DWARF register numbers are never generated by
481 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
482 believes these numbers have these meanings.
483 8 for %eip (no gcc equivalent)
484 9 for %eflags (gcc regno = 17)
485 10 for %trapno (no gcc equivalent)
486 It is not at all clear how we should number the FP stack registers
487 for the x86 architecture. If the version of SDB on x86/svr4 were
488 a bit less brain dead with respect to floating-point then we would
489 have a precedent to follow with respect to DWARF register numbers
490 for x86 FP registers, but the SDB on x86/svr4 is so completely
491 broken with respect to FP registers that it is hardly worth thinking
492 of it as something to strive for compatibility with.
493 The version of x86/svr4 SDB I have at the moment does (partially)
494 seem to believe that DWARF register number 11 is associated with
495 the x86 register %st(0), but that's about all. Higher DWARF
496 register numbers don't seem to be associated with anything in
497 particular, and even for DWARF regno 11, SDB only seems to under-
498 stand that it should say that a variable lives in %st(0) (when
499 asked via an `=' command) if we said it was in DWARF regno 11,
500 but SDB still prints garbage when asked for the value of the
501 variable in question (via a `/' command).
502 (Also note that the labels SDB prints for various FP stack regs
503 when doing an `x' command are all wrong.)
504 Note that these problems generally don't affect the native SVR4
505 C compiler because it doesn't allow the use of -O with -g and
506 because when it is *not* optimizing, it allocates a memory
507 location for each floating-point variable, and the memory
508 location is what gets described in the DWARF AT_location
509 attribute for the variable in question.
510 Regardless of the severe mental illness of the x86/svr4 SDB, we
511 do something sensible here and we use the following DWARF
512 register numbers. Note that these are all stack-top-relative
513 numbers.
514 11 for %st(0) (gcc regno = 8)
515 12 for %st(1) (gcc regno = 9)
516 13 for %st(2) (gcc regno = 10)
517 14 for %st(3) (gcc regno = 11)
518 15 for %st(4) (gcc regno = 12)
519 16 for %st(5) (gcc regno = 13)
520 17 for %st(6) (gcc regno = 14)
521 18 for %st(7) (gcc regno = 15)
522 */
523 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
524 {
525 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
526 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
527 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
528 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
529 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
530 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
531 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
532 };
533
534 /* Test and compare insns in i386.md store the information needed to
535 generate branch and scc insns here. */
536
537 rtx ix86_compare_op0 = NULL_RTX;
538 rtx ix86_compare_op1 = NULL_RTX;
539
540 #define MAX_386_STACK_LOCALS 3
541 /* Size of the register save area. */
542 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
543
544 /* Define the structure for the machine field in struct function. */
545 struct machine_function
546 {
547 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
548 int save_varrargs_registers;
549 int accesses_prev_frame;
550 };
551
552 #define ix86_stack_locals (cfun->machine->stack_locals)
553 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
554
555 /* Structure describing stack frame layout.
556 Stack grows downward:
557
558 [arguments]
559 <- ARG_POINTER
560 saved pc
561
562 saved frame pointer if frame_pointer_needed
563 <- HARD_FRAME_POINTER
564 [saved regs]
565
566 [padding1] \
567 )
568 [va_arg registers] (
569 > to_allocate <- FRAME_POINTER
570 [frame] (
571 )
572 [padding2] /
573 */
574 struct ix86_frame
575 {
576 int nregs;
577 int padding1;
578 int va_arg_size;
579 HOST_WIDE_INT frame;
580 int padding2;
581 int outgoing_arguments_size;
582 int red_zone_size;
583
584 HOST_WIDE_INT to_allocate;
585 /* The offsets relative to ARG_POINTER. */
586 HOST_WIDE_INT frame_pointer_offset;
587 HOST_WIDE_INT hard_frame_pointer_offset;
588 HOST_WIDE_INT stack_pointer_offset;
589 };
590
591 /* Used to enable/disable debugging features. */
592 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
593 /* Code model option as passed by user. */
594 const char *ix86_cmodel_string;
595 /* Parsed value. */
596 enum cmodel ix86_cmodel;
597 /* Asm dialect. */
598 const char *ix86_asm_string;
599 enum asm_dialect ix86_asm_dialect = ASM_ATT;
600
601 /* which cpu are we scheduling for */
602 enum processor_type ix86_cpu;
603
604 /* which unit we are generating floating point math for */
605 enum fpmath_unit ix86_fpmath;
606
607 /* which instruction set architecture to use. */
608 int ix86_arch;
609
610 /* Strings to hold which cpu and instruction set architecture to use. */
611 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
612 const char *ix86_arch_string; /* for -march=<xxx> */
613 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
614
615 /* # of registers to use to pass arguments. */
616 const char *ix86_regparm_string;
617
618 /* true if sse prefetch instruction is not NOOP. */
619 int x86_prefetch_sse;
620
621 /* ix86_regparm_string as a number */
622 int ix86_regparm;
623
624 /* Alignment to use for loops and jumps: */
625
626 /* Power of two alignment for loops. */
627 const char *ix86_align_loops_string;
628
629 /* Power of two alignment for non-loop jumps. */
630 const char *ix86_align_jumps_string;
631
632 /* Power of two alignment for stack boundary in bytes. */
633 const char *ix86_preferred_stack_boundary_string;
634
635 /* Preferred alignment for stack boundary in bits. */
636 int ix86_preferred_stack_boundary;
637
638 /* Values 1-5: see jump.c */
639 int ix86_branch_cost;
640 const char *ix86_branch_cost_string;
641
642 /* Power of two alignment for functions. */
643 const char *ix86_align_funcs_string;
644
645 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
646 static char internal_label_prefix[16];
647 static int internal_label_prefix_len;
648 \f
649 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
650 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
651 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
652 int, int, FILE *));
653 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
654 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
655 rtx *, rtx *));
656 static rtx gen_push PARAMS ((rtx));
657 static int memory_address_length PARAMS ((rtx addr));
658 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
659 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
660 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
661 static void ix86_dump_ppro_packet PARAMS ((FILE *));
662 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
663 static void ix86_init_machine_status PARAMS ((struct function *));
664 static void ix86_mark_machine_status PARAMS ((struct function *));
665 static void ix86_free_machine_status PARAMS ((struct function *));
666 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
667 static int ix86_nsaved_regs PARAMS ((void));
668 static void ix86_emit_save_regs PARAMS ((void));
669 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
670 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
671 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
672 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
673 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
674 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
675 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
676 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
677 static int ix86_issue_rate PARAMS ((void));
678 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
679 static void ix86_sched_init PARAMS ((FILE *, int, int));
680 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
681 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
682 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
683 static int ia32_multipass_dfa_lookahead PARAMS ((void));
684 static void ix86_init_mmx_sse_builtins PARAMS ((void));
685
686 struct ix86_address
687 {
688 rtx base, index, disp;
689 HOST_WIDE_INT scale;
690 };
691
692 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
693
694 static void i386_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
695
696 struct builtin_description;
697 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
698 tree, rtx));
699 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
700 tree, rtx));
701 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
702 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
703 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
704 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
705 tree, rtx));
706 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
707 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
708 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
709 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
710 enum rtx_code *,
711 enum rtx_code *,
712 enum rtx_code *));
713 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
714 rtx *, rtx *));
715 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
716 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
717 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
718 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
719 static int ix86_save_reg PARAMS ((unsigned int, int));
720 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
721 static int ix86_comp_type_attributes PARAMS ((tree, tree));
722 const struct attribute_spec ix86_attribute_table[];
723 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
724 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
725
726 #ifdef DO_GLOBAL_CTORS_BODY
727 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
728 #endif
729
730 /* Register class used for passing given 64bit part of the argument.
731 These represent classes as documented by the PS ABI, with the exception
732 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
733 use SF or DFmode move instead of DImode to avoid reformating penalties.
734
735 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
736 whenever possible (upper half does contain padding).
737 */
738 enum x86_64_reg_class
739 {
740 X86_64_NO_CLASS,
741 X86_64_INTEGER_CLASS,
742 X86_64_INTEGERSI_CLASS,
743 X86_64_SSE_CLASS,
744 X86_64_SSESF_CLASS,
745 X86_64_SSEDF_CLASS,
746 X86_64_SSEUP_CLASS,
747 X86_64_X87_CLASS,
748 X86_64_X87UP_CLASS,
749 X86_64_MEMORY_CLASS
750 };
751 static const char * const x86_64_reg_class_name[] =
752 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
753
754 #define MAX_CLASSES 4
755 static int classify_argument PARAMS ((enum machine_mode, tree,
756 enum x86_64_reg_class [MAX_CLASSES],
757 int));
758 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
759 int *));
760 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
761 const int *, int));
762 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
763 enum x86_64_reg_class));
764 \f
765 /* Initialize the GCC target structure. */
766 #undef TARGET_ATTRIBUTE_TABLE
767 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
768 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
769 # undef TARGET_MERGE_DECL_ATTRIBUTES
770 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
771 #endif
772
773 #undef TARGET_COMP_TYPE_ATTRIBUTES
774 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
775
776 #undef TARGET_INIT_BUILTINS
777 #define TARGET_INIT_BUILTINS ix86_init_builtins
778
779 #undef TARGET_EXPAND_BUILTIN
780 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
781
782 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
783 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
784 HOST_WIDE_INT));
785 # undef TARGET_ASM_FUNCTION_PROLOGUE
786 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
787 #endif
788
789 #undef TARGET_ASM_OPEN_PAREN
790 #define TARGET_ASM_OPEN_PAREN ""
791 #undef TARGET_ASM_CLOSE_PAREN
792 #define TARGET_ASM_CLOSE_PAREN ""
793
794 #undef TARGET_ASM_ALIGNED_HI_OP
795 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
796 #undef TARGET_ASM_ALIGNED_SI_OP
797 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
798 #ifdef ASM_QUAD
799 #undef TARGET_ASM_ALIGNED_DI_OP
800 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
801 #endif
802
803 #undef TARGET_ASM_UNALIGNED_HI_OP
804 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
805 #undef TARGET_ASM_UNALIGNED_SI_OP
806 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
807 #undef TARGET_ASM_UNALIGNED_DI_OP
808 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
809
810 #undef TARGET_SCHED_ADJUST_COST
811 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
812 #undef TARGET_SCHED_ISSUE_RATE
813 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
814 #undef TARGET_SCHED_VARIABLE_ISSUE
815 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
816 #undef TARGET_SCHED_INIT
817 #define TARGET_SCHED_INIT ix86_sched_init
818 #undef TARGET_SCHED_REORDER
819 #define TARGET_SCHED_REORDER ix86_sched_reorder
820 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
821 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
822 ia32_use_dfa_pipeline_interface
823 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
824 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
825 ia32_multipass_dfa_lookahead
826
827 struct gcc_target targetm = TARGET_INITIALIZER;
828 \f
829 /* Sometimes certain combinations of command options do not make
830 sense on a particular target machine. You can define a macro
831 `OVERRIDE_OPTIONS' to take account of this. This macro, if
832 defined, is executed once just after all the command options have
833 been parsed.
834
835 Don't use this macro to turn on various extra optimizations for
836 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
837
838 void
839 override_options ()
840 {
841 int i;
842 /* Comes from final.c -- no real reason to change it. */
843 #define MAX_CODE_ALIGN 16
844
845 static struct ptt
846 {
847 const struct processor_costs *cost; /* Processor costs */
848 const int target_enable; /* Target flags to enable. */
849 const int target_disable; /* Target flags to disable. */
850 const int align_loop; /* Default alignments. */
851 const int align_loop_max_skip;
852 const int align_jump;
853 const int align_jump_max_skip;
854 const int align_func;
855 const int branch_cost;
856 }
857 const processor_target_table[PROCESSOR_max] =
858 {
859 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
860 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
861 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
862 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
863 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
864 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
865 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
866 };
867
868 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
869 static struct pta
870 {
871 const char *const name; /* processor name or nickname. */
872 const enum processor_type processor;
873 const enum pta_flags
874 {
875 PTA_SSE = 1,
876 PTA_SSE2 = 2,
877 PTA_MMX = 4,
878 PTA_PREFETCH_SSE = 8,
879 PTA_3DNOW = 16,
880 PTA_3DNOW_A = 64
881 } flags;
882 }
883 const processor_alias_table[] =
884 {
885 {"i386", PROCESSOR_I386, 0},
886 {"i486", PROCESSOR_I486, 0},
887 {"i586", PROCESSOR_PENTIUM, 0},
888 {"pentium", PROCESSOR_PENTIUM, 0},
889 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
890 {"i686", PROCESSOR_PENTIUMPRO, 0},
891 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
892 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
893 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
894 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
895 PTA_MMX | PTA_PREFETCH_SSE},
896 {"k6", PROCESSOR_K6, PTA_MMX},
897 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
898 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
899 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
900 | PTA_3DNOW_A},
901 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
902 | PTA_3DNOW | PTA_3DNOW_A},
903 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
904 | PTA_3DNOW_A | PTA_SSE},
905 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
906 | PTA_3DNOW_A | PTA_SSE},
907 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
908 | PTA_3DNOW_A | PTA_SSE},
909 };
910
911 int const pta_size = ARRAY_SIZE (processor_alias_table);
912
913 #ifdef SUBTARGET_OVERRIDE_OPTIONS
914 SUBTARGET_OVERRIDE_OPTIONS;
915 #endif
916
917 if (!ix86_cpu_string && ix86_arch_string)
918 ix86_cpu_string = ix86_arch_string;
919 if (!ix86_cpu_string)
920 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
921 if (!ix86_arch_string)
922 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
923
924 if (ix86_cmodel_string != 0)
925 {
926 if (!strcmp (ix86_cmodel_string, "small"))
927 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
928 else if (flag_pic)
929 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
930 else if (!strcmp (ix86_cmodel_string, "32"))
931 ix86_cmodel = CM_32;
932 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
933 ix86_cmodel = CM_KERNEL;
934 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
935 ix86_cmodel = CM_MEDIUM;
936 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
937 ix86_cmodel = CM_LARGE;
938 else
939 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
940 }
941 else
942 {
943 ix86_cmodel = CM_32;
944 if (TARGET_64BIT)
945 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
946 }
947 if (ix86_asm_string != 0)
948 {
949 if (!strcmp (ix86_asm_string, "intel"))
950 ix86_asm_dialect = ASM_INTEL;
951 else if (!strcmp (ix86_asm_string, "att"))
952 ix86_asm_dialect = ASM_ATT;
953 else
954 error ("bad value (%s) for -masm= switch", ix86_asm_string);
955 }
956 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
957 error ("code model `%s' not supported in the %s bit mode",
958 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
959 if (ix86_cmodel == CM_LARGE)
960 sorry ("code model `large' not supported yet");
961 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
962 sorry ("%i-bit mode not compiled in",
963 (target_flags & MASK_64BIT) ? 64 : 32);
964
965 for (i = 0; i < pta_size; i++)
966 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
967 {
968 ix86_arch = processor_alias_table[i].processor;
969 /* Default cpu tuning to the architecture. */
970 ix86_cpu = ix86_arch;
971 if (processor_alias_table[i].flags & PTA_MMX
972 && !(target_flags & MASK_MMX_SET))
973 target_flags |= MASK_MMX;
974 if (processor_alias_table[i].flags & PTA_3DNOW
975 && !(target_flags & MASK_3DNOW_SET))
976 target_flags |= MASK_3DNOW;
977 if (processor_alias_table[i].flags & PTA_3DNOW_A
978 && !(target_flags & MASK_3DNOW_A_SET))
979 target_flags |= MASK_3DNOW_A;
980 if (processor_alias_table[i].flags & PTA_SSE
981 && !(target_flags & MASK_SSE_SET))
982 target_flags |= MASK_SSE;
983 if (processor_alias_table[i].flags & PTA_SSE2
984 && !(target_flags & MASK_SSE2_SET))
985 target_flags |= MASK_SSE2;
986 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
987 x86_prefetch_sse = true;
988 break;
989 }
990
991 if (i == pta_size)
992 error ("bad value (%s) for -march= switch", ix86_arch_string);
993
994 for (i = 0; i < pta_size; i++)
995 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
996 {
997 ix86_cpu = processor_alias_table[i].processor;
998 break;
999 }
1000 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1001 x86_prefetch_sse = true;
1002 if (i == pta_size)
1003 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1004
1005 if (optimize_size)
1006 ix86_cost = &size_cost;
1007 else
1008 ix86_cost = processor_target_table[ix86_cpu].cost;
1009 target_flags |= processor_target_table[ix86_cpu].target_enable;
1010 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1011
1012 /* Arrange to set up i386_stack_locals for all functions. */
1013 init_machine_status = ix86_init_machine_status;
1014 mark_machine_status = ix86_mark_machine_status;
1015 free_machine_status = ix86_free_machine_status;
1016
1017 /* Validate -mregparm= value. */
1018 if (ix86_regparm_string)
1019 {
1020 i = atoi (ix86_regparm_string);
1021 if (i < 0 || i > REGPARM_MAX)
1022 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1023 else
1024 ix86_regparm = i;
1025 }
1026 else
1027 if (TARGET_64BIT)
1028 ix86_regparm = REGPARM_MAX;
1029
1030 /* If the user has provided any of the -malign-* options,
1031 warn and use that value only if -falign-* is not set.
1032 Remove this code in GCC 3.2 or later. */
1033 if (ix86_align_loops_string)
1034 {
1035 warning ("-malign-loops is obsolete, use -falign-loops");
1036 if (align_loops == 0)
1037 {
1038 i = atoi (ix86_align_loops_string);
1039 if (i < 0 || i > MAX_CODE_ALIGN)
1040 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1041 else
1042 align_loops = 1 << i;
1043 }
1044 }
1045
1046 if (ix86_align_jumps_string)
1047 {
1048 warning ("-malign-jumps is obsolete, use -falign-jumps");
1049 if (align_jumps == 0)
1050 {
1051 i = atoi (ix86_align_jumps_string);
1052 if (i < 0 || i > MAX_CODE_ALIGN)
1053 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1054 else
1055 align_jumps = 1 << i;
1056 }
1057 }
1058
1059 if (ix86_align_funcs_string)
1060 {
1061 warning ("-malign-functions is obsolete, use -falign-functions");
1062 if (align_functions == 0)
1063 {
1064 i = atoi (ix86_align_funcs_string);
1065 if (i < 0 || i > MAX_CODE_ALIGN)
1066 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1067 else
1068 align_functions = 1 << i;
1069 }
1070 }
1071
1072 /* Default align_* from the processor table. */
1073 if (align_loops == 0)
1074 {
1075 align_loops = processor_target_table[ix86_cpu].align_loop;
1076 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1077 }
1078 if (align_jumps == 0)
1079 {
1080 align_jumps = processor_target_table[ix86_cpu].align_jump;
1081 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1082 }
1083 if (align_functions == 0)
1084 {
1085 align_functions = processor_target_table[ix86_cpu].align_func;
1086 }
1087
1088 /* Validate -mpreferred-stack-boundary= value, or provide default.
1089 The default of 128 bits is for Pentium III's SSE __m128, but we
1090 don't want additional code to keep the stack aligned when
1091 optimizing for code size. */
1092 ix86_preferred_stack_boundary = (optimize_size
1093 ? TARGET_64BIT ? 64 : 32
1094 : 128);
1095 if (ix86_preferred_stack_boundary_string)
1096 {
1097 i = atoi (ix86_preferred_stack_boundary_string);
1098 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1099 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1100 TARGET_64BIT ? 3 : 2);
1101 else
1102 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1103 }
1104
1105 /* Validate -mbranch-cost= value, or provide default. */
1106 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1107 if (ix86_branch_cost_string)
1108 {
1109 i = atoi (ix86_branch_cost_string);
1110 if (i < 0 || i > 5)
1111 error ("-mbranch-cost=%d is not between 0 and 5", i);
1112 else
1113 ix86_branch_cost = i;
1114 }
1115
1116 /* Keep nonleaf frame pointers. */
1117 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1118 flag_omit_frame_pointer = 1;
1119
1120 /* If we're doing fast math, we don't care about comparison order
1121 wrt NaNs. This lets us use a shorter comparison sequence. */
1122 if (flag_unsafe_math_optimizations)
1123 target_flags &= ~MASK_IEEE_FP;
1124
1125 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1126 since the insns won't need emulation. */
1127 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1128 target_flags &= ~MASK_NO_FANCY_MATH_387;
1129
1130 if (TARGET_64BIT)
1131 {
1132 if (TARGET_ALIGN_DOUBLE)
1133 error ("-malign-double makes no sense in the 64bit mode");
1134 if (TARGET_RTD)
1135 error ("-mrtd calling convention not supported in the 64bit mode");
1136 /* Enable by default the SSE and MMX builtins. */
1137 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1138 ix86_fpmath = FPMATH_SSE;
1139 }
1140 else
1141 ix86_fpmath = FPMATH_387;
1142
1143 if (ix86_fpmath_string != 0)
1144 {
1145 if (! strcmp (ix86_fpmath_string, "387"))
1146 ix86_fpmath = FPMATH_387;
1147 else if (! strcmp (ix86_fpmath_string, "sse"))
1148 {
1149 if (!TARGET_SSE)
1150 {
1151 warning ("SSE instruction set disabled, using 387 arithmetics");
1152 ix86_fpmath = FPMATH_387;
1153 }
1154 else
1155 ix86_fpmath = FPMATH_SSE;
1156 }
1157 else if (! strcmp (ix86_fpmath_string, "387,sse")
1158 || ! strcmp (ix86_fpmath_string, "sse,387"))
1159 {
1160 if (!TARGET_SSE)
1161 {
1162 warning ("SSE instruction set disabled, using 387 arithmetics");
1163 ix86_fpmath = FPMATH_387;
1164 }
1165 else if (!TARGET_80387)
1166 {
1167 warning ("387 instruction set disabled, using SSE arithmetics");
1168 ix86_fpmath = FPMATH_SSE;
1169 }
1170 else
1171 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1172 }
1173 else
1174 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1175 }
1176
1177 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1178 on by -msse. */
1179 if (TARGET_SSE)
1180 {
1181 target_flags |= MASK_MMX;
1182 x86_prefetch_sse = true;
1183 }
1184
1185 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1186 if (TARGET_3DNOW)
1187 {
1188 target_flags |= MASK_MMX;
1189 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1190 extensions it adds. */
1191 if (x86_3dnow_a & (1 << ix86_arch))
1192 target_flags |= MASK_3DNOW_A;
1193 }
1194 if ((x86_accumulate_outgoing_args & CPUMASK)
1195 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1196 && !optimize_size)
1197 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1198
1199 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1200 {
1201 char *p;
1202 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1203 p = strchr (internal_label_prefix, 'X');
1204 internal_label_prefix_len = p - internal_label_prefix;
1205 *p = '\0';
1206 }
1207 }
1208 \f
1209 void
1210 optimization_options (level, size)
1211 int level;
1212 int size ATTRIBUTE_UNUSED;
1213 {
1214 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1215 make the problem with not enough registers even worse. */
1216 #ifdef INSN_SCHEDULING
1217 if (level > 1)
1218 flag_schedule_insns = 0;
1219 #endif
1220 if (TARGET_64BIT && optimize >= 1)
1221 flag_omit_frame_pointer = 1;
1222 if (TARGET_64BIT)
1223 {
1224 flag_pcc_struct_return = 0;
1225 flag_asynchronous_unwind_tables = 1;
1226 }
1227 }
1228 \f
1229 /* Table of valid machine attributes. */
1230 const struct attribute_spec ix86_attribute_table[] =
1231 {
1232 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1233 /* Stdcall attribute says callee is responsible for popping arguments
1234 if they are not variable. */
1235 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1236 /* Cdecl attribute says the callee is a normal C declaration */
1237 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1238 /* Regparm attribute specifies how many integer arguments are to be
1239 passed in registers. */
1240 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1241 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1242 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1243 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1244 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1245 #endif
1246 { NULL, 0, 0, false, false, false, NULL }
1247 };
1248
1249 /* Handle a "cdecl" or "stdcall" attribute;
1250 arguments as in struct attribute_spec.handler. */
1251 static tree
1252 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1253 tree *node;
1254 tree name;
1255 tree args ATTRIBUTE_UNUSED;
1256 int flags ATTRIBUTE_UNUSED;
1257 bool *no_add_attrs;
1258 {
1259 if (TREE_CODE (*node) != FUNCTION_TYPE
1260 && TREE_CODE (*node) != METHOD_TYPE
1261 && TREE_CODE (*node) != FIELD_DECL
1262 && TREE_CODE (*node) != TYPE_DECL)
1263 {
1264 warning ("`%s' attribute only applies to functions",
1265 IDENTIFIER_POINTER (name));
1266 *no_add_attrs = true;
1267 }
1268
1269 if (TARGET_64BIT)
1270 {
1271 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1272 *no_add_attrs = true;
1273 }
1274
1275 return NULL_TREE;
1276 }
1277
1278 /* Handle a "regparm" attribute;
1279 arguments as in struct attribute_spec.handler. */
1280 static tree
1281 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1282 tree *node;
1283 tree name;
1284 tree args;
1285 int flags ATTRIBUTE_UNUSED;
1286 bool *no_add_attrs;
1287 {
1288 if (TREE_CODE (*node) != FUNCTION_TYPE
1289 && TREE_CODE (*node) != METHOD_TYPE
1290 && TREE_CODE (*node) != FIELD_DECL
1291 && TREE_CODE (*node) != TYPE_DECL)
1292 {
1293 warning ("`%s' attribute only applies to functions",
1294 IDENTIFIER_POINTER (name));
1295 *no_add_attrs = true;
1296 }
1297 else
1298 {
1299 tree cst;
1300
1301 cst = TREE_VALUE (args);
1302 if (TREE_CODE (cst) != INTEGER_CST)
1303 {
1304 warning ("`%s' attribute requires an integer constant argument",
1305 IDENTIFIER_POINTER (name));
1306 *no_add_attrs = true;
1307 }
1308 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1309 {
1310 warning ("argument to `%s' attribute larger than %d",
1311 IDENTIFIER_POINTER (name), REGPARM_MAX);
1312 *no_add_attrs = true;
1313 }
1314 }
1315
1316 return NULL_TREE;
1317 }
1318
1319 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1320
1321 /* Generate the assembly code for function entry. FILE is a stdio
1322 stream to output the code to. SIZE is an int: how many units of
1323 temporary storage to allocate.
1324
1325 Refer to the array `regs_ever_live' to determine which registers to
1326 save; `regs_ever_live[I]' is nonzero if register number I is ever
1327 used in the function. This function is responsible for knowing
1328 which registers should not be saved even if used.
1329
1330 We override it here to allow for the new profiling code to go before
1331 the prologue and the old mcount code to go after the prologue (and
1332 after %ebx has been set up for ELF shared library support). */
1333
1334 static void
1335 ix86_osf_output_function_prologue (file, size)
1336 FILE *file;
1337 HOST_WIDE_INT size;
1338 {
1339 const char *prefix = "";
1340 const char *const lprefix = LPREFIX;
1341 int labelno = current_function_profile_label_no;
1342
1343 #ifdef OSF_OS
1344
1345 if (TARGET_UNDERSCORES)
1346 prefix = "_";
1347
1348 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1349 {
1350 if (!flag_pic && !HALF_PIC_P ())
1351 {
1352 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1353 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1354 }
1355
1356 else if (HALF_PIC_P ())
1357 {
1358 rtx symref;
1359
1360 HALF_PIC_EXTERNAL ("_mcount_ptr");
1361 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1362 "_mcount_ptr"));
1363
1364 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1365 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1366 XSTR (symref, 0));
1367 fprintf (file, "\tcall *(%%eax)\n");
1368 }
1369
1370 else
1371 {
1372 static int call_no = 0;
1373
1374 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1375 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1376 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1377 lprefix, call_no++);
1378 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1379 lprefix, labelno);
1380 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1381 prefix);
1382 fprintf (file, "\tcall *(%%eax)\n");
1383 }
1384 }
1385
1386 #else /* !OSF_OS */
1387
1388 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1389 {
1390 if (!flag_pic)
1391 {
1392 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1393 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1394 }
1395
1396 else
1397 {
1398 static int call_no = 0;
1399
1400 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1401 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1402 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1403 lprefix, call_no++);
1404 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1405 lprefix, labelno);
1406 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1407 prefix);
1408 fprintf (file, "\tcall *(%%eax)\n");
1409 }
1410 }
1411 #endif /* !OSF_OS */
1412
1413 function_prologue (file, size);
1414 }
1415
1416 #endif /* OSF_OS || TARGET_OSF1ELF */
1417
1418 /* Return 0 if the attributes for two types are incompatible, 1 if they
1419 are compatible, and 2 if they are nearly compatible (which causes a
1420 warning to be generated). */
1421
1422 static int
1423 ix86_comp_type_attributes (type1, type2)
1424 tree type1;
1425 tree type2;
1426 {
1427 /* Check for mismatch of non-default calling convention. */
1428 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1429
1430 if (TREE_CODE (type1) != FUNCTION_TYPE)
1431 return 1;
1432
1433 /* Check for mismatched return types (cdecl vs stdcall). */
1434 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1435 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1436 return 0;
1437 return 1;
1438 }
1439 \f
1440 /* Value is the number of bytes of arguments automatically
1441 popped when returning from a subroutine call.
1442 FUNDECL is the declaration node of the function (as a tree),
1443 FUNTYPE is the data type of the function (as a tree),
1444 or for a library call it is an identifier node for the subroutine name.
1445 SIZE is the number of bytes of arguments passed on the stack.
1446
1447 On the 80386, the RTD insn may be used to pop them if the number
1448 of args is fixed, but if the number is variable then the caller
1449 must pop them all. RTD can't be used for library calls now
1450 because the library is compiled with the Unix compiler.
1451 Use of RTD is a selectable option, since it is incompatible with
1452 standard Unix calling sequences. If the option is not selected,
1453 the caller must always pop the args.
1454
1455 The attribute stdcall is equivalent to RTD on a per module basis. */
1456
1457 int
1458 ix86_return_pops_args (fundecl, funtype, size)
1459 tree fundecl;
1460 tree funtype;
1461 int size;
1462 {
1463 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1464
1465 /* Cdecl functions override -mrtd, and never pop the stack. */
1466 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1467
1468 /* Stdcall functions will pop the stack if not variable args. */
1469 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1470 rtd = 1;
1471
1472 if (rtd
1473 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1474 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1475 == void_type_node)))
1476 return size;
1477 }
1478
1479 /* Lose any fake structure return argument if it is passed on the stack. */
1480 if (aggregate_value_p (TREE_TYPE (funtype))
1481 && !TARGET_64BIT)
1482 {
1483 int nregs = ix86_regparm;
1484
1485 if (funtype)
1486 {
1487 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1488
1489 if (attr)
1490 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1491 }
1492
1493 if (!nregs)
1494 return GET_MODE_SIZE (Pmode);
1495 }
1496
1497 return 0;
1498 }
1499 \f
1500 /* Argument support functions. */
1501
1502 /* Return true when register may be used to pass function parameters. */
1503 bool
1504 ix86_function_arg_regno_p (regno)
1505 int regno;
1506 {
1507 int i;
1508 if (!TARGET_64BIT)
1509 return (regno < REGPARM_MAX
1510 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1511 if (SSE_REGNO_P (regno) && TARGET_SSE)
1512 return true;
1513 /* RAX is used as hidden argument to va_arg functions. */
1514 if (!regno)
1515 return true;
1516 for (i = 0; i < REGPARM_MAX; i++)
1517 if (regno == x86_64_int_parameter_registers[i])
1518 return true;
1519 return false;
1520 }
1521
1522 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1523 for a call to a function whose data type is FNTYPE.
1524 For a library call, FNTYPE is 0. */
1525
1526 void
1527 init_cumulative_args (cum, fntype, libname)
1528 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1529 tree fntype; /* tree ptr for function decl */
1530 rtx libname; /* SYMBOL_REF of library name or 0 */
1531 {
1532 static CUMULATIVE_ARGS zero_cum;
1533 tree param, next_param;
1534
1535 if (TARGET_DEBUG_ARG)
1536 {
1537 fprintf (stderr, "\ninit_cumulative_args (");
1538 if (fntype)
1539 fprintf (stderr, "fntype code = %s, ret code = %s",
1540 tree_code_name[(int) TREE_CODE (fntype)],
1541 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1542 else
1543 fprintf (stderr, "no fntype");
1544
1545 if (libname)
1546 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1547 }
1548
1549 *cum = zero_cum;
1550
1551 /* Set up the number of registers to use for passing arguments. */
1552 cum->nregs = ix86_regparm;
1553 cum->sse_nregs = SSE_REGPARM_MAX;
1554 if (fntype && !TARGET_64BIT)
1555 {
1556 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1557
1558 if (attr)
1559 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1560 }
1561 cum->maybe_vaarg = false;
1562
1563 /* Determine if this function has variable arguments. This is
1564 indicated by the last argument being 'void_type_mode' if there
1565 are no variable arguments. If there are variable arguments, then
1566 we won't pass anything in registers */
1567
1568 if (cum->nregs)
1569 {
1570 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1571 param != 0; param = next_param)
1572 {
1573 next_param = TREE_CHAIN (param);
1574 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1575 {
1576 if (!TARGET_64BIT)
1577 cum->nregs = 0;
1578 cum->maybe_vaarg = true;
1579 }
1580 }
1581 }
1582 if ((!fntype && !libname)
1583 || (fntype && !TYPE_ARG_TYPES (fntype)))
1584 cum->maybe_vaarg = 1;
1585
1586 if (TARGET_DEBUG_ARG)
1587 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1588
1589 return;
1590 }
1591
1592 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1593 of this code is to classify each 8bytes of incoming argument by the register
1594 class and assign registers accordingly. */
1595
1596 /* Return the union class of CLASS1 and CLASS2.
1597 See the x86-64 PS ABI for details. */
1598
1599 static enum x86_64_reg_class
1600 merge_classes (class1, class2)
1601 enum x86_64_reg_class class1, class2;
1602 {
1603 /* Rule #1: If both classes are equal, this is the resulting class. */
1604 if (class1 == class2)
1605 return class1;
1606
1607 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1608 the other class. */
1609 if (class1 == X86_64_NO_CLASS)
1610 return class2;
1611 if (class2 == X86_64_NO_CLASS)
1612 return class1;
1613
1614 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1615 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1616 return X86_64_MEMORY_CLASS;
1617
1618 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1619 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1620 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1621 return X86_64_INTEGERSI_CLASS;
1622 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1623 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1624 return X86_64_INTEGER_CLASS;
1625
1626 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1627 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1628 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1629 return X86_64_MEMORY_CLASS;
1630
1631 /* Rule #6: Otherwise class SSE is used. */
1632 return X86_64_SSE_CLASS;
1633 }
1634
1635 /* Classify the argument of type TYPE and mode MODE.
1636 CLASSES will be filled by the register class used to pass each word
1637 of the operand. The number of words is returned. In case the parameter
1638 should be passed in memory, 0 is returned. As a special case for zero
1639 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1640
1641 BIT_OFFSET is used internally for handling records and specifies offset
1642 of the offset in bits modulo 256 to avoid overflow cases.
1643
1644 See the x86-64 PS ABI for details.
1645 */
1646
1647 static int
1648 classify_argument (mode, type, classes, bit_offset)
1649 enum machine_mode mode;
1650 tree type;
1651 enum x86_64_reg_class classes[MAX_CLASSES];
1652 int bit_offset;
1653 {
1654 int bytes =
1655 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1656 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1657
1658 if (type && AGGREGATE_TYPE_P (type))
1659 {
1660 int i;
1661 tree field;
1662 enum x86_64_reg_class subclasses[MAX_CLASSES];
1663
1664 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1665 if (bytes > 16)
1666 return 0;
1667
1668 for (i = 0; i < words; i++)
1669 classes[i] = X86_64_NO_CLASS;
1670
1671 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1672 signalize memory class, so handle it as special case. */
1673 if (!words)
1674 {
1675 classes[0] = X86_64_NO_CLASS;
1676 return 1;
1677 }
1678
1679 /* Classify each field of record and merge classes. */
1680 if (TREE_CODE (type) == RECORD_TYPE)
1681 {
1682 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1683 {
1684 if (TREE_CODE (field) == FIELD_DECL)
1685 {
1686 int num;
1687
1688 /* Bitfields are always classified as integer. Handle them
1689 early, since later code would consider them to be
1690 misaligned integers. */
1691 if (DECL_BIT_FIELD (field))
1692 {
1693 for (i = int_bit_position (field) / 8 / 8;
1694 i < (int_bit_position (field)
1695 + tree_low_cst (DECL_SIZE (field), 0)
1696 + 63) / 8 / 8; i++)
1697 classes[i] =
1698 merge_classes (X86_64_INTEGER_CLASS,
1699 classes[i]);
1700 }
1701 else
1702 {
1703 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1704 TREE_TYPE (field), subclasses,
1705 (int_bit_position (field)
1706 + bit_offset) % 256);
1707 if (!num)
1708 return 0;
1709 for (i = 0; i < num; i++)
1710 {
1711 int pos =
1712 (int_bit_position (field) + bit_offset) / 8 / 8;
1713 classes[i + pos] =
1714 merge_classes (subclasses[i], classes[i + pos]);
1715 }
1716 }
1717 }
1718 }
1719 }
1720 /* Arrays are handled as small records. */
1721 else if (TREE_CODE (type) == ARRAY_TYPE)
1722 {
1723 int num;
1724 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1725 TREE_TYPE (type), subclasses, bit_offset);
1726 if (!num)
1727 return 0;
1728
1729 /* The partial classes are now full classes. */
1730 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1731 subclasses[0] = X86_64_SSE_CLASS;
1732 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1733 subclasses[0] = X86_64_INTEGER_CLASS;
1734
1735 for (i = 0; i < words; i++)
1736 classes[i] = subclasses[i % num];
1737 }
1738 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1739 else if (TREE_CODE (type) == UNION_TYPE
1740 || TREE_CODE (type) == QUAL_UNION_TYPE)
1741 {
1742 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1743 {
1744 if (TREE_CODE (field) == FIELD_DECL)
1745 {
1746 int num;
1747 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1748 TREE_TYPE (field), subclasses,
1749 bit_offset);
1750 if (!num)
1751 return 0;
1752 for (i = 0; i < num; i++)
1753 classes[i] = merge_classes (subclasses[i], classes[i]);
1754 }
1755 }
1756 }
1757 else
1758 abort ();
1759
1760 /* Final merger cleanup. */
1761 for (i = 0; i < words; i++)
1762 {
1763 /* If one class is MEMORY, everything should be passed in
1764 memory. */
1765 if (classes[i] == X86_64_MEMORY_CLASS)
1766 return 0;
1767
1768 /* The X86_64_SSEUP_CLASS should be always preceded by
1769 X86_64_SSE_CLASS. */
1770 if (classes[i] == X86_64_SSEUP_CLASS
1771 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1772 classes[i] = X86_64_SSE_CLASS;
1773
1774 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1775 if (classes[i] == X86_64_X87UP_CLASS
1776 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1777 classes[i] = X86_64_SSE_CLASS;
1778 }
1779 return words;
1780 }
1781
1782 /* Compute alignment needed. We align all types to natural boundaries with
1783 exception of XFmode that is aligned to 64bits. */
1784 if (mode != VOIDmode && mode != BLKmode)
1785 {
1786 int mode_alignment = GET_MODE_BITSIZE (mode);
1787
1788 if (mode == XFmode)
1789 mode_alignment = 128;
1790 else if (mode == XCmode)
1791 mode_alignment = 256;
1792 /* Misaligned fields are always returned in memory. */
1793 if (bit_offset % mode_alignment)
1794 return 0;
1795 }
1796
1797 /* Classification of atomic types. */
1798 switch (mode)
1799 {
1800 case DImode:
1801 case SImode:
1802 case HImode:
1803 case QImode:
1804 case CSImode:
1805 case CHImode:
1806 case CQImode:
1807 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1808 classes[0] = X86_64_INTEGERSI_CLASS;
1809 else
1810 classes[0] = X86_64_INTEGER_CLASS;
1811 return 1;
1812 case CDImode:
1813 case TImode:
1814 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1815 return 2;
1816 case CTImode:
1817 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1818 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1819 return 4;
1820 case SFmode:
1821 if (!(bit_offset % 64))
1822 classes[0] = X86_64_SSESF_CLASS;
1823 else
1824 classes[0] = X86_64_SSE_CLASS;
1825 return 1;
1826 case DFmode:
1827 classes[0] = X86_64_SSEDF_CLASS;
1828 return 1;
1829 case TFmode:
1830 classes[0] = X86_64_X87_CLASS;
1831 classes[1] = X86_64_X87UP_CLASS;
1832 return 2;
1833 case TCmode:
1834 classes[0] = X86_64_X87_CLASS;
1835 classes[1] = X86_64_X87UP_CLASS;
1836 classes[2] = X86_64_X87_CLASS;
1837 classes[3] = X86_64_X87UP_CLASS;
1838 return 4;
1839 case DCmode:
1840 classes[0] = X86_64_SSEDF_CLASS;
1841 classes[1] = X86_64_SSEDF_CLASS;
1842 return 2;
1843 case SCmode:
1844 classes[0] = X86_64_SSE_CLASS;
1845 return 1;
1846 case V4SFmode:
1847 case V4SImode:
1848 classes[0] = X86_64_SSE_CLASS;
1849 classes[1] = X86_64_SSEUP_CLASS;
1850 return 2;
1851 case V2SFmode:
1852 case V2SImode:
1853 case V4HImode:
1854 case V8QImode:
1855 classes[0] = X86_64_SSE_CLASS;
1856 return 1;
1857 case BLKmode:
1858 case VOIDmode:
1859 return 0;
1860 default:
1861 abort ();
1862 }
1863 }
1864
1865 /* Examine the argument and return set number of register required in each
1866 class. Return 0 iff parameter should be passed in memory. */
1867 static int
1868 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1869 enum machine_mode mode;
1870 tree type;
1871 int *int_nregs, *sse_nregs;
1872 int in_return;
1873 {
1874 enum x86_64_reg_class class[MAX_CLASSES];
1875 int n = classify_argument (mode, type, class, 0);
1876
1877 *int_nregs = 0;
1878 *sse_nregs = 0;
1879 if (!n)
1880 return 0;
1881 for (n--; n >= 0; n--)
1882 switch (class[n])
1883 {
1884 case X86_64_INTEGER_CLASS:
1885 case X86_64_INTEGERSI_CLASS:
1886 (*int_nregs)++;
1887 break;
1888 case X86_64_SSE_CLASS:
1889 case X86_64_SSESF_CLASS:
1890 case X86_64_SSEDF_CLASS:
1891 (*sse_nregs)++;
1892 break;
1893 case X86_64_NO_CLASS:
1894 case X86_64_SSEUP_CLASS:
1895 break;
1896 case X86_64_X87_CLASS:
1897 case X86_64_X87UP_CLASS:
1898 if (!in_return)
1899 return 0;
1900 break;
1901 case X86_64_MEMORY_CLASS:
1902 abort ();
1903 }
1904 return 1;
1905 }
1906 /* Construct container for the argument used by GCC interface. See
1907 FUNCTION_ARG for the detailed description. */
1908 static rtx
1909 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1910 enum machine_mode mode;
1911 tree type;
1912 int in_return;
1913 int nintregs, nsseregs;
1914 const int * intreg;
1915 int sse_regno;
1916 {
1917 enum machine_mode tmpmode;
1918 int bytes =
1919 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1920 enum x86_64_reg_class class[MAX_CLASSES];
1921 int n;
1922 int i;
1923 int nexps = 0;
1924 int needed_sseregs, needed_intregs;
1925 rtx exp[MAX_CLASSES];
1926 rtx ret;
1927
1928 n = classify_argument (mode, type, class, 0);
1929 if (TARGET_DEBUG_ARG)
1930 {
1931 if (!n)
1932 fprintf (stderr, "Memory class\n");
1933 else
1934 {
1935 fprintf (stderr, "Classes:");
1936 for (i = 0; i < n; i++)
1937 {
1938 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1939 }
1940 fprintf (stderr, "\n");
1941 }
1942 }
1943 if (!n)
1944 return NULL;
1945 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1946 return NULL;
1947 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1948 return NULL;
1949
1950 /* First construct simple cases. Avoid SCmode, since we want to use
1951 single register to pass this type. */
1952 if (n == 1 && mode != SCmode)
1953 switch (class[0])
1954 {
1955 case X86_64_INTEGER_CLASS:
1956 case X86_64_INTEGERSI_CLASS:
1957 return gen_rtx_REG (mode, intreg[0]);
1958 case X86_64_SSE_CLASS:
1959 case X86_64_SSESF_CLASS:
1960 case X86_64_SSEDF_CLASS:
1961 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1962 case X86_64_X87_CLASS:
1963 return gen_rtx_REG (mode, FIRST_STACK_REG);
1964 case X86_64_NO_CLASS:
1965 /* Zero sized array, struct or class. */
1966 return NULL;
1967 default:
1968 abort ();
1969 }
1970 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1971 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1972 if (n == 2
1973 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1974 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1975 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1976 && class[1] == X86_64_INTEGER_CLASS
1977 && (mode == CDImode || mode == TImode)
1978 && intreg[0] + 1 == intreg[1])
1979 return gen_rtx_REG (mode, intreg[0]);
1980 if (n == 4
1981 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1982 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1983 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1984
1985 /* Otherwise figure out the entries of the PARALLEL. */
1986 for (i = 0; i < n; i++)
1987 {
1988 switch (class[i])
1989 {
1990 case X86_64_NO_CLASS:
1991 break;
1992 case X86_64_INTEGER_CLASS:
1993 case X86_64_INTEGERSI_CLASS:
1994 /* Merge TImodes on aligned occassions here too. */
1995 if (i * 8 + 8 > bytes)
1996 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1997 else if (class[i] == X86_64_INTEGERSI_CLASS)
1998 tmpmode = SImode;
1999 else
2000 tmpmode = DImode;
2001 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2002 if (tmpmode == BLKmode)
2003 tmpmode = DImode;
2004 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2005 gen_rtx_REG (tmpmode, *intreg),
2006 GEN_INT (i*8));
2007 intreg++;
2008 break;
2009 case X86_64_SSESF_CLASS:
2010 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2011 gen_rtx_REG (SFmode,
2012 SSE_REGNO (sse_regno)),
2013 GEN_INT (i*8));
2014 sse_regno++;
2015 break;
2016 case X86_64_SSEDF_CLASS:
2017 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2018 gen_rtx_REG (DFmode,
2019 SSE_REGNO (sse_regno)),
2020 GEN_INT (i*8));
2021 sse_regno++;
2022 break;
2023 case X86_64_SSE_CLASS:
2024 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2025 tmpmode = TImode, i++;
2026 else
2027 tmpmode = DImode;
2028 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2029 gen_rtx_REG (tmpmode,
2030 SSE_REGNO (sse_regno)),
2031 GEN_INT (i*8));
2032 sse_regno++;
2033 break;
2034 default:
2035 abort ();
2036 }
2037 }
2038 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2039 for (i = 0; i < nexps; i++)
2040 XVECEXP (ret, 0, i) = exp [i];
2041 return ret;
2042 }
2043
2044 /* Update the data in CUM to advance over an argument
2045 of mode MODE and data type TYPE.
2046 (TYPE is null for libcalls where that information may not be available.) */
2047
2048 void
2049 function_arg_advance (cum, mode, type, named)
2050 CUMULATIVE_ARGS *cum; /* current arg information */
2051 enum machine_mode mode; /* current arg mode */
2052 tree type; /* type of the argument or 0 if lib support */
2053 int named; /* whether or not the argument was named */
2054 {
2055 int bytes =
2056 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2057 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2058
2059 if (TARGET_DEBUG_ARG)
2060 fprintf (stderr,
2061 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2062 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2063 if (TARGET_64BIT)
2064 {
2065 int int_nregs, sse_nregs;
2066 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2067 cum->words += words;
2068 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2069 {
2070 cum->nregs -= int_nregs;
2071 cum->sse_nregs -= sse_nregs;
2072 cum->regno += int_nregs;
2073 cum->sse_regno += sse_nregs;
2074 }
2075 else
2076 cum->words += words;
2077 }
2078 else
2079 {
2080 if (TARGET_SSE && mode == TImode)
2081 {
2082 cum->sse_words += words;
2083 cum->sse_nregs -= 1;
2084 cum->sse_regno += 1;
2085 if (cum->sse_nregs <= 0)
2086 {
2087 cum->sse_nregs = 0;
2088 cum->sse_regno = 0;
2089 }
2090 }
2091 else
2092 {
2093 cum->words += words;
2094 cum->nregs -= words;
2095 cum->regno += words;
2096
2097 if (cum->nregs <= 0)
2098 {
2099 cum->nregs = 0;
2100 cum->regno = 0;
2101 }
2102 }
2103 }
2104 return;
2105 }
2106
2107 /* Define where to put the arguments to a function.
2108 Value is zero to push the argument on the stack,
2109 or a hard register in which to store the argument.
2110
2111 MODE is the argument's machine mode.
2112 TYPE is the data type of the argument (as a tree).
2113 This is null for libcalls where that information may
2114 not be available.
2115 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2116 the preceding args and about the function being called.
2117 NAMED is nonzero if this argument is a named parameter
2118 (otherwise it is an extra parameter matching an ellipsis). */
2119
2120 rtx
2121 function_arg (cum, mode, type, named)
2122 CUMULATIVE_ARGS *cum; /* current arg information */
2123 enum machine_mode mode; /* current arg mode */
2124 tree type; /* type of the argument or 0 if lib support */
2125 int named; /* != 0 for normal args, == 0 for ... args */
2126 {
2127 rtx ret = NULL_RTX;
2128 int bytes =
2129 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2130 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2131
2132 /* Handle an hidden AL argument containing number of registers for varargs
2133 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2134 any AL settings. */
2135 if (mode == VOIDmode)
2136 {
2137 if (TARGET_64BIT)
2138 return GEN_INT (cum->maybe_vaarg
2139 ? (cum->sse_nregs < 0
2140 ? SSE_REGPARM_MAX
2141 : cum->sse_regno)
2142 : -1);
2143 else
2144 return constm1_rtx;
2145 }
2146 if (TARGET_64BIT)
2147 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2148 &x86_64_int_parameter_registers [cum->regno],
2149 cum->sse_regno);
2150 else
2151 switch (mode)
2152 {
2153 /* For now, pass fp/complex values on the stack. */
2154 default:
2155 break;
2156
2157 case BLKmode:
2158 case DImode:
2159 case SImode:
2160 case HImode:
2161 case QImode:
2162 if (words <= cum->nregs)
2163 ret = gen_rtx_REG (mode, cum->regno);
2164 break;
2165 case TImode:
2166 if (cum->sse_nregs)
2167 ret = gen_rtx_REG (mode, cum->sse_regno);
2168 break;
2169 }
2170
2171 if (TARGET_DEBUG_ARG)
2172 {
2173 fprintf (stderr,
2174 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2175 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2176
2177 if (ret)
2178 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2179 else
2180 fprintf (stderr, ", stack");
2181
2182 fprintf (stderr, " )\n");
2183 }
2184
2185 return ret;
2186 }
2187
2188 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2189 and type. */
2190
2191 int
2192 ix86_function_arg_boundary (mode, type)
2193 enum machine_mode mode;
2194 tree type;
2195 {
2196 int align;
2197 if (!TARGET_64BIT)
2198 return PARM_BOUNDARY;
2199 if (type)
2200 align = TYPE_ALIGN (type);
2201 else
2202 align = GET_MODE_ALIGNMENT (mode);
2203 if (align < PARM_BOUNDARY)
2204 align = PARM_BOUNDARY;
2205 if (align > 128)
2206 align = 128;
2207 return align;
2208 }
2209
2210 /* Return true if N is a possible register number of function value. */
2211 bool
2212 ix86_function_value_regno_p (regno)
2213 int regno;
2214 {
2215 if (!TARGET_64BIT)
2216 {
2217 return ((regno) == 0
2218 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2219 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2220 }
2221 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2222 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2223 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2224 }
2225
2226 /* Define how to find the value returned by a function.
2227 VALTYPE is the data type of the value (as a tree).
2228 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2229 otherwise, FUNC is 0. */
2230 rtx
2231 ix86_function_value (valtype)
2232 tree valtype;
2233 {
2234 if (TARGET_64BIT)
2235 {
2236 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2237 REGPARM_MAX, SSE_REGPARM_MAX,
2238 x86_64_int_return_registers, 0);
2239 /* For zero sized structures, construct_continer return NULL, but we need
2240 to keep rest of compiler happy by returning meaningfull value. */
2241 if (!ret)
2242 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2243 return ret;
2244 }
2245 else
2246 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2247 }
2248
2249 /* Return false iff type is returned in memory. */
2250 int
2251 ix86_return_in_memory (type)
2252 tree type;
2253 {
2254 int needed_intregs, needed_sseregs;
2255 if (TARGET_64BIT)
2256 {
2257 return !examine_argument (TYPE_MODE (type), type, 1,
2258 &needed_intregs, &needed_sseregs);
2259 }
2260 else
2261 {
2262 if (TYPE_MODE (type) == BLKmode
2263 || (VECTOR_MODE_P (TYPE_MODE (type))
2264 && int_size_in_bytes (type) == 8)
2265 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2266 && TYPE_MODE (type) != TFmode
2267 && !VECTOR_MODE_P (TYPE_MODE (type))))
2268 return 1;
2269 return 0;
2270 }
2271 }
2272
2273 /* Define how to find the value returned by a library function
2274 assuming the value has mode MODE. */
2275 rtx
2276 ix86_libcall_value (mode)
2277 enum machine_mode mode;
2278 {
2279 if (TARGET_64BIT)
2280 {
2281 switch (mode)
2282 {
2283 case SFmode:
2284 case SCmode:
2285 case DFmode:
2286 case DCmode:
2287 return gen_rtx_REG (mode, FIRST_SSE_REG);
2288 case TFmode:
2289 case TCmode:
2290 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2291 default:
2292 return gen_rtx_REG (mode, 0);
2293 }
2294 }
2295 else
2296 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2297 }
2298 \f
2299 /* Create the va_list data type. */
2300
2301 tree
2302 ix86_build_va_list ()
2303 {
2304 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2305
2306 /* For i386 we use plain pointer to argument area. */
2307 if (!TARGET_64BIT)
2308 return build_pointer_type (char_type_node);
2309
2310 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2311 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2312
2313 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2314 unsigned_type_node);
2315 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2316 unsigned_type_node);
2317 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2318 ptr_type_node);
2319 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2320 ptr_type_node);
2321
2322 DECL_FIELD_CONTEXT (f_gpr) = record;
2323 DECL_FIELD_CONTEXT (f_fpr) = record;
2324 DECL_FIELD_CONTEXT (f_ovf) = record;
2325 DECL_FIELD_CONTEXT (f_sav) = record;
2326
2327 TREE_CHAIN (record) = type_decl;
2328 TYPE_NAME (record) = type_decl;
2329 TYPE_FIELDS (record) = f_gpr;
2330 TREE_CHAIN (f_gpr) = f_fpr;
2331 TREE_CHAIN (f_fpr) = f_ovf;
2332 TREE_CHAIN (f_ovf) = f_sav;
2333
2334 layout_type (record);
2335
2336 /* The correct type is an array type of one element. */
2337 return build_array_type (record, build_index_type (size_zero_node));
2338 }
2339
2340 /* Perform any needed actions needed for a function that is receiving a
2341 variable number of arguments.
2342
2343 CUM is as above.
2344
2345 MODE and TYPE are the mode and type of the current parameter.
2346
2347 PRETEND_SIZE is a variable that should be set to the amount of stack
2348 that must be pushed by the prolog to pretend that our caller pushed
2349 it.
2350
2351 Normally, this macro will push all remaining incoming registers on the
2352 stack and set PRETEND_SIZE to the length of the registers pushed. */
2353
2354 void
2355 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2356 CUMULATIVE_ARGS *cum;
2357 enum machine_mode mode;
2358 tree type;
2359 int *pretend_size ATTRIBUTE_UNUSED;
2360 int no_rtl;
2361
2362 {
2363 CUMULATIVE_ARGS next_cum;
2364 rtx save_area = NULL_RTX, mem;
2365 rtx label;
2366 rtx label_ref;
2367 rtx tmp_reg;
2368 rtx nsse_reg;
2369 int set;
2370 tree fntype;
2371 int stdarg_p;
2372 int i;
2373
2374 if (!TARGET_64BIT)
2375 return;
2376
2377 /* Indicate to allocate space on the stack for varargs save area. */
2378 ix86_save_varrargs_registers = 1;
2379
2380 fntype = TREE_TYPE (current_function_decl);
2381 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2382 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2383 != void_type_node));
2384
2385 /* For varargs, we do not want to skip the dummy va_dcl argument.
2386 For stdargs, we do want to skip the last named argument. */
2387 next_cum = *cum;
2388 if (stdarg_p)
2389 function_arg_advance (&next_cum, mode, type, 1);
2390
2391 if (!no_rtl)
2392 save_area = frame_pointer_rtx;
2393
2394 set = get_varargs_alias_set ();
2395
2396 for (i = next_cum.regno; i < ix86_regparm; i++)
2397 {
2398 mem = gen_rtx_MEM (Pmode,
2399 plus_constant (save_area, i * UNITS_PER_WORD));
2400 set_mem_alias_set (mem, set);
2401 emit_move_insn (mem, gen_rtx_REG (Pmode,
2402 x86_64_int_parameter_registers[i]));
2403 }
2404
2405 if (next_cum.sse_nregs)
2406 {
2407 /* Now emit code to save SSE registers. The AX parameter contains number
2408 of SSE parameter regsiters used to call this function. We use
2409 sse_prologue_save insn template that produces computed jump across
2410 SSE saves. We need some preparation work to get this working. */
2411
2412 label = gen_label_rtx ();
2413 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2414
2415 /* Compute address to jump to :
2416 label - 5*eax + nnamed_sse_arguments*5 */
2417 tmp_reg = gen_reg_rtx (Pmode);
2418 nsse_reg = gen_reg_rtx (Pmode);
2419 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2420 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2421 gen_rtx_MULT (Pmode, nsse_reg,
2422 GEN_INT (4))));
2423 if (next_cum.sse_regno)
2424 emit_move_insn
2425 (nsse_reg,
2426 gen_rtx_CONST (DImode,
2427 gen_rtx_PLUS (DImode,
2428 label_ref,
2429 GEN_INT (next_cum.sse_regno * 4))));
2430 else
2431 emit_move_insn (nsse_reg, label_ref);
2432 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2433
2434 /* Compute address of memory block we save into. We always use pointer
2435 pointing 127 bytes after first byte to store - this is needed to keep
2436 instruction size limited by 4 bytes. */
2437 tmp_reg = gen_reg_rtx (Pmode);
2438 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2439 plus_constant (save_area,
2440 8 * REGPARM_MAX + 127)));
2441 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2442 set_mem_alias_set (mem, set);
2443 set_mem_align (mem, BITS_PER_WORD);
2444
2445 /* And finally do the dirty job! */
2446 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2447 GEN_INT (next_cum.sse_regno), label));
2448 }
2449
2450 }
2451
2452 /* Implement va_start. */
2453
2454 void
2455 ix86_va_start (stdarg_p, valist, nextarg)
2456 int stdarg_p;
2457 tree valist;
2458 rtx nextarg;
2459 {
2460 HOST_WIDE_INT words, n_gpr, n_fpr;
2461 tree f_gpr, f_fpr, f_ovf, f_sav;
2462 tree gpr, fpr, ovf, sav, t;
2463
2464 /* Only 64bit target needs something special. */
2465 if (!TARGET_64BIT)
2466 {
2467 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2468 return;
2469 }
2470
2471 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2472 f_fpr = TREE_CHAIN (f_gpr);
2473 f_ovf = TREE_CHAIN (f_fpr);
2474 f_sav = TREE_CHAIN (f_ovf);
2475
2476 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2477 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2478 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2479 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2480 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2481
2482 /* Count number of gp and fp argument registers used. */
2483 words = current_function_args_info.words;
2484 n_gpr = current_function_args_info.regno;
2485 n_fpr = current_function_args_info.sse_regno;
2486
2487 if (TARGET_DEBUG_ARG)
2488 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2489 (int) words, (int) n_gpr, (int) n_fpr);
2490
2491 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2492 build_int_2 (n_gpr * 8, 0));
2493 TREE_SIDE_EFFECTS (t) = 1;
2494 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2495
2496 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2497 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2498 TREE_SIDE_EFFECTS (t) = 1;
2499 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2500
2501 /* Find the overflow area. */
2502 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2503 if (words != 0)
2504 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2505 build_int_2 (words * UNITS_PER_WORD, 0));
2506 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2507 TREE_SIDE_EFFECTS (t) = 1;
2508 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2509
2510 /* Find the register save area.
2511 Prologue of the function save it right above stack frame. */
2512 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2513 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2514 TREE_SIDE_EFFECTS (t) = 1;
2515 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2516 }
2517
2518 /* Implement va_arg. */
2519 rtx
2520 ix86_va_arg (valist, type)
2521 tree valist, type;
2522 {
2523 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2524 tree f_gpr, f_fpr, f_ovf, f_sav;
2525 tree gpr, fpr, ovf, sav, t;
2526 int size, rsize;
2527 rtx lab_false, lab_over = NULL_RTX;
2528 rtx addr_rtx, r;
2529 rtx container;
2530
2531 /* Only 64bit target needs something special. */
2532 if (!TARGET_64BIT)
2533 {
2534 return std_expand_builtin_va_arg (valist, type);
2535 }
2536
2537 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2538 f_fpr = TREE_CHAIN (f_gpr);
2539 f_ovf = TREE_CHAIN (f_fpr);
2540 f_sav = TREE_CHAIN (f_ovf);
2541
2542 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2543 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2544 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2545 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2546 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2547
2548 size = int_size_in_bytes (type);
2549 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2550
2551 container = construct_container (TYPE_MODE (type), type, 0,
2552 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2553 /*
2554 * Pull the value out of the saved registers ...
2555 */
2556
2557 addr_rtx = gen_reg_rtx (Pmode);
2558
2559 if (container)
2560 {
2561 rtx int_addr_rtx, sse_addr_rtx;
2562 int needed_intregs, needed_sseregs;
2563 int need_temp;
2564
2565 lab_over = gen_label_rtx ();
2566 lab_false = gen_label_rtx ();
2567
2568 examine_argument (TYPE_MODE (type), type, 0,
2569 &needed_intregs, &needed_sseregs);
2570
2571
2572 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2573 || TYPE_ALIGN (type) > 128);
2574
2575 /* In case we are passing structure, verify that it is consetuctive block
2576 on the register save area. If not we need to do moves. */
2577 if (!need_temp && !REG_P (container))
2578 {
2579 /* Verify that all registers are strictly consetuctive */
2580 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2581 {
2582 int i;
2583
2584 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2585 {
2586 rtx slot = XVECEXP (container, 0, i);
2587 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2588 || INTVAL (XEXP (slot, 1)) != i * 16)
2589 need_temp = 1;
2590 }
2591 }
2592 else
2593 {
2594 int i;
2595
2596 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2597 {
2598 rtx slot = XVECEXP (container, 0, i);
2599 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2600 || INTVAL (XEXP (slot, 1)) != i * 8)
2601 need_temp = 1;
2602 }
2603 }
2604 }
2605 if (!need_temp)
2606 {
2607 int_addr_rtx = addr_rtx;
2608 sse_addr_rtx = addr_rtx;
2609 }
2610 else
2611 {
2612 int_addr_rtx = gen_reg_rtx (Pmode);
2613 sse_addr_rtx = gen_reg_rtx (Pmode);
2614 }
2615 /* First ensure that we fit completely in registers. */
2616 if (needed_intregs)
2617 {
2618 emit_cmp_and_jump_insns (expand_expr
2619 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2620 GEN_INT ((REGPARM_MAX - needed_intregs +
2621 1) * 8), GE, const1_rtx, SImode,
2622 1, lab_false);
2623 }
2624 if (needed_sseregs)
2625 {
2626 emit_cmp_and_jump_insns (expand_expr
2627 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2628 GEN_INT ((SSE_REGPARM_MAX -
2629 needed_sseregs + 1) * 16 +
2630 REGPARM_MAX * 8), GE, const1_rtx,
2631 SImode, 1, lab_false);
2632 }
2633
2634 /* Compute index to start of area used for integer regs. */
2635 if (needed_intregs)
2636 {
2637 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2638 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2639 if (r != int_addr_rtx)
2640 emit_move_insn (int_addr_rtx, r);
2641 }
2642 if (needed_sseregs)
2643 {
2644 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2645 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2646 if (r != sse_addr_rtx)
2647 emit_move_insn (sse_addr_rtx, r);
2648 }
2649 if (need_temp)
2650 {
2651 int i;
2652 rtx mem;
2653
2654 /* Never use the memory itself, as it has the alias set. */
2655 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2656 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2657 set_mem_alias_set (mem, get_varargs_alias_set ());
2658 set_mem_align (mem, BITS_PER_UNIT);
2659
2660 for (i = 0; i < XVECLEN (container, 0); i++)
2661 {
2662 rtx slot = XVECEXP (container, 0, i);
2663 rtx reg = XEXP (slot, 0);
2664 enum machine_mode mode = GET_MODE (reg);
2665 rtx src_addr;
2666 rtx src_mem;
2667 int src_offset;
2668 rtx dest_mem;
2669
2670 if (SSE_REGNO_P (REGNO (reg)))
2671 {
2672 src_addr = sse_addr_rtx;
2673 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2674 }
2675 else
2676 {
2677 src_addr = int_addr_rtx;
2678 src_offset = REGNO (reg) * 8;
2679 }
2680 src_mem = gen_rtx_MEM (mode, src_addr);
2681 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2682 src_mem = adjust_address (src_mem, mode, src_offset);
2683 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2684 emit_move_insn (dest_mem, src_mem);
2685 }
2686 }
2687
2688 if (needed_intregs)
2689 {
2690 t =
2691 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2692 build_int_2 (needed_intregs * 8, 0));
2693 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2694 TREE_SIDE_EFFECTS (t) = 1;
2695 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2696 }
2697 if (needed_sseregs)
2698 {
2699 t =
2700 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2701 build_int_2 (needed_sseregs * 16, 0));
2702 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2703 TREE_SIDE_EFFECTS (t) = 1;
2704 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2705 }
2706
2707 emit_jump_insn (gen_jump (lab_over));
2708 emit_barrier ();
2709 emit_label (lab_false);
2710 }
2711
2712 /* ... otherwise out of the overflow area. */
2713
2714 /* Care for on-stack alignment if needed. */
2715 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2716 t = ovf;
2717 else
2718 {
2719 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2720 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2721 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2722 }
2723 t = save_expr (t);
2724
2725 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2726 if (r != addr_rtx)
2727 emit_move_insn (addr_rtx, r);
2728
2729 t =
2730 build (PLUS_EXPR, TREE_TYPE (t), t,
2731 build_int_2 (rsize * UNITS_PER_WORD, 0));
2732 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2733 TREE_SIDE_EFFECTS (t) = 1;
2734 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2735
2736 if (container)
2737 emit_label (lab_over);
2738
2739 return addr_rtx;
2740 }
2741 \f
2742 /* Return nonzero if OP is general operand representable on x86_64. */
2743
2744 int
2745 x86_64_general_operand (op, mode)
2746 rtx op;
2747 enum machine_mode mode;
2748 {
2749 if (!TARGET_64BIT)
2750 return general_operand (op, mode);
2751 if (nonimmediate_operand (op, mode))
2752 return 1;
2753 return x86_64_sign_extended_value (op);
2754 }
2755
2756 /* Return nonzero if OP is general operand representable on x86_64
2757 as either sign extended or zero extended constant. */
2758
2759 int
2760 x86_64_szext_general_operand (op, mode)
2761 rtx op;
2762 enum machine_mode mode;
2763 {
2764 if (!TARGET_64BIT)
2765 return general_operand (op, mode);
2766 if (nonimmediate_operand (op, mode))
2767 return 1;
2768 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2769 }
2770
2771 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2772
2773 int
2774 x86_64_nonmemory_operand (op, mode)
2775 rtx op;
2776 enum machine_mode mode;
2777 {
2778 if (!TARGET_64BIT)
2779 return nonmemory_operand (op, mode);
2780 if (register_operand (op, mode))
2781 return 1;
2782 return x86_64_sign_extended_value (op);
2783 }
2784
2785 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2786
2787 int
2788 x86_64_movabs_operand (op, mode)
2789 rtx op;
2790 enum machine_mode mode;
2791 {
2792 if (!TARGET_64BIT || !flag_pic)
2793 return nonmemory_operand (op, mode);
2794 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2795 return 1;
2796 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2797 return 1;
2798 return 0;
2799 }
2800
2801 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2802
2803 int
2804 x86_64_szext_nonmemory_operand (op, mode)
2805 rtx op;
2806 enum machine_mode mode;
2807 {
2808 if (!TARGET_64BIT)
2809 return nonmemory_operand (op, mode);
2810 if (register_operand (op, mode))
2811 return 1;
2812 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2813 }
2814
2815 /* Return nonzero if OP is immediate operand representable on x86_64. */
2816
2817 int
2818 x86_64_immediate_operand (op, mode)
2819 rtx op;
2820 enum machine_mode mode;
2821 {
2822 if (!TARGET_64BIT)
2823 return immediate_operand (op, mode);
2824 return x86_64_sign_extended_value (op);
2825 }
2826
2827 /* Return nonzero if OP is immediate operand representable on x86_64. */
2828
2829 int
2830 x86_64_zext_immediate_operand (op, mode)
2831 rtx op;
2832 enum machine_mode mode ATTRIBUTE_UNUSED;
2833 {
2834 return x86_64_zero_extended_value (op);
2835 }
2836
2837 /* Return nonzero if OP is (const_int 1), else return zero. */
2838
2839 int
2840 const_int_1_operand (op, mode)
2841 rtx op;
2842 enum machine_mode mode ATTRIBUTE_UNUSED;
2843 {
2844 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2845 }
2846
2847 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2848 reference and a constant. */
2849
2850 int
2851 symbolic_operand (op, mode)
2852 register rtx op;
2853 enum machine_mode mode ATTRIBUTE_UNUSED;
2854 {
2855 switch (GET_CODE (op))
2856 {
2857 case SYMBOL_REF:
2858 case LABEL_REF:
2859 return 1;
2860
2861 case CONST:
2862 op = XEXP (op, 0);
2863 if (GET_CODE (op) == SYMBOL_REF
2864 || GET_CODE (op) == LABEL_REF
2865 || (GET_CODE (op) == UNSPEC
2866 && (XINT (op, 1) == UNSPEC_GOT
2867 || XINT (op, 1) == UNSPEC_GOTOFF
2868 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2869 return 1;
2870 if (GET_CODE (op) != PLUS
2871 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2872 return 0;
2873
2874 op = XEXP (op, 0);
2875 if (GET_CODE (op) == SYMBOL_REF
2876 || GET_CODE (op) == LABEL_REF)
2877 return 1;
2878 /* Only @GOTOFF gets offsets. */
2879 if (GET_CODE (op) != UNSPEC
2880 || XINT (op, 1) != UNSPEC_GOTOFF)
2881 return 0;
2882
2883 op = XVECEXP (op, 0, 0);
2884 if (GET_CODE (op) == SYMBOL_REF
2885 || GET_CODE (op) == LABEL_REF)
2886 return 1;
2887 return 0;
2888
2889 default:
2890 return 0;
2891 }
2892 }
2893
2894 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2895
2896 int
2897 pic_symbolic_operand (op, mode)
2898 register rtx op;
2899 enum machine_mode mode ATTRIBUTE_UNUSED;
2900 {
2901 if (GET_CODE (op) != CONST)
2902 return 0;
2903 op = XEXP (op, 0);
2904 if (TARGET_64BIT)
2905 {
2906 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2907 return 1;
2908 }
2909 else
2910 {
2911 if (GET_CODE (op) == UNSPEC)
2912 return 1;
2913 if (GET_CODE (op) != PLUS
2914 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2915 return 0;
2916 op = XEXP (op, 0);
2917 if (GET_CODE (op) == UNSPEC)
2918 return 1;
2919 }
2920 return 0;
2921 }
2922
2923 /* Return true if OP is a symbolic operand that resolves locally. */
2924
2925 static int
2926 local_symbolic_operand (op, mode)
2927 rtx op;
2928 enum machine_mode mode ATTRIBUTE_UNUSED;
2929 {
2930 if (GET_CODE (op) == LABEL_REF)
2931 return 1;
2932
2933 if (GET_CODE (op) == CONST
2934 && GET_CODE (XEXP (op, 0)) == PLUS
2935 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2936 op = XEXP (XEXP (op, 0), 0);
2937
2938 if (GET_CODE (op) != SYMBOL_REF)
2939 return 0;
2940
2941 /* These we've been told are local by varasm and encode_section_info
2942 respectively. */
2943 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2944 return 1;
2945
2946 /* There is, however, a not insubstantial body of code in the rest of
2947 the compiler that assumes it can just stick the results of
2948 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2949 /* ??? This is a hack. Should update the body of the compiler to
2950 always create a DECL an invoke targetm.encode_section_info. */
2951 if (strncmp (XSTR (op, 0), internal_label_prefix,
2952 internal_label_prefix_len) == 0)
2953 return 1;
2954
2955 return 0;
2956 }
2957
2958 /* Test for a valid operand for a call instruction. Don't allow the
2959 arg pointer register or virtual regs since they may decay into
2960 reg + const, which the patterns can't handle. */
2961
2962 int
2963 call_insn_operand (op, mode)
2964 rtx op;
2965 enum machine_mode mode ATTRIBUTE_UNUSED;
2966 {
2967 /* Disallow indirect through a virtual register. This leads to
2968 compiler aborts when trying to eliminate them. */
2969 if (GET_CODE (op) == REG
2970 && (op == arg_pointer_rtx
2971 || op == frame_pointer_rtx
2972 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2973 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2974 return 0;
2975
2976 /* Disallow `call 1234'. Due to varying assembler lameness this
2977 gets either rejected or translated to `call .+1234'. */
2978 if (GET_CODE (op) == CONST_INT)
2979 return 0;
2980
2981 /* Explicitly allow SYMBOL_REF even if pic. */
2982 if (GET_CODE (op) == SYMBOL_REF)
2983 return 1;
2984
2985 /* Half-pic doesn't allow anything but registers and constants.
2986 We've just taken care of the later. */
2987 if (HALF_PIC_P ())
2988 return register_operand (op, Pmode);
2989
2990 /* Otherwise we can allow any general_operand in the address. */
2991 return general_operand (op, Pmode);
2992 }
2993
2994 int
2995 constant_call_address_operand (op, mode)
2996 rtx op;
2997 enum machine_mode mode ATTRIBUTE_UNUSED;
2998 {
2999 if (GET_CODE (op) == CONST
3000 && GET_CODE (XEXP (op, 0)) == PLUS
3001 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3002 op = XEXP (XEXP (op, 0), 0);
3003 return GET_CODE (op) == SYMBOL_REF;
3004 }
3005
3006 /* Match exactly zero and one. */
3007
3008 int
3009 const0_operand (op, mode)
3010 register rtx op;
3011 enum machine_mode mode;
3012 {
3013 return op == CONST0_RTX (mode);
3014 }
3015
3016 int
3017 const1_operand (op, mode)
3018 register rtx op;
3019 enum machine_mode mode ATTRIBUTE_UNUSED;
3020 {
3021 return op == const1_rtx;
3022 }
3023
3024 /* Match 2, 4, or 8. Used for leal multiplicands. */
3025
3026 int
3027 const248_operand (op, mode)
3028 register rtx op;
3029 enum machine_mode mode ATTRIBUTE_UNUSED;
3030 {
3031 return (GET_CODE (op) == CONST_INT
3032 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3033 }
3034
3035 /* True if this is a constant appropriate for an increment or decremenmt. */
3036
3037 int
3038 incdec_operand (op, mode)
3039 register rtx op;
3040 enum machine_mode mode ATTRIBUTE_UNUSED;
3041 {
3042 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3043 registers, since carry flag is not set. */
3044 if (TARGET_PENTIUM4 && !optimize_size)
3045 return 0;
3046 return op == const1_rtx || op == constm1_rtx;
3047 }
3048
3049 /* Return nonzero if OP is acceptable as operand of DImode shift
3050 expander. */
3051
3052 int
3053 shiftdi_operand (op, mode)
3054 rtx op;
3055 enum machine_mode mode ATTRIBUTE_UNUSED;
3056 {
3057 if (TARGET_64BIT)
3058 return nonimmediate_operand (op, mode);
3059 else
3060 return register_operand (op, mode);
3061 }
3062
3063 /* Return false if this is the stack pointer, or any other fake
3064 register eliminable to the stack pointer. Otherwise, this is
3065 a register operand.
3066
3067 This is used to prevent esp from being used as an index reg.
3068 Which would only happen in pathological cases. */
3069
3070 int
3071 reg_no_sp_operand (op, mode)
3072 register rtx op;
3073 enum machine_mode mode;
3074 {
3075 rtx t = op;
3076 if (GET_CODE (t) == SUBREG)
3077 t = SUBREG_REG (t);
3078 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3079 return 0;
3080
3081 return register_operand (op, mode);
3082 }
3083
3084 int
3085 mmx_reg_operand (op, mode)
3086 register rtx op;
3087 enum machine_mode mode ATTRIBUTE_UNUSED;
3088 {
3089 return MMX_REG_P (op);
3090 }
3091
3092 /* Return false if this is any eliminable register. Otherwise
3093 general_operand. */
3094
3095 int
3096 general_no_elim_operand (op, mode)
3097 register rtx op;
3098 enum machine_mode mode;
3099 {
3100 rtx t = op;
3101 if (GET_CODE (t) == SUBREG)
3102 t = SUBREG_REG (t);
3103 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3104 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3105 || t == virtual_stack_dynamic_rtx)
3106 return 0;
3107 if (REG_P (t)
3108 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3109 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3110 return 0;
3111
3112 return general_operand (op, mode);
3113 }
3114
3115 /* Return false if this is any eliminable register. Otherwise
3116 register_operand or const_int. */
3117
3118 int
3119 nonmemory_no_elim_operand (op, mode)
3120 register rtx op;
3121 enum machine_mode mode;
3122 {
3123 rtx t = op;
3124 if (GET_CODE (t) == SUBREG)
3125 t = SUBREG_REG (t);
3126 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3127 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3128 || t == virtual_stack_dynamic_rtx)
3129 return 0;
3130
3131 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3132 }
3133
3134 /* Return true if op is a Q_REGS class register. */
3135
3136 int
3137 q_regs_operand (op, mode)
3138 register rtx op;
3139 enum machine_mode mode;
3140 {
3141 if (mode != VOIDmode && GET_MODE (op) != mode)
3142 return 0;
3143 if (GET_CODE (op) == SUBREG)
3144 op = SUBREG_REG (op);
3145 return ANY_QI_REG_P (op);
3146 }
3147
3148 /* Return true if op is a NON_Q_REGS class register. */
3149
3150 int
3151 non_q_regs_operand (op, mode)
3152 register rtx op;
3153 enum machine_mode mode;
3154 {
3155 if (mode != VOIDmode && GET_MODE (op) != mode)
3156 return 0;
3157 if (GET_CODE (op) == SUBREG)
3158 op = SUBREG_REG (op);
3159 return NON_QI_REG_P (op);
3160 }
3161
3162 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3163 insns. */
3164 int
3165 sse_comparison_operator (op, mode)
3166 rtx op;
3167 enum machine_mode mode ATTRIBUTE_UNUSED;
3168 {
3169 enum rtx_code code = GET_CODE (op);
3170 switch (code)
3171 {
3172 /* Operations supported directly. */
3173 case EQ:
3174 case LT:
3175 case LE:
3176 case UNORDERED:
3177 case NE:
3178 case UNGE:
3179 case UNGT:
3180 case ORDERED:
3181 return 1;
3182 /* These are equivalent to ones above in non-IEEE comparisons. */
3183 case UNEQ:
3184 case UNLT:
3185 case UNLE:
3186 case LTGT:
3187 case GE:
3188 case GT:
3189 return !TARGET_IEEE_FP;
3190 default:
3191 return 0;
3192 }
3193 }
3194 /* Return 1 if OP is a valid comparison operator in valid mode. */
3195 int
3196 ix86_comparison_operator (op, mode)
3197 register rtx op;
3198 enum machine_mode mode;
3199 {
3200 enum machine_mode inmode;
3201 enum rtx_code code = GET_CODE (op);
3202 if (mode != VOIDmode && GET_MODE (op) != mode)
3203 return 0;
3204 if (GET_RTX_CLASS (code) != '<')
3205 return 0;
3206 inmode = GET_MODE (XEXP (op, 0));
3207
3208 if (inmode == CCFPmode || inmode == CCFPUmode)
3209 {
3210 enum rtx_code second_code, bypass_code;
3211 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3212 return (bypass_code == NIL && second_code == NIL);
3213 }
3214 switch (code)
3215 {
3216 case EQ: case NE:
3217 return 1;
3218 case LT: case GE:
3219 if (inmode == CCmode || inmode == CCGCmode
3220 || inmode == CCGOCmode || inmode == CCNOmode)
3221 return 1;
3222 return 0;
3223 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3224 if (inmode == CCmode)
3225 return 1;
3226 return 0;
3227 case GT: case LE:
3228 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3229 return 1;
3230 return 0;
3231 default:
3232 return 0;
3233 }
3234 }
3235
3236 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3237
3238 int
3239 fcmov_comparison_operator (op, mode)
3240 register rtx op;
3241 enum machine_mode mode;
3242 {
3243 enum machine_mode inmode;
3244 enum rtx_code code = GET_CODE (op);
3245 if (mode != VOIDmode && GET_MODE (op) != mode)
3246 return 0;
3247 if (GET_RTX_CLASS (code) != '<')
3248 return 0;
3249 inmode = GET_MODE (XEXP (op, 0));
3250 if (inmode == CCFPmode || inmode == CCFPUmode)
3251 {
3252 enum rtx_code second_code, bypass_code;
3253 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3254 if (bypass_code != NIL || second_code != NIL)
3255 return 0;
3256 code = ix86_fp_compare_code_to_integer (code);
3257 }
3258 /* i387 supports just limited amount of conditional codes. */
3259 switch (code)
3260 {
3261 case LTU: case GTU: case LEU: case GEU:
3262 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3263 return 1;
3264 return 0;
3265 case ORDERED: case UNORDERED:
3266 case EQ: case NE:
3267 return 1;
3268 default:
3269 return 0;
3270 }
3271 }
3272
3273 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3274
3275 int
3276 promotable_binary_operator (op, mode)
3277 register rtx op;
3278 enum machine_mode mode ATTRIBUTE_UNUSED;
3279 {
3280 switch (GET_CODE (op))
3281 {
3282 case MULT:
3283 /* Modern CPUs have same latency for HImode and SImode multiply,
3284 but 386 and 486 do HImode multiply faster. */
3285 return ix86_cpu > PROCESSOR_I486;
3286 case PLUS:
3287 case AND:
3288 case IOR:
3289 case XOR:
3290 case ASHIFT:
3291 return 1;
3292 default:
3293 return 0;
3294 }
3295 }
3296
3297 /* Nearly general operand, but accept any const_double, since we wish
3298 to be able to drop them into memory rather than have them get pulled
3299 into registers. */
3300
3301 int
3302 cmp_fp_expander_operand (op, mode)
3303 register rtx op;
3304 enum machine_mode mode;
3305 {
3306 if (mode != VOIDmode && mode != GET_MODE (op))
3307 return 0;
3308 if (GET_CODE (op) == CONST_DOUBLE)
3309 return 1;
3310 return general_operand (op, mode);
3311 }
3312
3313 /* Match an SI or HImode register for a zero_extract. */
3314
3315 int
3316 ext_register_operand (op, mode)
3317 register rtx op;
3318 enum machine_mode mode ATTRIBUTE_UNUSED;
3319 {
3320 int regno;
3321 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3322 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3323 return 0;
3324
3325 if (!register_operand (op, VOIDmode))
3326 return 0;
3327
3328 /* Be curefull to accept only registers having upper parts. */
3329 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3330 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3331 }
3332
3333 /* Return 1 if this is a valid binary floating-point operation.
3334 OP is the expression matched, and MODE is its mode. */
3335
3336 int
3337 binary_fp_operator (op, mode)
3338 register rtx op;
3339 enum machine_mode mode;
3340 {
3341 if (mode != VOIDmode && mode != GET_MODE (op))
3342 return 0;
3343
3344 switch (GET_CODE (op))
3345 {
3346 case PLUS:
3347 case MINUS:
3348 case MULT:
3349 case DIV:
3350 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3351
3352 default:
3353 return 0;
3354 }
3355 }
3356
3357 int
3358 mult_operator (op, mode)
3359 register rtx op;
3360 enum machine_mode mode ATTRIBUTE_UNUSED;
3361 {
3362 return GET_CODE (op) == MULT;
3363 }
3364
3365 int
3366 div_operator (op, mode)
3367 register rtx op;
3368 enum machine_mode mode ATTRIBUTE_UNUSED;
3369 {
3370 return GET_CODE (op) == DIV;
3371 }
3372
3373 int
3374 arith_or_logical_operator (op, mode)
3375 rtx op;
3376 enum machine_mode mode;
3377 {
3378 return ((mode == VOIDmode || GET_MODE (op) == mode)
3379 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3380 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3381 }
3382
3383 /* Returns 1 if OP is memory operand with a displacement. */
3384
3385 int
3386 memory_displacement_operand (op, mode)
3387 register rtx op;
3388 enum machine_mode mode;
3389 {
3390 struct ix86_address parts;
3391
3392 if (! memory_operand (op, mode))
3393 return 0;
3394
3395 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3396 abort ();
3397
3398 return parts.disp != NULL_RTX;
3399 }
3400
3401 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3402 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3403
3404 ??? It seems likely that this will only work because cmpsi is an
3405 expander, and no actual insns use this. */
3406
3407 int
3408 cmpsi_operand (op, mode)
3409 rtx op;
3410 enum machine_mode mode;
3411 {
3412 if (nonimmediate_operand (op, mode))
3413 return 1;
3414
3415 if (GET_CODE (op) == AND
3416 && GET_MODE (op) == SImode
3417 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3418 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3419 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3420 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3421 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3422 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3423 return 1;
3424
3425 return 0;
3426 }
3427
3428 /* Returns 1 if OP is memory operand that can not be represented by the
3429 modRM array. */
3430
3431 int
3432 long_memory_operand (op, mode)
3433 register rtx op;
3434 enum machine_mode mode;
3435 {
3436 if (! memory_operand (op, mode))
3437 return 0;
3438
3439 return memory_address_length (op) != 0;
3440 }
3441
3442 /* Return nonzero if the rtx is known aligned. */
3443
3444 int
3445 aligned_operand (op, mode)
3446 rtx op;
3447 enum machine_mode mode;
3448 {
3449 struct ix86_address parts;
3450
3451 if (!general_operand (op, mode))
3452 return 0;
3453
3454 /* Registers and immediate operands are always "aligned". */
3455 if (GET_CODE (op) != MEM)
3456 return 1;
3457
3458 /* Don't even try to do any aligned optimizations with volatiles. */
3459 if (MEM_VOLATILE_P (op))
3460 return 0;
3461
3462 op = XEXP (op, 0);
3463
3464 /* Pushes and pops are only valid on the stack pointer. */
3465 if (GET_CODE (op) == PRE_DEC
3466 || GET_CODE (op) == POST_INC)
3467 return 1;
3468
3469 /* Decode the address. */
3470 if (! ix86_decompose_address (op, &parts))
3471 abort ();
3472
3473 if (parts.base && GET_CODE (parts.base) == SUBREG)
3474 parts.base = SUBREG_REG (parts.base);
3475 if (parts.index && GET_CODE (parts.index) == SUBREG)
3476 parts.index = SUBREG_REG (parts.index);
3477
3478 /* Look for some component that isn't known to be aligned. */
3479 if (parts.index)
3480 {
3481 if (parts.scale < 4
3482 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3483 return 0;
3484 }
3485 if (parts.base)
3486 {
3487 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3488 return 0;
3489 }
3490 if (parts.disp)
3491 {
3492 if (GET_CODE (parts.disp) != CONST_INT
3493 || (INTVAL (parts.disp) & 3) != 0)
3494 return 0;
3495 }
3496
3497 /* Didn't find one -- this must be an aligned address. */
3498 return 1;
3499 }
3500 \f
3501 /* Return true if the constant is something that can be loaded with
3502 a special instruction. Only handle 0.0 and 1.0; others are less
3503 worthwhile. */
3504
3505 int
3506 standard_80387_constant_p (x)
3507 rtx x;
3508 {
3509 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3510 return -1;
3511 /* Note that on the 80387, other constants, such as pi, that we should support
3512 too. On some machines, these are much slower to load as standard constant,
3513 than to load from doubles in memory. */
3514 if (x == CONST0_RTX (GET_MODE (x)))
3515 return 1;
3516 if (x == CONST1_RTX (GET_MODE (x)))
3517 return 2;
3518 return 0;
3519 }
3520
3521 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3522 */
3523 int
3524 standard_sse_constant_p (x)
3525 rtx x;
3526 {
3527 if (GET_CODE (x) != CONST_DOUBLE)
3528 return -1;
3529 return (x == CONST0_RTX (GET_MODE (x)));
3530 }
3531
3532 /* Returns 1 if OP contains a symbol reference */
3533
3534 int
3535 symbolic_reference_mentioned_p (op)
3536 rtx op;
3537 {
3538 register const char *fmt;
3539 register int i;
3540
3541 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3542 return 1;
3543
3544 fmt = GET_RTX_FORMAT (GET_CODE (op));
3545 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3546 {
3547 if (fmt[i] == 'E')
3548 {
3549 register int j;
3550
3551 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3552 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3553 return 1;
3554 }
3555
3556 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3557 return 1;
3558 }
3559
3560 return 0;
3561 }
3562
3563 /* Return 1 if it is appropriate to emit `ret' instructions in the
3564 body of a function. Do this only if the epilogue is simple, needing a
3565 couple of insns. Prior to reloading, we can't tell how many registers
3566 must be saved, so return 0 then. Return 0 if there is no frame
3567 marker to de-allocate.
3568
3569 If NON_SAVING_SETJMP is defined and true, then it is not possible
3570 for the epilogue to be simple, so return 0. This is a special case
3571 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3572 until final, but jump_optimize may need to know sooner if a
3573 `return' is OK. */
3574
3575 int
3576 ix86_can_use_return_insn_p ()
3577 {
3578 struct ix86_frame frame;
3579
3580 #ifdef NON_SAVING_SETJMP
3581 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3582 return 0;
3583 #endif
3584
3585 if (! reload_completed || frame_pointer_needed)
3586 return 0;
3587
3588 /* Don't allow more than 32 pop, since that's all we can do
3589 with one instruction. */
3590 if (current_function_pops_args
3591 && current_function_args_size >= 32768)
3592 return 0;
3593
3594 ix86_compute_frame_layout (&frame);
3595 return frame.to_allocate == 0 && frame.nregs == 0;
3596 }
3597 \f
3598 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3599 int
3600 x86_64_sign_extended_value (value)
3601 rtx value;
3602 {
3603 switch (GET_CODE (value))
3604 {
3605 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3606 to be at least 32 and this all acceptable constants are
3607 represented as CONST_INT. */
3608 case CONST_INT:
3609 if (HOST_BITS_PER_WIDE_INT == 32)
3610 return 1;
3611 else
3612 {
3613 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3614 return trunc_int_for_mode (val, SImode) == val;
3615 }
3616 break;
3617
3618 /* For certain code models, the symbolic references are known to fit. */
3619 case SYMBOL_REF:
3620 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3621
3622 /* For certain code models, the code is near as well. */
3623 case LABEL_REF:
3624 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3625
3626 /* We also may accept the offsetted memory references in certain special
3627 cases. */
3628 case CONST:
3629 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3630 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3631 return 1;
3632 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3633 {
3634 rtx op1 = XEXP (XEXP (value, 0), 0);
3635 rtx op2 = XEXP (XEXP (value, 0), 1);
3636 HOST_WIDE_INT offset;
3637
3638 if (ix86_cmodel == CM_LARGE)
3639 return 0;
3640 if (GET_CODE (op2) != CONST_INT)
3641 return 0;
3642 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3643 switch (GET_CODE (op1))
3644 {
3645 case SYMBOL_REF:
3646 /* For CM_SMALL assume that latest object is 1MB before
3647 end of 31bits boundary. We may also accept pretty
3648 large negative constants knowing that all objects are
3649 in the positive half of address space. */
3650 if (ix86_cmodel == CM_SMALL
3651 && offset < 1024*1024*1024
3652 && trunc_int_for_mode (offset, SImode) == offset)
3653 return 1;
3654 /* For CM_KERNEL we know that all object resist in the
3655 negative half of 32bits address space. We may not
3656 accept negative offsets, since they may be just off
3657 and we may accept pretty large positive ones. */
3658 if (ix86_cmodel == CM_KERNEL
3659 && offset > 0
3660 && trunc_int_for_mode (offset, SImode) == offset)
3661 return 1;
3662 break;
3663 case LABEL_REF:
3664 /* These conditions are similar to SYMBOL_REF ones, just the
3665 constraints for code models differ. */
3666 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3667 && offset < 1024*1024*1024
3668 && trunc_int_for_mode (offset, SImode) == offset)
3669 return 1;
3670 if (ix86_cmodel == CM_KERNEL
3671 && offset > 0
3672 && trunc_int_for_mode (offset, SImode) == offset)
3673 return 1;
3674 break;
3675 default:
3676 return 0;
3677 }
3678 }
3679 return 0;
3680 default:
3681 return 0;
3682 }
3683 }
3684
3685 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3686 int
3687 x86_64_zero_extended_value (value)
3688 rtx value;
3689 {
3690 switch (GET_CODE (value))
3691 {
3692 case CONST_DOUBLE:
3693 if (HOST_BITS_PER_WIDE_INT == 32)
3694 return (GET_MODE (value) == VOIDmode
3695 && !CONST_DOUBLE_HIGH (value));
3696 else
3697 return 0;
3698 case CONST_INT:
3699 if (HOST_BITS_PER_WIDE_INT == 32)
3700 return INTVAL (value) >= 0;
3701 else
3702 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3703 break;
3704
3705 /* For certain code models, the symbolic references are known to fit. */
3706 case SYMBOL_REF:
3707 return ix86_cmodel == CM_SMALL;
3708
3709 /* For certain code models, the code is near as well. */
3710 case LABEL_REF:
3711 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3712
3713 /* We also may accept the offsetted memory references in certain special
3714 cases. */
3715 case CONST:
3716 if (GET_CODE (XEXP (value, 0)) == PLUS)
3717 {
3718 rtx op1 = XEXP (XEXP (value, 0), 0);
3719 rtx op2 = XEXP (XEXP (value, 0), 1);
3720
3721 if (ix86_cmodel == CM_LARGE)
3722 return 0;
3723 switch (GET_CODE (op1))
3724 {
3725 case SYMBOL_REF:
3726 return 0;
3727 /* For small code model we may accept pretty large positive
3728 offsets, since one bit is available for free. Negative
3729 offsets are limited by the size of NULL pointer area
3730 specified by the ABI. */
3731 if (ix86_cmodel == CM_SMALL
3732 && GET_CODE (op2) == CONST_INT
3733 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3734 && (trunc_int_for_mode (INTVAL (op2), SImode)
3735 == INTVAL (op2)))
3736 return 1;
3737 /* ??? For the kernel, we may accept adjustment of
3738 -0x10000000, since we know that it will just convert
3739 negative address space to positive, but perhaps this
3740 is not worthwhile. */
3741 break;
3742 case LABEL_REF:
3743 /* These conditions are similar to SYMBOL_REF ones, just the
3744 constraints for code models differ. */
3745 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3746 && GET_CODE (op2) == CONST_INT
3747 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3748 && (trunc_int_for_mode (INTVAL (op2), SImode)
3749 == INTVAL (op2)))
3750 return 1;
3751 break;
3752 default:
3753 return 0;
3754 }
3755 }
3756 return 0;
3757 default:
3758 return 0;
3759 }
3760 }
3761
3762 /* Value should be nonzero if functions must have frame pointers.
3763 Zero means the frame pointer need not be set up (and parms may
3764 be accessed via the stack pointer) in functions that seem suitable. */
3765
3766 int
3767 ix86_frame_pointer_required ()
3768 {
3769 /* If we accessed previous frames, then the generated code expects
3770 to be able to access the saved ebp value in our frame. */
3771 if (cfun->machine->accesses_prev_frame)
3772 return 1;
3773
3774 /* Several x86 os'es need a frame pointer for other reasons,
3775 usually pertaining to setjmp. */
3776 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3777 return 1;
3778
3779 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3780 the frame pointer by default. Turn it back on now if we've not
3781 got a leaf function. */
3782 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3783 return 1;
3784
3785 return 0;
3786 }
3787
3788 /* Record that the current function accesses previous call frames. */
3789
3790 void
3791 ix86_setup_frame_addresses ()
3792 {
3793 cfun->machine->accesses_prev_frame = 1;
3794 }
3795 \f
3796 static char pic_label_name[32];
3797
3798 /* This function generates code for -fpic that loads %ebx with
3799 the return address of the caller and then returns. */
3800
3801 void
3802 ix86_asm_file_end (file)
3803 FILE *file;
3804 {
3805 rtx xops[2];
3806
3807 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3808 return;
3809
3810 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3811 to updating relocations to a section being discarded such that this
3812 doesn't work. Ought to detect this at configure time. */
3813 #if 0
3814 /* The trick here is to create a linkonce section containing the
3815 pic label thunk, but to refer to it with an internal label.
3816 Because the label is internal, we don't have inter-dso name
3817 binding issues on hosts that don't support ".hidden".
3818
3819 In order to use these macros, however, we must create a fake
3820 function decl. */
3821 if (targetm.have_named_sections)
3822 {
3823 tree decl = build_decl (FUNCTION_DECL,
3824 get_identifier ("i686.get_pc_thunk"),
3825 error_mark_node);
3826 DECL_ONE_ONLY (decl) = 1;
3827 (*targetm.asm_out.unique_section) (decl, 0);
3828 named_section (decl, NULL);
3829 }
3830 else
3831 #else
3832 text_section ();
3833 #endif
3834
3835 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3836 internal (non-global) label that's being emitted, it didn't make
3837 sense to have .type information for local labels. This caused
3838 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3839 me debug info for a label that you're declaring non-global?) this
3840 was changed to call ASM_OUTPUT_LABEL() instead. */
3841
3842 ASM_OUTPUT_LABEL (file, pic_label_name);
3843
3844 xops[0] = pic_offset_table_rtx;
3845 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3846 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3847 output_asm_insn ("ret", xops);
3848 }
3849
3850 void
3851 load_pic_register ()
3852 {
3853 rtx gotsym, pclab;
3854
3855 if (TARGET_64BIT)
3856 abort ();
3857
3858 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3859
3860 if (TARGET_DEEP_BRANCH_PREDICTION)
3861 {
3862 if (! pic_label_name[0])
3863 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3864 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3865 }
3866 else
3867 {
3868 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3869 }
3870
3871 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3872
3873 if (! TARGET_DEEP_BRANCH_PREDICTION)
3874 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3875
3876 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3877 }
3878
3879 /* Generate an "push" pattern for input ARG. */
3880
3881 static rtx
3882 gen_push (arg)
3883 rtx arg;
3884 {
3885 return gen_rtx_SET (VOIDmode,
3886 gen_rtx_MEM (Pmode,
3887 gen_rtx_PRE_DEC (Pmode,
3888 stack_pointer_rtx)),
3889 arg);
3890 }
3891
3892 /* Return 1 if we need to save REGNO. */
3893 static int
3894 ix86_save_reg (regno, maybe_eh_return)
3895 unsigned int regno;
3896 int maybe_eh_return;
3897 {
3898 if (regno == PIC_OFFSET_TABLE_REGNUM
3899 && (current_function_uses_pic_offset_table
3900 || current_function_uses_const_pool
3901 || current_function_calls_eh_return))
3902 return 1;
3903
3904 if (current_function_calls_eh_return && maybe_eh_return)
3905 {
3906 unsigned i;
3907 for (i = 0; ; i++)
3908 {
3909 unsigned test = EH_RETURN_DATA_REGNO (i);
3910 if (test == INVALID_REGNUM)
3911 break;
3912 if (test == regno)
3913 return 1;
3914 }
3915 }
3916
3917 return (regs_ever_live[regno]
3918 && !call_used_regs[regno]
3919 && !fixed_regs[regno]
3920 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3921 }
3922
3923 /* Return number of registers to be saved on the stack. */
3924
3925 static int
3926 ix86_nsaved_regs ()
3927 {
3928 int nregs = 0;
3929 int regno;
3930
3931 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3932 if (ix86_save_reg (regno, true))
3933 nregs++;
3934 return nregs;
3935 }
3936
3937 /* Return the offset between two registers, one to be eliminated, and the other
3938 its replacement, at the start of a routine. */
3939
3940 HOST_WIDE_INT
3941 ix86_initial_elimination_offset (from, to)
3942 int from;
3943 int to;
3944 {
3945 struct ix86_frame frame;
3946 ix86_compute_frame_layout (&frame);
3947
3948 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3949 return frame.hard_frame_pointer_offset;
3950 else if (from == FRAME_POINTER_REGNUM
3951 && to == HARD_FRAME_POINTER_REGNUM)
3952 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3953 else
3954 {
3955 if (to != STACK_POINTER_REGNUM)
3956 abort ();
3957 else if (from == ARG_POINTER_REGNUM)
3958 return frame.stack_pointer_offset;
3959 else if (from != FRAME_POINTER_REGNUM)
3960 abort ();
3961 else
3962 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3963 }
3964 }
3965
3966 /* Fill structure ix86_frame about frame of currently computed function. */
3967
3968 static void
3969 ix86_compute_frame_layout (frame)
3970 struct ix86_frame *frame;
3971 {
3972 HOST_WIDE_INT total_size;
3973 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3974 int offset;
3975 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3976 HOST_WIDE_INT size = get_frame_size ();
3977
3978 frame->nregs = ix86_nsaved_regs ();
3979 total_size = size;
3980
3981 /* Skip return address and saved base pointer. */
3982 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3983
3984 frame->hard_frame_pointer_offset = offset;
3985
3986 /* Do some sanity checking of stack_alignment_needed and
3987 preferred_alignment, since i386 port is the only using those features
3988 that may break easily. */
3989
3990 if (size && !stack_alignment_needed)
3991 abort ();
3992 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3993 abort ();
3994 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3995 abort ();
3996 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3997 abort ();
3998
3999 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4000 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4001
4002 /* Register save area */
4003 offset += frame->nregs * UNITS_PER_WORD;
4004
4005 /* Va-arg area */
4006 if (ix86_save_varrargs_registers)
4007 {
4008 offset += X86_64_VARARGS_SIZE;
4009 frame->va_arg_size = X86_64_VARARGS_SIZE;
4010 }
4011 else
4012 frame->va_arg_size = 0;
4013
4014 /* Align start of frame for local function. */
4015 frame->padding1 = ((offset + stack_alignment_needed - 1)
4016 & -stack_alignment_needed) - offset;
4017
4018 offset += frame->padding1;
4019
4020 /* Frame pointer points here. */
4021 frame->frame_pointer_offset = offset;
4022
4023 offset += size;
4024
4025 /* Add outgoing arguments area. */
4026 if (ACCUMULATE_OUTGOING_ARGS)
4027 {
4028 offset += current_function_outgoing_args_size;
4029 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4030 }
4031 else
4032 frame->outgoing_arguments_size = 0;
4033
4034 /* Align stack boundary. */
4035 frame->padding2 = ((offset + preferred_alignment - 1)
4036 & -preferred_alignment) - offset;
4037
4038 offset += frame->padding2;
4039
4040 /* We've reached end of stack frame. */
4041 frame->stack_pointer_offset = offset;
4042
4043 /* Size prologue needs to allocate. */
4044 frame->to_allocate =
4045 (size + frame->padding1 + frame->padding2
4046 + frame->outgoing_arguments_size + frame->va_arg_size);
4047
4048 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4049 && current_function_is_leaf)
4050 {
4051 frame->red_zone_size = frame->to_allocate;
4052 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4053 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4054 }
4055 else
4056 frame->red_zone_size = 0;
4057 frame->to_allocate -= frame->red_zone_size;
4058 frame->stack_pointer_offset -= frame->red_zone_size;
4059 #if 0
4060 fprintf (stderr, "nregs: %i\n", frame->nregs);
4061 fprintf (stderr, "size: %i\n", size);
4062 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4063 fprintf (stderr, "padding1: %i\n", frame->padding1);
4064 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4065 fprintf (stderr, "padding2: %i\n", frame->padding2);
4066 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4067 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4068 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4069 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4070 frame->hard_frame_pointer_offset);
4071 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4072 #endif
4073 }
4074
4075 /* Emit code to save registers in the prologue. */
4076
4077 static void
4078 ix86_emit_save_regs ()
4079 {
4080 register int regno;
4081 rtx insn;
4082
4083 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4084 if (ix86_save_reg (regno, true))
4085 {
4086 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4087 RTX_FRAME_RELATED_P (insn) = 1;
4088 }
4089 }
4090
4091 /* Emit code to save registers using MOV insns. First register
4092 is restored from POINTER + OFFSET. */
4093 static void
4094 ix86_emit_save_regs_using_mov (pointer, offset)
4095 rtx pointer;
4096 HOST_WIDE_INT offset;
4097 {
4098 int regno;
4099 rtx insn;
4100
4101 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4102 if (ix86_save_reg (regno, true))
4103 {
4104 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4105 Pmode, offset),
4106 gen_rtx_REG (Pmode, regno));
4107 RTX_FRAME_RELATED_P (insn) = 1;
4108 offset += UNITS_PER_WORD;
4109 }
4110 }
4111
4112 /* Expand the prologue into a bunch of separate insns. */
4113
4114 void
4115 ix86_expand_prologue ()
4116 {
4117 rtx insn;
4118 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4119 || current_function_uses_const_pool)
4120 && !TARGET_64BIT);
4121 struct ix86_frame frame;
4122 int use_mov = 0;
4123 HOST_WIDE_INT allocate;
4124
4125 if (!optimize_size)
4126 {
4127 use_fast_prologue_epilogue
4128 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4129 if (TARGET_PROLOGUE_USING_MOVE)
4130 use_mov = use_fast_prologue_epilogue;
4131 }
4132 ix86_compute_frame_layout (&frame);
4133
4134 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4135 slower on all targets. Also sdb doesn't like it. */
4136
4137 if (frame_pointer_needed)
4138 {
4139 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4140 RTX_FRAME_RELATED_P (insn) = 1;
4141
4142 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4143 RTX_FRAME_RELATED_P (insn) = 1;
4144 }
4145
4146 allocate = frame.to_allocate;
4147 /* In case we are dealing only with single register and empty frame,
4148 push is equivalent of the mov+add sequence. */
4149 if (allocate == 0 && frame.nregs <= 1)
4150 use_mov = 0;
4151
4152 if (!use_mov)
4153 ix86_emit_save_regs ();
4154 else
4155 allocate += frame.nregs * UNITS_PER_WORD;
4156
4157 if (allocate == 0)
4158 ;
4159 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4160 {
4161 insn = emit_insn (gen_pro_epilogue_adjust_stack
4162 (stack_pointer_rtx, stack_pointer_rtx,
4163 GEN_INT (-allocate)));
4164 RTX_FRAME_RELATED_P (insn) = 1;
4165 }
4166 else
4167 {
4168 /* ??? Is this only valid for Win32? */
4169
4170 rtx arg0, sym;
4171
4172 if (TARGET_64BIT)
4173 abort ();
4174
4175 arg0 = gen_rtx_REG (SImode, 0);
4176 emit_move_insn (arg0, GEN_INT (allocate));
4177
4178 sym = gen_rtx_MEM (FUNCTION_MODE,
4179 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4180 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4181
4182 CALL_INSN_FUNCTION_USAGE (insn)
4183 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4184 CALL_INSN_FUNCTION_USAGE (insn));
4185 }
4186 if (use_mov)
4187 {
4188 if (!frame_pointer_needed || !frame.to_allocate)
4189 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4190 else
4191 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4192 -frame.nregs * UNITS_PER_WORD);
4193 }
4194
4195 #ifdef SUBTARGET_PROLOGUE
4196 SUBTARGET_PROLOGUE;
4197 #endif
4198
4199 if (pic_reg_used)
4200 load_pic_register ();
4201
4202 /* If we are profiling, make sure no instructions are scheduled before
4203 the call to mcount. However, if -fpic, the above call will have
4204 done that. */
4205 if (current_function_profile && ! pic_reg_used)
4206 emit_insn (gen_blockage ());
4207 }
4208
4209 /* Emit code to restore saved registers using MOV insns. First register
4210 is restored from POINTER + OFFSET. */
4211 static void
4212 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4213 rtx pointer;
4214 int offset;
4215 int maybe_eh_return;
4216 {
4217 int regno;
4218
4219 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4220 if (ix86_save_reg (regno, maybe_eh_return))
4221 {
4222 emit_move_insn (gen_rtx_REG (Pmode, regno),
4223 adjust_address (gen_rtx_MEM (Pmode, pointer),
4224 Pmode, offset));
4225 offset += UNITS_PER_WORD;
4226 }
4227 }
4228
4229 /* Restore function stack, frame, and registers. */
4230
4231 void
4232 ix86_expand_epilogue (style)
4233 int style;
4234 {
4235 int regno;
4236 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4237 struct ix86_frame frame;
4238 HOST_WIDE_INT offset;
4239
4240 ix86_compute_frame_layout (&frame);
4241
4242 /* Calculate start of saved registers relative to ebp. Special care
4243 must be taken for the normal return case of a function using
4244 eh_return: the eax and edx registers are marked as saved, but not
4245 restored along this path. */
4246 offset = frame.nregs;
4247 if (current_function_calls_eh_return && style != 2)
4248 offset -= 2;
4249 offset *= -UNITS_PER_WORD;
4250
4251 /* If we're only restoring one register and sp is not valid then
4252 using a move instruction to restore the register since it's
4253 less work than reloading sp and popping the register.
4254
4255 The default code result in stack adjustment using add/lea instruction,
4256 while this code results in LEAVE instruction (or discrete equivalent),
4257 so it is profitable in some other cases as well. Especially when there
4258 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4259 and there is exactly one register to pop. This heruistic may need some
4260 tuning in future. */
4261 if ((!sp_valid && frame.nregs <= 1)
4262 || (TARGET_EPILOGUE_USING_MOVE
4263 && use_fast_prologue_epilogue
4264 && (frame.nregs > 1 || frame.to_allocate))
4265 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4266 || (frame_pointer_needed && TARGET_USE_LEAVE
4267 && use_fast_prologue_epilogue && frame.nregs == 1)
4268 || current_function_calls_eh_return)
4269 {
4270 /* Restore registers. We can use ebp or esp to address the memory
4271 locations. If both are available, default to ebp, since offsets
4272 are known to be small. Only exception is esp pointing directly to the
4273 end of block of saved registers, where we may simplify addressing
4274 mode. */
4275
4276 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4277 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4278 frame.to_allocate, style == 2);
4279 else
4280 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4281 offset, style == 2);
4282
4283 /* eh_return epilogues need %ecx added to the stack pointer. */
4284 if (style == 2)
4285 {
4286 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4287
4288 if (frame_pointer_needed)
4289 {
4290 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4291 tmp = plus_constant (tmp, UNITS_PER_WORD);
4292 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4293
4294 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4295 emit_move_insn (hard_frame_pointer_rtx, tmp);
4296
4297 emit_insn (gen_pro_epilogue_adjust_stack
4298 (stack_pointer_rtx, sa, const0_rtx));
4299 }
4300 else
4301 {
4302 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4303 tmp = plus_constant (tmp, (frame.to_allocate
4304 + frame.nregs * UNITS_PER_WORD));
4305 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4306 }
4307 }
4308 else if (!frame_pointer_needed)
4309 emit_insn (gen_pro_epilogue_adjust_stack
4310 (stack_pointer_rtx, stack_pointer_rtx,
4311 GEN_INT (frame.to_allocate
4312 + frame.nregs * UNITS_PER_WORD)));
4313 /* If not an i386, mov & pop is faster than "leave". */
4314 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4315 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4316 else
4317 {
4318 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4319 hard_frame_pointer_rtx,
4320 const0_rtx));
4321 if (TARGET_64BIT)
4322 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4323 else
4324 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4325 }
4326 }
4327 else
4328 {
4329 /* First step is to deallocate the stack frame so that we can
4330 pop the registers. */
4331 if (!sp_valid)
4332 {
4333 if (!frame_pointer_needed)
4334 abort ();
4335 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4336 hard_frame_pointer_rtx,
4337 GEN_INT (offset)));
4338 }
4339 else if (frame.to_allocate)
4340 emit_insn (gen_pro_epilogue_adjust_stack
4341 (stack_pointer_rtx, stack_pointer_rtx,
4342 GEN_INT (frame.to_allocate)));
4343
4344 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4345 if (ix86_save_reg (regno, false))
4346 {
4347 if (TARGET_64BIT)
4348 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4349 else
4350 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4351 }
4352 if (frame_pointer_needed)
4353 {
4354 /* Leave results in shorter dependency chains on CPUs that are
4355 able to grok it fast. */
4356 if (TARGET_USE_LEAVE)
4357 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4358 else if (TARGET_64BIT)
4359 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4360 else
4361 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4362 }
4363 }
4364
4365 /* Sibcall epilogues don't want a return instruction. */
4366 if (style == 0)
4367 return;
4368
4369 if (current_function_pops_args && current_function_args_size)
4370 {
4371 rtx popc = GEN_INT (current_function_pops_args);
4372
4373 /* i386 can only pop 64K bytes. If asked to pop more, pop
4374 return address, do explicit add, and jump indirectly to the
4375 caller. */
4376
4377 if (current_function_pops_args >= 65536)
4378 {
4379 rtx ecx = gen_rtx_REG (SImode, 2);
4380
4381 /* There are is no "pascal" calling convention in 64bit ABI. */
4382 if (TARGET_64BIT)
4383 abort ();
4384
4385 emit_insn (gen_popsi1 (ecx));
4386 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4387 emit_jump_insn (gen_return_indirect_internal (ecx));
4388 }
4389 else
4390 emit_jump_insn (gen_return_pop_internal (popc));
4391 }
4392 else
4393 emit_jump_insn (gen_return_internal ());
4394 }
4395 \f
4396 /* Extract the parts of an RTL expression that is a valid memory address
4397 for an instruction. Return 0 if the structure of the address is
4398 grossly off. Return -1 if the address contains ASHIFT, so it is not
4399 strictly valid, but still used for computing length of lea instruction.
4400 */
4401
4402 static int
4403 ix86_decompose_address (addr, out)
4404 register rtx addr;
4405 struct ix86_address *out;
4406 {
4407 rtx base = NULL_RTX;
4408 rtx index = NULL_RTX;
4409 rtx disp = NULL_RTX;
4410 HOST_WIDE_INT scale = 1;
4411 rtx scale_rtx = NULL_RTX;
4412 int retval = 1;
4413
4414 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4415 base = addr;
4416 else if (GET_CODE (addr) == PLUS)
4417 {
4418 rtx op0 = XEXP (addr, 0);
4419 rtx op1 = XEXP (addr, 1);
4420 enum rtx_code code0 = GET_CODE (op0);
4421 enum rtx_code code1 = GET_CODE (op1);
4422
4423 if (code0 == REG || code0 == SUBREG)
4424 {
4425 if (code1 == REG || code1 == SUBREG)
4426 index = op0, base = op1; /* index + base */
4427 else
4428 base = op0, disp = op1; /* base + displacement */
4429 }
4430 else if (code0 == MULT)
4431 {
4432 index = XEXP (op0, 0);
4433 scale_rtx = XEXP (op0, 1);
4434 if (code1 == REG || code1 == SUBREG)
4435 base = op1; /* index*scale + base */
4436 else
4437 disp = op1; /* index*scale + disp */
4438 }
4439 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4440 {
4441 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4442 scale_rtx = XEXP (XEXP (op0, 0), 1);
4443 base = XEXP (op0, 1);
4444 disp = op1;
4445 }
4446 else if (code0 == PLUS)
4447 {
4448 index = XEXP (op0, 0); /* index + base + disp */
4449 base = XEXP (op0, 1);
4450 disp = op1;
4451 }
4452 else
4453 return 0;
4454 }
4455 else if (GET_CODE (addr) == MULT)
4456 {
4457 index = XEXP (addr, 0); /* index*scale */
4458 scale_rtx = XEXP (addr, 1);
4459 }
4460 else if (GET_CODE (addr) == ASHIFT)
4461 {
4462 rtx tmp;
4463
4464 /* We're called for lea too, which implements ashift on occasion. */
4465 index = XEXP (addr, 0);
4466 tmp = XEXP (addr, 1);
4467 if (GET_CODE (tmp) != CONST_INT)
4468 return 0;
4469 scale = INTVAL (tmp);
4470 if ((unsigned HOST_WIDE_INT) scale > 3)
4471 return 0;
4472 scale = 1 << scale;
4473 retval = -1;
4474 }
4475 else
4476 disp = addr; /* displacement */
4477
4478 /* Extract the integral value of scale. */
4479 if (scale_rtx)
4480 {
4481 if (GET_CODE (scale_rtx) != CONST_INT)
4482 return 0;
4483 scale = INTVAL (scale_rtx);
4484 }
4485
4486 /* Allow arg pointer and stack pointer as index if there is not scaling */
4487 if (base && index && scale == 1
4488 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4489 || index == stack_pointer_rtx))
4490 {
4491 rtx tmp = base;
4492 base = index;
4493 index = tmp;
4494 }
4495
4496 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4497 if ((base == hard_frame_pointer_rtx
4498 || base == frame_pointer_rtx
4499 || base == arg_pointer_rtx) && !disp)
4500 disp = const0_rtx;
4501
4502 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4503 Avoid this by transforming to [%esi+0]. */
4504 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4505 && base && !index && !disp
4506 && REG_P (base)
4507 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4508 disp = const0_rtx;
4509
4510 /* Special case: encode reg+reg instead of reg*2. */
4511 if (!base && index && scale && scale == 2)
4512 base = index, scale = 1;
4513
4514 /* Special case: scaling cannot be encoded without base or displacement. */
4515 if (!base && !disp && index && scale != 1)
4516 disp = const0_rtx;
4517
4518 out->base = base;
4519 out->index = index;
4520 out->disp = disp;
4521 out->scale = scale;
4522
4523 return retval;
4524 }
4525 \f
4526 /* Return cost of the memory address x.
4527 For i386, it is better to use a complex address than let gcc copy
4528 the address into a reg and make a new pseudo. But not if the address
4529 requires to two regs - that would mean more pseudos with longer
4530 lifetimes. */
4531 int
4532 ix86_address_cost (x)
4533 rtx x;
4534 {
4535 struct ix86_address parts;
4536 int cost = 1;
4537
4538 if (!ix86_decompose_address (x, &parts))
4539 abort ();
4540
4541 if (parts.base && GET_CODE (parts.base) == SUBREG)
4542 parts.base = SUBREG_REG (parts.base);
4543 if (parts.index && GET_CODE (parts.index) == SUBREG)
4544 parts.index = SUBREG_REG (parts.index);
4545
4546 /* More complex memory references are better. */
4547 if (parts.disp && parts.disp != const0_rtx)
4548 cost--;
4549
4550 /* Attempt to minimize number of registers in the address. */
4551 if ((parts.base
4552 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4553 || (parts.index
4554 && (!REG_P (parts.index)
4555 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4556 cost++;
4557
4558 if (parts.base
4559 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4560 && parts.index
4561 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4562 && parts.base != parts.index)
4563 cost++;
4564
4565 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4566 since it's predecode logic can't detect the length of instructions
4567 and it degenerates to vector decoded. Increase cost of such
4568 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4569 to split such addresses or even refuse such addresses at all.
4570
4571 Following addressing modes are affected:
4572 [base+scale*index]
4573 [scale*index+disp]
4574 [base+index]
4575
4576 The first and last case may be avoidable by explicitly coding the zero in
4577 memory address, but I don't have AMD-K6 machine handy to check this
4578 theory. */
4579
4580 if (TARGET_K6
4581 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4582 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4583 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4584 cost += 10;
4585
4586 return cost;
4587 }
4588 \f
4589 /* If X is a machine specific address (i.e. a symbol or label being
4590 referenced as a displacement from the GOT implemented using an
4591 UNSPEC), then return the base term. Otherwise return X. */
4592
4593 rtx
4594 ix86_find_base_term (x)
4595 rtx x;
4596 {
4597 rtx term;
4598
4599 if (TARGET_64BIT)
4600 {
4601 if (GET_CODE (x) != CONST)
4602 return x;
4603 term = XEXP (x, 0);
4604 if (GET_CODE (term) == PLUS
4605 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4606 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4607 term = XEXP (term, 0);
4608 if (GET_CODE (term) != UNSPEC
4609 || XINT (term, 1) != UNSPEC_GOTPCREL)
4610 return x;
4611
4612 term = XVECEXP (term, 0, 0);
4613
4614 if (GET_CODE (term) != SYMBOL_REF
4615 && GET_CODE (term) != LABEL_REF)
4616 return x;
4617
4618 return term;
4619 }
4620
4621 if (GET_CODE (x) != PLUS
4622 || XEXP (x, 0) != pic_offset_table_rtx
4623 || GET_CODE (XEXP (x, 1)) != CONST)
4624 return x;
4625
4626 term = XEXP (XEXP (x, 1), 0);
4627
4628 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4629 term = XEXP (term, 0);
4630
4631 if (GET_CODE (term) != UNSPEC
4632 || XINT (term, 1) != UNSPEC_GOTOFF)
4633 return x;
4634
4635 term = XVECEXP (term, 0, 0);
4636
4637 if (GET_CODE (term) != SYMBOL_REF
4638 && GET_CODE (term) != LABEL_REF)
4639 return x;
4640
4641 return term;
4642 }
4643 \f
4644 /* Determine if a given CONST RTX is a valid memory displacement
4645 in PIC mode. */
4646
4647 int
4648 legitimate_pic_address_disp_p (disp)
4649 register rtx disp;
4650 {
4651 /* In 64bit mode we can allow direct addresses of symbols and labels
4652 when they are not dynamic symbols. */
4653 if (TARGET_64BIT)
4654 {
4655 rtx x = disp;
4656 if (GET_CODE (disp) == CONST)
4657 x = XEXP (disp, 0);
4658 /* ??? Handle PIC code models */
4659 if (GET_CODE (x) == PLUS
4660 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4661 && ix86_cmodel == CM_SMALL_PIC
4662 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4663 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4664 x = XEXP (x, 0);
4665 if (local_symbolic_operand (x, Pmode))
4666 return 1;
4667 }
4668 if (GET_CODE (disp) != CONST)
4669 return 0;
4670 disp = XEXP (disp, 0);
4671
4672 if (TARGET_64BIT)
4673 {
4674 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4675 of GOT tables. We should not need these anyway. */
4676 if (GET_CODE (disp) != UNSPEC
4677 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4678 return 0;
4679
4680 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4681 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4682 return 0;
4683 return 1;
4684 }
4685
4686 if (GET_CODE (disp) == PLUS)
4687 {
4688 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4689 return 0;
4690 disp = XEXP (disp, 0);
4691 }
4692
4693 if (GET_CODE (disp) != UNSPEC)
4694 return 0;
4695
4696 /* Must be @GOT or @GOTOFF. */
4697 switch (XINT (disp, 1))
4698 {
4699 case UNSPEC_GOT:
4700 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4701 case UNSPEC_GOTOFF:
4702 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4703 }
4704
4705 return 0;
4706 }
4707
4708 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4709 memory address for an instruction. The MODE argument is the machine mode
4710 for the MEM expression that wants to use this address.
4711
4712 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4713 convert common non-canonical forms to canonical form so that they will
4714 be recognized. */
4715
4716 int
4717 legitimate_address_p (mode, addr, strict)
4718 enum machine_mode mode;
4719 register rtx addr;
4720 int strict;
4721 {
4722 struct ix86_address parts;
4723 rtx base, index, disp;
4724 HOST_WIDE_INT scale;
4725 const char *reason = NULL;
4726 rtx reason_rtx = NULL_RTX;
4727
4728 if (TARGET_DEBUG_ADDR)
4729 {
4730 fprintf (stderr,
4731 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4732 GET_MODE_NAME (mode), strict);
4733 debug_rtx (addr);
4734 }
4735
4736 if (ix86_decompose_address (addr, &parts) <= 0)
4737 {
4738 reason = "decomposition failed";
4739 goto report_error;
4740 }
4741
4742 base = parts.base;
4743 index = parts.index;
4744 disp = parts.disp;
4745 scale = parts.scale;
4746
4747 /* Validate base register.
4748
4749 Don't allow SUBREG's here, it can lead to spill failures when the base
4750 is one word out of a two word structure, which is represented internally
4751 as a DImode int. */
4752
4753 if (base)
4754 {
4755 rtx reg;
4756 reason_rtx = base;
4757
4758 if (GET_CODE (base) == SUBREG)
4759 reg = SUBREG_REG (base);
4760 else
4761 reg = base;
4762
4763 if (GET_CODE (reg) != REG)
4764 {
4765 reason = "base is not a register";
4766 goto report_error;
4767 }
4768
4769 if (GET_MODE (base) != Pmode)
4770 {
4771 reason = "base is not in Pmode";
4772 goto report_error;
4773 }
4774
4775 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
4776 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
4777 {
4778 reason = "base is not valid";
4779 goto report_error;
4780 }
4781 }
4782
4783 /* Validate index register.
4784
4785 Don't allow SUBREG's here, it can lead to spill failures when the index
4786 is one word out of a two word structure, which is represented internally
4787 as a DImode int. */
4788
4789 if (index)
4790 {
4791 rtx reg;
4792 reason_rtx = index;
4793
4794 if (GET_CODE (index) == SUBREG)
4795 reg = SUBREG_REG (index);
4796 else
4797 reg = index;
4798
4799 if (GET_CODE (reg) != REG)
4800 {
4801 reason = "index is not a register";
4802 goto report_error;
4803 }
4804
4805 if (GET_MODE (index) != Pmode)
4806 {
4807 reason = "index is not in Pmode";
4808 goto report_error;
4809 }
4810
4811 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
4812 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
4813 {
4814 reason = "index is not valid";
4815 goto report_error;
4816 }
4817 }
4818
4819 /* Validate scale factor. */
4820 if (scale != 1)
4821 {
4822 reason_rtx = GEN_INT (scale);
4823 if (!index)
4824 {
4825 reason = "scale without index";
4826 goto report_error;
4827 }
4828
4829 if (scale != 2 && scale != 4 && scale != 8)
4830 {
4831 reason = "scale is not a valid multiplier";
4832 goto report_error;
4833 }
4834 }
4835
4836 /* Validate displacement. */
4837 if (disp)
4838 {
4839 reason_rtx = disp;
4840
4841 if (!CONSTANT_ADDRESS_P (disp))
4842 {
4843 reason = "displacement is not constant";
4844 goto report_error;
4845 }
4846
4847 if (TARGET_64BIT)
4848 {
4849 if (!x86_64_sign_extended_value (disp))
4850 {
4851 reason = "displacement is out of range";
4852 goto report_error;
4853 }
4854 }
4855 else
4856 {
4857 if (GET_CODE (disp) == CONST_DOUBLE)
4858 {
4859 reason = "displacement is a const_double";
4860 goto report_error;
4861 }
4862 }
4863
4864 if (flag_pic && SYMBOLIC_CONST (disp))
4865 {
4866 if (TARGET_64BIT && (index || base))
4867 {
4868 reason = "non-constant pic memory reference";
4869 goto report_error;
4870 }
4871 if (! legitimate_pic_address_disp_p (disp))
4872 {
4873 reason = "displacement is an invalid pic construct";
4874 goto report_error;
4875 }
4876
4877 /* This code used to verify that a symbolic pic displacement
4878 includes the pic_offset_table_rtx register.
4879
4880 While this is good idea, unfortunately these constructs may
4881 be created by "adds using lea" optimization for incorrect
4882 code like:
4883
4884 int a;
4885 int foo(int i)
4886 {
4887 return *(&a+i);
4888 }
4889
4890 This code is nonsensical, but results in addressing
4891 GOT table with pic_offset_table_rtx base. We can't
4892 just refuse it easily, since it gets matched by
4893 "addsi3" pattern, that later gets split to lea in the
4894 case output register differs from input. While this
4895 can be handled by separate addsi pattern for this case
4896 that never results in lea, this seems to be easier and
4897 correct fix for crash to disable this test. */
4898 }
4899 else if (HALF_PIC_P ())
4900 {
4901 if (! HALF_PIC_ADDRESS_P (disp)
4902 || (base != NULL_RTX || index != NULL_RTX))
4903 {
4904 reason = "displacement is an invalid half-pic reference";
4905 goto report_error;
4906 }
4907 }
4908 }
4909
4910 /* Everything looks valid. */
4911 if (TARGET_DEBUG_ADDR)
4912 fprintf (stderr, "Success.\n");
4913 return TRUE;
4914
4915 report_error:
4916 if (TARGET_DEBUG_ADDR)
4917 {
4918 fprintf (stderr, "Error: %s\n", reason);
4919 debug_rtx (reason_rtx);
4920 }
4921 return FALSE;
4922 }
4923 \f
4924 /* Return an unique alias set for the GOT. */
4925
4926 static HOST_WIDE_INT
4927 ix86_GOT_alias_set ()
4928 {
4929 static HOST_WIDE_INT set = -1;
4930 if (set == -1)
4931 set = new_alias_set ();
4932 return set;
4933 }
4934
4935 /* Return a legitimate reference for ORIG (an address) using the
4936 register REG. If REG is 0, a new pseudo is generated.
4937
4938 There are two types of references that must be handled:
4939
4940 1. Global data references must load the address from the GOT, via
4941 the PIC reg. An insn is emitted to do this load, and the reg is
4942 returned.
4943
4944 2. Static data references, constant pool addresses, and code labels
4945 compute the address as an offset from the GOT, whose base is in
4946 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4947 differentiate them from global data objects. The returned
4948 address is the PIC reg + an unspec constant.
4949
4950 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4951 reg also appears in the address. */
4952
4953 rtx
4954 legitimize_pic_address (orig, reg)
4955 rtx orig;
4956 rtx reg;
4957 {
4958 rtx addr = orig;
4959 rtx new = orig;
4960 rtx base;
4961
4962 if (local_symbolic_operand (addr, Pmode))
4963 {
4964 /* In 64bit mode we can address such objects directly. */
4965 if (TARGET_64BIT)
4966 new = addr;
4967 else
4968 {
4969 /* This symbol may be referenced via a displacement from the PIC
4970 base address (@GOTOFF). */
4971
4972 current_function_uses_pic_offset_table = 1;
4973 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4974 new = gen_rtx_CONST (Pmode, new);
4975 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4976
4977 if (reg != 0)
4978 {
4979 emit_move_insn (reg, new);
4980 new = reg;
4981 }
4982 }
4983 }
4984 else if (GET_CODE (addr) == SYMBOL_REF)
4985 {
4986 if (TARGET_64BIT)
4987 {
4988 current_function_uses_pic_offset_table = 1;
4989 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
4990 new = gen_rtx_CONST (Pmode, new);
4991 new = gen_rtx_MEM (Pmode, new);
4992 RTX_UNCHANGING_P (new) = 1;
4993 set_mem_alias_set (new, ix86_GOT_alias_set ());
4994
4995 if (reg == 0)
4996 reg = gen_reg_rtx (Pmode);
4997 /* Use directly gen_movsi, otherwise the address is loaded
4998 into register for CSE. We don't want to CSE this addresses,
4999 instead we CSE addresses from the GOT table, so skip this. */
5000 emit_insn (gen_movsi (reg, new));
5001 new = reg;
5002 }
5003 else
5004 {
5005 /* This symbol must be referenced via a load from the
5006 Global Offset Table (@GOT). */
5007
5008 current_function_uses_pic_offset_table = 1;
5009 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5010 new = gen_rtx_CONST (Pmode, new);
5011 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5012 new = gen_rtx_MEM (Pmode, new);
5013 RTX_UNCHANGING_P (new) = 1;
5014 set_mem_alias_set (new, ix86_GOT_alias_set ());
5015
5016 if (reg == 0)
5017 reg = gen_reg_rtx (Pmode);
5018 emit_move_insn (reg, new);
5019 new = reg;
5020 }
5021 }
5022 else
5023 {
5024 if (GET_CODE (addr) == CONST)
5025 {
5026 addr = XEXP (addr, 0);
5027
5028 /* We must match stuff we generate before. Assume the only
5029 unspecs that can get here are ours. Not that we could do
5030 anything with them anyway... */
5031 if (GET_CODE (addr) == UNSPEC
5032 || (GET_CODE (addr) == PLUS
5033 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5034 return orig;
5035 if (GET_CODE (addr) != PLUS)
5036 abort ();
5037 }
5038 if (GET_CODE (addr) == PLUS)
5039 {
5040 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5041
5042 /* Check first to see if this is a constant offset from a @GOTOFF
5043 symbol reference. */
5044 if (local_symbolic_operand (op0, Pmode)
5045 && GET_CODE (op1) == CONST_INT)
5046 {
5047 if (!TARGET_64BIT)
5048 {
5049 current_function_uses_pic_offset_table = 1;
5050 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5051 UNSPEC_GOTOFF);
5052 new = gen_rtx_PLUS (Pmode, new, op1);
5053 new = gen_rtx_CONST (Pmode, new);
5054 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5055
5056 if (reg != 0)
5057 {
5058 emit_move_insn (reg, new);
5059 new = reg;
5060 }
5061 }
5062 else
5063 {
5064 /* ??? We need to limit offsets here. */
5065 }
5066 }
5067 else
5068 {
5069 base = legitimize_pic_address (XEXP (addr, 0), reg);
5070 new = legitimize_pic_address (XEXP (addr, 1),
5071 base == reg ? NULL_RTX : reg);
5072
5073 if (GET_CODE (new) == CONST_INT)
5074 new = plus_constant (base, INTVAL (new));
5075 else
5076 {
5077 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5078 {
5079 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5080 new = XEXP (new, 1);
5081 }
5082 new = gen_rtx_PLUS (Pmode, base, new);
5083 }
5084 }
5085 }
5086 }
5087 return new;
5088 }
5089
5090 /* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
5091 may access it directly in the GOT. */
5092
5093 static void
5094 i386_encode_section_info (decl, first)
5095 tree decl;
5096 int first ATTRIBUTE_UNUSED;
5097 {
5098 if (flag_pic)
5099 {
5100 rtx rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5101
5102 if (GET_CODE (rtl) == MEM && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
5103 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = (*targetm.binds_local_p) (decl);
5104 }
5105 }
5106 \f
5107 /* Try machine-dependent ways of modifying an illegitimate address
5108 to be legitimate. If we find one, return the new, valid address.
5109 This macro is used in only one place: `memory_address' in explow.c.
5110
5111 OLDX is the address as it was before break_out_memory_refs was called.
5112 In some cases it is useful to look at this to decide what needs to be done.
5113
5114 MODE and WIN are passed so that this macro can use
5115 GO_IF_LEGITIMATE_ADDRESS.
5116
5117 It is always safe for this macro to do nothing. It exists to recognize
5118 opportunities to optimize the output.
5119
5120 For the 80386, we handle X+REG by loading X into a register R and
5121 using R+REG. R will go in a general reg and indexing will be used.
5122 However, if REG is a broken-out memory address or multiplication,
5123 nothing needs to be done because REG can certainly go in a general reg.
5124
5125 When -fpic is used, special handling is needed for symbolic references.
5126 See comments by legitimize_pic_address in i386.c for details. */
5127
5128 rtx
5129 legitimize_address (x, oldx, mode)
5130 register rtx x;
5131 register rtx oldx ATTRIBUTE_UNUSED;
5132 enum machine_mode mode;
5133 {
5134 int changed = 0;
5135 unsigned log;
5136
5137 if (TARGET_DEBUG_ADDR)
5138 {
5139 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5140 GET_MODE_NAME (mode));
5141 debug_rtx (x);
5142 }
5143
5144 if (flag_pic && SYMBOLIC_CONST (x))
5145 return legitimize_pic_address (x, 0);
5146
5147 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5148 if (GET_CODE (x) == ASHIFT
5149 && GET_CODE (XEXP (x, 1)) == CONST_INT
5150 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5151 {
5152 changed = 1;
5153 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5154 GEN_INT (1 << log));
5155 }
5156
5157 if (GET_CODE (x) == PLUS)
5158 {
5159 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5160
5161 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5162 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5163 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5164 {
5165 changed = 1;
5166 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5167 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5168 GEN_INT (1 << log));
5169 }
5170
5171 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5172 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5173 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5174 {
5175 changed = 1;
5176 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5177 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5178 GEN_INT (1 << log));
5179 }
5180
5181 /* Put multiply first if it isn't already. */
5182 if (GET_CODE (XEXP (x, 1)) == MULT)
5183 {
5184 rtx tmp = XEXP (x, 0);
5185 XEXP (x, 0) = XEXP (x, 1);
5186 XEXP (x, 1) = tmp;
5187 changed = 1;
5188 }
5189
5190 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5191 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5192 created by virtual register instantiation, register elimination, and
5193 similar optimizations. */
5194 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5195 {
5196 changed = 1;
5197 x = gen_rtx_PLUS (Pmode,
5198 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5199 XEXP (XEXP (x, 1), 0)),
5200 XEXP (XEXP (x, 1), 1));
5201 }
5202
5203 /* Canonicalize
5204 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5205 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5206 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5207 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5208 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5209 && CONSTANT_P (XEXP (x, 1)))
5210 {
5211 rtx constant;
5212 rtx other = NULL_RTX;
5213
5214 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5215 {
5216 constant = XEXP (x, 1);
5217 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5218 }
5219 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5220 {
5221 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5222 other = XEXP (x, 1);
5223 }
5224 else
5225 constant = 0;
5226
5227 if (constant)
5228 {
5229 changed = 1;
5230 x = gen_rtx_PLUS (Pmode,
5231 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5232 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5233 plus_constant (other, INTVAL (constant)));
5234 }
5235 }
5236
5237 if (changed && legitimate_address_p (mode, x, FALSE))
5238 return x;
5239
5240 if (GET_CODE (XEXP (x, 0)) == MULT)
5241 {
5242 changed = 1;
5243 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5244 }
5245
5246 if (GET_CODE (XEXP (x, 1)) == MULT)
5247 {
5248 changed = 1;
5249 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5250 }
5251
5252 if (changed
5253 && GET_CODE (XEXP (x, 1)) == REG
5254 && GET_CODE (XEXP (x, 0)) == REG)
5255 return x;
5256
5257 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5258 {
5259 changed = 1;
5260 x = legitimize_pic_address (x, 0);
5261 }
5262
5263 if (changed && legitimate_address_p (mode, x, FALSE))
5264 return x;
5265
5266 if (GET_CODE (XEXP (x, 0)) == REG)
5267 {
5268 register rtx temp = gen_reg_rtx (Pmode);
5269 register rtx val = force_operand (XEXP (x, 1), temp);
5270 if (val != temp)
5271 emit_move_insn (temp, val);
5272
5273 XEXP (x, 1) = temp;
5274 return x;
5275 }
5276
5277 else if (GET_CODE (XEXP (x, 1)) == REG)
5278 {
5279 register rtx temp = gen_reg_rtx (Pmode);
5280 register rtx val = force_operand (XEXP (x, 0), temp);
5281 if (val != temp)
5282 emit_move_insn (temp, val);
5283
5284 XEXP (x, 0) = temp;
5285 return x;
5286 }
5287 }
5288
5289 return x;
5290 }
5291 \f
5292 /* Print an integer constant expression in assembler syntax. Addition
5293 and subtraction are the only arithmetic that may appear in these
5294 expressions. FILE is the stdio stream to write to, X is the rtx, and
5295 CODE is the operand print code from the output string. */
5296
5297 static void
5298 output_pic_addr_const (file, x, code)
5299 FILE *file;
5300 rtx x;
5301 int code;
5302 {
5303 char buf[256];
5304
5305 switch (GET_CODE (x))
5306 {
5307 case PC:
5308 if (flag_pic)
5309 putc ('.', file);
5310 else
5311 abort ();
5312 break;
5313
5314 case SYMBOL_REF:
5315 assemble_name (file, XSTR (x, 0));
5316 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5317 fputs ("@PLT", file);
5318 break;
5319
5320 case LABEL_REF:
5321 x = XEXP (x, 0);
5322 /* FALLTHRU */
5323 case CODE_LABEL:
5324 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5325 assemble_name (asm_out_file, buf);
5326 break;
5327
5328 case CONST_INT:
5329 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5330 break;
5331
5332 case CONST:
5333 /* This used to output parentheses around the expression,
5334 but that does not work on the 386 (either ATT or BSD assembler). */
5335 output_pic_addr_const (file, XEXP (x, 0), code);
5336 break;
5337
5338 case CONST_DOUBLE:
5339 if (GET_MODE (x) == VOIDmode)
5340 {
5341 /* We can use %d if the number is <32 bits and positive. */
5342 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5343 fprintf (file, "0x%lx%08lx",
5344 (unsigned long) CONST_DOUBLE_HIGH (x),
5345 (unsigned long) CONST_DOUBLE_LOW (x));
5346 else
5347 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5348 }
5349 else
5350 /* We can't handle floating point constants;
5351 PRINT_OPERAND must handle them. */
5352 output_operand_lossage ("floating constant misused");
5353 break;
5354
5355 case PLUS:
5356 /* Some assemblers need integer constants to appear first. */
5357 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5358 {
5359 output_pic_addr_const (file, XEXP (x, 0), code);
5360 putc ('+', file);
5361 output_pic_addr_const (file, XEXP (x, 1), code);
5362 }
5363 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5364 {
5365 output_pic_addr_const (file, XEXP (x, 1), code);
5366 putc ('+', file);
5367 output_pic_addr_const (file, XEXP (x, 0), code);
5368 }
5369 else
5370 abort ();
5371 break;
5372
5373 case MINUS:
5374 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5375 output_pic_addr_const (file, XEXP (x, 0), code);
5376 putc ('-', file);
5377 output_pic_addr_const (file, XEXP (x, 1), code);
5378 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5379 break;
5380
5381 case UNSPEC:
5382 if (XVECLEN (x, 0) != 1)
5383 abort ();
5384 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5385 switch (XINT (x, 1))
5386 {
5387 case UNSPEC_GOT:
5388 fputs ("@GOT", file);
5389 break;
5390 case UNSPEC_GOTOFF:
5391 fputs ("@GOTOFF", file);
5392 break;
5393 case UNSPEC_PLT:
5394 fputs ("@PLT", file);
5395 break;
5396 case UNSPEC_GOTPCREL:
5397 fputs ("@GOTPCREL(%RIP)", file);
5398 break;
5399 default:
5400 output_operand_lossage ("invalid UNSPEC as operand");
5401 break;
5402 }
5403 break;
5404
5405 default:
5406 output_operand_lossage ("invalid expression as operand");
5407 }
5408 }
5409
5410 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5411 We need to handle our special PIC relocations. */
5412
5413 void
5414 i386_dwarf_output_addr_const (file, x)
5415 FILE *file;
5416 rtx x;
5417 {
5418 #ifdef ASM_QUAD
5419 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5420 #else
5421 if (TARGET_64BIT)
5422 abort ();
5423 fprintf (file, "%s", ASM_LONG);
5424 #endif
5425 if (flag_pic)
5426 output_pic_addr_const (file, x, '\0');
5427 else
5428 output_addr_const (file, x);
5429 fputc ('\n', file);
5430 }
5431
5432 /* In the name of slightly smaller debug output, and to cater to
5433 general assembler losage, recognize PIC+GOTOFF and turn it back
5434 into a direct symbol reference. */
5435
5436 rtx
5437 i386_simplify_dwarf_addr (orig_x)
5438 rtx orig_x;
5439 {
5440 rtx x = orig_x, y;
5441
5442 if (GET_CODE (x) == MEM)
5443 x = XEXP (x, 0);
5444
5445 if (TARGET_64BIT)
5446 {
5447 if (GET_CODE (x) != CONST
5448 || GET_CODE (XEXP (x, 0)) != UNSPEC
5449 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5450 || GET_CODE (orig_x) != MEM)
5451 return orig_x;
5452 return XVECEXP (XEXP (x, 0), 0, 0);
5453 }
5454
5455 if (GET_CODE (x) != PLUS
5456 || GET_CODE (XEXP (x, 1)) != CONST)
5457 return orig_x;
5458
5459 if (GET_CODE (XEXP (x, 0)) == REG
5460 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5461 /* %ebx + GOT/GOTOFF */
5462 y = NULL;
5463 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5464 {
5465 /* %ebx + %reg * scale + GOT/GOTOFF */
5466 y = XEXP (x, 0);
5467 if (GET_CODE (XEXP (y, 0)) == REG
5468 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5469 y = XEXP (y, 1);
5470 else if (GET_CODE (XEXP (y, 1)) == REG
5471 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5472 y = XEXP (y, 0);
5473 else
5474 return orig_x;
5475 if (GET_CODE (y) != REG
5476 && GET_CODE (y) != MULT
5477 && GET_CODE (y) != ASHIFT)
5478 return orig_x;
5479 }
5480 else
5481 return orig_x;
5482
5483 x = XEXP (XEXP (x, 1), 0);
5484 if (GET_CODE (x) == UNSPEC
5485 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5486 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5487 {
5488 if (y)
5489 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5490 return XVECEXP (x, 0, 0);
5491 }
5492
5493 if (GET_CODE (x) == PLUS
5494 && GET_CODE (XEXP (x, 0)) == UNSPEC
5495 && GET_CODE (XEXP (x, 1)) == CONST_INT
5496 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5497 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5498 && GET_CODE (orig_x) != MEM)))
5499 {
5500 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5501 if (y)
5502 return gen_rtx_PLUS (Pmode, y, x);
5503 return x;
5504 }
5505
5506 return orig_x;
5507 }
5508 \f
5509 static void
5510 put_condition_code (code, mode, reverse, fp, file)
5511 enum rtx_code code;
5512 enum machine_mode mode;
5513 int reverse, fp;
5514 FILE *file;
5515 {
5516 const char *suffix;
5517
5518 if (mode == CCFPmode || mode == CCFPUmode)
5519 {
5520 enum rtx_code second_code, bypass_code;
5521 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5522 if (bypass_code != NIL || second_code != NIL)
5523 abort ();
5524 code = ix86_fp_compare_code_to_integer (code);
5525 mode = CCmode;
5526 }
5527 if (reverse)
5528 code = reverse_condition (code);
5529
5530 switch (code)
5531 {
5532 case EQ:
5533 suffix = "e";
5534 break;
5535 case NE:
5536 suffix = "ne";
5537 break;
5538 case GT:
5539 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5540 abort ();
5541 suffix = "g";
5542 break;
5543 case GTU:
5544 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5545 Those same assemblers have the same but opposite losage on cmov. */
5546 if (mode != CCmode)
5547 abort ();
5548 suffix = fp ? "nbe" : "a";
5549 break;
5550 case LT:
5551 if (mode == CCNOmode || mode == CCGOCmode)
5552 suffix = "s";
5553 else if (mode == CCmode || mode == CCGCmode)
5554 suffix = "l";
5555 else
5556 abort ();
5557 break;
5558 case LTU:
5559 if (mode != CCmode)
5560 abort ();
5561 suffix = "b";
5562 break;
5563 case GE:
5564 if (mode == CCNOmode || mode == CCGOCmode)
5565 suffix = "ns";
5566 else if (mode == CCmode || mode == CCGCmode)
5567 suffix = "ge";
5568 else
5569 abort ();
5570 break;
5571 case GEU:
5572 /* ??? As above. */
5573 if (mode != CCmode)
5574 abort ();
5575 suffix = fp ? "nb" : "ae";
5576 break;
5577 case LE:
5578 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5579 abort ();
5580 suffix = "le";
5581 break;
5582 case LEU:
5583 if (mode != CCmode)
5584 abort ();
5585 suffix = "be";
5586 break;
5587 case UNORDERED:
5588 suffix = fp ? "u" : "p";
5589 break;
5590 case ORDERED:
5591 suffix = fp ? "nu" : "np";
5592 break;
5593 default:
5594 abort ();
5595 }
5596 fputs (suffix, file);
5597 }
5598
5599 void
5600 print_reg (x, code, file)
5601 rtx x;
5602 int code;
5603 FILE *file;
5604 {
5605 if (REGNO (x) == ARG_POINTER_REGNUM
5606 || REGNO (x) == FRAME_POINTER_REGNUM
5607 || REGNO (x) == FLAGS_REG
5608 || REGNO (x) == FPSR_REG)
5609 abort ();
5610
5611 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
5612 putc ('%', file);
5613
5614 if (code == 'w' || MMX_REG_P (x))
5615 code = 2;
5616 else if (code == 'b')
5617 code = 1;
5618 else if (code == 'k')
5619 code = 4;
5620 else if (code == 'q')
5621 code = 8;
5622 else if (code == 'y')
5623 code = 3;
5624 else if (code == 'h')
5625 code = 0;
5626 else
5627 code = GET_MODE_SIZE (GET_MODE (x));
5628
5629 /* Irritatingly, AMD extended registers use different naming convention
5630 from the normal registers. */
5631 if (REX_INT_REG_P (x))
5632 {
5633 if (!TARGET_64BIT)
5634 abort ();
5635 switch (code)
5636 {
5637 case 0:
5638 error ("extended registers have no high halves");
5639 break;
5640 case 1:
5641 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5642 break;
5643 case 2:
5644 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5645 break;
5646 case 4:
5647 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5648 break;
5649 case 8:
5650 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5651 break;
5652 default:
5653 error ("unsupported operand size for extended register");
5654 break;
5655 }
5656 return;
5657 }
5658 switch (code)
5659 {
5660 case 3:
5661 if (STACK_TOP_P (x))
5662 {
5663 fputs ("st(0)", file);
5664 break;
5665 }
5666 /* FALLTHRU */
5667 case 8:
5668 case 4:
5669 case 12:
5670 if (! ANY_FP_REG_P (x))
5671 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5672 /* FALLTHRU */
5673 case 16:
5674 case 2:
5675 fputs (hi_reg_name[REGNO (x)], file);
5676 break;
5677 case 1:
5678 fputs (qi_reg_name[REGNO (x)], file);
5679 break;
5680 case 0:
5681 fputs (qi_high_reg_name[REGNO (x)], file);
5682 break;
5683 default:
5684 abort ();
5685 }
5686 }
5687
5688 /* Meaning of CODE:
5689 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5690 C -- print opcode suffix for set/cmov insn.
5691 c -- like C, but print reversed condition
5692 F,f -- likewise, but for floating-point.
5693 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5694 nothing
5695 R -- print the prefix for register names.
5696 z -- print the opcode suffix for the size of the current operand.
5697 * -- print a star (in certain assembler syntax)
5698 A -- print an absolute memory reference.
5699 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5700 s -- print a shift double count, followed by the assemblers argument
5701 delimiter.
5702 b -- print the QImode name of the register for the indicated operand.
5703 %b0 would print %al if operands[0] is reg 0.
5704 w -- likewise, print the HImode name of the register.
5705 k -- likewise, print the SImode name of the register.
5706 q -- likewise, print the DImode name of the register.
5707 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5708 y -- print "st(0)" instead of "st" as a register.
5709 D -- print condition for SSE cmp instruction.
5710 P -- if PIC, print an @PLT suffix.
5711 X -- don't print any sort of PIC '@' suffix for a symbol.
5712 */
5713
5714 void
5715 print_operand (file, x, code)
5716 FILE *file;
5717 rtx x;
5718 int code;
5719 {
5720 if (code)
5721 {
5722 switch (code)
5723 {
5724 case '*':
5725 if (ASSEMBLER_DIALECT == ASM_ATT)
5726 putc ('*', file);
5727 return;
5728
5729 case 'A':
5730 if (ASSEMBLER_DIALECT == ASM_ATT)
5731 putc ('*', file);
5732 else if (ASSEMBLER_DIALECT == ASM_INTEL)
5733 {
5734 /* Intel syntax. For absolute addresses, registers should not
5735 be surrounded by braces. */
5736 if (GET_CODE (x) != REG)
5737 {
5738 putc ('[', file);
5739 PRINT_OPERAND (file, x, 0);
5740 putc (']', file);
5741 return;
5742 }
5743 }
5744 else
5745 abort ();
5746
5747 PRINT_OPERAND (file, x, 0);
5748 return;
5749
5750
5751 case 'L':
5752 if (ASSEMBLER_DIALECT == ASM_ATT)
5753 putc ('l', file);
5754 return;
5755
5756 case 'W':
5757 if (ASSEMBLER_DIALECT == ASM_ATT)
5758 putc ('w', file);
5759 return;
5760
5761 case 'B':
5762 if (ASSEMBLER_DIALECT == ASM_ATT)
5763 putc ('b', file);
5764 return;
5765
5766 case 'Q':
5767 if (ASSEMBLER_DIALECT == ASM_ATT)
5768 putc ('l', file);
5769 return;
5770
5771 case 'S':
5772 if (ASSEMBLER_DIALECT == ASM_ATT)
5773 putc ('s', file);
5774 return;
5775
5776 case 'T':
5777 if (ASSEMBLER_DIALECT == ASM_ATT)
5778 putc ('t', file);
5779 return;
5780
5781 case 'z':
5782 /* 387 opcodes don't get size suffixes if the operands are
5783 registers. */
5784 if (STACK_REG_P (x))
5785 return;
5786
5787 /* Likewise if using Intel opcodes. */
5788 if (ASSEMBLER_DIALECT == ASM_INTEL)
5789 return;
5790
5791 /* This is the size of op from size of operand. */
5792 switch (GET_MODE_SIZE (GET_MODE (x)))
5793 {
5794 case 2:
5795 #ifdef HAVE_GAS_FILDS_FISTS
5796 putc ('s', file);
5797 #endif
5798 return;
5799
5800 case 4:
5801 if (GET_MODE (x) == SFmode)
5802 {
5803 putc ('s', file);
5804 return;
5805 }
5806 else
5807 putc ('l', file);
5808 return;
5809
5810 case 12:
5811 case 16:
5812 putc ('t', file);
5813 return;
5814
5815 case 8:
5816 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5817 {
5818 #ifdef GAS_MNEMONICS
5819 putc ('q', file);
5820 #else
5821 putc ('l', file);
5822 putc ('l', file);
5823 #endif
5824 }
5825 else
5826 putc ('l', file);
5827 return;
5828
5829 default:
5830 abort ();
5831 }
5832
5833 case 'b':
5834 case 'w':
5835 case 'k':
5836 case 'q':
5837 case 'h':
5838 case 'y':
5839 case 'X':
5840 case 'P':
5841 break;
5842
5843 case 's':
5844 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5845 {
5846 PRINT_OPERAND (file, x, 0);
5847 putc (',', file);
5848 }
5849 return;
5850
5851 case 'D':
5852 /* Little bit of braindamage here. The SSE compare instructions
5853 does use completely different names for the comparisons that the
5854 fp conditional moves. */
5855 switch (GET_CODE (x))
5856 {
5857 case EQ:
5858 case UNEQ:
5859 fputs ("eq", file);
5860 break;
5861 case LT:
5862 case UNLT:
5863 fputs ("lt", file);
5864 break;
5865 case LE:
5866 case UNLE:
5867 fputs ("le", file);
5868 break;
5869 case UNORDERED:
5870 fputs ("unord", file);
5871 break;
5872 case NE:
5873 case LTGT:
5874 fputs ("neq", file);
5875 break;
5876 case UNGE:
5877 case GE:
5878 fputs ("nlt", file);
5879 break;
5880 case UNGT:
5881 case GT:
5882 fputs ("nle", file);
5883 break;
5884 case ORDERED:
5885 fputs ("ord", file);
5886 break;
5887 default:
5888 abort ();
5889 break;
5890 }
5891 return;
5892 case 'O':
5893 #ifdef CMOV_SUN_AS_SYNTAX
5894 if (ASSEMBLER_DIALECT == ASM_ATT)
5895 {
5896 switch (GET_MODE (x))
5897 {
5898 case HImode: putc ('w', file); break;
5899 case SImode:
5900 case SFmode: putc ('l', file); break;
5901 case DImode:
5902 case DFmode: putc ('q', file); break;
5903 default: abort ();
5904 }
5905 putc ('.', file);
5906 }
5907 #endif
5908 return;
5909 case 'C':
5910 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5911 return;
5912 case 'F':
5913 #ifdef CMOV_SUN_AS_SYNTAX
5914 if (ASSEMBLER_DIALECT == ASM_ATT)
5915 putc ('.', file);
5916 #endif
5917 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5918 return;
5919
5920 /* Like above, but reverse condition */
5921 case 'c':
5922 /* Check to see if argument to %c is really a constant
5923 and not a condition code which needs to be reversed. */
5924 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5925 {
5926 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5927 return;
5928 }
5929 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5930 return;
5931 case 'f':
5932 #ifdef CMOV_SUN_AS_SYNTAX
5933 if (ASSEMBLER_DIALECT == ASM_ATT)
5934 putc ('.', file);
5935 #endif
5936 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5937 return;
5938 case '+':
5939 {
5940 rtx x;
5941
5942 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5943 return;
5944
5945 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5946 if (x)
5947 {
5948 int pred_val = INTVAL (XEXP (x, 0));
5949
5950 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5951 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5952 {
5953 int taken = pred_val > REG_BR_PROB_BASE / 2;
5954 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5955
5956 /* Emit hints only in the case default branch prediction
5957 heruistics would fail. */
5958 if (taken != cputaken)
5959 {
5960 /* We use 3e (DS) prefix for taken branches and
5961 2e (CS) prefix for not taken branches. */
5962 if (taken)
5963 fputs ("ds ; ", file);
5964 else
5965 fputs ("cs ; ", file);
5966 }
5967 }
5968 }
5969 return;
5970 }
5971 default:
5972 output_operand_lossage ("invalid operand code `%c'", code);
5973 }
5974 }
5975
5976 if (GET_CODE (x) == REG)
5977 {
5978 PRINT_REG (x, code, file);
5979 }
5980
5981 else if (GET_CODE (x) == MEM)
5982 {
5983 /* No `byte ptr' prefix for call instructions. */
5984 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
5985 {
5986 const char * size;
5987 switch (GET_MODE_SIZE (GET_MODE (x)))
5988 {
5989 case 1: size = "BYTE"; break;
5990 case 2: size = "WORD"; break;
5991 case 4: size = "DWORD"; break;
5992 case 8: size = "QWORD"; break;
5993 case 12: size = "XWORD"; break;
5994 case 16: size = "XMMWORD"; break;
5995 default:
5996 abort ();
5997 }
5998
5999 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6000 if (code == 'b')
6001 size = "BYTE";
6002 else if (code == 'w')
6003 size = "WORD";
6004 else if (code == 'k')
6005 size = "DWORD";
6006
6007 fputs (size, file);
6008 fputs (" PTR ", file);
6009 }
6010
6011 x = XEXP (x, 0);
6012 if (flag_pic && CONSTANT_ADDRESS_P (x))
6013 output_pic_addr_const (file, x, code);
6014 /* Avoid (%rip) for call operands. */
6015 else if (CONSTANT_ADDRESS_P (x) && code =='P'
6016 && GET_CODE (x) != CONST_INT)
6017 output_addr_const (file, x);
6018 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6019 output_operand_lossage ("invalid constraints for operand");
6020 else
6021 output_address (x);
6022 }
6023
6024 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6025 {
6026 REAL_VALUE_TYPE r;
6027 long l;
6028
6029 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6030 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6031
6032 if (ASSEMBLER_DIALECT == ASM_ATT)
6033 putc ('$', file);
6034 fprintf (file, "0x%lx", l);
6035 }
6036
6037 /* These float cases don't actually occur as immediate operands. */
6038 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6039 {
6040 REAL_VALUE_TYPE r;
6041 char dstr[30];
6042
6043 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6044 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6045 fprintf (file, "%s", dstr);
6046 }
6047
6048 else if (GET_CODE (x) == CONST_DOUBLE
6049 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6050 {
6051 REAL_VALUE_TYPE r;
6052 char dstr[30];
6053
6054 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6055 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6056 fprintf (file, "%s", dstr);
6057 }
6058 else
6059 {
6060 if (code != 'P')
6061 {
6062 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6063 {
6064 if (ASSEMBLER_DIALECT == ASM_ATT)
6065 putc ('$', file);
6066 }
6067 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6068 || GET_CODE (x) == LABEL_REF)
6069 {
6070 if (ASSEMBLER_DIALECT == ASM_ATT)
6071 putc ('$', file);
6072 else
6073 fputs ("OFFSET FLAT:", file);
6074 }
6075 }
6076 if (GET_CODE (x) == CONST_INT)
6077 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6078 else if (flag_pic)
6079 output_pic_addr_const (file, x, code);
6080 else
6081 output_addr_const (file, x);
6082 }
6083 }
6084 \f
6085 /* Print a memory operand whose address is ADDR. */
6086
6087 void
6088 print_operand_address (file, addr)
6089 FILE *file;
6090 register rtx addr;
6091 {
6092 struct ix86_address parts;
6093 rtx base, index, disp;
6094 int scale;
6095
6096 if (! ix86_decompose_address (addr, &parts))
6097 abort ();
6098
6099 base = parts.base;
6100 index = parts.index;
6101 disp = parts.disp;
6102 scale = parts.scale;
6103
6104 if (!base && !index)
6105 {
6106 /* Displacement only requires special attention. */
6107
6108 if (GET_CODE (disp) == CONST_INT)
6109 {
6110 if (ASSEMBLER_DIALECT == ASM_INTEL)
6111 {
6112 if (USER_LABEL_PREFIX[0] == 0)
6113 putc ('%', file);
6114 fputs ("ds:", file);
6115 }
6116 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6117 }
6118 else if (flag_pic)
6119 output_pic_addr_const (file, addr, 0);
6120 else
6121 output_addr_const (file, addr);
6122
6123 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6124 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6125 fputs ("(%rip)", file);
6126 }
6127 else
6128 {
6129 if (ASSEMBLER_DIALECT == ASM_ATT)
6130 {
6131 if (disp)
6132 {
6133 if (flag_pic)
6134 output_pic_addr_const (file, disp, 0);
6135 else if (GET_CODE (disp) == LABEL_REF)
6136 output_asm_label (disp);
6137 else
6138 output_addr_const (file, disp);
6139 }
6140
6141 putc ('(', file);
6142 if (base)
6143 PRINT_REG (base, 0, file);
6144 if (index)
6145 {
6146 putc (',', file);
6147 PRINT_REG (index, 0, file);
6148 if (scale != 1)
6149 fprintf (file, ",%d", scale);
6150 }
6151 putc (')', file);
6152 }
6153 else
6154 {
6155 rtx offset = NULL_RTX;
6156
6157 if (disp)
6158 {
6159 /* Pull out the offset of a symbol; print any symbol itself. */
6160 if (GET_CODE (disp) == CONST
6161 && GET_CODE (XEXP (disp, 0)) == PLUS
6162 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6163 {
6164 offset = XEXP (XEXP (disp, 0), 1);
6165 disp = gen_rtx_CONST (VOIDmode,
6166 XEXP (XEXP (disp, 0), 0));
6167 }
6168
6169 if (flag_pic)
6170 output_pic_addr_const (file, disp, 0);
6171 else if (GET_CODE (disp) == LABEL_REF)
6172 output_asm_label (disp);
6173 else if (GET_CODE (disp) == CONST_INT)
6174 offset = disp;
6175 else
6176 output_addr_const (file, disp);
6177 }
6178
6179 putc ('[', file);
6180 if (base)
6181 {
6182 PRINT_REG (base, 0, file);
6183 if (offset)
6184 {
6185 if (INTVAL (offset) >= 0)
6186 putc ('+', file);
6187 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6188 }
6189 }
6190 else if (offset)
6191 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6192 else
6193 putc ('0', file);
6194
6195 if (index)
6196 {
6197 putc ('+', file);
6198 PRINT_REG (index, 0, file);
6199 if (scale != 1)
6200 fprintf (file, "*%d", scale);
6201 }
6202 putc (']', file);
6203 }
6204 }
6205 }
6206 \f
6207 /* Split one or more DImode RTL references into pairs of SImode
6208 references. The RTL can be REG, offsettable MEM, integer constant, or
6209 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6210 split and "num" is its length. lo_half and hi_half are output arrays
6211 that parallel "operands". */
6212
6213 void
6214 split_di (operands, num, lo_half, hi_half)
6215 rtx operands[];
6216 int num;
6217 rtx lo_half[], hi_half[];
6218 {
6219 while (num--)
6220 {
6221 rtx op = operands[num];
6222
6223 /* simplify_subreg refuse to split volatile memory addresses,
6224 but we still have to handle it. */
6225 if (GET_CODE (op) == MEM)
6226 {
6227 lo_half[num] = adjust_address (op, SImode, 0);
6228 hi_half[num] = adjust_address (op, SImode, 4);
6229 }
6230 else
6231 {
6232 lo_half[num] = simplify_gen_subreg (SImode, op,
6233 GET_MODE (op) == VOIDmode
6234 ? DImode : GET_MODE (op), 0);
6235 hi_half[num] = simplify_gen_subreg (SImode, op,
6236 GET_MODE (op) == VOIDmode
6237 ? DImode : GET_MODE (op), 4);
6238 }
6239 }
6240 }
6241 /* Split one or more TImode RTL references into pairs of SImode
6242 references. The RTL can be REG, offsettable MEM, integer constant, or
6243 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6244 split and "num" is its length. lo_half and hi_half are output arrays
6245 that parallel "operands". */
6246
6247 void
6248 split_ti (operands, num, lo_half, hi_half)
6249 rtx operands[];
6250 int num;
6251 rtx lo_half[], hi_half[];
6252 {
6253 while (num--)
6254 {
6255 rtx op = operands[num];
6256
6257 /* simplify_subreg refuse to split volatile memory addresses, but we
6258 still have to handle it. */
6259 if (GET_CODE (op) == MEM)
6260 {
6261 lo_half[num] = adjust_address (op, DImode, 0);
6262 hi_half[num] = adjust_address (op, DImode, 8);
6263 }
6264 else
6265 {
6266 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6267 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6268 }
6269 }
6270 }
6271 \f
6272 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6273 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6274 is the expression of the binary operation. The output may either be
6275 emitted here, or returned to the caller, like all output_* functions.
6276
6277 There is no guarantee that the operands are the same mode, as they
6278 might be within FLOAT or FLOAT_EXTEND expressions. */
6279
6280 #ifndef SYSV386_COMPAT
6281 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6282 wants to fix the assemblers because that causes incompatibility
6283 with gcc. No-one wants to fix gcc because that causes
6284 incompatibility with assemblers... You can use the option of
6285 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6286 #define SYSV386_COMPAT 1
6287 #endif
6288
6289 const char *
6290 output_387_binary_op (insn, operands)
6291 rtx insn;
6292 rtx *operands;
6293 {
6294 static char buf[30];
6295 const char *p;
6296 const char *ssep;
6297 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6298
6299 #ifdef ENABLE_CHECKING
6300 /* Even if we do not want to check the inputs, this documents input
6301 constraints. Which helps in understanding the following code. */
6302 if (STACK_REG_P (operands[0])
6303 && ((REG_P (operands[1])
6304 && REGNO (operands[0]) == REGNO (operands[1])
6305 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6306 || (REG_P (operands[2])
6307 && REGNO (operands[0]) == REGNO (operands[2])
6308 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6309 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6310 ; /* ok */
6311 else if (!is_sse)
6312 abort ();
6313 #endif
6314
6315 switch (GET_CODE (operands[3]))
6316 {
6317 case PLUS:
6318 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6319 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6320 p = "fiadd";
6321 else
6322 p = "fadd";
6323 ssep = "add";
6324 break;
6325
6326 case MINUS:
6327 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6328 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6329 p = "fisub";
6330 else
6331 p = "fsub";
6332 ssep = "sub";
6333 break;
6334
6335 case MULT:
6336 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6337 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6338 p = "fimul";
6339 else
6340 p = "fmul";
6341 ssep = "mul";
6342 break;
6343
6344 case DIV:
6345 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6346 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6347 p = "fidiv";
6348 else
6349 p = "fdiv";
6350 ssep = "div";
6351 break;
6352
6353 default:
6354 abort ();
6355 }
6356
6357 if (is_sse)
6358 {
6359 strcpy (buf, ssep);
6360 if (GET_MODE (operands[0]) == SFmode)
6361 strcat (buf, "ss\t{%2, %0|%0, %2}");
6362 else
6363 strcat (buf, "sd\t{%2, %0|%0, %2}");
6364 return buf;
6365 }
6366 strcpy (buf, p);
6367
6368 switch (GET_CODE (operands[3]))
6369 {
6370 case MULT:
6371 case PLUS:
6372 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6373 {
6374 rtx temp = operands[2];
6375 operands[2] = operands[1];
6376 operands[1] = temp;
6377 }
6378
6379 /* know operands[0] == operands[1]. */
6380
6381 if (GET_CODE (operands[2]) == MEM)
6382 {
6383 p = "%z2\t%2";
6384 break;
6385 }
6386
6387 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6388 {
6389 if (STACK_TOP_P (operands[0]))
6390 /* How is it that we are storing to a dead operand[2]?
6391 Well, presumably operands[1] is dead too. We can't
6392 store the result to st(0) as st(0) gets popped on this
6393 instruction. Instead store to operands[2] (which I
6394 think has to be st(1)). st(1) will be popped later.
6395 gcc <= 2.8.1 didn't have this check and generated
6396 assembly code that the Unixware assembler rejected. */
6397 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6398 else
6399 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6400 break;
6401 }
6402
6403 if (STACK_TOP_P (operands[0]))
6404 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6405 else
6406 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6407 break;
6408
6409 case MINUS:
6410 case DIV:
6411 if (GET_CODE (operands[1]) == MEM)
6412 {
6413 p = "r%z1\t%1";
6414 break;
6415 }
6416
6417 if (GET_CODE (operands[2]) == MEM)
6418 {
6419 p = "%z2\t%2";
6420 break;
6421 }
6422
6423 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6424 {
6425 #if SYSV386_COMPAT
6426 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6427 derived assemblers, confusingly reverse the direction of
6428 the operation for fsub{r} and fdiv{r} when the
6429 destination register is not st(0). The Intel assembler
6430 doesn't have this brain damage. Read !SYSV386_COMPAT to
6431 figure out what the hardware really does. */
6432 if (STACK_TOP_P (operands[0]))
6433 p = "{p\t%0, %2|rp\t%2, %0}";
6434 else
6435 p = "{rp\t%2, %0|p\t%0, %2}";
6436 #else
6437 if (STACK_TOP_P (operands[0]))
6438 /* As above for fmul/fadd, we can't store to st(0). */
6439 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6440 else
6441 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6442 #endif
6443 break;
6444 }
6445
6446 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6447 {
6448 #if SYSV386_COMPAT
6449 if (STACK_TOP_P (operands[0]))
6450 p = "{rp\t%0, %1|p\t%1, %0}";
6451 else
6452 p = "{p\t%1, %0|rp\t%0, %1}";
6453 #else
6454 if (STACK_TOP_P (operands[0]))
6455 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6456 else
6457 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6458 #endif
6459 break;
6460 }
6461
6462 if (STACK_TOP_P (operands[0]))
6463 {
6464 if (STACK_TOP_P (operands[1]))
6465 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6466 else
6467 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6468 break;
6469 }
6470 else if (STACK_TOP_P (operands[1]))
6471 {
6472 #if SYSV386_COMPAT
6473 p = "{\t%1, %0|r\t%0, %1}";
6474 #else
6475 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6476 #endif
6477 }
6478 else
6479 {
6480 #if SYSV386_COMPAT
6481 p = "{r\t%2, %0|\t%0, %2}";
6482 #else
6483 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6484 #endif
6485 }
6486 break;
6487
6488 default:
6489 abort ();
6490 }
6491
6492 strcat (buf, p);
6493 return buf;
6494 }
6495
6496 /* Output code to initialize control word copies used by
6497 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6498 is set to control word rounding downwards. */
6499 void
6500 emit_i387_cw_initialization (normal, round_down)
6501 rtx normal, round_down;
6502 {
6503 rtx reg = gen_reg_rtx (HImode);
6504
6505 emit_insn (gen_x86_fnstcw_1 (normal));
6506 emit_move_insn (reg, normal);
6507 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6508 && !TARGET_64BIT)
6509 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6510 else
6511 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6512 emit_move_insn (round_down, reg);
6513 }
6514
6515 /* Output code for INSN to convert a float to a signed int. OPERANDS
6516 are the insn operands. The output may be [HSD]Imode and the input
6517 operand may be [SDX]Fmode. */
6518
6519 const char *
6520 output_fix_trunc (insn, operands)
6521 rtx insn;
6522 rtx *operands;
6523 {
6524 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6525 int dimode_p = GET_MODE (operands[0]) == DImode;
6526
6527 /* Jump through a hoop or two for DImode, since the hardware has no
6528 non-popping instruction. We used to do this a different way, but
6529 that was somewhat fragile and broke with post-reload splitters. */
6530 if (dimode_p && !stack_top_dies)
6531 output_asm_insn ("fld\t%y1", operands);
6532
6533 if (!STACK_TOP_P (operands[1]))
6534 abort ();
6535
6536 if (GET_CODE (operands[0]) != MEM)
6537 abort ();
6538
6539 output_asm_insn ("fldcw\t%3", operands);
6540 if (stack_top_dies || dimode_p)
6541 output_asm_insn ("fistp%z0\t%0", operands);
6542 else
6543 output_asm_insn ("fist%z0\t%0", operands);
6544 output_asm_insn ("fldcw\t%2", operands);
6545
6546 return "";
6547 }
6548
6549 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6550 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6551 when fucom should be used. */
6552
6553 const char *
6554 output_fp_compare (insn, operands, eflags_p, unordered_p)
6555 rtx insn;
6556 rtx *operands;
6557 int eflags_p, unordered_p;
6558 {
6559 int stack_top_dies;
6560 rtx cmp_op0 = operands[0];
6561 rtx cmp_op1 = operands[1];
6562 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6563
6564 if (eflags_p == 2)
6565 {
6566 cmp_op0 = cmp_op1;
6567 cmp_op1 = operands[2];
6568 }
6569 if (is_sse)
6570 {
6571 if (GET_MODE (operands[0]) == SFmode)
6572 if (unordered_p)
6573 return "ucomiss\t{%1, %0|%0, %1}";
6574 else
6575 return "comiss\t{%1, %0|%0, %y}";
6576 else
6577 if (unordered_p)
6578 return "ucomisd\t{%1, %0|%0, %1}";
6579 else
6580 return "comisd\t{%1, %0|%0, %y}";
6581 }
6582
6583 if (! STACK_TOP_P (cmp_op0))
6584 abort ();
6585
6586 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6587
6588 if (STACK_REG_P (cmp_op1)
6589 && stack_top_dies
6590 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6591 && REGNO (cmp_op1) != FIRST_STACK_REG)
6592 {
6593 /* If both the top of the 387 stack dies, and the other operand
6594 is also a stack register that dies, then this must be a
6595 `fcompp' float compare */
6596
6597 if (eflags_p == 1)
6598 {
6599 /* There is no double popping fcomi variant. Fortunately,
6600 eflags is immune from the fstp's cc clobbering. */
6601 if (unordered_p)
6602 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6603 else
6604 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6605 return "fstp\t%y0";
6606 }
6607 else
6608 {
6609 if (eflags_p == 2)
6610 {
6611 if (unordered_p)
6612 return "fucompp\n\tfnstsw\t%0";
6613 else
6614 return "fcompp\n\tfnstsw\t%0";
6615 }
6616 else
6617 {
6618 if (unordered_p)
6619 return "fucompp";
6620 else
6621 return "fcompp";
6622 }
6623 }
6624 }
6625 else
6626 {
6627 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6628
6629 static const char * const alt[24] =
6630 {
6631 "fcom%z1\t%y1",
6632 "fcomp%z1\t%y1",
6633 "fucom%z1\t%y1",
6634 "fucomp%z1\t%y1",
6635
6636 "ficom%z1\t%y1",
6637 "ficomp%z1\t%y1",
6638 NULL,
6639 NULL,
6640
6641 "fcomi\t{%y1, %0|%0, %y1}",
6642 "fcomip\t{%y1, %0|%0, %y1}",
6643 "fucomi\t{%y1, %0|%0, %y1}",
6644 "fucomip\t{%y1, %0|%0, %y1}",
6645
6646 NULL,
6647 NULL,
6648 NULL,
6649 NULL,
6650
6651 "fcom%z2\t%y2\n\tfnstsw\t%0",
6652 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6653 "fucom%z2\t%y2\n\tfnstsw\t%0",
6654 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6655
6656 "ficom%z2\t%y2\n\tfnstsw\t%0",
6657 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6658 NULL,
6659 NULL
6660 };
6661
6662 int mask;
6663 const char *ret;
6664
6665 mask = eflags_p << 3;
6666 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6667 mask |= unordered_p << 1;
6668 mask |= stack_top_dies;
6669
6670 if (mask >= 24)
6671 abort ();
6672 ret = alt[mask];
6673 if (ret == NULL)
6674 abort ();
6675
6676 return ret;
6677 }
6678 }
6679
6680 void
6681 ix86_output_addr_vec_elt (file, value)
6682 FILE *file;
6683 int value;
6684 {
6685 const char *directive = ASM_LONG;
6686
6687 if (TARGET_64BIT)
6688 {
6689 #ifdef ASM_QUAD
6690 directive = ASM_QUAD;
6691 #else
6692 abort ();
6693 #endif
6694 }
6695
6696 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6697 }
6698
6699 void
6700 ix86_output_addr_diff_elt (file, value, rel)
6701 FILE *file;
6702 int value, rel;
6703 {
6704 if (TARGET_64BIT)
6705 fprintf (file, "%s%s%d-%s%d\n",
6706 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6707 else if (HAVE_AS_GOTOFF_IN_DATA)
6708 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6709 else
6710 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6711 ASM_LONG, LPREFIX, value);
6712 }
6713 \f
6714 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6715 for the target. */
6716
6717 void
6718 ix86_expand_clear (dest)
6719 rtx dest;
6720 {
6721 rtx tmp;
6722
6723 /* We play register width games, which are only valid after reload. */
6724 if (!reload_completed)
6725 abort ();
6726
6727 /* Avoid HImode and its attendant prefix byte. */
6728 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6729 dest = gen_rtx_REG (SImode, REGNO (dest));
6730
6731 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6732
6733 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6734 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6735 {
6736 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6737 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6738 }
6739
6740 emit_insn (tmp);
6741 }
6742
6743 void
6744 ix86_expand_move (mode, operands)
6745 enum machine_mode mode;
6746 rtx operands[];
6747 {
6748 int strict = (reload_in_progress || reload_completed);
6749 rtx insn;
6750
6751 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6752 {
6753 /* Emit insns to move operands[1] into operands[0]. */
6754
6755 if (GET_CODE (operands[0]) == MEM)
6756 operands[1] = force_reg (Pmode, operands[1]);
6757 else
6758 {
6759 rtx temp = operands[0];
6760 if (GET_CODE (temp) != REG)
6761 temp = gen_reg_rtx (Pmode);
6762 temp = legitimize_pic_address (operands[1], temp);
6763 if (temp == operands[0])
6764 return;
6765 operands[1] = temp;
6766 }
6767 }
6768 else
6769 {
6770 if (GET_CODE (operands[0]) == MEM
6771 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6772 || !push_operand (operands[0], mode))
6773 && GET_CODE (operands[1]) == MEM)
6774 operands[1] = force_reg (mode, operands[1]);
6775
6776 if (push_operand (operands[0], mode)
6777 && ! general_no_elim_operand (operands[1], mode))
6778 operands[1] = copy_to_mode_reg (mode, operands[1]);
6779
6780 /* Force large constants in 64bit compilation into register
6781 to get them CSEed. */
6782 if (TARGET_64BIT && mode == DImode
6783 && immediate_operand (operands[1], mode)
6784 && !x86_64_zero_extended_value (operands[1])
6785 && !register_operand (operands[0], mode)
6786 && optimize && !reload_completed && !reload_in_progress)
6787 operands[1] = copy_to_mode_reg (mode, operands[1]);
6788
6789 if (FLOAT_MODE_P (mode))
6790 {
6791 /* If we are loading a floating point constant to a register,
6792 force the value to memory now, since we'll get better code
6793 out the back end. */
6794
6795 if (strict)
6796 ;
6797 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6798 && register_operand (operands[0], mode))
6799 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6800 }
6801 }
6802
6803 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6804
6805 emit_insn (insn);
6806 }
6807
6808 void
6809 ix86_expand_vector_move (mode, operands)
6810 enum machine_mode mode;
6811 rtx operands[];
6812 {
6813 /* Force constants other than zero into memory. We do not know how
6814 the instructions used to build constants modify the upper 64 bits
6815 of the register, once we have that information we may be able
6816 to handle some of them more efficiently. */
6817 if ((reload_in_progress | reload_completed) == 0
6818 && register_operand (operands[0], mode)
6819 && CONSTANT_P (operands[1]))
6820 {
6821 rtx addr = gen_reg_rtx (Pmode);
6822 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6823 operands[1] = gen_rtx_MEM (mode, addr);
6824 }
6825
6826 /* Make operand1 a register if it isn't already. */
6827 if ((reload_in_progress | reload_completed) == 0
6828 && !register_operand (operands[0], mode)
6829 && !register_operand (operands[1], mode)
6830 && operands[1] != CONST0_RTX (mode))
6831 {
6832 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
6833 emit_move_insn (operands[0], temp);
6834 return;
6835 }
6836
6837 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6838 }
6839
6840 /* Attempt to expand a binary operator. Make the expansion closer to the
6841 actual machine, then just general_operand, which will allow 3 separate
6842 memory references (one output, two input) in a single insn. */
6843
6844 void
6845 ix86_expand_binary_operator (code, mode, operands)
6846 enum rtx_code code;
6847 enum machine_mode mode;
6848 rtx operands[];
6849 {
6850 int matching_memory;
6851 rtx src1, src2, dst, op, clob;
6852
6853 dst = operands[0];
6854 src1 = operands[1];
6855 src2 = operands[2];
6856
6857 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6858 if (GET_RTX_CLASS (code) == 'c'
6859 && (rtx_equal_p (dst, src2)
6860 || immediate_operand (src1, mode)))
6861 {
6862 rtx temp = src1;
6863 src1 = src2;
6864 src2 = temp;
6865 }
6866
6867 /* If the destination is memory, and we do not have matching source
6868 operands, do things in registers. */
6869 matching_memory = 0;
6870 if (GET_CODE (dst) == MEM)
6871 {
6872 if (rtx_equal_p (dst, src1))
6873 matching_memory = 1;
6874 else if (GET_RTX_CLASS (code) == 'c'
6875 && rtx_equal_p (dst, src2))
6876 matching_memory = 2;
6877 else
6878 dst = gen_reg_rtx (mode);
6879 }
6880
6881 /* Both source operands cannot be in memory. */
6882 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6883 {
6884 if (matching_memory != 2)
6885 src2 = force_reg (mode, src2);
6886 else
6887 src1 = force_reg (mode, src1);
6888 }
6889
6890 /* If the operation is not commutable, source 1 cannot be a constant
6891 or non-matching memory. */
6892 if ((CONSTANT_P (src1)
6893 || (!matching_memory && GET_CODE (src1) == MEM))
6894 && GET_RTX_CLASS (code) != 'c')
6895 src1 = force_reg (mode, src1);
6896
6897 /* If optimizing, copy to regs to improve CSE */
6898 if (optimize && ! no_new_pseudos)
6899 {
6900 if (GET_CODE (dst) == MEM)
6901 dst = gen_reg_rtx (mode);
6902 if (GET_CODE (src1) == MEM)
6903 src1 = force_reg (mode, src1);
6904 if (GET_CODE (src2) == MEM)
6905 src2 = force_reg (mode, src2);
6906 }
6907
6908 /* Emit the instruction. */
6909
6910 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6911 if (reload_in_progress)
6912 {
6913 /* Reload doesn't know about the flags register, and doesn't know that
6914 it doesn't want to clobber it. We can only do this with PLUS. */
6915 if (code != PLUS)
6916 abort ();
6917 emit_insn (op);
6918 }
6919 else
6920 {
6921 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6922 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6923 }
6924
6925 /* Fix up the destination if needed. */
6926 if (dst != operands[0])
6927 emit_move_insn (operands[0], dst);
6928 }
6929
6930 /* Return TRUE or FALSE depending on whether the binary operator meets the
6931 appropriate constraints. */
6932
6933 int
6934 ix86_binary_operator_ok (code, mode, operands)
6935 enum rtx_code code;
6936 enum machine_mode mode ATTRIBUTE_UNUSED;
6937 rtx operands[3];
6938 {
6939 /* Both source operands cannot be in memory. */
6940 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6941 return 0;
6942 /* If the operation is not commutable, source 1 cannot be a constant. */
6943 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6944 return 0;
6945 /* If the destination is memory, we must have a matching source operand. */
6946 if (GET_CODE (operands[0]) == MEM
6947 && ! (rtx_equal_p (operands[0], operands[1])
6948 || (GET_RTX_CLASS (code) == 'c'
6949 && rtx_equal_p (operands[0], operands[2]))))
6950 return 0;
6951 /* If the operation is not commutable and the source 1 is memory, we must
6952 have a matching destination. */
6953 if (GET_CODE (operands[1]) == MEM
6954 && GET_RTX_CLASS (code) != 'c'
6955 && ! rtx_equal_p (operands[0], operands[1]))
6956 return 0;
6957 return 1;
6958 }
6959
6960 /* Attempt to expand a unary operator. Make the expansion closer to the
6961 actual machine, then just general_operand, which will allow 2 separate
6962 memory references (one output, one input) in a single insn. */
6963
6964 void
6965 ix86_expand_unary_operator (code, mode, operands)
6966 enum rtx_code code;
6967 enum machine_mode mode;
6968 rtx operands[];
6969 {
6970 int matching_memory;
6971 rtx src, dst, op, clob;
6972
6973 dst = operands[0];
6974 src = operands[1];
6975
6976 /* If the destination is memory, and we do not have matching source
6977 operands, do things in registers. */
6978 matching_memory = 0;
6979 if (GET_CODE (dst) == MEM)
6980 {
6981 if (rtx_equal_p (dst, src))
6982 matching_memory = 1;
6983 else
6984 dst = gen_reg_rtx (mode);
6985 }
6986
6987 /* When source operand is memory, destination must match. */
6988 if (!matching_memory && GET_CODE (src) == MEM)
6989 src = force_reg (mode, src);
6990
6991 /* If optimizing, copy to regs to improve CSE */
6992 if (optimize && ! no_new_pseudos)
6993 {
6994 if (GET_CODE (dst) == MEM)
6995 dst = gen_reg_rtx (mode);
6996 if (GET_CODE (src) == MEM)
6997 src = force_reg (mode, src);
6998 }
6999
7000 /* Emit the instruction. */
7001
7002 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7003 if (reload_in_progress || code == NOT)
7004 {
7005 /* Reload doesn't know about the flags register, and doesn't know that
7006 it doesn't want to clobber it. */
7007 if (code != NOT)
7008 abort ();
7009 emit_insn (op);
7010 }
7011 else
7012 {
7013 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7014 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7015 }
7016
7017 /* Fix up the destination if needed. */
7018 if (dst != operands[0])
7019 emit_move_insn (operands[0], dst);
7020 }
7021
7022 /* Return TRUE or FALSE depending on whether the unary operator meets the
7023 appropriate constraints. */
7024
7025 int
7026 ix86_unary_operator_ok (code, mode, operands)
7027 enum rtx_code code ATTRIBUTE_UNUSED;
7028 enum machine_mode mode ATTRIBUTE_UNUSED;
7029 rtx operands[2] ATTRIBUTE_UNUSED;
7030 {
7031 /* If one of operands is memory, source and destination must match. */
7032 if ((GET_CODE (operands[0]) == MEM
7033 || GET_CODE (operands[1]) == MEM)
7034 && ! rtx_equal_p (operands[0], operands[1]))
7035 return FALSE;
7036 return TRUE;
7037 }
7038
7039 /* Return TRUE or FALSE depending on whether the first SET in INSN
7040 has source and destination with matching CC modes, and that the
7041 CC mode is at least as constrained as REQ_MODE. */
7042
7043 int
7044 ix86_match_ccmode (insn, req_mode)
7045 rtx insn;
7046 enum machine_mode req_mode;
7047 {
7048 rtx set;
7049 enum machine_mode set_mode;
7050
7051 set = PATTERN (insn);
7052 if (GET_CODE (set) == PARALLEL)
7053 set = XVECEXP (set, 0, 0);
7054 if (GET_CODE (set) != SET)
7055 abort ();
7056 if (GET_CODE (SET_SRC (set)) != COMPARE)
7057 abort ();
7058
7059 set_mode = GET_MODE (SET_DEST (set));
7060 switch (set_mode)
7061 {
7062 case CCNOmode:
7063 if (req_mode != CCNOmode
7064 && (req_mode != CCmode
7065 || XEXP (SET_SRC (set), 1) != const0_rtx))
7066 return 0;
7067 break;
7068 case CCmode:
7069 if (req_mode == CCGCmode)
7070 return 0;
7071 /* FALLTHRU */
7072 case CCGCmode:
7073 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7074 return 0;
7075 /* FALLTHRU */
7076 case CCGOCmode:
7077 if (req_mode == CCZmode)
7078 return 0;
7079 /* FALLTHRU */
7080 case CCZmode:
7081 break;
7082
7083 default:
7084 abort ();
7085 }
7086
7087 return (GET_MODE (SET_SRC (set)) == set_mode);
7088 }
7089
7090 /* Generate insn patterns to do an integer compare of OPERANDS. */
7091
7092 static rtx
7093 ix86_expand_int_compare (code, op0, op1)
7094 enum rtx_code code;
7095 rtx op0, op1;
7096 {
7097 enum machine_mode cmpmode;
7098 rtx tmp, flags;
7099
7100 cmpmode = SELECT_CC_MODE (code, op0, op1);
7101 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7102
7103 /* This is very simple, but making the interface the same as in the
7104 FP case makes the rest of the code easier. */
7105 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7106 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7107
7108 /* Return the test that should be put into the flags user, i.e.
7109 the bcc, scc, or cmov instruction. */
7110 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7111 }
7112
7113 /* Figure out whether to use ordered or unordered fp comparisons.
7114 Return the appropriate mode to use. */
7115
7116 enum machine_mode
7117 ix86_fp_compare_mode (code)
7118 enum rtx_code code ATTRIBUTE_UNUSED;
7119 {
7120 /* ??? In order to make all comparisons reversible, we do all comparisons
7121 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7122 all forms trapping and nontrapping comparisons, we can make inequality
7123 comparisons trapping again, since it results in better code when using
7124 FCOM based compares. */
7125 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7126 }
7127
7128 enum machine_mode
7129 ix86_cc_mode (code, op0, op1)
7130 enum rtx_code code;
7131 rtx op0, op1;
7132 {
7133 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7134 return ix86_fp_compare_mode (code);
7135 switch (code)
7136 {
7137 /* Only zero flag is needed. */
7138 case EQ: /* ZF=0 */
7139 case NE: /* ZF!=0 */
7140 return CCZmode;
7141 /* Codes needing carry flag. */
7142 case GEU: /* CF=0 */
7143 case GTU: /* CF=0 & ZF=0 */
7144 case LTU: /* CF=1 */
7145 case LEU: /* CF=1 | ZF=1 */
7146 return CCmode;
7147 /* Codes possibly doable only with sign flag when
7148 comparing against zero. */
7149 case GE: /* SF=OF or SF=0 */
7150 case LT: /* SF<>OF or SF=1 */
7151 if (op1 == const0_rtx)
7152 return CCGOCmode;
7153 else
7154 /* For other cases Carry flag is not required. */
7155 return CCGCmode;
7156 /* Codes doable only with sign flag when comparing
7157 against zero, but we miss jump instruction for it
7158 so we need to use relational tests agains overflow
7159 that thus needs to be zero. */
7160 case GT: /* ZF=0 & SF=OF */
7161 case LE: /* ZF=1 | SF<>OF */
7162 if (op1 == const0_rtx)
7163 return CCNOmode;
7164 else
7165 return CCGCmode;
7166 /* strcmp pattern do (use flags) and combine may ask us for proper
7167 mode. */
7168 case USE:
7169 return CCmode;
7170 default:
7171 abort ();
7172 }
7173 }
7174
7175 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7176
7177 int
7178 ix86_use_fcomi_compare (code)
7179 enum rtx_code code ATTRIBUTE_UNUSED;
7180 {
7181 enum rtx_code swapped_code = swap_condition (code);
7182 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7183 || (ix86_fp_comparison_cost (swapped_code)
7184 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7185 }
7186
7187 /* Swap, force into registers, or otherwise massage the two operands
7188 to a fp comparison. The operands are updated in place; the new
7189 comparsion code is returned. */
7190
7191 static enum rtx_code
7192 ix86_prepare_fp_compare_args (code, pop0, pop1)
7193 enum rtx_code code;
7194 rtx *pop0, *pop1;
7195 {
7196 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7197 rtx op0 = *pop0, op1 = *pop1;
7198 enum machine_mode op_mode = GET_MODE (op0);
7199 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7200
7201 /* All of the unordered compare instructions only work on registers.
7202 The same is true of the XFmode compare instructions. The same is
7203 true of the fcomi compare instructions. */
7204
7205 if (!is_sse
7206 && (fpcmp_mode == CCFPUmode
7207 || op_mode == XFmode
7208 || op_mode == TFmode
7209 || ix86_use_fcomi_compare (code)))
7210 {
7211 op0 = force_reg (op_mode, op0);
7212 op1 = force_reg (op_mode, op1);
7213 }
7214 else
7215 {
7216 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7217 things around if they appear profitable, otherwise force op0
7218 into a register. */
7219
7220 if (standard_80387_constant_p (op0) == 0
7221 || (GET_CODE (op0) == MEM
7222 && ! (standard_80387_constant_p (op1) == 0
7223 || GET_CODE (op1) == MEM)))
7224 {
7225 rtx tmp;
7226 tmp = op0, op0 = op1, op1 = tmp;
7227 code = swap_condition (code);
7228 }
7229
7230 if (GET_CODE (op0) != REG)
7231 op0 = force_reg (op_mode, op0);
7232
7233 if (CONSTANT_P (op1))
7234 {
7235 if (standard_80387_constant_p (op1))
7236 op1 = force_reg (op_mode, op1);
7237 else
7238 op1 = validize_mem (force_const_mem (op_mode, op1));
7239 }
7240 }
7241
7242 /* Try to rearrange the comparison to make it cheaper. */
7243 if (ix86_fp_comparison_cost (code)
7244 > ix86_fp_comparison_cost (swap_condition (code))
7245 && (GET_CODE (op1) == REG || !no_new_pseudos))
7246 {
7247 rtx tmp;
7248 tmp = op0, op0 = op1, op1 = tmp;
7249 code = swap_condition (code);
7250 if (GET_CODE (op0) != REG)
7251 op0 = force_reg (op_mode, op0);
7252 }
7253
7254 *pop0 = op0;
7255 *pop1 = op1;
7256 return code;
7257 }
7258
7259 /* Convert comparison codes we use to represent FP comparison to integer
7260 code that will result in proper branch. Return UNKNOWN if no such code
7261 is available. */
7262 static enum rtx_code
7263 ix86_fp_compare_code_to_integer (code)
7264 enum rtx_code code;
7265 {
7266 switch (code)
7267 {
7268 case GT:
7269 return GTU;
7270 case GE:
7271 return GEU;
7272 case ORDERED:
7273 case UNORDERED:
7274 return code;
7275 break;
7276 case UNEQ:
7277 return EQ;
7278 break;
7279 case UNLT:
7280 return LTU;
7281 break;
7282 case UNLE:
7283 return LEU;
7284 break;
7285 case LTGT:
7286 return NE;
7287 break;
7288 default:
7289 return UNKNOWN;
7290 }
7291 }
7292
7293 /* Split comparison code CODE into comparisons we can do using branch
7294 instructions. BYPASS_CODE is comparison code for branch that will
7295 branch around FIRST_CODE and SECOND_CODE. If some of branches
7296 is not required, set value to NIL.
7297 We never require more than two branches. */
7298 static void
7299 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7300 enum rtx_code code, *bypass_code, *first_code, *second_code;
7301 {
7302 *first_code = code;
7303 *bypass_code = NIL;
7304 *second_code = NIL;
7305
7306 /* The fcomi comparison sets flags as follows:
7307
7308 cmp ZF PF CF
7309 > 0 0 0
7310 < 0 0 1
7311 = 1 0 0
7312 un 1 1 1 */
7313
7314 switch (code)
7315 {
7316 case GT: /* GTU - CF=0 & ZF=0 */
7317 case GE: /* GEU - CF=0 */
7318 case ORDERED: /* PF=0 */
7319 case UNORDERED: /* PF=1 */
7320 case UNEQ: /* EQ - ZF=1 */
7321 case UNLT: /* LTU - CF=1 */
7322 case UNLE: /* LEU - CF=1 | ZF=1 */
7323 case LTGT: /* EQ - ZF=0 */
7324 break;
7325 case LT: /* LTU - CF=1 - fails on unordered */
7326 *first_code = UNLT;
7327 *bypass_code = UNORDERED;
7328 break;
7329 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7330 *first_code = UNLE;
7331 *bypass_code = UNORDERED;
7332 break;
7333 case EQ: /* EQ - ZF=1 - fails on unordered */
7334 *first_code = UNEQ;
7335 *bypass_code = UNORDERED;
7336 break;
7337 case NE: /* NE - ZF=0 - fails on unordered */
7338 *first_code = LTGT;
7339 *second_code = UNORDERED;
7340 break;
7341 case UNGE: /* GEU - CF=0 - fails on unordered */
7342 *first_code = GE;
7343 *second_code = UNORDERED;
7344 break;
7345 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7346 *first_code = GT;
7347 *second_code = UNORDERED;
7348 break;
7349 default:
7350 abort ();
7351 }
7352 if (!TARGET_IEEE_FP)
7353 {
7354 *second_code = NIL;
7355 *bypass_code = NIL;
7356 }
7357 }
7358
7359 /* Return cost of comparison done fcom + arithmetics operations on AX.
7360 All following functions do use number of instructions as an cost metrics.
7361 In future this should be tweaked to compute bytes for optimize_size and
7362 take into account performance of various instructions on various CPUs. */
7363 static int
7364 ix86_fp_comparison_arithmetics_cost (code)
7365 enum rtx_code code;
7366 {
7367 if (!TARGET_IEEE_FP)
7368 return 4;
7369 /* The cost of code output by ix86_expand_fp_compare. */
7370 switch (code)
7371 {
7372 case UNLE:
7373 case UNLT:
7374 case LTGT:
7375 case GT:
7376 case GE:
7377 case UNORDERED:
7378 case ORDERED:
7379 case UNEQ:
7380 return 4;
7381 break;
7382 case LT:
7383 case NE:
7384 case EQ:
7385 case UNGE:
7386 return 5;
7387 break;
7388 case LE:
7389 case UNGT:
7390 return 6;
7391 break;
7392 default:
7393 abort ();
7394 }
7395 }
7396
7397 /* Return cost of comparison done using fcomi operation.
7398 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7399 static int
7400 ix86_fp_comparison_fcomi_cost (code)
7401 enum rtx_code code;
7402 {
7403 enum rtx_code bypass_code, first_code, second_code;
7404 /* Return arbitarily high cost when instruction is not supported - this
7405 prevents gcc from using it. */
7406 if (!TARGET_CMOVE)
7407 return 1024;
7408 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7409 return (bypass_code != NIL || second_code != NIL) + 2;
7410 }
7411
7412 /* Return cost of comparison done using sahf operation.
7413 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7414 static int
7415 ix86_fp_comparison_sahf_cost (code)
7416 enum rtx_code code;
7417 {
7418 enum rtx_code bypass_code, first_code, second_code;
7419 /* Return arbitarily high cost when instruction is not preferred - this
7420 avoids gcc from using it. */
7421 if (!TARGET_USE_SAHF && !optimize_size)
7422 return 1024;
7423 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7424 return (bypass_code != NIL || second_code != NIL) + 3;
7425 }
7426
7427 /* Compute cost of the comparison done using any method.
7428 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7429 static int
7430 ix86_fp_comparison_cost (code)
7431 enum rtx_code code;
7432 {
7433 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7434 int min;
7435
7436 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7437 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7438
7439 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7440 if (min > sahf_cost)
7441 min = sahf_cost;
7442 if (min > fcomi_cost)
7443 min = fcomi_cost;
7444 return min;
7445 }
7446
7447 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7448
7449 static rtx
7450 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7451 enum rtx_code code;
7452 rtx op0, op1, scratch;
7453 rtx *second_test;
7454 rtx *bypass_test;
7455 {
7456 enum machine_mode fpcmp_mode, intcmp_mode;
7457 rtx tmp, tmp2;
7458 int cost = ix86_fp_comparison_cost (code);
7459 enum rtx_code bypass_code, first_code, second_code;
7460
7461 fpcmp_mode = ix86_fp_compare_mode (code);
7462 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7463
7464 if (second_test)
7465 *second_test = NULL_RTX;
7466 if (bypass_test)
7467 *bypass_test = NULL_RTX;
7468
7469 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7470
7471 /* Do fcomi/sahf based test when profitable. */
7472 if ((bypass_code == NIL || bypass_test)
7473 && (second_code == NIL || second_test)
7474 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7475 {
7476 if (TARGET_CMOVE)
7477 {
7478 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7479 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7480 tmp);
7481 emit_insn (tmp);
7482 }
7483 else
7484 {
7485 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7486 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
7487 if (!scratch)
7488 scratch = gen_reg_rtx (HImode);
7489 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7490 emit_insn (gen_x86_sahf_1 (scratch));
7491 }
7492
7493 /* The FP codes work out to act like unsigned. */
7494 intcmp_mode = fpcmp_mode;
7495 code = first_code;
7496 if (bypass_code != NIL)
7497 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7498 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7499 const0_rtx);
7500 if (second_code != NIL)
7501 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7502 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7503 const0_rtx);
7504 }
7505 else
7506 {
7507 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7508 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7509 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
7510 if (!scratch)
7511 scratch = gen_reg_rtx (HImode);
7512 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7513
7514 /* In the unordered case, we have to check C2 for NaN's, which
7515 doesn't happen to work out to anything nice combination-wise.
7516 So do some bit twiddling on the value we've got in AH to come
7517 up with an appropriate set of condition codes. */
7518
7519 intcmp_mode = CCNOmode;
7520 switch (code)
7521 {
7522 case GT:
7523 case UNGT:
7524 if (code == GT || !TARGET_IEEE_FP)
7525 {
7526 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7527 code = EQ;
7528 }
7529 else
7530 {
7531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7532 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7533 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7534 intcmp_mode = CCmode;
7535 code = GEU;
7536 }
7537 break;
7538 case LT:
7539 case UNLT:
7540 if (code == LT && TARGET_IEEE_FP)
7541 {
7542 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7543 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7544 intcmp_mode = CCmode;
7545 code = EQ;
7546 }
7547 else
7548 {
7549 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7550 code = NE;
7551 }
7552 break;
7553 case GE:
7554 case UNGE:
7555 if (code == GE || !TARGET_IEEE_FP)
7556 {
7557 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7558 code = EQ;
7559 }
7560 else
7561 {
7562 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7563 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7564 GEN_INT (0x01)));
7565 code = NE;
7566 }
7567 break;
7568 case LE:
7569 case UNLE:
7570 if (code == LE && TARGET_IEEE_FP)
7571 {
7572 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7573 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7574 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7575 intcmp_mode = CCmode;
7576 code = LTU;
7577 }
7578 else
7579 {
7580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7581 code = NE;
7582 }
7583 break;
7584 case EQ:
7585 case UNEQ:
7586 if (code == EQ && TARGET_IEEE_FP)
7587 {
7588 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7589 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7590 intcmp_mode = CCmode;
7591 code = EQ;
7592 }
7593 else
7594 {
7595 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7596 code = NE;
7597 break;
7598 }
7599 break;
7600 case NE:
7601 case LTGT:
7602 if (code == NE && TARGET_IEEE_FP)
7603 {
7604 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7605 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7606 GEN_INT (0x40)));
7607 code = NE;
7608 }
7609 else
7610 {
7611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7612 code = EQ;
7613 }
7614 break;
7615
7616 case UNORDERED:
7617 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7618 code = NE;
7619 break;
7620 case ORDERED:
7621 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7622 code = EQ;
7623 break;
7624
7625 default:
7626 abort ();
7627 }
7628 }
7629
7630 /* Return the test that should be put into the flags user, i.e.
7631 the bcc, scc, or cmov instruction. */
7632 return gen_rtx_fmt_ee (code, VOIDmode,
7633 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7634 const0_rtx);
7635 }
7636
7637 rtx
7638 ix86_expand_compare (code, second_test, bypass_test)
7639 enum rtx_code code;
7640 rtx *second_test, *bypass_test;
7641 {
7642 rtx op0, op1, ret;
7643 op0 = ix86_compare_op0;
7644 op1 = ix86_compare_op1;
7645
7646 if (second_test)
7647 *second_test = NULL_RTX;
7648 if (bypass_test)
7649 *bypass_test = NULL_RTX;
7650
7651 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7652 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7653 second_test, bypass_test);
7654 else
7655 ret = ix86_expand_int_compare (code, op0, op1);
7656
7657 return ret;
7658 }
7659
7660 /* Return true if the CODE will result in nontrivial jump sequence. */
7661 bool
7662 ix86_fp_jump_nontrivial_p (code)
7663 enum rtx_code code;
7664 {
7665 enum rtx_code bypass_code, first_code, second_code;
7666 if (!TARGET_CMOVE)
7667 return true;
7668 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7669 return bypass_code != NIL || second_code != NIL;
7670 }
7671
7672 void
7673 ix86_expand_branch (code, label)
7674 enum rtx_code code;
7675 rtx label;
7676 {
7677 rtx tmp;
7678
7679 switch (GET_MODE (ix86_compare_op0))
7680 {
7681 case QImode:
7682 case HImode:
7683 case SImode:
7684 simple:
7685 tmp = ix86_expand_compare (code, NULL, NULL);
7686 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7687 gen_rtx_LABEL_REF (VOIDmode, label),
7688 pc_rtx);
7689 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7690 return;
7691
7692 case SFmode:
7693 case DFmode:
7694 case XFmode:
7695 case TFmode:
7696 {
7697 rtvec vec;
7698 int use_fcomi;
7699 enum rtx_code bypass_code, first_code, second_code;
7700
7701 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7702 &ix86_compare_op1);
7703
7704 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7705
7706 /* Check whether we will use the natural sequence with one jump. If
7707 so, we can expand jump early. Otherwise delay expansion by
7708 creating compound insn to not confuse optimizers. */
7709 if (bypass_code == NIL && second_code == NIL
7710 && TARGET_CMOVE)
7711 {
7712 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7713 gen_rtx_LABEL_REF (VOIDmode, label),
7714 pc_rtx, NULL_RTX);
7715 }
7716 else
7717 {
7718 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7719 ix86_compare_op0, ix86_compare_op1);
7720 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7721 gen_rtx_LABEL_REF (VOIDmode, label),
7722 pc_rtx);
7723 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7724
7725 use_fcomi = ix86_use_fcomi_compare (code);
7726 vec = rtvec_alloc (3 + !use_fcomi);
7727 RTVEC_ELT (vec, 0) = tmp;
7728 RTVEC_ELT (vec, 1)
7729 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7730 RTVEC_ELT (vec, 2)
7731 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7732 if (! use_fcomi)
7733 RTVEC_ELT (vec, 3)
7734 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7735
7736 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7737 }
7738 return;
7739 }
7740
7741 case DImode:
7742 if (TARGET_64BIT)
7743 goto simple;
7744 /* Expand DImode branch into multiple compare+branch. */
7745 {
7746 rtx lo[2], hi[2], label2;
7747 enum rtx_code code1, code2, code3;
7748
7749 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7750 {
7751 tmp = ix86_compare_op0;
7752 ix86_compare_op0 = ix86_compare_op1;
7753 ix86_compare_op1 = tmp;
7754 code = swap_condition (code);
7755 }
7756 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7757 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7758
7759 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7760 avoid two branches. This costs one extra insn, so disable when
7761 optimizing for size. */
7762
7763 if ((code == EQ || code == NE)
7764 && (!optimize_size
7765 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7766 {
7767 rtx xor0, xor1;
7768
7769 xor1 = hi[0];
7770 if (hi[1] != const0_rtx)
7771 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7772 NULL_RTX, 0, OPTAB_WIDEN);
7773
7774 xor0 = lo[0];
7775 if (lo[1] != const0_rtx)
7776 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7777 NULL_RTX, 0, OPTAB_WIDEN);
7778
7779 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7780 NULL_RTX, 0, OPTAB_WIDEN);
7781
7782 ix86_compare_op0 = tmp;
7783 ix86_compare_op1 = const0_rtx;
7784 ix86_expand_branch (code, label);
7785 return;
7786 }
7787
7788 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7789 op1 is a constant and the low word is zero, then we can just
7790 examine the high word. */
7791
7792 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7793 switch (code)
7794 {
7795 case LT: case LTU: case GE: case GEU:
7796 ix86_compare_op0 = hi[0];
7797 ix86_compare_op1 = hi[1];
7798 ix86_expand_branch (code, label);
7799 return;
7800 default:
7801 break;
7802 }
7803
7804 /* Otherwise, we need two or three jumps. */
7805
7806 label2 = gen_label_rtx ();
7807
7808 code1 = code;
7809 code2 = swap_condition (code);
7810 code3 = unsigned_condition (code);
7811
7812 switch (code)
7813 {
7814 case LT: case GT: case LTU: case GTU:
7815 break;
7816
7817 case LE: code1 = LT; code2 = GT; break;
7818 case GE: code1 = GT; code2 = LT; break;
7819 case LEU: code1 = LTU; code2 = GTU; break;
7820 case GEU: code1 = GTU; code2 = LTU; break;
7821
7822 case EQ: code1 = NIL; code2 = NE; break;
7823 case NE: code2 = NIL; break;
7824
7825 default:
7826 abort ();
7827 }
7828
7829 /*
7830 * a < b =>
7831 * if (hi(a) < hi(b)) goto true;
7832 * if (hi(a) > hi(b)) goto false;
7833 * if (lo(a) < lo(b)) goto true;
7834 * false:
7835 */
7836
7837 ix86_compare_op0 = hi[0];
7838 ix86_compare_op1 = hi[1];
7839
7840 if (code1 != NIL)
7841 ix86_expand_branch (code1, label);
7842 if (code2 != NIL)
7843 ix86_expand_branch (code2, label2);
7844
7845 ix86_compare_op0 = lo[0];
7846 ix86_compare_op1 = lo[1];
7847 ix86_expand_branch (code3, label);
7848
7849 if (code2 != NIL)
7850 emit_label (label2);
7851 return;
7852 }
7853
7854 default:
7855 abort ();
7856 }
7857 }
7858
7859 /* Split branch based on floating point condition. */
7860 void
7861 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7862 enum rtx_code code;
7863 rtx op1, op2, target1, target2, tmp;
7864 {
7865 rtx second, bypass;
7866 rtx label = NULL_RTX;
7867 rtx condition;
7868 int bypass_probability = -1, second_probability = -1, probability = -1;
7869 rtx i;
7870
7871 if (target2 != pc_rtx)
7872 {
7873 rtx tmp = target2;
7874 code = reverse_condition_maybe_unordered (code);
7875 target2 = target1;
7876 target1 = tmp;
7877 }
7878
7879 condition = ix86_expand_fp_compare (code, op1, op2,
7880 tmp, &second, &bypass);
7881
7882 if (split_branch_probability >= 0)
7883 {
7884 /* Distribute the probabilities across the jumps.
7885 Assume the BYPASS and SECOND to be always test
7886 for UNORDERED. */
7887 probability = split_branch_probability;
7888
7889 /* Value of 1 is low enough to make no need for probability
7890 to be updated. Later we may run some experiments and see
7891 if unordered values are more frequent in practice. */
7892 if (bypass)
7893 bypass_probability = 1;
7894 if (second)
7895 second_probability = 1;
7896 }
7897 if (bypass != NULL_RTX)
7898 {
7899 label = gen_label_rtx ();
7900 i = emit_jump_insn (gen_rtx_SET
7901 (VOIDmode, pc_rtx,
7902 gen_rtx_IF_THEN_ELSE (VOIDmode,
7903 bypass,
7904 gen_rtx_LABEL_REF (VOIDmode,
7905 label),
7906 pc_rtx)));
7907 if (bypass_probability >= 0)
7908 REG_NOTES (i)
7909 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7910 GEN_INT (bypass_probability),
7911 REG_NOTES (i));
7912 }
7913 i = emit_jump_insn (gen_rtx_SET
7914 (VOIDmode, pc_rtx,
7915 gen_rtx_IF_THEN_ELSE (VOIDmode,
7916 condition, target1, target2)));
7917 if (probability >= 0)
7918 REG_NOTES (i)
7919 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7920 GEN_INT (probability),
7921 REG_NOTES (i));
7922 if (second != NULL_RTX)
7923 {
7924 i = emit_jump_insn (gen_rtx_SET
7925 (VOIDmode, pc_rtx,
7926 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7927 target2)));
7928 if (second_probability >= 0)
7929 REG_NOTES (i)
7930 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7931 GEN_INT (second_probability),
7932 REG_NOTES (i));
7933 }
7934 if (label != NULL_RTX)
7935 emit_label (label);
7936 }
7937
7938 int
7939 ix86_expand_setcc (code, dest)
7940 enum rtx_code code;
7941 rtx dest;
7942 {
7943 rtx ret, tmp, tmpreg;
7944 rtx second_test, bypass_test;
7945
7946 if (GET_MODE (ix86_compare_op0) == DImode
7947 && !TARGET_64BIT)
7948 return 0; /* FAIL */
7949
7950 if (GET_MODE (dest) != QImode)
7951 abort ();
7952
7953 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7954 PUT_MODE (ret, QImode);
7955
7956 tmp = dest;
7957 tmpreg = dest;
7958
7959 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7960 if (bypass_test || second_test)
7961 {
7962 rtx test = second_test;
7963 int bypass = 0;
7964 rtx tmp2 = gen_reg_rtx (QImode);
7965 if (bypass_test)
7966 {
7967 if (second_test)
7968 abort ();
7969 test = bypass_test;
7970 bypass = 1;
7971 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7972 }
7973 PUT_MODE (test, QImode);
7974 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7975
7976 if (bypass)
7977 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7978 else
7979 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7980 }
7981
7982 return 1; /* DONE */
7983 }
7984
7985 int
7986 ix86_expand_int_movcc (operands)
7987 rtx operands[];
7988 {
7989 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7990 rtx compare_seq, compare_op;
7991 rtx second_test, bypass_test;
7992 enum machine_mode mode = GET_MODE (operands[0]);
7993
7994 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7995 In case comparsion is done with immediate, we can convert it to LTU or
7996 GEU by altering the integer. */
7997
7998 if ((code == LEU || code == GTU)
7999 && GET_CODE (ix86_compare_op1) == CONST_INT
8000 && mode != HImode
8001 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
8002 /* The operand still must be representable as sign extended value. */
8003 && (!TARGET_64BIT
8004 || GET_MODE (ix86_compare_op0) != DImode
8005 || (unsigned int) INTVAL (ix86_compare_op1) != 0x7fffffff)
8006 && GET_CODE (operands[2]) == CONST_INT
8007 && GET_CODE (operands[3]) == CONST_INT)
8008 {
8009 if (code == LEU)
8010 code = LTU;
8011 else
8012 code = GEU;
8013 ix86_compare_op1
8014 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8015 GET_MODE (ix86_compare_op0));
8016 }
8017
8018 start_sequence ();
8019 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8020 compare_seq = gen_sequence ();
8021 end_sequence ();
8022
8023 compare_code = GET_CODE (compare_op);
8024
8025 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8026 HImode insns, we'd be swallowed in word prefix ops. */
8027
8028 if (mode != HImode
8029 && (mode != DImode || TARGET_64BIT)
8030 && GET_CODE (operands[2]) == CONST_INT
8031 && GET_CODE (operands[3]) == CONST_INT)
8032 {
8033 rtx out = operands[0];
8034 HOST_WIDE_INT ct = INTVAL (operands[2]);
8035 HOST_WIDE_INT cf = INTVAL (operands[3]);
8036 HOST_WIDE_INT diff;
8037
8038 if ((compare_code == LTU || compare_code == GEU)
8039 && !second_test && !bypass_test)
8040 {
8041
8042 /* Detect overlap between destination and compare sources. */
8043 rtx tmp = out;
8044
8045 /* To simplify rest of code, restrict to the GEU case. */
8046 if (compare_code == LTU)
8047 {
8048 int tmp = ct;
8049 ct = cf;
8050 cf = tmp;
8051 compare_code = reverse_condition (compare_code);
8052 code = reverse_condition (code);
8053 }
8054 diff = ct - cf;
8055
8056 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8057 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8058 tmp = gen_reg_rtx (mode);
8059
8060 emit_insn (compare_seq);
8061 if (mode == DImode)
8062 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8063 else
8064 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8065
8066 if (diff == 1)
8067 {
8068 /*
8069 * cmpl op0,op1
8070 * sbbl dest,dest
8071 * [addl dest, ct]
8072 *
8073 * Size 5 - 8.
8074 */
8075 if (ct)
8076 tmp = expand_simple_binop (mode, PLUS,
8077 tmp, GEN_INT (ct),
8078 tmp, 1, OPTAB_DIRECT);
8079 }
8080 else if (cf == -1)
8081 {
8082 /*
8083 * cmpl op0,op1
8084 * sbbl dest,dest
8085 * orl $ct, dest
8086 *
8087 * Size 8.
8088 */
8089 tmp = expand_simple_binop (mode, IOR,
8090 tmp, GEN_INT (ct),
8091 tmp, 1, OPTAB_DIRECT);
8092 }
8093 else if (diff == -1 && ct)
8094 {
8095 /*
8096 * cmpl op0,op1
8097 * sbbl dest,dest
8098 * xorl $-1, dest
8099 * [addl dest, cf]
8100 *
8101 * Size 8 - 11.
8102 */
8103 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8104 if (cf)
8105 tmp = expand_simple_binop (mode, PLUS,
8106 tmp, GEN_INT (cf),
8107 tmp, 1, OPTAB_DIRECT);
8108 }
8109 else
8110 {
8111 /*
8112 * cmpl op0,op1
8113 * sbbl dest,dest
8114 * andl cf - ct, dest
8115 * [addl dest, ct]
8116 *
8117 * Size 8 - 11.
8118 */
8119 tmp = expand_simple_binop (mode, AND,
8120 tmp,
8121 gen_int_mode (cf - ct, mode),
8122 tmp, 1, OPTAB_DIRECT);
8123 if (ct)
8124 tmp = expand_simple_binop (mode, PLUS,
8125 tmp, GEN_INT (ct),
8126 tmp, 1, OPTAB_DIRECT);
8127 }
8128
8129 if (tmp != out)
8130 emit_move_insn (out, tmp);
8131
8132 return 1; /* DONE */
8133 }
8134
8135 diff = ct - cf;
8136 if (diff < 0)
8137 {
8138 HOST_WIDE_INT tmp;
8139 tmp = ct, ct = cf, cf = tmp;
8140 diff = -diff;
8141 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8142 {
8143 /* We may be reversing unordered compare to normal compare, that
8144 is not valid in general (we may convert non-trapping condition
8145 to trapping one), however on i386 we currently emit all
8146 comparisons unordered. */
8147 compare_code = reverse_condition_maybe_unordered (compare_code);
8148 code = reverse_condition_maybe_unordered (code);
8149 }
8150 else
8151 {
8152 compare_code = reverse_condition (compare_code);
8153 code = reverse_condition (code);
8154 }
8155 }
8156
8157 compare_code = NIL;
8158 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8159 && GET_CODE (ix86_compare_op1) == CONST_INT)
8160 {
8161 if (ix86_compare_op1 == const0_rtx
8162 && (code == LT || code == GE))
8163 compare_code = code;
8164 else if (ix86_compare_op1 == constm1_rtx)
8165 {
8166 if (code == LE)
8167 compare_code = LT;
8168 else if (code == GT)
8169 compare_code = GE;
8170 }
8171 }
8172
8173 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8174 if (compare_code != NIL
8175 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8176 && (cf == -1 || ct == -1))
8177 {
8178 /* If lea code below could be used, only optimize
8179 if it results in a 2 insn sequence. */
8180
8181 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8182 || diff == 3 || diff == 5 || diff == 9)
8183 || (compare_code == LT && ct == -1)
8184 || (compare_code == GE && cf == -1))
8185 {
8186 /*
8187 * notl op1 (if necessary)
8188 * sarl $31, op1
8189 * orl cf, op1
8190 */
8191 if (ct != -1)
8192 {
8193 cf = ct;
8194 ct = -1;
8195 code = reverse_condition (code);
8196 }
8197
8198 out = emit_store_flag (out, code, ix86_compare_op0,
8199 ix86_compare_op1, VOIDmode, 0, -1);
8200
8201 out = expand_simple_binop (mode, IOR,
8202 out, GEN_INT (cf),
8203 out, 1, OPTAB_DIRECT);
8204 if (out != operands[0])
8205 emit_move_insn (operands[0], out);
8206
8207 return 1; /* DONE */
8208 }
8209 }
8210
8211 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8212 || diff == 3 || diff == 5 || diff == 9)
8213 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8214 {
8215 /*
8216 * xorl dest,dest
8217 * cmpl op1,op2
8218 * setcc dest
8219 * lea cf(dest*(ct-cf)),dest
8220 *
8221 * Size 14.
8222 *
8223 * This also catches the degenerate setcc-only case.
8224 */
8225
8226 rtx tmp;
8227 int nops;
8228
8229 out = emit_store_flag (out, code, ix86_compare_op0,
8230 ix86_compare_op1, VOIDmode, 0, 1);
8231
8232 nops = 0;
8233 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8234 done in proper mode to match. */
8235 if (diff == 1)
8236 tmp = out;
8237 else
8238 {
8239 rtx out1;
8240 out1 = out;
8241 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8242 nops++;
8243 if (diff & 1)
8244 {
8245 tmp = gen_rtx_PLUS (mode, tmp, out1);
8246 nops++;
8247 }
8248 }
8249 if (cf != 0)
8250 {
8251 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8252 nops++;
8253 }
8254 if (tmp != out
8255 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8256 {
8257 if (nops == 1)
8258 {
8259 rtx clob;
8260
8261 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8262 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8263
8264 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8265 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8266 emit_insn (tmp);
8267 }
8268 else
8269 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8270 }
8271 if (out != operands[0])
8272 emit_move_insn (operands[0], out);
8273
8274 return 1; /* DONE */
8275 }
8276
8277 /*
8278 * General case: Jumpful:
8279 * xorl dest,dest cmpl op1, op2
8280 * cmpl op1, op2 movl ct, dest
8281 * setcc dest jcc 1f
8282 * decl dest movl cf, dest
8283 * andl (cf-ct),dest 1:
8284 * addl ct,dest
8285 *
8286 * Size 20. Size 14.
8287 *
8288 * This is reasonably steep, but branch mispredict costs are
8289 * high on modern cpus, so consider failing only if optimizing
8290 * for space.
8291 *
8292 * %%% Parameterize branch_cost on the tuning architecture, then
8293 * use that. The 80386 couldn't care less about mispredicts.
8294 */
8295
8296 if (!optimize_size && !TARGET_CMOVE)
8297 {
8298 if (ct == 0)
8299 {
8300 ct = cf;
8301 cf = 0;
8302 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8303 /* We may be reversing unordered compare to normal compare,
8304 that is not valid in general (we may convert non-trapping
8305 condition to trapping one), however on i386 we currently
8306 emit all comparisons unordered. */
8307 code = reverse_condition_maybe_unordered (code);
8308 else
8309 {
8310 code = reverse_condition (code);
8311 if (compare_code != NIL)
8312 compare_code = reverse_condition (compare_code);
8313 }
8314 }
8315
8316 if (compare_code != NIL)
8317 {
8318 /* notl op1 (if needed)
8319 sarl $31, op1
8320 andl (cf-ct), op1
8321 addl ct, op1
8322
8323 For x < 0 (resp. x <= -1) there will be no notl,
8324 so if possible swap the constants to get rid of the
8325 complement.
8326 True/false will be -1/0 while code below (store flag
8327 followed by decrement) is 0/-1, so the constants need
8328 to be exchanged once more. */
8329
8330 if (compare_code == GE || !cf)
8331 {
8332 code = reverse_condition (code);
8333 compare_code = LT;
8334 }
8335 else
8336 {
8337 HOST_WIDE_INT tmp = cf;
8338 cf = ct;
8339 ct = tmp;
8340 }
8341
8342 out = emit_store_flag (out, code, ix86_compare_op0,
8343 ix86_compare_op1, VOIDmode, 0, -1);
8344 }
8345 else
8346 {
8347 out = emit_store_flag (out, code, ix86_compare_op0,
8348 ix86_compare_op1, VOIDmode, 0, 1);
8349
8350 out = expand_simple_binop (mode, PLUS,
8351 out, constm1_rtx,
8352 out, 1, OPTAB_DIRECT);
8353 }
8354
8355 out = expand_simple_binop (mode, AND,
8356 out,
8357 gen_int_mode (cf - ct, mode),
8358 out, 1, OPTAB_DIRECT);
8359 out = expand_simple_binop (mode, PLUS,
8360 out, GEN_INT (ct),
8361 out, 1, OPTAB_DIRECT);
8362 if (out != operands[0])
8363 emit_move_insn (operands[0], out);
8364
8365 return 1; /* DONE */
8366 }
8367 }
8368
8369 if (!TARGET_CMOVE)
8370 {
8371 /* Try a few things more with specific constants and a variable. */
8372
8373 optab op;
8374 rtx var, orig_out, out, tmp;
8375
8376 if (optimize_size)
8377 return 0; /* FAIL */
8378
8379 /* If one of the two operands is an interesting constant, load a
8380 constant with the above and mask it in with a logical operation. */
8381
8382 if (GET_CODE (operands[2]) == CONST_INT)
8383 {
8384 var = operands[3];
8385 if (INTVAL (operands[2]) == 0)
8386 operands[3] = constm1_rtx, op = and_optab;
8387 else if (INTVAL (operands[2]) == -1)
8388 operands[3] = const0_rtx, op = ior_optab;
8389 else
8390 return 0; /* FAIL */
8391 }
8392 else if (GET_CODE (operands[3]) == CONST_INT)
8393 {
8394 var = operands[2];
8395 if (INTVAL (operands[3]) == 0)
8396 operands[2] = constm1_rtx, op = and_optab;
8397 else if (INTVAL (operands[3]) == -1)
8398 operands[2] = const0_rtx, op = ior_optab;
8399 else
8400 return 0; /* FAIL */
8401 }
8402 else
8403 return 0; /* FAIL */
8404
8405 orig_out = operands[0];
8406 tmp = gen_reg_rtx (mode);
8407 operands[0] = tmp;
8408
8409 /* Recurse to get the constant loaded. */
8410 if (ix86_expand_int_movcc (operands) == 0)
8411 return 0; /* FAIL */
8412
8413 /* Mask in the interesting variable. */
8414 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8415 OPTAB_WIDEN);
8416 if (out != orig_out)
8417 emit_move_insn (orig_out, out);
8418
8419 return 1; /* DONE */
8420 }
8421
8422 /*
8423 * For comparison with above,
8424 *
8425 * movl cf,dest
8426 * movl ct,tmp
8427 * cmpl op1,op2
8428 * cmovcc tmp,dest
8429 *
8430 * Size 15.
8431 */
8432
8433 if (! nonimmediate_operand (operands[2], mode))
8434 operands[2] = force_reg (mode, operands[2]);
8435 if (! nonimmediate_operand (operands[3], mode))
8436 operands[3] = force_reg (mode, operands[3]);
8437
8438 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8439 {
8440 rtx tmp = gen_reg_rtx (mode);
8441 emit_move_insn (tmp, operands[3]);
8442 operands[3] = tmp;
8443 }
8444 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8445 {
8446 rtx tmp = gen_reg_rtx (mode);
8447 emit_move_insn (tmp, operands[2]);
8448 operands[2] = tmp;
8449 }
8450 if (! register_operand (operands[2], VOIDmode)
8451 && ! register_operand (operands[3], VOIDmode))
8452 operands[2] = force_reg (mode, operands[2]);
8453
8454 emit_insn (compare_seq);
8455 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8456 gen_rtx_IF_THEN_ELSE (mode,
8457 compare_op, operands[2],
8458 operands[3])));
8459 if (bypass_test)
8460 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8461 gen_rtx_IF_THEN_ELSE (mode,
8462 bypass_test,
8463 operands[3],
8464 operands[0])));
8465 if (second_test)
8466 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8467 gen_rtx_IF_THEN_ELSE (mode,
8468 second_test,
8469 operands[2],
8470 operands[0])));
8471
8472 return 1; /* DONE */
8473 }
8474
8475 int
8476 ix86_expand_fp_movcc (operands)
8477 rtx operands[];
8478 {
8479 enum rtx_code code;
8480 rtx tmp;
8481 rtx compare_op, second_test, bypass_test;
8482
8483 /* For SF/DFmode conditional moves based on comparisons
8484 in same mode, we may want to use SSE min/max instructions. */
8485 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8486 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8487 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8488 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8489 && (!TARGET_IEEE_FP
8490 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8491 /* We may be called from the post-reload splitter. */
8492 && (!REG_P (operands[0])
8493 || SSE_REG_P (operands[0])
8494 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8495 {
8496 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8497 code = GET_CODE (operands[1]);
8498
8499 /* See if we have (cross) match between comparison operands and
8500 conditional move operands. */
8501 if (rtx_equal_p (operands[2], op1))
8502 {
8503 rtx tmp = op0;
8504 op0 = op1;
8505 op1 = tmp;
8506 code = reverse_condition_maybe_unordered (code);
8507 }
8508 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8509 {
8510 /* Check for min operation. */
8511 if (code == LT)
8512 {
8513 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8514 if (memory_operand (op0, VOIDmode))
8515 op0 = force_reg (GET_MODE (operands[0]), op0);
8516 if (GET_MODE (operands[0]) == SFmode)
8517 emit_insn (gen_minsf3 (operands[0], op0, op1));
8518 else
8519 emit_insn (gen_mindf3 (operands[0], op0, op1));
8520 return 1;
8521 }
8522 /* Check for max operation. */
8523 if (code == GT)
8524 {
8525 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8526 if (memory_operand (op0, VOIDmode))
8527 op0 = force_reg (GET_MODE (operands[0]), op0);
8528 if (GET_MODE (operands[0]) == SFmode)
8529 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8530 else
8531 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8532 return 1;
8533 }
8534 }
8535 /* Manage condition to be sse_comparison_operator. In case we are
8536 in non-ieee mode, try to canonicalize the destination operand
8537 to be first in the comparison - this helps reload to avoid extra
8538 moves. */
8539 if (!sse_comparison_operator (operands[1], VOIDmode)
8540 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8541 {
8542 rtx tmp = ix86_compare_op0;
8543 ix86_compare_op0 = ix86_compare_op1;
8544 ix86_compare_op1 = tmp;
8545 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8546 VOIDmode, ix86_compare_op0,
8547 ix86_compare_op1);
8548 }
8549 /* Similary try to manage result to be first operand of conditional
8550 move. We also don't support the NE comparison on SSE, so try to
8551 avoid it. */
8552 if ((rtx_equal_p (operands[0], operands[3])
8553 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8554 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8555 {
8556 rtx tmp = operands[2];
8557 operands[2] = operands[3];
8558 operands[3] = tmp;
8559 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8560 (GET_CODE (operands[1])),
8561 VOIDmode, ix86_compare_op0,
8562 ix86_compare_op1);
8563 }
8564 if (GET_MODE (operands[0]) == SFmode)
8565 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8566 operands[2], operands[3],
8567 ix86_compare_op0, ix86_compare_op1));
8568 else
8569 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8570 operands[2], operands[3],
8571 ix86_compare_op0, ix86_compare_op1));
8572 return 1;
8573 }
8574
8575 /* The floating point conditional move instructions don't directly
8576 support conditions resulting from a signed integer comparison. */
8577
8578 code = GET_CODE (operands[1]);
8579 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8580
8581 /* The floating point conditional move instructions don't directly
8582 support signed integer comparisons. */
8583
8584 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8585 {
8586 if (second_test != NULL || bypass_test != NULL)
8587 abort ();
8588 tmp = gen_reg_rtx (QImode);
8589 ix86_expand_setcc (code, tmp);
8590 code = NE;
8591 ix86_compare_op0 = tmp;
8592 ix86_compare_op1 = const0_rtx;
8593 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8594 }
8595 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8596 {
8597 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8598 emit_move_insn (tmp, operands[3]);
8599 operands[3] = tmp;
8600 }
8601 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8602 {
8603 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8604 emit_move_insn (tmp, operands[2]);
8605 operands[2] = tmp;
8606 }
8607
8608 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8609 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8610 compare_op,
8611 operands[2],
8612 operands[3])));
8613 if (bypass_test)
8614 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8615 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8616 bypass_test,
8617 operands[3],
8618 operands[0])));
8619 if (second_test)
8620 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8621 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8622 second_test,
8623 operands[2],
8624 operands[0])));
8625
8626 return 1;
8627 }
8628
8629 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8630 works for floating pointer parameters and nonoffsetable memories.
8631 For pushes, it returns just stack offsets; the values will be saved
8632 in the right order. Maximally three parts are generated. */
8633
8634 static int
8635 ix86_split_to_parts (operand, parts, mode)
8636 rtx operand;
8637 rtx *parts;
8638 enum machine_mode mode;
8639 {
8640 int size;
8641
8642 if (!TARGET_64BIT)
8643 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8644 else
8645 size = (GET_MODE_SIZE (mode) + 4) / 8;
8646
8647 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8648 abort ();
8649 if (size < 2 || size > 3)
8650 abort ();
8651
8652 /* Optimize constant pool reference to immediates. This is used by fp moves,
8653 that force all constants to memory to allow combining. */
8654
8655 if (GET_CODE (operand) == MEM
8656 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8657 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8658 operand = get_pool_constant (XEXP (operand, 0));
8659
8660 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8661 {
8662 /* The only non-offsetable memories we handle are pushes. */
8663 if (! push_operand (operand, VOIDmode))
8664 abort ();
8665
8666 operand = copy_rtx (operand);
8667 PUT_MODE (operand, Pmode);
8668 parts[0] = parts[1] = parts[2] = operand;
8669 }
8670 else if (!TARGET_64BIT)
8671 {
8672 if (mode == DImode)
8673 split_di (&operand, 1, &parts[0], &parts[1]);
8674 else
8675 {
8676 if (REG_P (operand))
8677 {
8678 if (!reload_completed)
8679 abort ();
8680 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8681 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8682 if (size == 3)
8683 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8684 }
8685 else if (offsettable_memref_p (operand))
8686 {
8687 operand = adjust_address (operand, SImode, 0);
8688 parts[0] = operand;
8689 parts[1] = adjust_address (operand, SImode, 4);
8690 if (size == 3)
8691 parts[2] = adjust_address (operand, SImode, 8);
8692 }
8693 else if (GET_CODE (operand) == CONST_DOUBLE)
8694 {
8695 REAL_VALUE_TYPE r;
8696 long l[4];
8697
8698 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8699 switch (mode)
8700 {
8701 case XFmode:
8702 case TFmode:
8703 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8704 parts[2] = gen_int_mode (l[2], SImode);
8705 break;
8706 case DFmode:
8707 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8708 break;
8709 default:
8710 abort ();
8711 }
8712 parts[1] = gen_int_mode (l[1], SImode);
8713 parts[0] = gen_int_mode (l[0], SImode);
8714 }
8715 else
8716 abort ();
8717 }
8718 }
8719 else
8720 {
8721 if (mode == TImode)
8722 split_ti (&operand, 1, &parts[0], &parts[1]);
8723 if (mode == XFmode || mode == TFmode)
8724 {
8725 if (REG_P (operand))
8726 {
8727 if (!reload_completed)
8728 abort ();
8729 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8730 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8731 }
8732 else if (offsettable_memref_p (operand))
8733 {
8734 operand = adjust_address (operand, DImode, 0);
8735 parts[0] = operand;
8736 parts[1] = adjust_address (operand, SImode, 8);
8737 }
8738 else if (GET_CODE (operand) == CONST_DOUBLE)
8739 {
8740 REAL_VALUE_TYPE r;
8741 long l[3];
8742
8743 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8744 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8745 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8746 if (HOST_BITS_PER_WIDE_INT >= 64)
8747 parts[0]
8748 = gen_int_mode
8749 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8750 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8751 DImode);
8752 else
8753 parts[0] = immed_double_const (l[0], l[1], DImode);
8754 parts[1] = gen_int_mode (l[2], SImode);
8755 }
8756 else
8757 abort ();
8758 }
8759 }
8760
8761 return size;
8762 }
8763
8764 /* Emit insns to perform a move or push of DI, DF, and XF values.
8765 Return false when normal moves are needed; true when all required
8766 insns have been emitted. Operands 2-4 contain the input values
8767 int the correct order; operands 5-7 contain the output values. */
8768
8769 void
8770 ix86_split_long_move (operands)
8771 rtx operands[];
8772 {
8773 rtx part[2][3];
8774 int nparts;
8775 int push = 0;
8776 int collisions = 0;
8777 enum machine_mode mode = GET_MODE (operands[0]);
8778
8779 /* The DFmode expanders may ask us to move double.
8780 For 64bit target this is single move. By hiding the fact
8781 here we simplify i386.md splitters. */
8782 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8783 {
8784 /* Optimize constant pool reference to immediates. This is used by
8785 fp moves, that force all constants to memory to allow combining. */
8786
8787 if (GET_CODE (operands[1]) == MEM
8788 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8789 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8790 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8791 if (push_operand (operands[0], VOIDmode))
8792 {
8793 operands[0] = copy_rtx (operands[0]);
8794 PUT_MODE (operands[0], Pmode);
8795 }
8796 else
8797 operands[0] = gen_lowpart (DImode, operands[0]);
8798 operands[1] = gen_lowpart (DImode, operands[1]);
8799 emit_move_insn (operands[0], operands[1]);
8800 return;
8801 }
8802
8803 /* The only non-offsettable memory we handle is push. */
8804 if (push_operand (operands[0], VOIDmode))
8805 push = 1;
8806 else if (GET_CODE (operands[0]) == MEM
8807 && ! offsettable_memref_p (operands[0]))
8808 abort ();
8809
8810 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8811 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8812
8813 /* When emitting push, take care for source operands on the stack. */
8814 if (push && GET_CODE (operands[1]) == MEM
8815 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8816 {
8817 if (nparts == 3)
8818 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8819 XEXP (part[1][2], 0));
8820 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8821 XEXP (part[1][1], 0));
8822 }
8823
8824 /* We need to do copy in the right order in case an address register
8825 of the source overlaps the destination. */
8826 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8827 {
8828 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8829 collisions++;
8830 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8831 collisions++;
8832 if (nparts == 3
8833 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8834 collisions++;
8835
8836 /* Collision in the middle part can be handled by reordering. */
8837 if (collisions == 1 && nparts == 3
8838 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8839 {
8840 rtx tmp;
8841 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8842 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8843 }
8844
8845 /* If there are more collisions, we can't handle it by reordering.
8846 Do an lea to the last part and use only one colliding move. */
8847 else if (collisions > 1)
8848 {
8849 collisions = 1;
8850 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8851 XEXP (part[1][0], 0)));
8852 part[1][0] = change_address (part[1][0],
8853 TARGET_64BIT ? DImode : SImode,
8854 part[0][nparts - 1]);
8855 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8856 if (nparts == 3)
8857 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8858 }
8859 }
8860
8861 if (push)
8862 {
8863 if (!TARGET_64BIT)
8864 {
8865 if (nparts == 3)
8866 {
8867 /* We use only first 12 bytes of TFmode value, but for pushing we
8868 are required to adjust stack as if we were pushing real 16byte
8869 value. */
8870 if (mode == TFmode && !TARGET_64BIT)
8871 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8872 GEN_INT (-4)));
8873 emit_move_insn (part[0][2], part[1][2]);
8874 }
8875 }
8876 else
8877 {
8878 /* In 64bit mode we don't have 32bit push available. In case this is
8879 register, it is OK - we will just use larger counterpart. We also
8880 retype memory - these comes from attempt to avoid REX prefix on
8881 moving of second half of TFmode value. */
8882 if (GET_MODE (part[1][1]) == SImode)
8883 {
8884 if (GET_CODE (part[1][1]) == MEM)
8885 part[1][1] = adjust_address (part[1][1], DImode, 0);
8886 else if (REG_P (part[1][1]))
8887 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8888 else
8889 abort ();
8890 if (GET_MODE (part[1][0]) == SImode)
8891 part[1][0] = part[1][1];
8892 }
8893 }
8894 emit_move_insn (part[0][1], part[1][1]);
8895 emit_move_insn (part[0][0], part[1][0]);
8896 return;
8897 }
8898
8899 /* Choose correct order to not overwrite the source before it is copied. */
8900 if ((REG_P (part[0][0])
8901 && REG_P (part[1][1])
8902 && (REGNO (part[0][0]) == REGNO (part[1][1])
8903 || (nparts == 3
8904 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8905 || (collisions > 0
8906 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8907 {
8908 if (nparts == 3)
8909 {
8910 operands[2] = part[0][2];
8911 operands[3] = part[0][1];
8912 operands[4] = part[0][0];
8913 operands[5] = part[1][2];
8914 operands[6] = part[1][1];
8915 operands[7] = part[1][0];
8916 }
8917 else
8918 {
8919 operands[2] = part[0][1];
8920 operands[3] = part[0][0];
8921 operands[5] = part[1][1];
8922 operands[6] = part[1][0];
8923 }
8924 }
8925 else
8926 {
8927 if (nparts == 3)
8928 {
8929 operands[2] = part[0][0];
8930 operands[3] = part[0][1];
8931 operands[4] = part[0][2];
8932 operands[5] = part[1][0];
8933 operands[6] = part[1][1];
8934 operands[7] = part[1][2];
8935 }
8936 else
8937 {
8938 operands[2] = part[0][0];
8939 operands[3] = part[0][1];
8940 operands[5] = part[1][0];
8941 operands[6] = part[1][1];
8942 }
8943 }
8944 emit_move_insn (operands[2], operands[5]);
8945 emit_move_insn (operands[3], operands[6]);
8946 if (nparts == 3)
8947 emit_move_insn (operands[4], operands[7]);
8948
8949 return;
8950 }
8951
8952 void
8953 ix86_split_ashldi (operands, scratch)
8954 rtx *operands, scratch;
8955 {
8956 rtx low[2], high[2];
8957 int count;
8958
8959 if (GET_CODE (operands[2]) == CONST_INT)
8960 {
8961 split_di (operands, 2, low, high);
8962 count = INTVAL (operands[2]) & 63;
8963
8964 if (count >= 32)
8965 {
8966 emit_move_insn (high[0], low[1]);
8967 emit_move_insn (low[0], const0_rtx);
8968
8969 if (count > 32)
8970 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8971 }
8972 else
8973 {
8974 if (!rtx_equal_p (operands[0], operands[1]))
8975 emit_move_insn (operands[0], operands[1]);
8976 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8977 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8978 }
8979 }
8980 else
8981 {
8982 if (!rtx_equal_p (operands[0], operands[1]))
8983 emit_move_insn (operands[0], operands[1]);
8984
8985 split_di (operands, 1, low, high);
8986
8987 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8988 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8989
8990 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8991 {
8992 if (! no_new_pseudos)
8993 scratch = force_reg (SImode, const0_rtx);
8994 else
8995 emit_move_insn (scratch, const0_rtx);
8996
8997 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8998 scratch));
8999 }
9000 else
9001 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9002 }
9003 }
9004
9005 void
9006 ix86_split_ashrdi (operands, scratch)
9007 rtx *operands, scratch;
9008 {
9009 rtx low[2], high[2];
9010 int count;
9011
9012 if (GET_CODE (operands[2]) == CONST_INT)
9013 {
9014 split_di (operands, 2, low, high);
9015 count = INTVAL (operands[2]) & 63;
9016
9017 if (count >= 32)
9018 {
9019 emit_move_insn (low[0], high[1]);
9020
9021 if (! reload_completed)
9022 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9023 else
9024 {
9025 emit_move_insn (high[0], low[0]);
9026 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9027 }
9028
9029 if (count > 32)
9030 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9031 }
9032 else
9033 {
9034 if (!rtx_equal_p (operands[0], operands[1]))
9035 emit_move_insn (operands[0], operands[1]);
9036 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9037 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9038 }
9039 }
9040 else
9041 {
9042 if (!rtx_equal_p (operands[0], operands[1]))
9043 emit_move_insn (operands[0], operands[1]);
9044
9045 split_di (operands, 1, low, high);
9046
9047 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9048 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9049
9050 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9051 {
9052 if (! no_new_pseudos)
9053 scratch = gen_reg_rtx (SImode);
9054 emit_move_insn (scratch, high[0]);
9055 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9056 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9057 scratch));
9058 }
9059 else
9060 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9061 }
9062 }
9063
9064 void
9065 ix86_split_lshrdi (operands, scratch)
9066 rtx *operands, scratch;
9067 {
9068 rtx low[2], high[2];
9069 int count;
9070
9071 if (GET_CODE (operands[2]) == CONST_INT)
9072 {
9073 split_di (operands, 2, low, high);
9074 count = INTVAL (operands[2]) & 63;
9075
9076 if (count >= 32)
9077 {
9078 emit_move_insn (low[0], high[1]);
9079 emit_move_insn (high[0], const0_rtx);
9080
9081 if (count > 32)
9082 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9083 }
9084 else
9085 {
9086 if (!rtx_equal_p (operands[0], operands[1]))
9087 emit_move_insn (operands[0], operands[1]);
9088 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9089 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9090 }
9091 }
9092 else
9093 {
9094 if (!rtx_equal_p (operands[0], operands[1]))
9095 emit_move_insn (operands[0], operands[1]);
9096
9097 split_di (operands, 1, low, high);
9098
9099 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9100 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9101
9102 /* Heh. By reversing the arguments, we can reuse this pattern. */
9103 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9104 {
9105 if (! no_new_pseudos)
9106 scratch = force_reg (SImode, const0_rtx);
9107 else
9108 emit_move_insn (scratch, const0_rtx);
9109
9110 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9111 scratch));
9112 }
9113 else
9114 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9115 }
9116 }
9117
9118 /* Helper function for the string operations below. Dest VARIABLE whether
9119 it is aligned to VALUE bytes. If true, jump to the label. */
9120 static rtx
9121 ix86_expand_aligntest (variable, value)
9122 rtx variable;
9123 int value;
9124 {
9125 rtx label = gen_label_rtx ();
9126 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9127 if (GET_MODE (variable) == DImode)
9128 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9129 else
9130 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9131 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9132 1, label);
9133 return label;
9134 }
9135
9136 /* Adjust COUNTER by the VALUE. */
9137 static void
9138 ix86_adjust_counter (countreg, value)
9139 rtx countreg;
9140 HOST_WIDE_INT value;
9141 {
9142 if (GET_MODE (countreg) == DImode)
9143 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9144 else
9145 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9146 }
9147
9148 /* Zero extend possibly SImode EXP to Pmode register. */
9149 rtx
9150 ix86_zero_extend_to_Pmode (exp)
9151 rtx exp;
9152 {
9153 rtx r;
9154 if (GET_MODE (exp) == VOIDmode)
9155 return force_reg (Pmode, exp);
9156 if (GET_MODE (exp) == Pmode)
9157 return copy_to_mode_reg (Pmode, exp);
9158 r = gen_reg_rtx (Pmode);
9159 emit_insn (gen_zero_extendsidi2 (r, exp));
9160 return r;
9161 }
9162
9163 /* Expand string move (memcpy) operation. Use i386 string operations when
9164 profitable. expand_clrstr contains similar code. */
9165 int
9166 ix86_expand_movstr (dst, src, count_exp, align_exp)
9167 rtx dst, src, count_exp, align_exp;
9168 {
9169 rtx srcreg, destreg, countreg;
9170 enum machine_mode counter_mode;
9171 HOST_WIDE_INT align = 0;
9172 unsigned HOST_WIDE_INT count = 0;
9173 rtx insns;
9174
9175 start_sequence ();
9176
9177 if (GET_CODE (align_exp) == CONST_INT)
9178 align = INTVAL (align_exp);
9179
9180 /* This simple hack avoids all inlining code and simplifies code below. */
9181 if (!TARGET_ALIGN_STRINGOPS)
9182 align = 64;
9183
9184 if (GET_CODE (count_exp) == CONST_INT)
9185 count = INTVAL (count_exp);
9186
9187 /* Figure out proper mode for counter. For 32bits it is always SImode,
9188 for 64bits use SImode when possible, otherwise DImode.
9189 Set count to number of bytes copied when known at compile time. */
9190 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9191 || x86_64_zero_extended_value (count_exp))
9192 counter_mode = SImode;
9193 else
9194 counter_mode = DImode;
9195
9196 if (counter_mode != SImode && counter_mode != DImode)
9197 abort ();
9198
9199 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9200 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9201
9202 emit_insn (gen_cld ());
9203
9204 /* When optimizing for size emit simple rep ; movsb instruction for
9205 counts not divisible by 4. */
9206
9207 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9208 {
9209 countreg = ix86_zero_extend_to_Pmode (count_exp);
9210 if (TARGET_64BIT)
9211 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9212 destreg, srcreg, countreg));
9213 else
9214 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9215 destreg, srcreg, countreg));
9216 }
9217
9218 /* For constant aligned (or small unaligned) copies use rep movsl
9219 followed by code copying the rest. For PentiumPro ensure 8 byte
9220 alignment to allow rep movsl acceleration. */
9221
9222 else if (count != 0
9223 && (align >= 8
9224 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9225 || optimize_size || count < (unsigned int) 64))
9226 {
9227 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9228 if (count & ~(size - 1))
9229 {
9230 countreg = copy_to_mode_reg (counter_mode,
9231 GEN_INT ((count >> (size == 4 ? 2 : 3))
9232 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9233 countreg = ix86_zero_extend_to_Pmode (countreg);
9234 if (size == 4)
9235 {
9236 if (TARGET_64BIT)
9237 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9238 destreg, srcreg, countreg));
9239 else
9240 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9241 destreg, srcreg, countreg));
9242 }
9243 else
9244 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9245 destreg, srcreg, countreg));
9246 }
9247 if (size == 8 && (count & 0x04))
9248 emit_insn (gen_strmovsi (destreg, srcreg));
9249 if (count & 0x02)
9250 emit_insn (gen_strmovhi (destreg, srcreg));
9251 if (count & 0x01)
9252 emit_insn (gen_strmovqi (destreg, srcreg));
9253 }
9254 /* The generic code based on the glibc implementation:
9255 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9256 allowing accelerated copying there)
9257 - copy the data using rep movsl
9258 - copy the rest. */
9259 else
9260 {
9261 rtx countreg2;
9262 rtx label = NULL;
9263 int desired_alignment = (TARGET_PENTIUMPRO
9264 && (count == 0 || count >= (unsigned int) 260)
9265 ? 8 : UNITS_PER_WORD);
9266
9267 /* In case we don't know anything about the alignment, default to
9268 library version, since it is usually equally fast and result in
9269 shorter code. */
9270 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9271 {
9272 end_sequence ();
9273 return 0;
9274 }
9275
9276 if (TARGET_SINGLE_STRINGOP)
9277 emit_insn (gen_cld ());
9278
9279 countreg2 = gen_reg_rtx (Pmode);
9280 countreg = copy_to_mode_reg (counter_mode, count_exp);
9281
9282 /* We don't use loops to align destination and to copy parts smaller
9283 than 4 bytes, because gcc is able to optimize such code better (in
9284 the case the destination or the count really is aligned, gcc is often
9285 able to predict the branches) and also it is friendlier to the
9286 hardware branch prediction.
9287
9288 Using loops is benefical for generic case, because we can
9289 handle small counts using the loops. Many CPUs (such as Athlon)
9290 have large REP prefix setup costs.
9291
9292 This is quite costy. Maybe we can revisit this decision later or
9293 add some customizability to this code. */
9294
9295 if (count == 0 && align < desired_alignment)
9296 {
9297 label = gen_label_rtx ();
9298 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9299 LEU, 0, counter_mode, 1, label);
9300 }
9301 if (align <= 1)
9302 {
9303 rtx label = ix86_expand_aligntest (destreg, 1);
9304 emit_insn (gen_strmovqi (destreg, srcreg));
9305 ix86_adjust_counter (countreg, 1);
9306 emit_label (label);
9307 LABEL_NUSES (label) = 1;
9308 }
9309 if (align <= 2)
9310 {
9311 rtx label = ix86_expand_aligntest (destreg, 2);
9312 emit_insn (gen_strmovhi (destreg, srcreg));
9313 ix86_adjust_counter (countreg, 2);
9314 emit_label (label);
9315 LABEL_NUSES (label) = 1;
9316 }
9317 if (align <= 4 && desired_alignment > 4)
9318 {
9319 rtx label = ix86_expand_aligntest (destreg, 4);
9320 emit_insn (gen_strmovsi (destreg, srcreg));
9321 ix86_adjust_counter (countreg, 4);
9322 emit_label (label);
9323 LABEL_NUSES (label) = 1;
9324 }
9325
9326 if (label && desired_alignment > 4 && !TARGET_64BIT)
9327 {
9328 emit_label (label);
9329 LABEL_NUSES (label) = 1;
9330 label = NULL_RTX;
9331 }
9332 if (!TARGET_SINGLE_STRINGOP)
9333 emit_insn (gen_cld ());
9334 if (TARGET_64BIT)
9335 {
9336 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9337 GEN_INT (3)));
9338 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9339 destreg, srcreg, countreg2));
9340 }
9341 else
9342 {
9343 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9344 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9345 destreg, srcreg, countreg2));
9346 }
9347
9348 if (label)
9349 {
9350 emit_label (label);
9351 LABEL_NUSES (label) = 1;
9352 }
9353 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9354 emit_insn (gen_strmovsi (destreg, srcreg));
9355 if ((align <= 4 || count == 0) && TARGET_64BIT)
9356 {
9357 rtx label = ix86_expand_aligntest (countreg, 4);
9358 emit_insn (gen_strmovsi (destreg, srcreg));
9359 emit_label (label);
9360 LABEL_NUSES (label) = 1;
9361 }
9362 if (align > 2 && count != 0 && (count & 2))
9363 emit_insn (gen_strmovhi (destreg, srcreg));
9364 if (align <= 2 || count == 0)
9365 {
9366 rtx label = ix86_expand_aligntest (countreg, 2);
9367 emit_insn (gen_strmovhi (destreg, srcreg));
9368 emit_label (label);
9369 LABEL_NUSES (label) = 1;
9370 }
9371 if (align > 1 && count != 0 && (count & 1))
9372 emit_insn (gen_strmovqi (destreg, srcreg));
9373 if (align <= 1 || count == 0)
9374 {
9375 rtx label = ix86_expand_aligntest (countreg, 1);
9376 emit_insn (gen_strmovqi (destreg, srcreg));
9377 emit_label (label);
9378 LABEL_NUSES (label) = 1;
9379 }
9380 }
9381
9382 insns = get_insns ();
9383 end_sequence ();
9384
9385 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9386 emit_insns (insns);
9387 return 1;
9388 }
9389
9390 /* Expand string clear operation (bzero). Use i386 string operations when
9391 profitable. expand_movstr contains similar code. */
9392 int
9393 ix86_expand_clrstr (src, count_exp, align_exp)
9394 rtx src, count_exp, align_exp;
9395 {
9396 rtx destreg, zeroreg, countreg;
9397 enum machine_mode counter_mode;
9398 HOST_WIDE_INT align = 0;
9399 unsigned HOST_WIDE_INT count = 0;
9400
9401 if (GET_CODE (align_exp) == CONST_INT)
9402 align = INTVAL (align_exp);
9403
9404 /* This simple hack avoids all inlining code and simplifies code below. */
9405 if (!TARGET_ALIGN_STRINGOPS)
9406 align = 32;
9407
9408 if (GET_CODE (count_exp) == CONST_INT)
9409 count = INTVAL (count_exp);
9410 /* Figure out proper mode for counter. For 32bits it is always SImode,
9411 for 64bits use SImode when possible, otherwise DImode.
9412 Set count to number of bytes copied when known at compile time. */
9413 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9414 || x86_64_zero_extended_value (count_exp))
9415 counter_mode = SImode;
9416 else
9417 counter_mode = DImode;
9418
9419 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9420
9421 emit_insn (gen_cld ());
9422
9423 /* When optimizing for size emit simple rep ; movsb instruction for
9424 counts not divisible by 4. */
9425
9426 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9427 {
9428 countreg = ix86_zero_extend_to_Pmode (count_exp);
9429 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9430 if (TARGET_64BIT)
9431 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9432 destreg, countreg));
9433 else
9434 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9435 destreg, countreg));
9436 }
9437 else if (count != 0
9438 && (align >= 8
9439 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9440 || optimize_size || count < (unsigned int) 64))
9441 {
9442 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9443 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9444 if (count & ~(size - 1))
9445 {
9446 countreg = copy_to_mode_reg (counter_mode,
9447 GEN_INT ((count >> (size == 4 ? 2 : 3))
9448 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9449 countreg = ix86_zero_extend_to_Pmode (countreg);
9450 if (size == 4)
9451 {
9452 if (TARGET_64BIT)
9453 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9454 destreg, countreg));
9455 else
9456 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9457 destreg, countreg));
9458 }
9459 else
9460 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9461 destreg, countreg));
9462 }
9463 if (size == 8 && (count & 0x04))
9464 emit_insn (gen_strsetsi (destreg,
9465 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9466 if (count & 0x02)
9467 emit_insn (gen_strsethi (destreg,
9468 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9469 if (count & 0x01)
9470 emit_insn (gen_strsetqi (destreg,
9471 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9472 }
9473 else
9474 {
9475 rtx countreg2;
9476 rtx label = NULL;
9477 /* Compute desired alignment of the string operation. */
9478 int desired_alignment = (TARGET_PENTIUMPRO
9479 && (count == 0 || count >= (unsigned int) 260)
9480 ? 8 : UNITS_PER_WORD);
9481
9482 /* In case we don't know anything about the alignment, default to
9483 library version, since it is usually equally fast and result in
9484 shorter code. */
9485 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9486 return 0;
9487
9488 if (TARGET_SINGLE_STRINGOP)
9489 emit_insn (gen_cld ());
9490
9491 countreg2 = gen_reg_rtx (Pmode);
9492 countreg = copy_to_mode_reg (counter_mode, count_exp);
9493 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9494
9495 if (count == 0 && align < desired_alignment)
9496 {
9497 label = gen_label_rtx ();
9498 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
9499 LEU, 0, counter_mode, 1, label);
9500 }
9501 if (align <= 1)
9502 {
9503 rtx label = ix86_expand_aligntest (destreg, 1);
9504 emit_insn (gen_strsetqi (destreg,
9505 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9506 ix86_adjust_counter (countreg, 1);
9507 emit_label (label);
9508 LABEL_NUSES (label) = 1;
9509 }
9510 if (align <= 2)
9511 {
9512 rtx label = ix86_expand_aligntest (destreg, 2);
9513 emit_insn (gen_strsethi (destreg,
9514 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9515 ix86_adjust_counter (countreg, 2);
9516 emit_label (label);
9517 LABEL_NUSES (label) = 1;
9518 }
9519 if (align <= 4 && desired_alignment > 4)
9520 {
9521 rtx label = ix86_expand_aligntest (destreg, 4);
9522 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9523 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9524 : zeroreg)));
9525 ix86_adjust_counter (countreg, 4);
9526 emit_label (label);
9527 LABEL_NUSES (label) = 1;
9528 }
9529
9530 if (label && desired_alignment > 4 && !TARGET_64BIT)
9531 {
9532 emit_label (label);
9533 LABEL_NUSES (label) = 1;
9534 label = NULL_RTX;
9535 }
9536
9537 if (!TARGET_SINGLE_STRINGOP)
9538 emit_insn (gen_cld ());
9539 if (TARGET_64BIT)
9540 {
9541 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9542 GEN_INT (3)));
9543 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9544 destreg, countreg2));
9545 }
9546 else
9547 {
9548 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9549 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9550 destreg, countreg2));
9551 }
9552 if (label)
9553 {
9554 emit_label (label);
9555 LABEL_NUSES (label) = 1;
9556 }
9557
9558 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9559 emit_insn (gen_strsetsi (destreg,
9560 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9561 if (TARGET_64BIT && (align <= 4 || count == 0))
9562 {
9563 rtx label = ix86_expand_aligntest (countreg, 2);
9564 emit_insn (gen_strsetsi (destreg,
9565 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9566 emit_label (label);
9567 LABEL_NUSES (label) = 1;
9568 }
9569 if (align > 2 && count != 0 && (count & 2))
9570 emit_insn (gen_strsethi (destreg,
9571 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9572 if (align <= 2 || count == 0)
9573 {
9574 rtx label = ix86_expand_aligntest (countreg, 2);
9575 emit_insn (gen_strsethi (destreg,
9576 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9577 emit_label (label);
9578 LABEL_NUSES (label) = 1;
9579 }
9580 if (align > 1 && count != 0 && (count & 1))
9581 emit_insn (gen_strsetqi (destreg,
9582 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9583 if (align <= 1 || count == 0)
9584 {
9585 rtx label = ix86_expand_aligntest (countreg, 1);
9586 emit_insn (gen_strsetqi (destreg,
9587 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9588 emit_label (label);
9589 LABEL_NUSES (label) = 1;
9590 }
9591 }
9592 return 1;
9593 }
9594 /* Expand strlen. */
9595 int
9596 ix86_expand_strlen (out, src, eoschar, align)
9597 rtx out, src, eoschar, align;
9598 {
9599 rtx addr, scratch1, scratch2, scratch3, scratch4;
9600
9601 /* The generic case of strlen expander is long. Avoid it's
9602 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9603
9604 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9605 && !TARGET_INLINE_ALL_STRINGOPS
9606 && !optimize_size
9607 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9608 return 0;
9609
9610 addr = force_reg (Pmode, XEXP (src, 0));
9611 scratch1 = gen_reg_rtx (Pmode);
9612
9613 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9614 && !optimize_size)
9615 {
9616 /* Well it seems that some optimizer does not combine a call like
9617 foo(strlen(bar), strlen(bar));
9618 when the move and the subtraction is done here. It does calculate
9619 the length just once when these instructions are done inside of
9620 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9621 often used and I use one fewer register for the lifetime of
9622 output_strlen_unroll() this is better. */
9623
9624 emit_move_insn (out, addr);
9625
9626 ix86_expand_strlensi_unroll_1 (out, align);
9627
9628 /* strlensi_unroll_1 returns the address of the zero at the end of
9629 the string, like memchr(), so compute the length by subtracting
9630 the start address. */
9631 if (TARGET_64BIT)
9632 emit_insn (gen_subdi3 (out, out, addr));
9633 else
9634 emit_insn (gen_subsi3 (out, out, addr));
9635 }
9636 else
9637 {
9638 scratch2 = gen_reg_rtx (Pmode);
9639 scratch3 = gen_reg_rtx (Pmode);
9640 scratch4 = force_reg (Pmode, constm1_rtx);
9641
9642 emit_move_insn (scratch3, addr);
9643 eoschar = force_reg (QImode, eoschar);
9644
9645 emit_insn (gen_cld ());
9646 if (TARGET_64BIT)
9647 {
9648 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9649 align, scratch4, scratch3));
9650 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9651 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9652 }
9653 else
9654 {
9655 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9656 align, scratch4, scratch3));
9657 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9658 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9659 }
9660 }
9661 return 1;
9662 }
9663
9664 /* Expand the appropriate insns for doing strlen if not just doing
9665 repnz; scasb
9666
9667 out = result, initialized with the start address
9668 align_rtx = alignment of the address.
9669 scratch = scratch register, initialized with the startaddress when
9670 not aligned, otherwise undefined
9671
9672 This is just the body. It needs the initialisations mentioned above and
9673 some address computing at the end. These things are done in i386.md. */
9674
9675 static void
9676 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9677 rtx out, align_rtx;
9678 {
9679 int align;
9680 rtx tmp;
9681 rtx align_2_label = NULL_RTX;
9682 rtx align_3_label = NULL_RTX;
9683 rtx align_4_label = gen_label_rtx ();
9684 rtx end_0_label = gen_label_rtx ();
9685 rtx mem;
9686 rtx tmpreg = gen_reg_rtx (SImode);
9687 rtx scratch = gen_reg_rtx (SImode);
9688
9689 align = 0;
9690 if (GET_CODE (align_rtx) == CONST_INT)
9691 align = INTVAL (align_rtx);
9692
9693 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9694
9695 /* Is there a known alignment and is it less than 4? */
9696 if (align < 4)
9697 {
9698 rtx scratch1 = gen_reg_rtx (Pmode);
9699 emit_move_insn (scratch1, out);
9700 /* Is there a known alignment and is it not 2? */
9701 if (align != 2)
9702 {
9703 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9704 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9705
9706 /* Leave just the 3 lower bits. */
9707 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9708 NULL_RTX, 0, OPTAB_WIDEN);
9709
9710 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9711 Pmode, 1, align_4_label);
9712 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9713 Pmode, 1, align_2_label);
9714 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9715 Pmode, 1, align_3_label);
9716 }
9717 else
9718 {
9719 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9720 check if is aligned to 4 - byte. */
9721
9722 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9723 NULL_RTX, 0, OPTAB_WIDEN);
9724
9725 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9726 Pmode, 1, align_4_label);
9727 }
9728
9729 mem = gen_rtx_MEM (QImode, out);
9730
9731 /* Now compare the bytes. */
9732
9733 /* Compare the first n unaligned byte on a byte per byte basis. */
9734 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9735 QImode, 1, end_0_label);
9736
9737 /* Increment the address. */
9738 if (TARGET_64BIT)
9739 emit_insn (gen_adddi3 (out, out, const1_rtx));
9740 else
9741 emit_insn (gen_addsi3 (out, out, const1_rtx));
9742
9743 /* Not needed with an alignment of 2 */
9744 if (align != 2)
9745 {
9746 emit_label (align_2_label);
9747
9748 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9749 end_0_label);
9750
9751 if (TARGET_64BIT)
9752 emit_insn (gen_adddi3 (out, out, const1_rtx));
9753 else
9754 emit_insn (gen_addsi3 (out, out, const1_rtx));
9755
9756 emit_label (align_3_label);
9757 }
9758
9759 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9760 end_0_label);
9761
9762 if (TARGET_64BIT)
9763 emit_insn (gen_adddi3 (out, out, const1_rtx));
9764 else
9765 emit_insn (gen_addsi3 (out, out, const1_rtx));
9766 }
9767
9768 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9769 align this loop. It gives only huge programs, but does not help to
9770 speed up. */
9771 emit_label (align_4_label);
9772
9773 mem = gen_rtx_MEM (SImode, out);
9774 emit_move_insn (scratch, mem);
9775 if (TARGET_64BIT)
9776 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9777 else
9778 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9779
9780 /* This formula yields a nonzero result iff one of the bytes is zero.
9781 This saves three branches inside loop and many cycles. */
9782
9783 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9784 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9785 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9786 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9787 gen_int_mode (0x80808080, SImode)));
9788 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9789 align_4_label);
9790
9791 if (TARGET_CMOVE)
9792 {
9793 rtx reg = gen_reg_rtx (SImode);
9794 rtx reg2 = gen_reg_rtx (Pmode);
9795 emit_move_insn (reg, tmpreg);
9796 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9797
9798 /* If zero is not in the first two bytes, move two bytes forward. */
9799 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9800 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9801 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9802 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9803 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9804 reg,
9805 tmpreg)));
9806 /* Emit lea manually to avoid clobbering of flags. */
9807 emit_insn (gen_rtx_SET (SImode, reg2,
9808 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9809
9810 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9811 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9812 emit_insn (gen_rtx_SET (VOIDmode, out,
9813 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9814 reg2,
9815 out)));
9816
9817 }
9818 else
9819 {
9820 rtx end_2_label = gen_label_rtx ();
9821 /* Is zero in the first two bytes? */
9822
9823 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9824 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9825 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9826 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9827 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9828 pc_rtx);
9829 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9830 JUMP_LABEL (tmp) = end_2_label;
9831
9832 /* Not in the first two. Move two bytes forward. */
9833 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9834 if (TARGET_64BIT)
9835 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9836 else
9837 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9838
9839 emit_label (end_2_label);
9840
9841 }
9842
9843 /* Avoid branch in fixing the byte. */
9844 tmpreg = gen_lowpart (QImode, tmpreg);
9845 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9846 if (TARGET_64BIT)
9847 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9848 else
9849 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9850
9851 emit_label (end_0_label);
9852 }
9853
9854 void
9855 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
9856 rtx retval, fnaddr, callarg1, callarg2, pop;
9857 {
9858 rtx use = NULL, call;
9859
9860 if (pop == const0_rtx)
9861 pop = NULL;
9862 if (TARGET_64BIT && pop)
9863 abort ();
9864
9865 /* Static functions and indirect calls don't need the pic register. */
9866 if (! TARGET_64BIT && flag_pic
9867 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
9868 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
9869 {
9870 current_function_uses_pic_offset_table = 1;
9871 use_reg (&use, pic_offset_table_rtx);
9872 }
9873
9874 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
9875 {
9876 rtx al = gen_rtx_REG (QImode, 0);
9877 emit_move_insn (al, callarg2);
9878 use_reg (&use, al);
9879 }
9880
9881 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
9882 {
9883 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
9884 fnaddr = gen_rtx_MEM (QImode, fnaddr);
9885 }
9886
9887 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
9888 if (retval)
9889 call = gen_rtx_SET (VOIDmode, retval, call);
9890 if (pop)
9891 {
9892 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
9893 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
9894 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
9895 }
9896
9897 call = emit_call_insn (call);
9898 if (use)
9899 CALL_INSN_FUNCTION_USAGE (call) = use;
9900 }
9901
9902 \f
9903 /* Clear stack slot assignments remembered from previous functions.
9904 This is called from INIT_EXPANDERS once before RTL is emitted for each
9905 function. */
9906
9907 static void
9908 ix86_init_machine_status (p)
9909 struct function *p;
9910 {
9911 p->machine = (struct machine_function *)
9912 xcalloc (1, sizeof (struct machine_function));
9913 }
9914
9915 /* Mark machine specific bits of P for GC. */
9916 static void
9917 ix86_mark_machine_status (p)
9918 struct function *p;
9919 {
9920 struct machine_function *machine = p->machine;
9921 enum machine_mode mode;
9922 int n;
9923
9924 if (! machine)
9925 return;
9926
9927 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9928 mode = (enum machine_mode) ((int) mode + 1))
9929 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9930 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9931 }
9932
9933 static void
9934 ix86_free_machine_status (p)
9935 struct function *p;
9936 {
9937 free (p->machine);
9938 p->machine = NULL;
9939 }
9940
9941 /* Return a MEM corresponding to a stack slot with mode MODE.
9942 Allocate a new slot if necessary.
9943
9944 The RTL for a function can have several slots available: N is
9945 which slot to use. */
9946
9947 rtx
9948 assign_386_stack_local (mode, n)
9949 enum machine_mode mode;
9950 int n;
9951 {
9952 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9953 abort ();
9954
9955 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9956 ix86_stack_locals[(int) mode][n]
9957 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9958
9959 return ix86_stack_locals[(int) mode][n];
9960 }
9961 \f
9962 /* Calculate the length of the memory address in the instruction
9963 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9964
9965 static int
9966 memory_address_length (addr)
9967 rtx addr;
9968 {
9969 struct ix86_address parts;
9970 rtx base, index, disp;
9971 int len;
9972
9973 if (GET_CODE (addr) == PRE_DEC
9974 || GET_CODE (addr) == POST_INC
9975 || GET_CODE (addr) == PRE_MODIFY
9976 || GET_CODE (addr) == POST_MODIFY)
9977 return 0;
9978
9979 if (! ix86_decompose_address (addr, &parts))
9980 abort ();
9981
9982 base = parts.base;
9983 index = parts.index;
9984 disp = parts.disp;
9985 len = 0;
9986
9987 /* Register Indirect. */
9988 if (base && !index && !disp)
9989 {
9990 /* Special cases: ebp and esp need the two-byte modrm form. */
9991 if (addr == stack_pointer_rtx
9992 || addr == arg_pointer_rtx
9993 || addr == frame_pointer_rtx
9994 || addr == hard_frame_pointer_rtx)
9995 len = 1;
9996 }
9997
9998 /* Direct Addressing. */
9999 else if (disp && !base && !index)
10000 len = 4;
10001
10002 else
10003 {
10004 /* Find the length of the displacement constant. */
10005 if (disp)
10006 {
10007 if (GET_CODE (disp) == CONST_INT
10008 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10009 len = 1;
10010 else
10011 len = 4;
10012 }
10013
10014 /* An index requires the two-byte modrm form. */
10015 if (index)
10016 len += 1;
10017 }
10018
10019 return len;
10020 }
10021
10022 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
10023 expect that insn have 8bit immediate alternative. */
10024 int
10025 ix86_attr_length_immediate_default (insn, shortform)
10026 rtx insn;
10027 int shortform;
10028 {
10029 int len = 0;
10030 int i;
10031 extract_insn_cached (insn);
10032 for (i = recog_data.n_operands - 1; i >= 0; --i)
10033 if (CONSTANT_P (recog_data.operand[i]))
10034 {
10035 if (len)
10036 abort ();
10037 if (shortform
10038 && GET_CODE (recog_data.operand[i]) == CONST_INT
10039 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10040 len = 1;
10041 else
10042 {
10043 switch (get_attr_mode (insn))
10044 {
10045 case MODE_QI:
10046 len+=1;
10047 break;
10048 case MODE_HI:
10049 len+=2;
10050 break;
10051 case MODE_SI:
10052 len+=4;
10053 break;
10054 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10055 case MODE_DI:
10056 len+=4;
10057 break;
10058 default:
10059 fatal_insn ("unknown insn mode", insn);
10060 }
10061 }
10062 }
10063 return len;
10064 }
10065 /* Compute default value for "length_address" attribute. */
10066 int
10067 ix86_attr_length_address_default (insn)
10068 rtx insn;
10069 {
10070 int i;
10071 extract_insn_cached (insn);
10072 for (i = recog_data.n_operands - 1; i >= 0; --i)
10073 if (GET_CODE (recog_data.operand[i]) == MEM)
10074 {
10075 return memory_address_length (XEXP (recog_data.operand[i], 0));
10076 break;
10077 }
10078 return 0;
10079 }
10080 \f
10081 /* Return the maximum number of instructions a cpu can issue. */
10082
10083 static int
10084 ix86_issue_rate ()
10085 {
10086 switch (ix86_cpu)
10087 {
10088 case PROCESSOR_PENTIUM:
10089 case PROCESSOR_K6:
10090 return 2;
10091
10092 case PROCESSOR_PENTIUMPRO:
10093 case PROCESSOR_PENTIUM4:
10094 case PROCESSOR_ATHLON:
10095 return 3;
10096
10097 default:
10098 return 1;
10099 }
10100 }
10101
10102 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10103 by DEP_INSN and nothing set by DEP_INSN. */
10104
10105 static int
10106 ix86_flags_dependant (insn, dep_insn, insn_type)
10107 rtx insn, dep_insn;
10108 enum attr_type insn_type;
10109 {
10110 rtx set, set2;
10111
10112 /* Simplify the test for uninteresting insns. */
10113 if (insn_type != TYPE_SETCC
10114 && insn_type != TYPE_ICMOV
10115 && insn_type != TYPE_FCMOV
10116 && insn_type != TYPE_IBR)
10117 return 0;
10118
10119 if ((set = single_set (dep_insn)) != 0)
10120 {
10121 set = SET_DEST (set);
10122 set2 = NULL_RTX;
10123 }
10124 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10125 && XVECLEN (PATTERN (dep_insn), 0) == 2
10126 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10127 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10128 {
10129 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10130 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10131 }
10132 else
10133 return 0;
10134
10135 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10136 return 0;
10137
10138 /* This test is true if the dependent insn reads the flags but
10139 not any other potentially set register. */
10140 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10141 return 0;
10142
10143 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10144 return 0;
10145
10146 return 1;
10147 }
10148
10149 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10150 address with operands set by DEP_INSN. */
10151
10152 static int
10153 ix86_agi_dependant (insn, dep_insn, insn_type)
10154 rtx insn, dep_insn;
10155 enum attr_type insn_type;
10156 {
10157 rtx addr;
10158
10159 if (insn_type == TYPE_LEA
10160 && TARGET_PENTIUM)
10161 {
10162 addr = PATTERN (insn);
10163 if (GET_CODE (addr) == SET)
10164 ;
10165 else if (GET_CODE (addr) == PARALLEL
10166 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10167 addr = XVECEXP (addr, 0, 0);
10168 else
10169 abort ();
10170 addr = SET_SRC (addr);
10171 }
10172 else
10173 {
10174 int i;
10175 extract_insn_cached (insn);
10176 for (i = recog_data.n_operands - 1; i >= 0; --i)
10177 if (GET_CODE (recog_data.operand[i]) == MEM)
10178 {
10179 addr = XEXP (recog_data.operand[i], 0);
10180 goto found;
10181 }
10182 return 0;
10183 found:;
10184 }
10185
10186 return modified_in_p (addr, dep_insn);
10187 }
10188
10189 static int
10190 ix86_adjust_cost (insn, link, dep_insn, cost)
10191 rtx insn, link, dep_insn;
10192 int cost;
10193 {
10194 enum attr_type insn_type, dep_insn_type;
10195 enum attr_memory memory, dep_memory;
10196 rtx set, set2;
10197 int dep_insn_code_number;
10198
10199 /* Anti and output depenancies have zero cost on all CPUs. */
10200 if (REG_NOTE_KIND (link) != 0)
10201 return 0;
10202
10203 dep_insn_code_number = recog_memoized (dep_insn);
10204
10205 /* If we can't recognize the insns, we can't really do anything. */
10206 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10207 return cost;
10208
10209 insn_type = get_attr_type (insn);
10210 dep_insn_type = get_attr_type (dep_insn);
10211
10212 switch (ix86_cpu)
10213 {
10214 case PROCESSOR_PENTIUM:
10215 /* Address Generation Interlock adds a cycle of latency. */
10216 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10217 cost += 1;
10218
10219 /* ??? Compares pair with jump/setcc. */
10220 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10221 cost = 0;
10222
10223 /* Floating point stores require value to be ready one cycle ealier. */
10224 if (insn_type == TYPE_FMOV
10225 && get_attr_memory (insn) == MEMORY_STORE
10226 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10227 cost += 1;
10228 break;
10229
10230 case PROCESSOR_PENTIUMPRO:
10231 memory = get_attr_memory (insn);
10232 dep_memory = get_attr_memory (dep_insn);
10233
10234 /* Since we can't represent delayed latencies of load+operation,
10235 increase the cost here for non-imov insns. */
10236 if (dep_insn_type != TYPE_IMOV
10237 && dep_insn_type != TYPE_FMOV
10238 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10239 cost += 1;
10240
10241 /* INT->FP conversion is expensive. */
10242 if (get_attr_fp_int_src (dep_insn))
10243 cost += 5;
10244
10245 /* There is one cycle extra latency between an FP op and a store. */
10246 if (insn_type == TYPE_FMOV
10247 && (set = single_set (dep_insn)) != NULL_RTX
10248 && (set2 = single_set (insn)) != NULL_RTX
10249 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10250 && GET_CODE (SET_DEST (set2)) == MEM)
10251 cost += 1;
10252
10253 /* Show ability of reorder buffer to hide latency of load by executing
10254 in parallel with previous instruction in case
10255 previous instruction is not needed to compute the address. */
10256 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10257 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10258 {
10259 /* Claim moves to take one cycle, as core can issue one load
10260 at time and the next load can start cycle later. */
10261 if (dep_insn_type == TYPE_IMOV
10262 || dep_insn_type == TYPE_FMOV)
10263 cost = 1;
10264 else if (cost > 1)
10265 cost--;
10266 }
10267 break;
10268
10269 case PROCESSOR_K6:
10270 memory = get_attr_memory (insn);
10271 dep_memory = get_attr_memory (dep_insn);
10272 /* The esp dependency is resolved before the instruction is really
10273 finished. */
10274 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10275 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10276 return 1;
10277
10278 /* Since we can't represent delayed latencies of load+operation,
10279 increase the cost here for non-imov insns. */
10280 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10281 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10282
10283 /* INT->FP conversion is expensive. */
10284 if (get_attr_fp_int_src (dep_insn))
10285 cost += 5;
10286
10287 /* Show ability of reorder buffer to hide latency of load by executing
10288 in parallel with previous instruction in case
10289 previous instruction is not needed to compute the address. */
10290 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10291 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10292 {
10293 /* Claim moves to take one cycle, as core can issue one load
10294 at time and the next load can start cycle later. */
10295 if (dep_insn_type == TYPE_IMOV
10296 || dep_insn_type == TYPE_FMOV)
10297 cost = 1;
10298 else if (cost > 2)
10299 cost -= 2;
10300 else
10301 cost = 1;
10302 }
10303 break;
10304
10305 case PROCESSOR_ATHLON:
10306 memory = get_attr_memory (insn);
10307 dep_memory = get_attr_memory (dep_insn);
10308
10309 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10310 {
10311 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10312 cost += 2;
10313 else
10314 cost += 3;
10315 }
10316 /* Show ability of reorder buffer to hide latency of load by executing
10317 in parallel with previous instruction in case
10318 previous instruction is not needed to compute the address. */
10319 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10320 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10321 {
10322 /* Claim moves to take one cycle, as core can issue one load
10323 at time and the next load can start cycle later. */
10324 if (dep_insn_type == TYPE_IMOV
10325 || dep_insn_type == TYPE_FMOV)
10326 cost = 0;
10327 else if (cost >= 3)
10328 cost -= 3;
10329 else
10330 cost = 0;
10331 }
10332
10333 default:
10334 break;
10335 }
10336
10337 return cost;
10338 }
10339
10340 static union
10341 {
10342 struct ppro_sched_data
10343 {
10344 rtx decode[3];
10345 int issued_this_cycle;
10346 } ppro;
10347 } ix86_sched_data;
10348
10349 static enum attr_ppro_uops
10350 ix86_safe_ppro_uops (insn)
10351 rtx insn;
10352 {
10353 if (recog_memoized (insn) >= 0)
10354 return get_attr_ppro_uops (insn);
10355 else
10356 return PPRO_UOPS_MANY;
10357 }
10358
10359 static void
10360 ix86_dump_ppro_packet (dump)
10361 FILE *dump;
10362 {
10363 if (ix86_sched_data.ppro.decode[0])
10364 {
10365 fprintf (dump, "PPRO packet: %d",
10366 INSN_UID (ix86_sched_data.ppro.decode[0]));
10367 if (ix86_sched_data.ppro.decode[1])
10368 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10369 if (ix86_sched_data.ppro.decode[2])
10370 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10371 fputc ('\n', dump);
10372 }
10373 }
10374
10375 /* We're beginning a new block. Initialize data structures as necessary. */
10376
10377 static void
10378 ix86_sched_init (dump, sched_verbose, veclen)
10379 FILE *dump ATTRIBUTE_UNUSED;
10380 int sched_verbose ATTRIBUTE_UNUSED;
10381 int veclen ATTRIBUTE_UNUSED;
10382 {
10383 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10384 }
10385
10386 /* Shift INSN to SLOT, and shift everything else down. */
10387
10388 static void
10389 ix86_reorder_insn (insnp, slot)
10390 rtx *insnp, *slot;
10391 {
10392 if (insnp != slot)
10393 {
10394 rtx insn = *insnp;
10395 do
10396 insnp[0] = insnp[1];
10397 while (++insnp != slot);
10398 *insnp = insn;
10399 }
10400 }
10401
10402 static void
10403 ix86_sched_reorder_ppro (ready, e_ready)
10404 rtx *ready;
10405 rtx *e_ready;
10406 {
10407 rtx decode[3];
10408 enum attr_ppro_uops cur_uops;
10409 int issued_this_cycle;
10410 rtx *insnp;
10411 int i;
10412
10413 /* At this point .ppro.decode contains the state of the three
10414 decoders from last "cycle". That is, those insns that were
10415 actually independent. But here we're scheduling for the
10416 decoder, and we may find things that are decodable in the
10417 same cycle. */
10418
10419 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10420 issued_this_cycle = 0;
10421
10422 insnp = e_ready;
10423 cur_uops = ix86_safe_ppro_uops (*insnp);
10424
10425 /* If the decoders are empty, and we've a complex insn at the
10426 head of the priority queue, let it issue without complaint. */
10427 if (decode[0] == NULL)
10428 {
10429 if (cur_uops == PPRO_UOPS_MANY)
10430 {
10431 decode[0] = *insnp;
10432 goto ppro_done;
10433 }
10434
10435 /* Otherwise, search for a 2-4 uop unsn to issue. */
10436 while (cur_uops != PPRO_UOPS_FEW)
10437 {
10438 if (insnp == ready)
10439 break;
10440 cur_uops = ix86_safe_ppro_uops (*--insnp);
10441 }
10442
10443 /* If so, move it to the head of the line. */
10444 if (cur_uops == PPRO_UOPS_FEW)
10445 ix86_reorder_insn (insnp, e_ready);
10446
10447 /* Issue the head of the queue. */
10448 issued_this_cycle = 1;
10449 decode[0] = *e_ready--;
10450 }
10451
10452 /* Look for simple insns to fill in the other two slots. */
10453 for (i = 1; i < 3; ++i)
10454 if (decode[i] == NULL)
10455 {
10456 if (ready > e_ready)
10457 goto ppro_done;
10458
10459 insnp = e_ready;
10460 cur_uops = ix86_safe_ppro_uops (*insnp);
10461 while (cur_uops != PPRO_UOPS_ONE)
10462 {
10463 if (insnp == ready)
10464 break;
10465 cur_uops = ix86_safe_ppro_uops (*--insnp);
10466 }
10467
10468 /* Found one. Move it to the head of the queue and issue it. */
10469 if (cur_uops == PPRO_UOPS_ONE)
10470 {
10471 ix86_reorder_insn (insnp, e_ready);
10472 decode[i] = *e_ready--;
10473 issued_this_cycle++;
10474 continue;
10475 }
10476
10477 /* ??? Didn't find one. Ideally, here we would do a lazy split
10478 of 2-uop insns, issue one and queue the other. */
10479 }
10480
10481 ppro_done:
10482 if (issued_this_cycle == 0)
10483 issued_this_cycle = 1;
10484 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10485 }
10486
10487 /* We are about to being issuing insns for this clock cycle.
10488 Override the default sort algorithm to better slot instructions. */
10489 static int
10490 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10491 FILE *dump ATTRIBUTE_UNUSED;
10492 int sched_verbose ATTRIBUTE_UNUSED;
10493 rtx *ready;
10494 int *n_readyp;
10495 int clock_var ATTRIBUTE_UNUSED;
10496 {
10497 int n_ready = *n_readyp;
10498 rtx *e_ready = ready + n_ready - 1;
10499
10500 /* Make sure to go ahead and initialize key items in
10501 ix86_sched_data if we are not going to bother trying to
10502 reorder the ready queue. */
10503 if (n_ready < 2)
10504 {
10505 ix86_sched_data.ppro.issued_this_cycle = 1;
10506 goto out;
10507 }
10508
10509 switch (ix86_cpu)
10510 {
10511 default:
10512 break;
10513
10514 case PROCESSOR_PENTIUMPRO:
10515 ix86_sched_reorder_ppro (ready, e_ready);
10516 break;
10517 }
10518
10519 out:
10520 return ix86_issue_rate ();
10521 }
10522
10523 /* We are about to issue INSN. Return the number of insns left on the
10524 ready queue that can be issued this cycle. */
10525
10526 static int
10527 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10528 FILE *dump;
10529 int sched_verbose;
10530 rtx insn;
10531 int can_issue_more;
10532 {
10533 int i;
10534 switch (ix86_cpu)
10535 {
10536 default:
10537 return can_issue_more - 1;
10538
10539 case PROCESSOR_PENTIUMPRO:
10540 {
10541 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10542
10543 if (uops == PPRO_UOPS_MANY)
10544 {
10545 if (sched_verbose)
10546 ix86_dump_ppro_packet (dump);
10547 ix86_sched_data.ppro.decode[0] = insn;
10548 ix86_sched_data.ppro.decode[1] = NULL;
10549 ix86_sched_data.ppro.decode[2] = NULL;
10550 if (sched_verbose)
10551 ix86_dump_ppro_packet (dump);
10552 ix86_sched_data.ppro.decode[0] = NULL;
10553 }
10554 else if (uops == PPRO_UOPS_FEW)
10555 {
10556 if (sched_verbose)
10557 ix86_dump_ppro_packet (dump);
10558 ix86_sched_data.ppro.decode[0] = insn;
10559 ix86_sched_data.ppro.decode[1] = NULL;
10560 ix86_sched_data.ppro.decode[2] = NULL;
10561 }
10562 else
10563 {
10564 for (i = 0; i < 3; ++i)
10565 if (ix86_sched_data.ppro.decode[i] == NULL)
10566 {
10567 ix86_sched_data.ppro.decode[i] = insn;
10568 break;
10569 }
10570 if (i == 3)
10571 abort ();
10572 if (i == 2)
10573 {
10574 if (sched_verbose)
10575 ix86_dump_ppro_packet (dump);
10576 ix86_sched_data.ppro.decode[0] = NULL;
10577 ix86_sched_data.ppro.decode[1] = NULL;
10578 ix86_sched_data.ppro.decode[2] = NULL;
10579 }
10580 }
10581 }
10582 return --ix86_sched_data.ppro.issued_this_cycle;
10583 }
10584 }
10585
10586 static int
10587 ia32_use_dfa_pipeline_interface ()
10588 {
10589 if (ix86_cpu == PROCESSOR_PENTIUM)
10590 return 1;
10591 return 0;
10592 }
10593
10594 /* How many alternative schedules to try. This should be as wide as the
10595 scheduling freedom in the DFA, but no wider. Making this value too
10596 large results extra work for the scheduler. */
10597
10598 static int
10599 ia32_multipass_dfa_lookahead ()
10600 {
10601 if (ix86_cpu == PROCESSOR_PENTIUM)
10602 return 2;
10603 else
10604 return 0;
10605 }
10606
10607 \f
10608 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10609 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10610 appropriate. */
10611
10612 void
10613 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10614 rtx insns;
10615 rtx dstref, srcref, dstreg, srcreg;
10616 {
10617 rtx insn;
10618
10619 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10620 if (INSN_P (insn))
10621 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10622 dstreg, srcreg);
10623 }
10624
10625 /* Subroutine of above to actually do the updating by recursively walking
10626 the rtx. */
10627
10628 static void
10629 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10630 rtx x;
10631 rtx dstref, srcref, dstreg, srcreg;
10632 {
10633 enum rtx_code code = GET_CODE (x);
10634 const char *format_ptr = GET_RTX_FORMAT (code);
10635 int i, j;
10636
10637 if (code == MEM && XEXP (x, 0) == dstreg)
10638 MEM_COPY_ATTRIBUTES (x, dstref);
10639 else if (code == MEM && XEXP (x, 0) == srcreg)
10640 MEM_COPY_ATTRIBUTES (x, srcref);
10641
10642 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10643 {
10644 if (*format_ptr == 'e')
10645 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10646 dstreg, srcreg);
10647 else if (*format_ptr == 'E')
10648 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10649 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10650 dstreg, srcreg);
10651 }
10652 }
10653 \f
10654 /* Compute the alignment given to a constant that is being placed in memory.
10655 EXP is the constant and ALIGN is the alignment that the object would
10656 ordinarily have.
10657 The value of this function is used instead of that alignment to align
10658 the object. */
10659
10660 int
10661 ix86_constant_alignment (exp, align)
10662 tree exp;
10663 int align;
10664 {
10665 if (TREE_CODE (exp) == REAL_CST)
10666 {
10667 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10668 return 64;
10669 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10670 return 128;
10671 }
10672 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10673 && align < 256)
10674 return 256;
10675
10676 return align;
10677 }
10678
10679 /* Compute the alignment for a static variable.
10680 TYPE is the data type, and ALIGN is the alignment that
10681 the object would ordinarily have. The value of this function is used
10682 instead of that alignment to align the object. */
10683
10684 int
10685 ix86_data_alignment (type, align)
10686 tree type;
10687 int align;
10688 {
10689 if (AGGREGATE_TYPE_P (type)
10690 && TYPE_SIZE (type)
10691 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10692 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10693 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10694 return 256;
10695
10696 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10697 to 16byte boundary. */
10698 if (TARGET_64BIT)
10699 {
10700 if (AGGREGATE_TYPE_P (type)
10701 && TYPE_SIZE (type)
10702 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10703 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10704 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10705 return 128;
10706 }
10707
10708 if (TREE_CODE (type) == ARRAY_TYPE)
10709 {
10710 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10711 return 64;
10712 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10713 return 128;
10714 }
10715 else if (TREE_CODE (type) == COMPLEX_TYPE)
10716 {
10717
10718 if (TYPE_MODE (type) == DCmode && align < 64)
10719 return 64;
10720 if (TYPE_MODE (type) == XCmode && align < 128)
10721 return 128;
10722 }
10723 else if ((TREE_CODE (type) == RECORD_TYPE
10724 || TREE_CODE (type) == UNION_TYPE
10725 || TREE_CODE (type) == QUAL_UNION_TYPE)
10726 && TYPE_FIELDS (type))
10727 {
10728 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10729 return 64;
10730 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10731 return 128;
10732 }
10733 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10734 || TREE_CODE (type) == INTEGER_TYPE)
10735 {
10736 if (TYPE_MODE (type) == DFmode && align < 64)
10737 return 64;
10738 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10739 return 128;
10740 }
10741
10742 return align;
10743 }
10744
10745 /* Compute the alignment for a local variable.
10746 TYPE is the data type, and ALIGN is the alignment that
10747 the object would ordinarily have. The value of this macro is used
10748 instead of that alignment to align the object. */
10749
10750 int
10751 ix86_local_alignment (type, align)
10752 tree type;
10753 int align;
10754 {
10755 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10756 to 16byte boundary. */
10757 if (TARGET_64BIT)
10758 {
10759 if (AGGREGATE_TYPE_P (type)
10760 && TYPE_SIZE (type)
10761 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10762 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10763 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10764 return 128;
10765 }
10766 if (TREE_CODE (type) == ARRAY_TYPE)
10767 {
10768 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10769 return 64;
10770 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10771 return 128;
10772 }
10773 else if (TREE_CODE (type) == COMPLEX_TYPE)
10774 {
10775 if (TYPE_MODE (type) == DCmode && align < 64)
10776 return 64;
10777 if (TYPE_MODE (type) == XCmode && align < 128)
10778 return 128;
10779 }
10780 else if ((TREE_CODE (type) == RECORD_TYPE
10781 || TREE_CODE (type) == UNION_TYPE
10782 || TREE_CODE (type) == QUAL_UNION_TYPE)
10783 && TYPE_FIELDS (type))
10784 {
10785 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10786 return 64;
10787 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10788 return 128;
10789 }
10790 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10791 || TREE_CODE (type) == INTEGER_TYPE)
10792 {
10793
10794 if (TYPE_MODE (type) == DFmode && align < 64)
10795 return 64;
10796 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10797 return 128;
10798 }
10799 return align;
10800 }
10801 \f
10802 /* Emit RTL insns to initialize the variable parts of a trampoline.
10803 FNADDR is an RTX for the address of the function's pure code.
10804 CXT is an RTX for the static chain value for the function. */
10805 void
10806 x86_initialize_trampoline (tramp, fnaddr, cxt)
10807 rtx tramp, fnaddr, cxt;
10808 {
10809 if (!TARGET_64BIT)
10810 {
10811 /* Compute offset from the end of the jmp to the target function. */
10812 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10813 plus_constant (tramp, 10),
10814 NULL_RTX, 1, OPTAB_DIRECT);
10815 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10816 gen_int_mode (0xb9, QImode));
10817 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10818 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10819 gen_int_mode (0xe9, QImode));
10820 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10821 }
10822 else
10823 {
10824 int offset = 0;
10825 /* Try to load address using shorter movl instead of movabs.
10826 We may want to support movq for kernel mode, but kernel does not use
10827 trampolines at the moment. */
10828 if (x86_64_zero_extended_value (fnaddr))
10829 {
10830 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10831 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10832 gen_int_mode (0xbb41, HImode));
10833 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10834 gen_lowpart (SImode, fnaddr));
10835 offset += 6;
10836 }
10837 else
10838 {
10839 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10840 gen_int_mode (0xbb49, HImode));
10841 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10842 fnaddr);
10843 offset += 10;
10844 }
10845 /* Load static chain using movabs to r10. */
10846 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10847 gen_int_mode (0xba49, HImode));
10848 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10849 cxt);
10850 offset += 10;
10851 /* Jump to the r11 */
10852 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10853 gen_int_mode (0xff49, HImode));
10854 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10855 gen_int_mode (0xe3, QImode));
10856 offset += 3;
10857 if (offset > TRAMPOLINE_SIZE)
10858 abort ();
10859 }
10860 }
10861 \f
10862 #define def_builtin(MASK, NAME, TYPE, CODE) \
10863 do { \
10864 if ((MASK) & target_flags) \
10865 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10866 } while (0)
10867
10868 struct builtin_description
10869 {
10870 const unsigned int mask;
10871 const enum insn_code icode;
10872 const char *const name;
10873 const enum ix86_builtins code;
10874 const enum rtx_code comparison;
10875 const unsigned int flag;
10876 };
10877
10878 /* Used for builtins that are enabled both by -msse and -msse2. */
10879 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
10880
10881 static const struct builtin_description bdesc_comi[] =
10882 {
10883 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10884 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10885 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10886 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10887 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10888 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10889 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10890 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10891 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10892 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10893 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10894 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
10895 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
10896 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
10897 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
10898 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
10899 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
10900 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
10901 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
10902 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
10903 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
10904 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
10905 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
10906 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
10907 };
10908
10909 static const struct builtin_description bdesc_2arg[] =
10910 {
10911 /* SSE */
10912 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10913 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10914 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10915 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10916 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10917 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10918 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10919 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10920
10921 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10922 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10923 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10924 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10925 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10926 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10927 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10928 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10929 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10930 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10931 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10932 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10933 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10934 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10935 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10936 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10937 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10938 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10939 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10940 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10941 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10942 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10943 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10944 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10945
10946 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10947 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10948 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10949 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10950
10951 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10952 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10953 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10954 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10955 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10956
10957 /* MMX */
10958 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10959 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10960 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10961 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10962 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10963 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10964
10965 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10966 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10967 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10968 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10969 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10970 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10971 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10972 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10973
10974 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10975 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10976 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10977
10978 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10979 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10980 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10981 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10982
10983 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10984 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10985
10986 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10987 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10988 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10989 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10990 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10991 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10992
10993 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10994 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10995 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10996 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10997
10998 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10999 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11000 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11001 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11002 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11003 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11004
11005 /* Special. */
11006 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11007 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11008 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11009
11010 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11011 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11012
11013 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11014 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11015 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11016 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11017 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11018 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11019
11020 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11021 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11022 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11023 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11024 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11025 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11026
11027 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11028 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11029 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11030 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11031
11032 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11033 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11034
11035 /* SSE2 */
11036 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11037 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11038 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11039 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11040 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11041 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11042 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11043 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11044
11045 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11046 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11047 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11048 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11049 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11050 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11051 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11052 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11053 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11054 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11055 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11056 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11057 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11058 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11059 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11060 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11061 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11062 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11063 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11064 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11065 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11066 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11067 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11068 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11069
11070 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11071 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11072 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11073 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11074
11075 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11076 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11077 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11078 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11079
11080 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11081 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11082 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11083
11084 /* SSE2 MMX */
11085 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11086 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11087 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11088 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11089 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11090 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11091 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11092 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11093
11094 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11095 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11096 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11097 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11098 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11099 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11100 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11101 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11102
11103 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11104 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11105 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11106 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11107
11108 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11109 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11110 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11111 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11112
11113 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11114 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11115
11116 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11117 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11118 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11119 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11120 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11121 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11122
11123 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11124 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11125 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11126 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11127
11128 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11129 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11130 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11131 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11132 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11133 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11134
11135 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11136 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11137 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11138
11139 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11140 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11141
11142 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11143 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11144 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11145 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11146 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11147 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11148
11149 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11150 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11151 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11152 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11153 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11154 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11155
11156 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11157 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11158 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11159 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11160
11161 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11162
11163 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11164 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11165 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11166 };
11167
11168 static const struct builtin_description bdesc_1arg[] =
11169 {
11170 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11171 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11172
11173 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11174 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11175 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11176
11177 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11178 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11179 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11180 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11181
11182 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11183 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11184 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11185
11186 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11187
11188 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11189 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11190
11191 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11192 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11193 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11194 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11195 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11196
11197 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11198
11199 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11200 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11201
11202 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11203 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11204 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11205 };
11206
11207 void
11208 ix86_init_builtins ()
11209 {
11210 if (TARGET_MMX)
11211 ix86_init_mmx_sse_builtins ();
11212 }
11213
11214 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11215 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11216 builtins. */
11217 static void
11218 ix86_init_mmx_sse_builtins ()
11219 {
11220 const struct builtin_description * d;
11221 size_t i;
11222 tree endlink = void_list_node;
11223
11224 tree pchar_type_node = build_pointer_type (char_type_node);
11225 tree pfloat_type_node = build_pointer_type (float_type_node);
11226 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11227 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11228 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11229
11230 /* Comparisons. */
11231 tree int_ftype_v4sf_v4sf
11232 = build_function_type (integer_type_node,
11233 tree_cons (NULL_TREE, V4SF_type_node,
11234 tree_cons (NULL_TREE,
11235 V4SF_type_node,
11236 endlink)));
11237 tree v4si_ftype_v4sf_v4sf
11238 = build_function_type (V4SI_type_node,
11239 tree_cons (NULL_TREE, V4SF_type_node,
11240 tree_cons (NULL_TREE,
11241 V4SF_type_node,
11242 endlink)));
11243 /* MMX/SSE/integer conversions. */
11244 tree int_ftype_v4sf
11245 = build_function_type (integer_type_node,
11246 tree_cons (NULL_TREE, V4SF_type_node,
11247 endlink));
11248 tree int_ftype_v8qi
11249 = build_function_type (integer_type_node,
11250 tree_cons (NULL_TREE, V8QI_type_node,
11251 endlink));
11252 tree v4sf_ftype_v4sf_int
11253 = build_function_type (V4SF_type_node,
11254 tree_cons (NULL_TREE, V4SF_type_node,
11255 tree_cons (NULL_TREE, integer_type_node,
11256 endlink)));
11257 tree v4sf_ftype_v4sf_v2si
11258 = build_function_type (V4SF_type_node,
11259 tree_cons (NULL_TREE, V4SF_type_node,
11260 tree_cons (NULL_TREE, V2SI_type_node,
11261 endlink)));
11262 tree int_ftype_v4hi_int
11263 = build_function_type (integer_type_node,
11264 tree_cons (NULL_TREE, V4HI_type_node,
11265 tree_cons (NULL_TREE, integer_type_node,
11266 endlink)));
11267 tree v4hi_ftype_v4hi_int_int
11268 = build_function_type (V4HI_type_node,
11269 tree_cons (NULL_TREE, V4HI_type_node,
11270 tree_cons (NULL_TREE, integer_type_node,
11271 tree_cons (NULL_TREE,
11272 integer_type_node,
11273 endlink))));
11274 /* Miscellaneous. */
11275 tree v8qi_ftype_v4hi_v4hi
11276 = build_function_type (V8QI_type_node,
11277 tree_cons (NULL_TREE, V4HI_type_node,
11278 tree_cons (NULL_TREE, V4HI_type_node,
11279 endlink)));
11280 tree v4hi_ftype_v2si_v2si
11281 = build_function_type (V4HI_type_node,
11282 tree_cons (NULL_TREE, V2SI_type_node,
11283 tree_cons (NULL_TREE, V2SI_type_node,
11284 endlink)));
11285 tree v4sf_ftype_v4sf_v4sf_int
11286 = build_function_type (V4SF_type_node,
11287 tree_cons (NULL_TREE, V4SF_type_node,
11288 tree_cons (NULL_TREE, V4SF_type_node,
11289 tree_cons (NULL_TREE,
11290 integer_type_node,
11291 endlink))));
11292 tree v2si_ftype_v4hi_v4hi
11293 = build_function_type (V2SI_type_node,
11294 tree_cons (NULL_TREE, V4HI_type_node,
11295 tree_cons (NULL_TREE, V4HI_type_node,
11296 endlink)));
11297 tree v4hi_ftype_v4hi_int
11298 = build_function_type (V4HI_type_node,
11299 tree_cons (NULL_TREE, V4HI_type_node,
11300 tree_cons (NULL_TREE, integer_type_node,
11301 endlink)));
11302 tree v4hi_ftype_v4hi_di
11303 = build_function_type (V4HI_type_node,
11304 tree_cons (NULL_TREE, V4HI_type_node,
11305 tree_cons (NULL_TREE,
11306 long_long_integer_type_node,
11307 endlink)));
11308 tree v2si_ftype_v2si_di
11309 = build_function_type (V2SI_type_node,
11310 tree_cons (NULL_TREE, V2SI_type_node,
11311 tree_cons (NULL_TREE,
11312 long_long_integer_type_node,
11313 endlink)));
11314 tree void_ftype_void
11315 = build_function_type (void_type_node, endlink);
11316 tree void_ftype_unsigned
11317 = build_function_type (void_type_node,
11318 tree_cons (NULL_TREE, unsigned_type_node,
11319 endlink));
11320 tree unsigned_ftype_void
11321 = build_function_type (unsigned_type_node, endlink);
11322 tree di_ftype_void
11323 = build_function_type (long_long_unsigned_type_node, endlink);
11324 tree v4sf_ftype_void
11325 = build_function_type (V4SF_type_node, endlink);
11326 tree v2si_ftype_v4sf
11327 = build_function_type (V2SI_type_node,
11328 tree_cons (NULL_TREE, V4SF_type_node,
11329 endlink));
11330 /* Loads/stores. */
11331 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11332 tree_cons (NULL_TREE, V8QI_type_node,
11333 tree_cons (NULL_TREE,
11334 pchar_type_node,
11335 endlink)));
11336 tree void_ftype_v8qi_v8qi_pchar
11337 = build_function_type (void_type_node, maskmovq_args);
11338 tree v4sf_ftype_pfloat
11339 = build_function_type (V4SF_type_node,
11340 tree_cons (NULL_TREE, pfloat_type_node,
11341 endlink));
11342 /* @@@ the type is bogus */
11343 tree v4sf_ftype_v4sf_pv2si
11344 = build_function_type (V4SF_type_node,
11345 tree_cons (NULL_TREE, V4SF_type_node,
11346 tree_cons (NULL_TREE, pv2si_type_node,
11347 endlink)));
11348 tree void_ftype_pv2si_v4sf
11349 = build_function_type (void_type_node,
11350 tree_cons (NULL_TREE, pv2si_type_node,
11351 tree_cons (NULL_TREE, V4SF_type_node,
11352 endlink)));
11353 tree void_ftype_pfloat_v4sf
11354 = build_function_type (void_type_node,
11355 tree_cons (NULL_TREE, pfloat_type_node,
11356 tree_cons (NULL_TREE, V4SF_type_node,
11357 endlink)));
11358 tree void_ftype_pdi_di
11359 = build_function_type (void_type_node,
11360 tree_cons (NULL_TREE, pdi_type_node,
11361 tree_cons (NULL_TREE,
11362 long_long_unsigned_type_node,
11363 endlink)));
11364 tree void_ftype_pv2di_v2di
11365 = build_function_type (void_type_node,
11366 tree_cons (NULL_TREE, pv2di_type_node,
11367 tree_cons (NULL_TREE,
11368 V2DI_type_node,
11369 endlink)));
11370 /* Normal vector unops. */
11371 tree v4sf_ftype_v4sf
11372 = build_function_type (V4SF_type_node,
11373 tree_cons (NULL_TREE, V4SF_type_node,
11374 endlink));
11375
11376 /* Normal vector binops. */
11377 tree v4sf_ftype_v4sf_v4sf
11378 = build_function_type (V4SF_type_node,
11379 tree_cons (NULL_TREE, V4SF_type_node,
11380 tree_cons (NULL_TREE, V4SF_type_node,
11381 endlink)));
11382 tree v8qi_ftype_v8qi_v8qi
11383 = build_function_type (V8QI_type_node,
11384 tree_cons (NULL_TREE, V8QI_type_node,
11385 tree_cons (NULL_TREE, V8QI_type_node,
11386 endlink)));
11387 tree v4hi_ftype_v4hi_v4hi
11388 = build_function_type (V4HI_type_node,
11389 tree_cons (NULL_TREE, V4HI_type_node,
11390 tree_cons (NULL_TREE, V4HI_type_node,
11391 endlink)));
11392 tree v2si_ftype_v2si_v2si
11393 = build_function_type (V2SI_type_node,
11394 tree_cons (NULL_TREE, V2SI_type_node,
11395 tree_cons (NULL_TREE, V2SI_type_node,
11396 endlink)));
11397 tree di_ftype_di_di
11398 = build_function_type (long_long_unsigned_type_node,
11399 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11400 tree_cons (NULL_TREE,
11401 long_long_unsigned_type_node,
11402 endlink)));
11403
11404 tree v2si_ftype_v2sf
11405 = build_function_type (V2SI_type_node,
11406 tree_cons (NULL_TREE, V2SF_type_node,
11407 endlink));
11408 tree v2sf_ftype_v2si
11409 = build_function_type (V2SF_type_node,
11410 tree_cons (NULL_TREE, V2SI_type_node,
11411 endlink));
11412 tree v2si_ftype_v2si
11413 = build_function_type (V2SI_type_node,
11414 tree_cons (NULL_TREE, V2SI_type_node,
11415 endlink));
11416 tree v2sf_ftype_v2sf
11417 = build_function_type (V2SF_type_node,
11418 tree_cons (NULL_TREE, V2SF_type_node,
11419 endlink));
11420 tree v2sf_ftype_v2sf_v2sf
11421 = build_function_type (V2SF_type_node,
11422 tree_cons (NULL_TREE, V2SF_type_node,
11423 tree_cons (NULL_TREE,
11424 V2SF_type_node,
11425 endlink)));
11426 tree v2si_ftype_v2sf_v2sf
11427 = build_function_type (V2SI_type_node,
11428 tree_cons (NULL_TREE, V2SF_type_node,
11429 tree_cons (NULL_TREE,
11430 V2SF_type_node,
11431 endlink)));
11432 tree pint_type_node = build_pointer_type (integer_type_node);
11433 tree pdouble_type_node = build_pointer_type (double_type_node);
11434 tree int_ftype_v2df_v2df
11435 = build_function_type (integer_type_node,
11436 tree_cons (NULL_TREE, V2DF_type_node,
11437 tree_cons (NULL_TREE, V2DF_type_node, endlink)));
11438
11439 tree ti_ftype_void
11440 = build_function_type (intTI_type_node, endlink);
11441 tree ti_ftype_ti_ti
11442 = build_function_type (intTI_type_node,
11443 tree_cons (NULL_TREE, intTI_type_node,
11444 tree_cons (NULL_TREE, intTI_type_node,
11445 endlink)));
11446 tree void_ftype_pvoid
11447 = build_function_type (void_type_node,
11448 tree_cons (NULL_TREE, ptr_type_node, endlink));
11449 tree v2di_ftype_di
11450 = build_function_type (V2DI_type_node,
11451 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11452 endlink));
11453 tree v4sf_ftype_v4si
11454 = build_function_type (V4SF_type_node,
11455 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11456 tree v4si_ftype_v4sf
11457 = build_function_type (V4SI_type_node,
11458 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11459 tree v2df_ftype_v4si
11460 = build_function_type (V2DF_type_node,
11461 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11462 tree v4si_ftype_v2df
11463 = build_function_type (V4SI_type_node,
11464 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11465 tree v2si_ftype_v2df
11466 = build_function_type (V2SI_type_node,
11467 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11468 tree v4sf_ftype_v2df
11469 = build_function_type (V4SF_type_node,
11470 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11471 tree v2df_ftype_v2si
11472 = build_function_type (V2DF_type_node,
11473 tree_cons (NULL_TREE, V2SI_type_node, endlink));
11474 tree v2df_ftype_v4sf
11475 = build_function_type (V2DF_type_node,
11476 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11477 tree int_ftype_v2df
11478 = build_function_type (integer_type_node,
11479 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11480 tree v2df_ftype_v2df_int
11481 = build_function_type (V2DF_type_node,
11482 tree_cons (NULL_TREE, V2DF_type_node,
11483 tree_cons (NULL_TREE, integer_type_node,
11484 endlink)));
11485 tree v4sf_ftype_v4sf_v2df
11486 = build_function_type (V4SF_type_node,
11487 tree_cons (NULL_TREE, V4SF_type_node,
11488 tree_cons (NULL_TREE, V2DF_type_node,
11489 endlink)));
11490 tree v2df_ftype_v2df_v4sf
11491 = build_function_type (V2DF_type_node,
11492 tree_cons (NULL_TREE, V2DF_type_node,
11493 tree_cons (NULL_TREE, V4SF_type_node,
11494 endlink)));
11495 tree v2df_ftype_v2df_v2df_int
11496 = build_function_type (V2DF_type_node,
11497 tree_cons (NULL_TREE, V2DF_type_node,
11498 tree_cons (NULL_TREE, V2DF_type_node,
11499 tree_cons (NULL_TREE,
11500 integer_type_node,
11501 endlink))));
11502 tree v2df_ftype_v2df_pv2si
11503 = build_function_type (V2DF_type_node,
11504 tree_cons (NULL_TREE, V2DF_type_node,
11505 tree_cons (NULL_TREE, pv2si_type_node,
11506 endlink)));
11507 tree void_ftype_pv2si_v2df
11508 = build_function_type (void_type_node,
11509 tree_cons (NULL_TREE, pv2si_type_node,
11510 tree_cons (NULL_TREE, V2DF_type_node,
11511 endlink)));
11512 tree void_ftype_pdouble_v2df
11513 = build_function_type (void_type_node,
11514 tree_cons (NULL_TREE, pdouble_type_node,
11515 tree_cons (NULL_TREE, V2DF_type_node,
11516 endlink)));
11517 tree void_ftype_pint_int
11518 = build_function_type (void_type_node,
11519 tree_cons (NULL_TREE, pint_type_node,
11520 tree_cons (NULL_TREE, integer_type_node,
11521 endlink)));
11522 tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node,
11523 tree_cons (NULL_TREE, V16QI_type_node,
11524 tree_cons (NULL_TREE,
11525 pchar_type_node,
11526 endlink)));
11527 tree void_ftype_v16qi_v16qi_pchar
11528 = build_function_type (void_type_node, maskmovdqu_args);
11529 tree v2df_ftype_pdouble
11530 = build_function_type (V2DF_type_node,
11531 tree_cons (NULL_TREE, pdouble_type_node,
11532 endlink));
11533 tree v2df_ftype_v2df_v2df
11534 = build_function_type (V2DF_type_node,
11535 tree_cons (NULL_TREE, V2DF_type_node,
11536 tree_cons (NULL_TREE, V2DF_type_node,
11537 endlink)));
11538 tree v16qi_ftype_v16qi_v16qi
11539 = build_function_type (V16QI_type_node,
11540 tree_cons (NULL_TREE, V16QI_type_node,
11541 tree_cons (NULL_TREE, V16QI_type_node,
11542 endlink)));
11543 tree v8hi_ftype_v8hi_v8hi
11544 = build_function_type (V8HI_type_node,
11545 tree_cons (NULL_TREE, V8HI_type_node,
11546 tree_cons (NULL_TREE, V8HI_type_node,
11547 endlink)));
11548 tree v4si_ftype_v4si_v4si
11549 = build_function_type (V4SI_type_node,
11550 tree_cons (NULL_TREE, V4SI_type_node,
11551 tree_cons (NULL_TREE, V4SI_type_node,
11552 endlink)));
11553 tree v2di_ftype_v2di_v2di
11554 = build_function_type (V2DI_type_node,
11555 tree_cons (NULL_TREE, V2DI_type_node,
11556 tree_cons (NULL_TREE, V2DI_type_node,
11557 endlink)));
11558 tree v2di_ftype_v2df_v2df
11559 = build_function_type (V2DI_type_node,
11560 tree_cons (NULL_TREE, V2DF_type_node,
11561 tree_cons (NULL_TREE, V2DF_type_node,
11562 endlink)));
11563 tree v2df_ftype_v2df
11564 = build_function_type (V2DF_type_node,
11565 tree_cons (NULL_TREE, V2DF_type_node,
11566 endlink));
11567 tree v2df_ftype_double
11568 = build_function_type (V2DF_type_node,
11569 tree_cons (NULL_TREE, double_type_node,
11570 endlink));
11571 tree v2df_ftype_double_double
11572 = build_function_type (V2DF_type_node,
11573 tree_cons (NULL_TREE, double_type_node,
11574 tree_cons (NULL_TREE, double_type_node,
11575 endlink)));
11576 tree int_ftype_v8hi_int
11577 = build_function_type (integer_type_node,
11578 tree_cons (NULL_TREE, V8HI_type_node,
11579 tree_cons (NULL_TREE, integer_type_node,
11580 endlink)));
11581 tree v8hi_ftype_v8hi_int_int
11582 = build_function_type (V8HI_type_node,
11583 tree_cons (NULL_TREE, V8HI_type_node,
11584 tree_cons (NULL_TREE, integer_type_node,
11585 tree_cons (NULL_TREE,
11586 integer_type_node,
11587 endlink))));
11588 tree v2di_ftype_v2di_int
11589 = build_function_type (V2DI_type_node,
11590 tree_cons (NULL_TREE, V2DI_type_node,
11591 tree_cons (NULL_TREE, integer_type_node,
11592 endlink)));
11593 tree v4si_ftype_v4si_int
11594 = build_function_type (V4SI_type_node,
11595 tree_cons (NULL_TREE, V4SI_type_node,
11596 tree_cons (NULL_TREE, integer_type_node,
11597 endlink)));
11598 tree v8hi_ftype_v8hi_int
11599 = build_function_type (V8HI_type_node,
11600 tree_cons (NULL_TREE, V8HI_type_node,
11601 tree_cons (NULL_TREE, integer_type_node,
11602 endlink)));
11603 tree v8hi_ftype_v8hi_v2di
11604 = build_function_type (V8HI_type_node,
11605 tree_cons (NULL_TREE, V8HI_type_node,
11606 tree_cons (NULL_TREE, V2DI_type_node,
11607 endlink)));
11608 tree v4si_ftype_v4si_v2di
11609 = build_function_type (V4SI_type_node,
11610 tree_cons (NULL_TREE, V4SI_type_node,
11611 tree_cons (NULL_TREE, V2DI_type_node,
11612 endlink)));
11613 tree v4si_ftype_v8hi_v8hi
11614 = build_function_type (V4SI_type_node,
11615 tree_cons (NULL_TREE, V8HI_type_node,
11616 tree_cons (NULL_TREE, V8HI_type_node,
11617 endlink)));
11618 tree di_ftype_v8qi_v8qi
11619 = build_function_type (long_long_unsigned_type_node,
11620 tree_cons (NULL_TREE, V8QI_type_node,
11621 tree_cons (NULL_TREE, V8QI_type_node,
11622 endlink)));
11623 tree v2di_ftype_v16qi_v16qi
11624 = build_function_type (V2DI_type_node,
11625 tree_cons (NULL_TREE, V16QI_type_node,
11626 tree_cons (NULL_TREE, V16QI_type_node,
11627 endlink)));
11628 tree int_ftype_v16qi
11629 = build_function_type (integer_type_node,
11630 tree_cons (NULL_TREE, V16QI_type_node, endlink));
11631
11632 /* Add all builtins that are more or less simple operations on two
11633 operands. */
11634 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11635 {
11636 /* Use one of the operands; the target can have a different mode for
11637 mask-generating compares. */
11638 enum machine_mode mode;
11639 tree type;
11640
11641 if (d->name == 0)
11642 continue;
11643 mode = insn_data[d->icode].operand[1].mode;
11644
11645 switch (mode)
11646 {
11647 case V16QImode:
11648 type = v16qi_ftype_v16qi_v16qi;
11649 break;
11650 case V8HImode:
11651 type = v8hi_ftype_v8hi_v8hi;
11652 break;
11653 case V4SImode:
11654 type = v4si_ftype_v4si_v4si;
11655 break;
11656 case V2DImode:
11657 type = v2di_ftype_v2di_v2di;
11658 break;
11659 case V2DFmode:
11660 type = v2df_ftype_v2df_v2df;
11661 break;
11662 case TImode:
11663 type = ti_ftype_ti_ti;
11664 break;
11665 case V4SFmode:
11666 type = v4sf_ftype_v4sf_v4sf;
11667 break;
11668 case V8QImode:
11669 type = v8qi_ftype_v8qi_v8qi;
11670 break;
11671 case V4HImode:
11672 type = v4hi_ftype_v4hi_v4hi;
11673 break;
11674 case V2SImode:
11675 type = v2si_ftype_v2si_v2si;
11676 break;
11677 case DImode:
11678 type = di_ftype_di_di;
11679 break;
11680
11681 default:
11682 abort ();
11683 }
11684
11685 /* Override for comparisons. */
11686 if (d->icode == CODE_FOR_maskcmpv4sf3
11687 || d->icode == CODE_FOR_maskncmpv4sf3
11688 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11689 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11690 type = v4si_ftype_v4sf_v4sf;
11691
11692 if (d->icode == CODE_FOR_maskcmpv2df3
11693 || d->icode == CODE_FOR_maskncmpv2df3
11694 || d->icode == CODE_FOR_vmmaskcmpv2df3
11695 || d->icode == CODE_FOR_vmmaskncmpv2df3)
11696 type = v2di_ftype_v2df_v2df;
11697
11698 def_builtin (d->mask, d->name, type, d->code);
11699 }
11700
11701 /* Add the remaining MMX insns with somewhat more complicated types. */
11702 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11703 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11704 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11705 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11706 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11707 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11708 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11709
11710 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11711 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11712 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11713
11714 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11715 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11716
11717 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11718 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11719
11720 /* comi/ucomi insns. */
11721 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
11722 if (d->mask == MASK_SSE2)
11723 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
11724 else
11725 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11726
11727 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11728 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11729 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11730
11731 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11732 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11733 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11734 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11735 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11736 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11737
11738 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11739 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11740 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11741 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11742
11743 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11744 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11745
11746 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11747
11748 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11749 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11750 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11751 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11752 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11753 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11754
11755 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11756 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11757 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11758 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11759
11760 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11761 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11762 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11763 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11764
11765 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11766
11767 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11768
11769 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11770 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11771 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11772 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11773 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11774 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11775
11776 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11777
11778 /* Original 3DNow! */
11779 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11780 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11781 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11782 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11783 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11784 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11785 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11786 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11787 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11788 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11789 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11790 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11791 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11792 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11793 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11794 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11795 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11796 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11797 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11798 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11799
11800 /* 3DNow! extension as used in the Athlon CPU. */
11801 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11802 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11803 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11804 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11805 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11806 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11807
11808 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11809
11810 /* SSE2 */
11811 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
11812 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
11813
11814 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
11815 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
11816
11817 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
11818 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
11819 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
11820 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
11821 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
11822 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
11823
11824 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
11825 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
11826 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
11827 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
11828
11829 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
11830 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
11831 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
11832 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
11833 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
11834
11835 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
11836 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
11837 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
11838 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
11839
11840 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
11841 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
11842
11843 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
11844
11845 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
11846 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
11847
11848 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
11849 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
11850 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
11851 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
11852 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
11853
11854 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
11855
11856 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
11857 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
11858
11859 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
11860 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
11861 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
11862
11863 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
11864 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
11865 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
11866
11867 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
11868 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
11869 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
11870 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
11871 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
11872 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
11873 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
11874
11875 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
11876 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
11877 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
11878
11879 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
11880 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
11881 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
11882
11883 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
11884 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
11885 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
11886
11887 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
11888 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
11889
11890 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
11891 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
11892 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
11893
11894 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
11895 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
11896 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
11897
11898 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
11899 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
11900
11901 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
11902 }
11903
11904 /* Errors in the source file can cause expand_expr to return const0_rtx
11905 where we expect a vector. To avoid crashing, use one of the vector
11906 clear instructions. */
11907 static rtx
11908 safe_vector_operand (x, mode)
11909 rtx x;
11910 enum machine_mode mode;
11911 {
11912 if (x != const0_rtx)
11913 return x;
11914 x = gen_reg_rtx (mode);
11915
11916 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11917 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11918 : gen_rtx_SUBREG (DImode, x, 0)));
11919 else
11920 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11921 : gen_rtx_SUBREG (V4SFmode, x, 0)));
11922 return x;
11923 }
11924
11925 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11926
11927 static rtx
11928 ix86_expand_binop_builtin (icode, arglist, target)
11929 enum insn_code icode;
11930 tree arglist;
11931 rtx target;
11932 {
11933 rtx pat;
11934 tree arg0 = TREE_VALUE (arglist);
11935 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11936 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11937 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11938 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11939 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11940 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11941
11942 if (VECTOR_MODE_P (mode0))
11943 op0 = safe_vector_operand (op0, mode0);
11944 if (VECTOR_MODE_P (mode1))
11945 op1 = safe_vector_operand (op1, mode1);
11946
11947 if (! target
11948 || GET_MODE (target) != tmode
11949 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11950 target = gen_reg_rtx (tmode);
11951
11952 /* In case the insn wants input operands in modes different from
11953 the result, abort. */
11954 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11955 abort ();
11956
11957 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11958 op0 = copy_to_mode_reg (mode0, op0);
11959 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11960 op1 = copy_to_mode_reg (mode1, op1);
11961
11962 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11963 yet one of the two must not be a memory. This is normally enforced
11964 by expanders, but we didn't bother to create one here. */
11965 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11966 op0 = copy_to_mode_reg (mode0, op0);
11967
11968 pat = GEN_FCN (icode) (target, op0, op1);
11969 if (! pat)
11970 return 0;
11971 emit_insn (pat);
11972 return target;
11973 }
11974
11975 /* In type_for_mode we restrict the ability to create TImode types
11976 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11977 to have a V4SFmode signature. Convert them in-place to TImode. */
11978
11979 static rtx
11980 ix86_expand_timode_binop_builtin (icode, arglist, target)
11981 enum insn_code icode;
11982 tree arglist;
11983 rtx target;
11984 {
11985 rtx pat;
11986 tree arg0 = TREE_VALUE (arglist);
11987 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11988 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11989 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11990
11991 op0 = gen_lowpart (TImode, op0);
11992 op1 = gen_lowpart (TImode, op1);
11993 target = gen_reg_rtx (TImode);
11994
11995 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11996 op0 = copy_to_mode_reg (TImode, op0);
11997 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11998 op1 = copy_to_mode_reg (TImode, op1);
11999
12000 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12001 yet one of the two must not be a memory. This is normally enforced
12002 by expanders, but we didn't bother to create one here. */
12003 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12004 op0 = copy_to_mode_reg (TImode, op0);
12005
12006 pat = GEN_FCN (icode) (target, op0, op1);
12007 if (! pat)
12008 return 0;
12009 emit_insn (pat);
12010
12011 return gen_lowpart (V4SFmode, target);
12012 }
12013
12014 /* Subroutine of ix86_expand_builtin to take care of stores. */
12015
12016 static rtx
12017 ix86_expand_store_builtin (icode, arglist)
12018 enum insn_code icode;
12019 tree arglist;
12020 {
12021 rtx pat;
12022 tree arg0 = TREE_VALUE (arglist);
12023 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12024 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12025 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12026 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12027 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12028
12029 if (VECTOR_MODE_P (mode1))
12030 op1 = safe_vector_operand (op1, mode1);
12031
12032 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12033
12034 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12035 op1 = copy_to_mode_reg (mode1, op1);
12036
12037 pat = GEN_FCN (icode) (op0, op1);
12038 if (pat)
12039 emit_insn (pat);
12040 return 0;
12041 }
12042
12043 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12044
12045 static rtx
12046 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12047 enum insn_code icode;
12048 tree arglist;
12049 rtx target;
12050 int do_load;
12051 {
12052 rtx pat;
12053 tree arg0 = TREE_VALUE (arglist);
12054 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12055 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12056 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12057
12058 if (! target
12059 || GET_MODE (target) != tmode
12060 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12061 target = gen_reg_rtx (tmode);
12062 if (do_load)
12063 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12064 else
12065 {
12066 if (VECTOR_MODE_P (mode0))
12067 op0 = safe_vector_operand (op0, mode0);
12068
12069 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12070 op0 = copy_to_mode_reg (mode0, op0);
12071 }
12072
12073 pat = GEN_FCN (icode) (target, op0);
12074 if (! pat)
12075 return 0;
12076 emit_insn (pat);
12077 return target;
12078 }
12079
12080 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12081 sqrtss, rsqrtss, rcpss. */
12082
12083 static rtx
12084 ix86_expand_unop1_builtin (icode, arglist, target)
12085 enum insn_code icode;
12086 tree arglist;
12087 rtx target;
12088 {
12089 rtx pat;
12090 tree arg0 = TREE_VALUE (arglist);
12091 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12092 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12093 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12094
12095 if (! target
12096 || GET_MODE (target) != tmode
12097 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12098 target = gen_reg_rtx (tmode);
12099
12100 if (VECTOR_MODE_P (mode0))
12101 op0 = safe_vector_operand (op0, mode0);
12102
12103 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12104 op0 = copy_to_mode_reg (mode0, op0);
12105
12106 op1 = op0;
12107 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12108 op1 = copy_to_mode_reg (mode0, op1);
12109
12110 pat = GEN_FCN (icode) (target, op0, op1);
12111 if (! pat)
12112 return 0;
12113 emit_insn (pat);
12114 return target;
12115 }
12116
12117 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12118
12119 static rtx
12120 ix86_expand_sse_compare (d, arglist, target)
12121 const struct builtin_description *d;
12122 tree arglist;
12123 rtx target;
12124 {
12125 rtx pat;
12126 tree arg0 = TREE_VALUE (arglist);
12127 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12128 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12129 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12130 rtx op2;
12131 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12132 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12133 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12134 enum rtx_code comparison = d->comparison;
12135
12136 if (VECTOR_MODE_P (mode0))
12137 op0 = safe_vector_operand (op0, mode0);
12138 if (VECTOR_MODE_P (mode1))
12139 op1 = safe_vector_operand (op1, mode1);
12140
12141 /* Swap operands if we have a comparison that isn't available in
12142 hardware. */
12143 if (d->flag)
12144 {
12145 rtx tmp = gen_reg_rtx (mode1);
12146 emit_move_insn (tmp, op1);
12147 op1 = op0;
12148 op0 = tmp;
12149 }
12150
12151 if (! target
12152 || GET_MODE (target) != tmode
12153 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12154 target = gen_reg_rtx (tmode);
12155
12156 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12157 op0 = copy_to_mode_reg (mode0, op0);
12158 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12159 op1 = copy_to_mode_reg (mode1, op1);
12160
12161 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12162 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12163 if (! pat)
12164 return 0;
12165 emit_insn (pat);
12166 return target;
12167 }
12168
12169 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12170
12171 static rtx
12172 ix86_expand_sse_comi (d, arglist, target)
12173 const struct builtin_description *d;
12174 tree arglist;
12175 rtx target;
12176 {
12177 rtx pat;
12178 tree arg0 = TREE_VALUE (arglist);
12179 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12180 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12181 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12182 rtx op2;
12183 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12184 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12185 enum rtx_code comparison = d->comparison;
12186
12187 if (VECTOR_MODE_P (mode0))
12188 op0 = safe_vector_operand (op0, mode0);
12189 if (VECTOR_MODE_P (mode1))
12190 op1 = safe_vector_operand (op1, mode1);
12191
12192 /* Swap operands if we have a comparison that isn't available in
12193 hardware. */
12194 if (d->flag)
12195 {
12196 rtx tmp = op1;
12197 op1 = op0;
12198 op0 = tmp;
12199 }
12200
12201 target = gen_reg_rtx (SImode);
12202 emit_move_insn (target, const0_rtx);
12203 target = gen_rtx_SUBREG (QImode, target, 0);
12204
12205 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12206 op0 = copy_to_mode_reg (mode0, op0);
12207 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12208 op1 = copy_to_mode_reg (mode1, op1);
12209
12210 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12211 pat = GEN_FCN (d->icode) (op0, op1, op2);
12212 if (! pat)
12213 return 0;
12214 emit_insn (pat);
12215 emit_insn (gen_rtx_SET (VOIDmode,
12216 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12217 gen_rtx_fmt_ee (comparison, QImode,
12218 gen_rtx_REG (CCmode, FLAGS_REG),
12219 const0_rtx)));
12220
12221 return SUBREG_REG (target);
12222 }
12223
12224 /* Expand an expression EXP that calls a built-in function,
12225 with result going to TARGET if that's convenient
12226 (and in mode MODE if that's convenient).
12227 SUBTARGET may be used as the target for computing one of EXP's operands.
12228 IGNORE is nonzero if the value is to be ignored. */
12229
12230 rtx
12231 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12232 tree exp;
12233 rtx target;
12234 rtx subtarget ATTRIBUTE_UNUSED;
12235 enum machine_mode mode ATTRIBUTE_UNUSED;
12236 int ignore ATTRIBUTE_UNUSED;
12237 {
12238 const struct builtin_description *d;
12239 size_t i;
12240 enum insn_code icode;
12241 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12242 tree arglist = TREE_OPERAND (exp, 1);
12243 tree arg0, arg1, arg2;
12244 rtx op0, op1, op2, pat;
12245 enum machine_mode tmode, mode0, mode1, mode2;
12246 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12247
12248 switch (fcode)
12249 {
12250 case IX86_BUILTIN_EMMS:
12251 emit_insn (gen_emms ());
12252 return 0;
12253
12254 case IX86_BUILTIN_SFENCE:
12255 emit_insn (gen_sfence ());
12256 return 0;
12257
12258 case IX86_BUILTIN_PEXTRW:
12259 case IX86_BUILTIN_PEXTRW128:
12260 icode = (fcode == IX86_BUILTIN_PEXTRW
12261 ? CODE_FOR_mmx_pextrw
12262 : CODE_FOR_sse2_pextrw);
12263 arg0 = TREE_VALUE (arglist);
12264 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12265 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12266 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12267 tmode = insn_data[icode].operand[0].mode;
12268 mode0 = insn_data[icode].operand[1].mode;
12269 mode1 = insn_data[icode].operand[2].mode;
12270
12271 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12272 op0 = copy_to_mode_reg (mode0, op0);
12273 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12274 {
12275 /* @@@ better error message */
12276 error ("selector must be an immediate");
12277 return gen_reg_rtx (tmode);
12278 }
12279 if (target == 0
12280 || GET_MODE (target) != tmode
12281 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12282 target = gen_reg_rtx (tmode);
12283 pat = GEN_FCN (icode) (target, op0, op1);
12284 if (! pat)
12285 return 0;
12286 emit_insn (pat);
12287 return target;
12288
12289 case IX86_BUILTIN_PINSRW:
12290 case IX86_BUILTIN_PINSRW128:
12291 icode = (fcode == IX86_BUILTIN_PINSRW
12292 ? CODE_FOR_mmx_pinsrw
12293 : CODE_FOR_sse2_pinsrw);
12294 arg0 = TREE_VALUE (arglist);
12295 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12296 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12297 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12298 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12299 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12300 tmode = insn_data[icode].operand[0].mode;
12301 mode0 = insn_data[icode].operand[1].mode;
12302 mode1 = insn_data[icode].operand[2].mode;
12303 mode2 = insn_data[icode].operand[3].mode;
12304
12305 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12306 op0 = copy_to_mode_reg (mode0, op0);
12307 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12308 op1 = copy_to_mode_reg (mode1, op1);
12309 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12310 {
12311 /* @@@ better error message */
12312 error ("selector must be an immediate");
12313 return const0_rtx;
12314 }
12315 if (target == 0
12316 || GET_MODE (target) != tmode
12317 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12318 target = gen_reg_rtx (tmode);
12319 pat = GEN_FCN (icode) (target, op0, op1, op2);
12320 if (! pat)
12321 return 0;
12322 emit_insn (pat);
12323 return target;
12324
12325 case IX86_BUILTIN_MASKMOVQ:
12326 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12327 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12328 : CODE_FOR_sse2_maskmovdqu);
12329 /* Note the arg order is different from the operand order. */
12330 arg1 = TREE_VALUE (arglist);
12331 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12332 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12333 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12334 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12335 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12336 mode0 = insn_data[icode].operand[0].mode;
12337 mode1 = insn_data[icode].operand[1].mode;
12338 mode2 = insn_data[icode].operand[2].mode;
12339
12340 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12341 op0 = copy_to_mode_reg (mode0, op0);
12342 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12343 op1 = copy_to_mode_reg (mode1, op1);
12344 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12345 op2 = copy_to_mode_reg (mode2, op2);
12346 pat = GEN_FCN (icode) (op0, op1, op2);
12347 if (! pat)
12348 return 0;
12349 emit_insn (pat);
12350 return 0;
12351
12352 case IX86_BUILTIN_SQRTSS:
12353 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12354 case IX86_BUILTIN_RSQRTSS:
12355 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12356 case IX86_BUILTIN_RCPSS:
12357 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12358
12359 case IX86_BUILTIN_ANDPS:
12360 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12361 arglist, target);
12362 case IX86_BUILTIN_ANDNPS:
12363 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12364 arglist, target);
12365 case IX86_BUILTIN_ORPS:
12366 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12367 arglist, target);
12368 case IX86_BUILTIN_XORPS:
12369 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12370 arglist, target);
12371
12372 case IX86_BUILTIN_LOADAPS:
12373 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12374
12375 case IX86_BUILTIN_LOADUPS:
12376 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12377
12378 case IX86_BUILTIN_STOREAPS:
12379 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12380 case IX86_BUILTIN_STOREUPS:
12381 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12382
12383 case IX86_BUILTIN_LOADSS:
12384 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12385
12386 case IX86_BUILTIN_STORESS:
12387 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12388
12389 case IX86_BUILTIN_LOADHPS:
12390 case IX86_BUILTIN_LOADLPS:
12391 case IX86_BUILTIN_LOADHPD:
12392 case IX86_BUILTIN_LOADLPD:
12393 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12394 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12395 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12396 : CODE_FOR_sse2_movlpd);
12397 arg0 = TREE_VALUE (arglist);
12398 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12399 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12400 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12401 tmode = insn_data[icode].operand[0].mode;
12402 mode0 = insn_data[icode].operand[1].mode;
12403 mode1 = insn_data[icode].operand[2].mode;
12404
12405 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12406 op0 = copy_to_mode_reg (mode0, op0);
12407 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12408 if (target == 0
12409 || GET_MODE (target) != tmode
12410 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12411 target = gen_reg_rtx (tmode);
12412 pat = GEN_FCN (icode) (target, op0, op1);
12413 if (! pat)
12414 return 0;
12415 emit_insn (pat);
12416 return target;
12417
12418 case IX86_BUILTIN_STOREHPS:
12419 case IX86_BUILTIN_STORELPS:
12420 case IX86_BUILTIN_STOREHPD:
12421 case IX86_BUILTIN_STORELPD:
12422 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12423 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12424 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12425 : CODE_FOR_sse2_movlpd);
12426 arg0 = TREE_VALUE (arglist);
12427 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12428 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12429 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12430 mode0 = insn_data[icode].operand[1].mode;
12431 mode1 = insn_data[icode].operand[2].mode;
12432
12433 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12434 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12435 op1 = copy_to_mode_reg (mode1, op1);
12436
12437 pat = GEN_FCN (icode) (op0, op0, op1);
12438 if (! pat)
12439 return 0;
12440 emit_insn (pat);
12441 return 0;
12442
12443 case IX86_BUILTIN_MOVNTPS:
12444 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
12445 case IX86_BUILTIN_MOVNTQ:
12446 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
12447
12448 case IX86_BUILTIN_LDMXCSR:
12449 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12450 target = assign_386_stack_local (SImode, 0);
12451 emit_move_insn (target, op0);
12452 emit_insn (gen_ldmxcsr (target));
12453 return 0;
12454
12455 case IX86_BUILTIN_STMXCSR:
12456 target = assign_386_stack_local (SImode, 0);
12457 emit_insn (gen_stmxcsr (target));
12458 return copy_to_mode_reg (SImode, target);
12459
12460 case IX86_BUILTIN_SHUFPS:
12461 case IX86_BUILTIN_SHUFPD:
12462 icode = (fcode == IX86_BUILTIN_SHUFPS
12463 ? CODE_FOR_sse_shufps
12464 : CODE_FOR_sse2_shufpd);
12465 arg0 = TREE_VALUE (arglist);
12466 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12467 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12468 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12469 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12470 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12471 tmode = insn_data[icode].operand[0].mode;
12472 mode0 = insn_data[icode].operand[1].mode;
12473 mode1 = insn_data[icode].operand[2].mode;
12474 mode2 = insn_data[icode].operand[3].mode;
12475
12476 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12477 op0 = copy_to_mode_reg (mode0, op0);
12478 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12479 op1 = copy_to_mode_reg (mode1, op1);
12480 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12481 {
12482 /* @@@ better error message */
12483 error ("mask must be an immediate");
12484 return gen_reg_rtx (tmode);
12485 }
12486 if (target == 0
12487 || GET_MODE (target) != tmode
12488 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12489 target = gen_reg_rtx (tmode);
12490 pat = GEN_FCN (icode) (target, op0, op1, op2);
12491 if (! pat)
12492 return 0;
12493 emit_insn (pat);
12494 return target;
12495
12496 case IX86_BUILTIN_PSHUFW:
12497 case IX86_BUILTIN_PSHUFD:
12498 case IX86_BUILTIN_PSHUFHW:
12499 case IX86_BUILTIN_PSHUFLW:
12500 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
12501 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
12502 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
12503 : CODE_FOR_mmx_pshufw);
12504 arg0 = TREE_VALUE (arglist);
12505 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12506 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12507 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12508 tmode = insn_data[icode].operand[0].mode;
12509 mode1 = insn_data[icode].operand[1].mode;
12510 mode2 = insn_data[icode].operand[2].mode;
12511
12512 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12513 op0 = copy_to_mode_reg (mode1, op0);
12514 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12515 {
12516 /* @@@ better error message */
12517 error ("mask must be an immediate");
12518 return const0_rtx;
12519 }
12520 if (target == 0
12521 || GET_MODE (target) != tmode
12522 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12523 target = gen_reg_rtx (tmode);
12524 pat = GEN_FCN (icode) (target, op0, op1);
12525 if (! pat)
12526 return 0;
12527 emit_insn (pat);
12528 return target;
12529
12530 case IX86_BUILTIN_FEMMS:
12531 emit_insn (gen_femms ());
12532 return NULL_RTX;
12533
12534 case IX86_BUILTIN_PAVGUSB:
12535 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12536
12537 case IX86_BUILTIN_PF2ID:
12538 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
12539
12540 case IX86_BUILTIN_PFACC:
12541 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
12542
12543 case IX86_BUILTIN_PFADD:
12544 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
12545
12546 case IX86_BUILTIN_PFCMPEQ:
12547 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
12548
12549 case IX86_BUILTIN_PFCMPGE:
12550 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
12551
12552 case IX86_BUILTIN_PFCMPGT:
12553 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
12554
12555 case IX86_BUILTIN_PFMAX:
12556 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
12557
12558 case IX86_BUILTIN_PFMIN:
12559 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
12560
12561 case IX86_BUILTIN_PFMUL:
12562 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
12563
12564 case IX86_BUILTIN_PFRCP:
12565 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
12566
12567 case IX86_BUILTIN_PFRCPIT1:
12568 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
12569
12570 case IX86_BUILTIN_PFRCPIT2:
12571 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
12572
12573 case IX86_BUILTIN_PFRSQIT1:
12574 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
12575
12576 case IX86_BUILTIN_PFRSQRT:
12577 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12578
12579 case IX86_BUILTIN_PFSUB:
12580 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12581
12582 case IX86_BUILTIN_PFSUBR:
12583 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12584
12585 case IX86_BUILTIN_PI2FD:
12586 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12587
12588 case IX86_BUILTIN_PMULHRW:
12589 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12590
12591 case IX86_BUILTIN_PF2IW:
12592 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12593
12594 case IX86_BUILTIN_PFNACC:
12595 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12596
12597 case IX86_BUILTIN_PFPNACC:
12598 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12599
12600 case IX86_BUILTIN_PI2FW:
12601 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12602
12603 case IX86_BUILTIN_PSWAPDSI:
12604 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12605
12606 case IX86_BUILTIN_PSWAPDSF:
12607 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12608
12609 case IX86_BUILTIN_SSE_ZERO:
12610 target = gen_reg_rtx (V4SFmode);
12611 emit_insn (gen_sse_clrv4sf (target));
12612 return target;
12613
12614 case IX86_BUILTIN_MMX_ZERO:
12615 target = gen_reg_rtx (DImode);
12616 emit_insn (gen_mmx_clrdi (target));
12617 return target;
12618
12619 case IX86_BUILTIN_SQRTSD:
12620 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
12621 case IX86_BUILTIN_LOADAPD:
12622 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
12623 case IX86_BUILTIN_LOADUPD:
12624 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
12625
12626 case IX86_BUILTIN_STOREAPD:
12627 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12628 case IX86_BUILTIN_STOREUPD:
12629 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
12630
12631 case IX86_BUILTIN_LOADSD:
12632 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
12633
12634 case IX86_BUILTIN_STORESD:
12635 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
12636
12637 case IX86_BUILTIN_SETPD1:
12638 target = assign_386_stack_local (DFmode, 0);
12639 arg0 = TREE_VALUE (arglist);
12640 emit_move_insn (adjust_address (target, DFmode, 0),
12641 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12642 op0 = gen_reg_rtx (V2DFmode);
12643 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
12644 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
12645 return op0;
12646
12647 case IX86_BUILTIN_SETPD:
12648 target = assign_386_stack_local (V2DFmode, 0);
12649 arg0 = TREE_VALUE (arglist);
12650 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12651 emit_move_insn (adjust_address (target, DFmode, 0),
12652 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12653 emit_move_insn (adjust_address (target, DFmode, 8),
12654 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
12655 op0 = gen_reg_rtx (V2DFmode);
12656 emit_insn (gen_sse2_movapd (op0, target));
12657 return op0;
12658
12659 case IX86_BUILTIN_LOADRPD:
12660 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
12661 gen_reg_rtx (V2DFmode), 1);
12662 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
12663 return target;
12664
12665 case IX86_BUILTIN_LOADPD1:
12666 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
12667 gen_reg_rtx (V2DFmode), 1);
12668 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
12669 return target;
12670
12671 case IX86_BUILTIN_STOREPD1:
12672 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12673 case IX86_BUILTIN_STORERPD:
12674 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12675
12676 case IX86_BUILTIN_MFENCE:
12677 emit_insn (gen_sse2_mfence ());
12678 return 0;
12679 case IX86_BUILTIN_LFENCE:
12680 emit_insn (gen_sse2_lfence ());
12681 return 0;
12682
12683 case IX86_BUILTIN_CLFLUSH:
12684 arg0 = TREE_VALUE (arglist);
12685 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12686 icode = CODE_FOR_sse2_clflush;
12687 mode0 = insn_data[icode].operand[0].mode;
12688 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12689 op0 = copy_to_mode_reg (mode0, op0);
12690
12691 emit_insn (gen_sse2_clflush (op0));
12692 return 0;
12693
12694 case IX86_BUILTIN_MOVNTPD:
12695 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
12696 case IX86_BUILTIN_MOVNTDQ:
12697 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
12698 case IX86_BUILTIN_MOVNTI:
12699 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
12700
12701 default:
12702 break;
12703 }
12704
12705 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12706 if (d->code == fcode)
12707 {
12708 /* Compares are treated specially. */
12709 if (d->icode == CODE_FOR_maskcmpv4sf3
12710 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12711 || d->icode == CODE_FOR_maskncmpv4sf3
12712 || d->icode == CODE_FOR_vmmaskncmpv4sf3
12713 || d->icode == CODE_FOR_maskcmpv2df3
12714 || d->icode == CODE_FOR_vmmaskcmpv2df3
12715 || d->icode == CODE_FOR_maskncmpv2df3
12716 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12717 return ix86_expand_sse_compare (d, arglist, target);
12718
12719 return ix86_expand_binop_builtin (d->icode, arglist, target);
12720 }
12721
12722 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12723 if (d->code == fcode)
12724 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12725
12726 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12727 if (d->code == fcode)
12728 return ix86_expand_sse_comi (d, arglist, target);
12729
12730 /* @@@ Should really do something sensible here. */
12731 return 0;
12732 }
12733
12734 /* Store OPERAND to the memory after reload is completed. This means
12735 that we can't easily use assign_stack_local. */
12736 rtx
12737 ix86_force_to_memory (mode, operand)
12738 enum machine_mode mode;
12739 rtx operand;
12740 {
12741 rtx result;
12742 if (!reload_completed)
12743 abort ();
12744 if (TARGET_64BIT && TARGET_RED_ZONE)
12745 {
12746 result = gen_rtx_MEM (mode,
12747 gen_rtx_PLUS (Pmode,
12748 stack_pointer_rtx,
12749 GEN_INT (-RED_ZONE_SIZE)));
12750 emit_move_insn (result, operand);
12751 }
12752 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12753 {
12754 switch (mode)
12755 {
12756 case HImode:
12757 case SImode:
12758 operand = gen_lowpart (DImode, operand);
12759 /* FALLTHRU */
12760 case DImode:
12761 emit_insn (
12762 gen_rtx_SET (VOIDmode,
12763 gen_rtx_MEM (DImode,
12764 gen_rtx_PRE_DEC (DImode,
12765 stack_pointer_rtx)),
12766 operand));
12767 break;
12768 default:
12769 abort ();
12770 }
12771 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12772 }
12773 else
12774 {
12775 switch (mode)
12776 {
12777 case DImode:
12778 {
12779 rtx operands[2];
12780 split_di (&operand, 1, operands, operands + 1);
12781 emit_insn (
12782 gen_rtx_SET (VOIDmode,
12783 gen_rtx_MEM (SImode,
12784 gen_rtx_PRE_DEC (Pmode,
12785 stack_pointer_rtx)),
12786 operands[1]));
12787 emit_insn (
12788 gen_rtx_SET (VOIDmode,
12789 gen_rtx_MEM (SImode,
12790 gen_rtx_PRE_DEC (Pmode,
12791 stack_pointer_rtx)),
12792 operands[0]));
12793 }
12794 break;
12795 case HImode:
12796 /* It is better to store HImodes as SImodes. */
12797 if (!TARGET_PARTIAL_REG_STALL)
12798 operand = gen_lowpart (SImode, operand);
12799 /* FALLTHRU */
12800 case SImode:
12801 emit_insn (
12802 gen_rtx_SET (VOIDmode,
12803 gen_rtx_MEM (GET_MODE (operand),
12804 gen_rtx_PRE_DEC (SImode,
12805 stack_pointer_rtx)),
12806 operand));
12807 break;
12808 default:
12809 abort ();
12810 }
12811 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12812 }
12813 return result;
12814 }
12815
12816 /* Free operand from the memory. */
12817 void
12818 ix86_free_from_memory (mode)
12819 enum machine_mode mode;
12820 {
12821 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12822 {
12823 int size;
12824
12825 if (mode == DImode || TARGET_64BIT)
12826 size = 8;
12827 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12828 size = 2;
12829 else
12830 size = 4;
12831 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12832 to pop or add instruction if registers are available. */
12833 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12834 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12835 GEN_INT (size))));
12836 }
12837 }
12838
12839 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12840 QImode must go into class Q_REGS.
12841 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12842 movdf to do mem-to-mem moves through integer regs. */
12843 enum reg_class
12844 ix86_preferred_reload_class (x, class)
12845 rtx x;
12846 enum reg_class class;
12847 {
12848 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12849 {
12850 /* SSE can't load any constant directly yet. */
12851 if (SSE_CLASS_P (class))
12852 return NO_REGS;
12853 /* Floats can load 0 and 1. */
12854 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12855 {
12856 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12857 if (MAYBE_SSE_CLASS_P (class))
12858 return (reg_class_subset_p (class, GENERAL_REGS)
12859 ? GENERAL_REGS : FLOAT_REGS);
12860 else
12861 return class;
12862 }
12863 /* General regs can load everything. */
12864 if (reg_class_subset_p (class, GENERAL_REGS))
12865 return GENERAL_REGS;
12866 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12867 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12868 return NO_REGS;
12869 }
12870 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12871 return NO_REGS;
12872 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12873 return Q_REGS;
12874 return class;
12875 }
12876
12877 /* If we are copying between general and FP registers, we need a memory
12878 location. The same is true for SSE and MMX registers.
12879
12880 The macro can't work reliably when one of the CLASSES is class containing
12881 registers from multiple units (SSE, MMX, integer). We avoid this by never
12882 combining those units in single alternative in the machine description.
12883 Ensure that this constraint holds to avoid unexpected surprises.
12884
12885 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12886 enforce these sanity checks. */
12887 int
12888 ix86_secondary_memory_needed (class1, class2, mode, strict)
12889 enum reg_class class1, class2;
12890 enum machine_mode mode;
12891 int strict;
12892 {
12893 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12894 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12895 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12896 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12897 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12898 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12899 {
12900 if (strict)
12901 abort ();
12902 else
12903 return 1;
12904 }
12905 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12906 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12907 && (mode) != SImode)
12908 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12909 && (mode) != SImode));
12910 }
12911 /* Return the cost of moving data from a register in class CLASS1 to
12912 one in class CLASS2.
12913
12914 It is not required that the cost always equal 2 when FROM is the same as TO;
12915 on some machines it is expensive to move between registers if they are not
12916 general registers. */
12917 int
12918 ix86_register_move_cost (mode, class1, class2)
12919 enum machine_mode mode;
12920 enum reg_class class1, class2;
12921 {
12922 /* In case we require secondary memory, compute cost of the store followed
12923 by load. In case of copying from general_purpose_register we may emit
12924 multiple stores followed by single load causing memory size mismatch
12925 stall. Count this as arbitarily high cost of 20. */
12926 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12927 {
12928 int add_cost = 0;
12929 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12930 add_cost = 20;
12931 return (MEMORY_MOVE_COST (mode, class1, 0)
12932 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12933 }
12934 /* Moves between SSE/MMX and integer unit are expensive. */
12935 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12936 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12937 return ix86_cost->mmxsse_to_integer;
12938 if (MAYBE_FLOAT_CLASS_P (class1))
12939 return ix86_cost->fp_move;
12940 if (MAYBE_SSE_CLASS_P (class1))
12941 return ix86_cost->sse_move;
12942 if (MAYBE_MMX_CLASS_P (class1))
12943 return ix86_cost->mmx_move;
12944 return 2;
12945 }
12946
12947 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12948 int
12949 ix86_hard_regno_mode_ok (regno, mode)
12950 int regno;
12951 enum machine_mode mode;
12952 {
12953 /* Flags and only flags can only hold CCmode values. */
12954 if (CC_REGNO_P (regno))
12955 return GET_MODE_CLASS (mode) == MODE_CC;
12956 if (GET_MODE_CLASS (mode) == MODE_CC
12957 || GET_MODE_CLASS (mode) == MODE_RANDOM
12958 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12959 return 0;
12960 if (FP_REGNO_P (regno))
12961 return VALID_FP_MODE_P (mode);
12962 if (SSE_REGNO_P (regno))
12963 return VALID_SSE_REG_MODE (mode);
12964 if (MMX_REGNO_P (regno))
12965 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12966 /* We handle both integer and floats in the general purpose registers.
12967 In future we should be able to handle vector modes as well. */
12968 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12969 return 0;
12970 /* Take care for QImode values - they can be in non-QI regs, but then
12971 they do cause partial register stalls. */
12972 if (regno < 4 || mode != QImode || TARGET_64BIT)
12973 return 1;
12974 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12975 }
12976
12977 /* Return the cost of moving data of mode M between a
12978 register and memory. A value of 2 is the default; this cost is
12979 relative to those in `REGISTER_MOVE_COST'.
12980
12981 If moving between registers and memory is more expensive than
12982 between two registers, you should define this macro to express the
12983 relative cost.
12984
12985 Model also increased moving costs of QImode registers in non
12986 Q_REGS classes.
12987 */
12988 int
12989 ix86_memory_move_cost (mode, class, in)
12990 enum machine_mode mode;
12991 enum reg_class class;
12992 int in;
12993 {
12994 if (FLOAT_CLASS_P (class))
12995 {
12996 int index;
12997 switch (mode)
12998 {
12999 case SFmode:
13000 index = 0;
13001 break;
13002 case DFmode:
13003 index = 1;
13004 break;
13005 case XFmode:
13006 case TFmode:
13007 index = 2;
13008 break;
13009 default:
13010 return 100;
13011 }
13012 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13013 }
13014 if (SSE_CLASS_P (class))
13015 {
13016 int index;
13017 switch (GET_MODE_SIZE (mode))
13018 {
13019 case 4:
13020 index = 0;
13021 break;
13022 case 8:
13023 index = 1;
13024 break;
13025 case 16:
13026 index = 2;
13027 break;
13028 default:
13029 return 100;
13030 }
13031 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13032 }
13033 if (MMX_CLASS_P (class))
13034 {
13035 int index;
13036 switch (GET_MODE_SIZE (mode))
13037 {
13038 case 4:
13039 index = 0;
13040 break;
13041 case 8:
13042 index = 1;
13043 break;
13044 default:
13045 return 100;
13046 }
13047 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13048 }
13049 switch (GET_MODE_SIZE (mode))
13050 {
13051 case 1:
13052 if (in)
13053 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13054 : ix86_cost->movzbl_load);
13055 else
13056 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13057 : ix86_cost->int_store[0] + 4);
13058 break;
13059 case 2:
13060 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13061 default:
13062 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13063 if (mode == TFmode)
13064 mode = XFmode;
13065 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13066 * (int) GET_MODE_SIZE (mode) / 4);
13067 }
13068 }
13069
13070 #ifdef DO_GLOBAL_CTORS_BODY
13071 static void
13072 ix86_svr3_asm_out_constructor (symbol, priority)
13073 rtx symbol;
13074 int priority ATTRIBUTE_UNUSED;
13075 {
13076 init_section ();
13077 fputs ("\tpushl $", asm_out_file);
13078 assemble_name (asm_out_file, XSTR (symbol, 0));
13079 fputc ('\n', asm_out_file);
13080 }
13081 #endif
13082
13083 /* Order the registers for register allocator. */
13084
13085 void
13086 x86_order_regs_for_local_alloc ()
13087 {
13088 int pos = 0;
13089 int i;
13090
13091 /* First allocate the local general purpose registers. */
13092 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13093 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13094 reg_alloc_order [pos++] = i;
13095
13096 /* Global general purpose registers. */
13097 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13098 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13099 reg_alloc_order [pos++] = i;
13100
13101 /* x87 registers come first in case we are doing FP math
13102 using them. */
13103 if (!TARGET_SSE_MATH)
13104 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13105 reg_alloc_order [pos++] = i;
13106
13107 /* SSE registers. */
13108 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13109 reg_alloc_order [pos++] = i;
13110 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13111 reg_alloc_order [pos++] = i;
13112
13113 /* x87 registerts. */
13114 if (TARGET_SSE_MATH)
13115 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13116 reg_alloc_order [pos++] = i;
13117
13118 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13119 reg_alloc_order [pos++] = i;
13120
13121 /* Initialize the rest of array as we do not allocate some registers
13122 at all. */
13123 while (pos < FIRST_PSEUDO_REGISTER)
13124 reg_alloc_order [pos++] = 0;
13125 }
13126
13127 void
13128 x86_output_mi_thunk (file, delta, function)
13129 FILE *file;
13130 int delta;
13131 tree function;
13132 {
13133 tree parm;
13134 rtx xops[3];
13135
13136 if (ix86_regparm > 0)
13137 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13138 else
13139 parm = NULL_TREE;
13140 for (; parm; parm = TREE_CHAIN (parm))
13141 if (TREE_VALUE (parm) == void_type_node)
13142 break;
13143
13144 xops[0] = GEN_INT (delta);
13145 if (TARGET_64BIT)
13146 {
13147 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13148 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13149 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13150 if (flag_pic)
13151 {
13152 fprintf (file, "\tjmp *");
13153 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13154 fprintf (file, "@GOTPCREL(%%rip)\n");
13155 }
13156 else
13157 {
13158 fprintf (file, "\tjmp ");
13159 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13160 fprintf (file, "\n");
13161 }
13162 }
13163 else
13164 {
13165 if (parm)
13166 xops[1] = gen_rtx_REG (SImode, 0);
13167 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13168 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13169 else
13170 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13171 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13172
13173 if (flag_pic)
13174 {
13175 xops[0] = pic_offset_table_rtx;
13176 xops[1] = gen_label_rtx ();
13177 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13178
13179 if (ix86_regparm > 2)
13180 abort ();
13181 output_asm_insn ("push{l}\t%0", xops);
13182 output_asm_insn ("call\t%P1", xops);
13183 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13184 output_asm_insn ("pop{l}\t%0", xops);
13185 output_asm_insn
13186 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13187 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13188 output_asm_insn
13189 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13190 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13191 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13192 }
13193 else
13194 {
13195 fprintf (file, "\tjmp ");
13196 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13197 fprintf (file, "\n");
13198 }
13199 }
13200 }
This page took 0.603317 seconds and 6 git commands to generate.