]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.c (ix86_save_reg): Examine regs_ever_live...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 };
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 };
125
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 };
162
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
198 };
199
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
235 };
236
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
272 };
273
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309 };
310
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 };
347
348 const struct processor_costs *ix86_cost = &pentium_cost;
349
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
380 const int x86_single_stringop = m_386 | m_PENT4;
381 const int x86_qimode_math = ~(0);
382 const int x86_promote_qi_regs = 0;
383 const int x86_himode_math = ~(m_PPRO);
384 const int x86_promote_hi_regs = m_PPRO;
385 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
386 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
387 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
388 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
389 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
390 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
391 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
392 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
393 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_decompose_lea = m_PENT4;
396 const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
397
398 /* In case the avreage insn count for single function invocation is
399 lower than this constant, emit fast (but longer) prologue and
400 epilogue code. */
401 #define FAST_PROLOGUE_INSN_COUNT 30
402
403 /* Set by prologue expander and used by epilogue expander to determine
404 the style used. */
405 static int use_fast_prologue_epilogue;
406
407 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
408
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
413
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
416
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
418 {
419 /* ax, dx, cx, bx */
420 AREG, DREG, CREG, BREG,
421 /* si, di, bp, sp */
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
423 /* FP registers */
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
426 /* arg pointer */
427 NON_Q_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
431 SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
433 MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
437 SSE_REGS, SSE_REGS,
438 };
439
440 /* The "default" register map used in 32bit mode. */
441
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
443 {
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
451 };
452
453 static int const x86_64_int_parameter_registers[6] =
454 {
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
457 };
458
459 static int const x86_64_int_return_registers[4] =
460 {
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
462 };
463
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
466 {
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
474 };
475
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
520 numbers.
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
529 */
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
531 {
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
539 };
540
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
543
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
546
547 /* The encoding characters for the four TLS models present in ELF. */
548
549 static char const tls_model_chars[] = " GLil";
550
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
554
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function
557 {
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
562 };
563
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
566
567 /* Structure describing stack frame layout.
568 Stack grows downward:
569
570 [arguments]
571 <- ARG_POINTER
572 saved pc
573
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
576 [saved regs]
577
578 [padding1] \
579 )
580 [va_arg registers] (
581 > to_allocate <- FRAME_POINTER
582 [frame] (
583 )
584 [padding2] /
585 */
586 struct ix86_frame
587 {
588 int nregs;
589 int padding1;
590 int va_arg_size;
591 HOST_WIDE_INT frame;
592 int padding2;
593 int outgoing_arguments_size;
594 int red_zone_size;
595
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
601 };
602
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
607 /* Parsed value. */
608 enum cmodel ix86_cmodel;
609 /* Asm dialect. */
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
612 /* TLS dialext. */
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
615
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
618
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
623
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
628
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
631
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
634
635 /* ix86_regparm_string as a number */
636 int ix86_regparm;
637
638 /* Alignment to use for loops and jumps: */
639
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
642
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
645
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
648
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
651
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
655
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
658
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
662 \f
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
667 int, int, FILE *));
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
673 rtx *, rtx *));
674 static rtx get_thread_pointer PARAMS ((void));
675 static rtx gen_push PARAMS ((rtx));
676 static int memory_address_length PARAMS ((rtx addr));
677 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
678 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
680 static void ix86_dump_ppro_packet PARAMS ((FILE *));
681 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
682 static void ix86_init_machine_status PARAMS ((struct function *));
683 static void ix86_mark_machine_status PARAMS ((struct function *));
684 static void ix86_free_machine_status PARAMS ((struct function *));
685 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
686 static int ix86_nsaved_regs PARAMS ((void));
687 static void ix86_emit_save_regs PARAMS ((void));
688 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
689 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
704
705 struct ix86_address
706 {
707 rtx base, index, disp;
708 HOST_WIDE_INT scale;
709 };
710
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
712
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
715 ATTRIBUTE_UNUSED;
716
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
719 tree, rtx));
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
721 tree, rtx));
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
726 tree, rtx));
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
731 enum rtx_code *,
732 enum rtx_code *,
733 enum rtx_code *));
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
735 rtx *, rtx *));
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static int ix86_save_reg PARAMS ((unsigned int, int));
741 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
742 static int ix86_comp_type_attributes PARAMS ((tree, tree));
743 const struct attribute_spec ix86_attribute_table[];
744 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
745 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
746
747 #ifdef DO_GLOBAL_CTORS_BODY
748 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
749 #endif
750
751 /* Register class used for passing given 64bit part of the argument.
752 These represent classes as documented by the PS ABI, with the exception
753 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
754 use SF or DFmode move instead of DImode to avoid reformating penalties.
755
756 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
757 whenever possible (upper half does contain padding).
758 */
759 enum x86_64_reg_class
760 {
761 X86_64_NO_CLASS,
762 X86_64_INTEGER_CLASS,
763 X86_64_INTEGERSI_CLASS,
764 X86_64_SSE_CLASS,
765 X86_64_SSESF_CLASS,
766 X86_64_SSEDF_CLASS,
767 X86_64_SSEUP_CLASS,
768 X86_64_X87_CLASS,
769 X86_64_X87UP_CLASS,
770 X86_64_MEMORY_CLASS
771 };
772 static const char * const x86_64_reg_class_name[] =
773 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
774
775 #define MAX_CLASSES 4
776 static int classify_argument PARAMS ((enum machine_mode, tree,
777 enum x86_64_reg_class [MAX_CLASSES],
778 int));
779 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
780 int *));
781 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
782 const int *, int));
783 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
784 enum x86_64_reg_class));
785 \f
786 /* Initialize the GCC target structure. */
787 #undef TARGET_ATTRIBUTE_TABLE
788 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
789 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
790 # undef TARGET_MERGE_DECL_ATTRIBUTES
791 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
792 #endif
793
794 #undef TARGET_COMP_TYPE_ATTRIBUTES
795 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
796
797 #undef TARGET_INIT_BUILTINS
798 #define TARGET_INIT_BUILTINS ix86_init_builtins
799
800 #undef TARGET_EXPAND_BUILTIN
801 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
802
803 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
804 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
805 HOST_WIDE_INT));
806 # undef TARGET_ASM_FUNCTION_PROLOGUE
807 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
808 #endif
809
810 #undef TARGET_ASM_OPEN_PAREN
811 #define TARGET_ASM_OPEN_PAREN ""
812 #undef TARGET_ASM_CLOSE_PAREN
813 #define TARGET_ASM_CLOSE_PAREN ""
814
815 #undef TARGET_ASM_ALIGNED_HI_OP
816 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
817 #undef TARGET_ASM_ALIGNED_SI_OP
818 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
819 #ifdef ASM_QUAD
820 #undef TARGET_ASM_ALIGNED_DI_OP
821 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
822 #endif
823
824 #undef TARGET_ASM_UNALIGNED_HI_OP
825 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
826 #undef TARGET_ASM_UNALIGNED_SI_OP
827 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
828 #undef TARGET_ASM_UNALIGNED_DI_OP
829 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
830
831 #undef TARGET_SCHED_ADJUST_COST
832 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
833 #undef TARGET_SCHED_ISSUE_RATE
834 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
835 #undef TARGET_SCHED_VARIABLE_ISSUE
836 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
837 #undef TARGET_SCHED_INIT
838 #define TARGET_SCHED_INIT ix86_sched_init
839 #undef TARGET_SCHED_REORDER
840 #define TARGET_SCHED_REORDER ix86_sched_reorder
841 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
842 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
843 ia32_use_dfa_pipeline_interface
844 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
845 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
846 ia32_multipass_dfa_lookahead
847
848 #ifdef HAVE_AS_TLS
849 #undef TARGET_HAVE_TLS
850 #define TARGET_HAVE_TLS true
851 #endif
852
853 struct gcc_target targetm = TARGET_INITIALIZER;
854 \f
855 /* Sometimes certain combinations of command options do not make
856 sense on a particular target machine. You can define a macro
857 `OVERRIDE_OPTIONS' to take account of this. This macro, if
858 defined, is executed once just after all the command options have
859 been parsed.
860
861 Don't use this macro to turn on various extra optimizations for
862 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
863
864 void
865 override_options ()
866 {
867 int i;
868 /* Comes from final.c -- no real reason to change it. */
869 #define MAX_CODE_ALIGN 16
870
871 static struct ptt
872 {
873 const struct processor_costs *cost; /* Processor costs */
874 const int target_enable; /* Target flags to enable. */
875 const int target_disable; /* Target flags to disable. */
876 const int align_loop; /* Default alignments. */
877 const int align_loop_max_skip;
878 const int align_jump;
879 const int align_jump_max_skip;
880 const int align_func;
881 const int branch_cost;
882 }
883 const processor_target_table[PROCESSOR_max] =
884 {
885 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
886 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
887 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
888 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
889 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
890 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
891 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
892 };
893
894 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
895 static struct pta
896 {
897 const char *const name; /* processor name or nickname. */
898 const enum processor_type processor;
899 const enum pta_flags
900 {
901 PTA_SSE = 1,
902 PTA_SSE2 = 2,
903 PTA_MMX = 4,
904 PTA_PREFETCH_SSE = 8,
905 PTA_3DNOW = 16,
906 PTA_3DNOW_A = 64
907 } flags;
908 }
909 const processor_alias_table[] =
910 {
911 {"i386", PROCESSOR_I386, 0},
912 {"i486", PROCESSOR_I486, 0},
913 {"i586", PROCESSOR_PENTIUM, 0},
914 {"pentium", PROCESSOR_PENTIUM, 0},
915 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
916 {"i686", PROCESSOR_PENTIUMPRO, 0},
917 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
918 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
919 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
920 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
921 PTA_MMX | PTA_PREFETCH_SSE},
922 {"k6", PROCESSOR_K6, PTA_MMX},
923 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
924 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
925 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
926 | PTA_3DNOW_A},
927 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
928 | PTA_3DNOW | PTA_3DNOW_A},
929 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
930 | PTA_3DNOW_A | PTA_SSE},
931 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
932 | PTA_3DNOW_A | PTA_SSE},
933 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
934 | PTA_3DNOW_A | PTA_SSE},
935 };
936
937 int const pta_size = ARRAY_SIZE (processor_alias_table);
938
939 #ifdef SUBTARGET_OVERRIDE_OPTIONS
940 SUBTARGET_OVERRIDE_OPTIONS;
941 #endif
942
943 if (!ix86_cpu_string && ix86_arch_string)
944 ix86_cpu_string = ix86_arch_string;
945 if (!ix86_cpu_string)
946 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
947 if (!ix86_arch_string)
948 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
949
950 if (ix86_cmodel_string != 0)
951 {
952 if (!strcmp (ix86_cmodel_string, "small"))
953 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
954 else if (flag_pic)
955 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
956 else if (!strcmp (ix86_cmodel_string, "32"))
957 ix86_cmodel = CM_32;
958 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
959 ix86_cmodel = CM_KERNEL;
960 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
961 ix86_cmodel = CM_MEDIUM;
962 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
963 ix86_cmodel = CM_LARGE;
964 else
965 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
966 }
967 else
968 {
969 ix86_cmodel = CM_32;
970 if (TARGET_64BIT)
971 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
972 }
973 if (ix86_asm_string != 0)
974 {
975 if (!strcmp (ix86_asm_string, "intel"))
976 ix86_asm_dialect = ASM_INTEL;
977 else if (!strcmp (ix86_asm_string, "att"))
978 ix86_asm_dialect = ASM_ATT;
979 else
980 error ("bad value (%s) for -masm= switch", ix86_asm_string);
981 }
982 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
983 error ("code model `%s' not supported in the %s bit mode",
984 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
985 if (ix86_cmodel == CM_LARGE)
986 sorry ("code model `large' not supported yet");
987 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
988 sorry ("%i-bit mode not compiled in",
989 (target_flags & MASK_64BIT) ? 64 : 32);
990
991 for (i = 0; i < pta_size; i++)
992 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
993 {
994 ix86_arch = processor_alias_table[i].processor;
995 /* Default cpu tuning to the architecture. */
996 ix86_cpu = ix86_arch;
997 if (processor_alias_table[i].flags & PTA_MMX
998 && !(target_flags & MASK_MMX_SET))
999 target_flags |= MASK_MMX;
1000 if (processor_alias_table[i].flags & PTA_3DNOW
1001 && !(target_flags & MASK_3DNOW_SET))
1002 target_flags |= MASK_3DNOW;
1003 if (processor_alias_table[i].flags & PTA_3DNOW_A
1004 && !(target_flags & MASK_3DNOW_A_SET))
1005 target_flags |= MASK_3DNOW_A;
1006 if (processor_alias_table[i].flags & PTA_SSE
1007 && !(target_flags & MASK_SSE_SET))
1008 target_flags |= MASK_SSE;
1009 if (processor_alias_table[i].flags & PTA_SSE2
1010 && !(target_flags & MASK_SSE2_SET))
1011 target_flags |= MASK_SSE2;
1012 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1013 x86_prefetch_sse = true;
1014 break;
1015 }
1016
1017 if (i == pta_size)
1018 error ("bad value (%s) for -march= switch", ix86_arch_string);
1019
1020 for (i = 0; i < pta_size; i++)
1021 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1022 {
1023 ix86_cpu = processor_alias_table[i].processor;
1024 break;
1025 }
1026 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1027 x86_prefetch_sse = true;
1028 if (i == pta_size)
1029 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1030
1031 if (optimize_size)
1032 ix86_cost = &size_cost;
1033 else
1034 ix86_cost = processor_target_table[ix86_cpu].cost;
1035 target_flags |= processor_target_table[ix86_cpu].target_enable;
1036 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1037
1038 /* Arrange to set up i386_stack_locals for all functions. */
1039 init_machine_status = ix86_init_machine_status;
1040 mark_machine_status = ix86_mark_machine_status;
1041 free_machine_status = ix86_free_machine_status;
1042
1043 /* Validate -mregparm= value. */
1044 if (ix86_regparm_string)
1045 {
1046 i = atoi (ix86_regparm_string);
1047 if (i < 0 || i > REGPARM_MAX)
1048 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1049 else
1050 ix86_regparm = i;
1051 }
1052 else
1053 if (TARGET_64BIT)
1054 ix86_regparm = REGPARM_MAX;
1055
1056 /* If the user has provided any of the -malign-* options,
1057 warn and use that value only if -falign-* is not set.
1058 Remove this code in GCC 3.2 or later. */
1059 if (ix86_align_loops_string)
1060 {
1061 warning ("-malign-loops is obsolete, use -falign-loops");
1062 if (align_loops == 0)
1063 {
1064 i = atoi (ix86_align_loops_string);
1065 if (i < 0 || i > MAX_CODE_ALIGN)
1066 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1067 else
1068 align_loops = 1 << i;
1069 }
1070 }
1071
1072 if (ix86_align_jumps_string)
1073 {
1074 warning ("-malign-jumps is obsolete, use -falign-jumps");
1075 if (align_jumps == 0)
1076 {
1077 i = atoi (ix86_align_jumps_string);
1078 if (i < 0 || i > MAX_CODE_ALIGN)
1079 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1080 else
1081 align_jumps = 1 << i;
1082 }
1083 }
1084
1085 if (ix86_align_funcs_string)
1086 {
1087 warning ("-malign-functions is obsolete, use -falign-functions");
1088 if (align_functions == 0)
1089 {
1090 i = atoi (ix86_align_funcs_string);
1091 if (i < 0 || i > MAX_CODE_ALIGN)
1092 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1093 else
1094 align_functions = 1 << i;
1095 }
1096 }
1097
1098 /* Default align_* from the processor table. */
1099 if (align_loops == 0)
1100 {
1101 align_loops = processor_target_table[ix86_cpu].align_loop;
1102 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1103 }
1104 if (align_jumps == 0)
1105 {
1106 align_jumps = processor_target_table[ix86_cpu].align_jump;
1107 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1108 }
1109 if (align_functions == 0)
1110 {
1111 align_functions = processor_target_table[ix86_cpu].align_func;
1112 }
1113
1114 /* Validate -mpreferred-stack-boundary= value, or provide default.
1115 The default of 128 bits is for Pentium III's SSE __m128, but we
1116 don't want additional code to keep the stack aligned when
1117 optimizing for code size. */
1118 ix86_preferred_stack_boundary = (optimize_size
1119 ? TARGET_64BIT ? 64 : 32
1120 : 128);
1121 if (ix86_preferred_stack_boundary_string)
1122 {
1123 i = atoi (ix86_preferred_stack_boundary_string);
1124 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1125 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1126 TARGET_64BIT ? 3 : 2);
1127 else
1128 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1129 }
1130
1131 /* Validate -mbranch-cost= value, or provide default. */
1132 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1133 if (ix86_branch_cost_string)
1134 {
1135 i = atoi (ix86_branch_cost_string);
1136 if (i < 0 || i > 5)
1137 error ("-mbranch-cost=%d is not between 0 and 5", i);
1138 else
1139 ix86_branch_cost = i;
1140 }
1141
1142 if (ix86_tls_dialect_string)
1143 {
1144 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1145 ix86_tls_dialect = TLS_DIALECT_GNU;
1146 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1147 ix86_tls_dialect = TLS_DIALECT_SUN;
1148 else
1149 error ("bad value (%s) for -mtls-dialect= switch",
1150 ix86_tls_dialect_string);
1151 }
1152
1153 /* Keep nonleaf frame pointers. */
1154 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1155 flag_omit_frame_pointer = 1;
1156
1157 /* If we're doing fast math, we don't care about comparison order
1158 wrt NaNs. This lets us use a shorter comparison sequence. */
1159 if (flag_unsafe_math_optimizations)
1160 target_flags &= ~MASK_IEEE_FP;
1161
1162 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1163 since the insns won't need emulation. */
1164 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1165 target_flags &= ~MASK_NO_FANCY_MATH_387;
1166
1167 if (TARGET_64BIT)
1168 {
1169 if (TARGET_ALIGN_DOUBLE)
1170 error ("-malign-double makes no sense in the 64bit mode");
1171 if (TARGET_RTD)
1172 error ("-mrtd calling convention not supported in the 64bit mode");
1173 /* Enable by default the SSE and MMX builtins. */
1174 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1175 ix86_fpmath = FPMATH_SSE;
1176 }
1177 else
1178 ix86_fpmath = FPMATH_387;
1179
1180 if (ix86_fpmath_string != 0)
1181 {
1182 if (! strcmp (ix86_fpmath_string, "387"))
1183 ix86_fpmath = FPMATH_387;
1184 else if (! strcmp (ix86_fpmath_string, "sse"))
1185 {
1186 if (!TARGET_SSE)
1187 {
1188 warning ("SSE instruction set disabled, using 387 arithmetics");
1189 ix86_fpmath = FPMATH_387;
1190 }
1191 else
1192 ix86_fpmath = FPMATH_SSE;
1193 }
1194 else if (! strcmp (ix86_fpmath_string, "387,sse")
1195 || ! strcmp (ix86_fpmath_string, "sse,387"))
1196 {
1197 if (!TARGET_SSE)
1198 {
1199 warning ("SSE instruction set disabled, using 387 arithmetics");
1200 ix86_fpmath = FPMATH_387;
1201 }
1202 else if (!TARGET_80387)
1203 {
1204 warning ("387 instruction set disabled, using SSE arithmetics");
1205 ix86_fpmath = FPMATH_SSE;
1206 }
1207 else
1208 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1209 }
1210 else
1211 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1212 }
1213
1214 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1215 on by -msse. */
1216 if (TARGET_SSE)
1217 {
1218 target_flags |= MASK_MMX;
1219 x86_prefetch_sse = true;
1220 }
1221
1222 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1223 if (TARGET_3DNOW)
1224 {
1225 target_flags |= MASK_MMX;
1226 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1227 extensions it adds. */
1228 if (x86_3dnow_a & (1 << ix86_arch))
1229 target_flags |= MASK_3DNOW_A;
1230 }
1231 if ((x86_accumulate_outgoing_args & CPUMASK)
1232 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1233 && !optimize_size)
1234 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1235
1236 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1237 {
1238 char *p;
1239 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1240 p = strchr (internal_label_prefix, 'X');
1241 internal_label_prefix_len = p - internal_label_prefix;
1242 *p = '\0';
1243 }
1244 }
1245 \f
1246 void
1247 optimization_options (level, size)
1248 int level;
1249 int size ATTRIBUTE_UNUSED;
1250 {
1251 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1252 make the problem with not enough registers even worse. */
1253 #ifdef INSN_SCHEDULING
1254 if (level > 1)
1255 flag_schedule_insns = 0;
1256 #endif
1257 if (TARGET_64BIT && optimize >= 1)
1258 flag_omit_frame_pointer = 1;
1259 if (TARGET_64BIT)
1260 {
1261 flag_pcc_struct_return = 0;
1262 flag_asynchronous_unwind_tables = 1;
1263 }
1264 }
1265 \f
1266 /* Table of valid machine attributes. */
1267 const struct attribute_spec ix86_attribute_table[] =
1268 {
1269 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1270 /* Stdcall attribute says callee is responsible for popping arguments
1271 if they are not variable. */
1272 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1273 /* Cdecl attribute says the callee is a normal C declaration */
1274 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1275 /* Regparm attribute specifies how many integer arguments are to be
1276 passed in registers. */
1277 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1278 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1279 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1280 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1281 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1282 #endif
1283 { NULL, 0, 0, false, false, false, NULL }
1284 };
1285
1286 /* Handle a "cdecl" or "stdcall" attribute;
1287 arguments as in struct attribute_spec.handler. */
1288 static tree
1289 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1290 tree *node;
1291 tree name;
1292 tree args ATTRIBUTE_UNUSED;
1293 int flags ATTRIBUTE_UNUSED;
1294 bool *no_add_attrs;
1295 {
1296 if (TREE_CODE (*node) != FUNCTION_TYPE
1297 && TREE_CODE (*node) != METHOD_TYPE
1298 && TREE_CODE (*node) != FIELD_DECL
1299 && TREE_CODE (*node) != TYPE_DECL)
1300 {
1301 warning ("`%s' attribute only applies to functions",
1302 IDENTIFIER_POINTER (name));
1303 *no_add_attrs = true;
1304 }
1305
1306 if (TARGET_64BIT)
1307 {
1308 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1309 *no_add_attrs = true;
1310 }
1311
1312 return NULL_TREE;
1313 }
1314
1315 /* Handle a "regparm" attribute;
1316 arguments as in struct attribute_spec.handler. */
1317 static tree
1318 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1319 tree *node;
1320 tree name;
1321 tree args;
1322 int flags ATTRIBUTE_UNUSED;
1323 bool *no_add_attrs;
1324 {
1325 if (TREE_CODE (*node) != FUNCTION_TYPE
1326 && TREE_CODE (*node) != METHOD_TYPE
1327 && TREE_CODE (*node) != FIELD_DECL
1328 && TREE_CODE (*node) != TYPE_DECL)
1329 {
1330 warning ("`%s' attribute only applies to functions",
1331 IDENTIFIER_POINTER (name));
1332 *no_add_attrs = true;
1333 }
1334 else
1335 {
1336 tree cst;
1337
1338 cst = TREE_VALUE (args);
1339 if (TREE_CODE (cst) != INTEGER_CST)
1340 {
1341 warning ("`%s' attribute requires an integer constant argument",
1342 IDENTIFIER_POINTER (name));
1343 *no_add_attrs = true;
1344 }
1345 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1346 {
1347 warning ("argument to `%s' attribute larger than %d",
1348 IDENTIFIER_POINTER (name), REGPARM_MAX);
1349 *no_add_attrs = true;
1350 }
1351 }
1352
1353 return NULL_TREE;
1354 }
1355
1356 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1357
1358 /* Generate the assembly code for function entry. FILE is a stdio
1359 stream to output the code to. SIZE is an int: how many units of
1360 temporary storage to allocate.
1361
1362 Refer to the array `regs_ever_live' to determine which registers to
1363 save; `regs_ever_live[I]' is nonzero if register number I is ever
1364 used in the function. This function is responsible for knowing
1365 which registers should not be saved even if used.
1366
1367 We override it here to allow for the new profiling code to go before
1368 the prologue and the old mcount code to go after the prologue (and
1369 after %ebx has been set up for ELF shared library support). */
1370
1371 static void
1372 ix86_osf_output_function_prologue (file, size)
1373 FILE *file;
1374 HOST_WIDE_INT size;
1375 {
1376 const char *prefix = "";
1377 const char *const lprefix = LPREFIX;
1378 int labelno = current_function_profile_label_no;
1379
1380 #ifdef OSF_OS
1381
1382 if (TARGET_UNDERSCORES)
1383 prefix = "_";
1384
1385 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1386 {
1387 if (!flag_pic && !HALF_PIC_P ())
1388 {
1389 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1390 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1391 }
1392
1393 else if (HALF_PIC_P ())
1394 {
1395 rtx symref;
1396
1397 HALF_PIC_EXTERNAL ("_mcount_ptr");
1398 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1399 "_mcount_ptr"));
1400
1401 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1402 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1403 XSTR (symref, 0));
1404 fprintf (file, "\tcall *(%%eax)\n");
1405 }
1406
1407 else
1408 {
1409 static int call_no = 0;
1410
1411 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1412 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1413 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1414 lprefix, call_no++);
1415 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1416 lprefix, labelno);
1417 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1418 prefix);
1419 fprintf (file, "\tcall *(%%eax)\n");
1420 }
1421 }
1422
1423 #else /* !OSF_OS */
1424
1425 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1426 {
1427 if (!flag_pic)
1428 {
1429 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1430 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1431 }
1432
1433 else
1434 {
1435 static int call_no = 0;
1436
1437 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1438 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1439 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1440 lprefix, call_no++);
1441 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1442 lprefix, labelno);
1443 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1444 prefix);
1445 fprintf (file, "\tcall *(%%eax)\n");
1446 }
1447 }
1448 #endif /* !OSF_OS */
1449
1450 function_prologue (file, size);
1451 }
1452
1453 #endif /* OSF_OS || TARGET_OSF1ELF */
1454
1455 /* Return 0 if the attributes for two types are incompatible, 1 if they
1456 are compatible, and 2 if they are nearly compatible (which causes a
1457 warning to be generated). */
1458
1459 static int
1460 ix86_comp_type_attributes (type1, type2)
1461 tree type1;
1462 tree type2;
1463 {
1464 /* Check for mismatch of non-default calling convention. */
1465 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1466
1467 if (TREE_CODE (type1) != FUNCTION_TYPE)
1468 return 1;
1469
1470 /* Check for mismatched return types (cdecl vs stdcall). */
1471 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1472 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1473 return 0;
1474 return 1;
1475 }
1476 \f
1477 /* Value is the number of bytes of arguments automatically
1478 popped when returning from a subroutine call.
1479 FUNDECL is the declaration node of the function (as a tree),
1480 FUNTYPE is the data type of the function (as a tree),
1481 or for a library call it is an identifier node for the subroutine name.
1482 SIZE is the number of bytes of arguments passed on the stack.
1483
1484 On the 80386, the RTD insn may be used to pop them if the number
1485 of args is fixed, but if the number is variable then the caller
1486 must pop them all. RTD can't be used for library calls now
1487 because the library is compiled with the Unix compiler.
1488 Use of RTD is a selectable option, since it is incompatible with
1489 standard Unix calling sequences. If the option is not selected,
1490 the caller must always pop the args.
1491
1492 The attribute stdcall is equivalent to RTD on a per module basis. */
1493
1494 int
1495 ix86_return_pops_args (fundecl, funtype, size)
1496 tree fundecl;
1497 tree funtype;
1498 int size;
1499 {
1500 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1501
1502 /* Cdecl functions override -mrtd, and never pop the stack. */
1503 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1504
1505 /* Stdcall functions will pop the stack if not variable args. */
1506 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1507 rtd = 1;
1508
1509 if (rtd
1510 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1511 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1512 == void_type_node)))
1513 return size;
1514 }
1515
1516 /* Lose any fake structure return argument if it is passed on the stack. */
1517 if (aggregate_value_p (TREE_TYPE (funtype))
1518 && !TARGET_64BIT)
1519 {
1520 int nregs = ix86_regparm;
1521
1522 if (funtype)
1523 {
1524 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1525
1526 if (attr)
1527 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1528 }
1529
1530 if (!nregs)
1531 return GET_MODE_SIZE (Pmode);
1532 }
1533
1534 return 0;
1535 }
1536 \f
1537 /* Argument support functions. */
1538
1539 /* Return true when register may be used to pass function parameters. */
1540 bool
1541 ix86_function_arg_regno_p (regno)
1542 int regno;
1543 {
1544 int i;
1545 if (!TARGET_64BIT)
1546 return (regno < REGPARM_MAX
1547 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1548 if (SSE_REGNO_P (regno) && TARGET_SSE)
1549 return true;
1550 /* RAX is used as hidden argument to va_arg functions. */
1551 if (!regno)
1552 return true;
1553 for (i = 0; i < REGPARM_MAX; i++)
1554 if (regno == x86_64_int_parameter_registers[i])
1555 return true;
1556 return false;
1557 }
1558
1559 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1560 for a call to a function whose data type is FNTYPE.
1561 For a library call, FNTYPE is 0. */
1562
1563 void
1564 init_cumulative_args (cum, fntype, libname)
1565 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1566 tree fntype; /* tree ptr for function decl */
1567 rtx libname; /* SYMBOL_REF of library name or 0 */
1568 {
1569 static CUMULATIVE_ARGS zero_cum;
1570 tree param, next_param;
1571
1572 if (TARGET_DEBUG_ARG)
1573 {
1574 fprintf (stderr, "\ninit_cumulative_args (");
1575 if (fntype)
1576 fprintf (stderr, "fntype code = %s, ret code = %s",
1577 tree_code_name[(int) TREE_CODE (fntype)],
1578 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1579 else
1580 fprintf (stderr, "no fntype");
1581
1582 if (libname)
1583 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1584 }
1585
1586 *cum = zero_cum;
1587
1588 /* Set up the number of registers to use for passing arguments. */
1589 cum->nregs = ix86_regparm;
1590 cum->sse_nregs = SSE_REGPARM_MAX;
1591 if (fntype && !TARGET_64BIT)
1592 {
1593 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1594
1595 if (attr)
1596 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1597 }
1598 cum->maybe_vaarg = false;
1599
1600 /* Determine if this function has variable arguments. This is
1601 indicated by the last argument being 'void_type_mode' if there
1602 are no variable arguments. If there are variable arguments, then
1603 we won't pass anything in registers */
1604
1605 if (cum->nregs)
1606 {
1607 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1608 param != 0; param = next_param)
1609 {
1610 next_param = TREE_CHAIN (param);
1611 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1612 {
1613 if (!TARGET_64BIT)
1614 cum->nregs = 0;
1615 cum->maybe_vaarg = true;
1616 }
1617 }
1618 }
1619 if ((!fntype && !libname)
1620 || (fntype && !TYPE_ARG_TYPES (fntype)))
1621 cum->maybe_vaarg = 1;
1622
1623 if (TARGET_DEBUG_ARG)
1624 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1625
1626 return;
1627 }
1628
1629 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1630 of this code is to classify each 8bytes of incoming argument by the register
1631 class and assign registers accordingly. */
1632
1633 /* Return the union class of CLASS1 and CLASS2.
1634 See the x86-64 PS ABI for details. */
1635
1636 static enum x86_64_reg_class
1637 merge_classes (class1, class2)
1638 enum x86_64_reg_class class1, class2;
1639 {
1640 /* Rule #1: If both classes are equal, this is the resulting class. */
1641 if (class1 == class2)
1642 return class1;
1643
1644 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1645 the other class. */
1646 if (class1 == X86_64_NO_CLASS)
1647 return class2;
1648 if (class2 == X86_64_NO_CLASS)
1649 return class1;
1650
1651 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1652 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1653 return X86_64_MEMORY_CLASS;
1654
1655 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1656 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1657 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1658 return X86_64_INTEGERSI_CLASS;
1659 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1660 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1661 return X86_64_INTEGER_CLASS;
1662
1663 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1664 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1665 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1666 return X86_64_MEMORY_CLASS;
1667
1668 /* Rule #6: Otherwise class SSE is used. */
1669 return X86_64_SSE_CLASS;
1670 }
1671
1672 /* Classify the argument of type TYPE and mode MODE.
1673 CLASSES will be filled by the register class used to pass each word
1674 of the operand. The number of words is returned. In case the parameter
1675 should be passed in memory, 0 is returned. As a special case for zero
1676 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1677
1678 BIT_OFFSET is used internally for handling records and specifies offset
1679 of the offset in bits modulo 256 to avoid overflow cases.
1680
1681 See the x86-64 PS ABI for details.
1682 */
1683
1684 static int
1685 classify_argument (mode, type, classes, bit_offset)
1686 enum machine_mode mode;
1687 tree type;
1688 enum x86_64_reg_class classes[MAX_CLASSES];
1689 int bit_offset;
1690 {
1691 int bytes =
1692 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1693 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1694
1695 if (type && AGGREGATE_TYPE_P (type))
1696 {
1697 int i;
1698 tree field;
1699 enum x86_64_reg_class subclasses[MAX_CLASSES];
1700
1701 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1702 if (bytes > 16)
1703 return 0;
1704
1705 for (i = 0; i < words; i++)
1706 classes[i] = X86_64_NO_CLASS;
1707
1708 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1709 signalize memory class, so handle it as special case. */
1710 if (!words)
1711 {
1712 classes[0] = X86_64_NO_CLASS;
1713 return 1;
1714 }
1715
1716 /* Classify each field of record and merge classes. */
1717 if (TREE_CODE (type) == RECORD_TYPE)
1718 {
1719 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1720 {
1721 if (TREE_CODE (field) == FIELD_DECL)
1722 {
1723 int num;
1724
1725 /* Bitfields are always classified as integer. Handle them
1726 early, since later code would consider them to be
1727 misaligned integers. */
1728 if (DECL_BIT_FIELD (field))
1729 {
1730 for (i = int_bit_position (field) / 8 / 8;
1731 i < (int_bit_position (field)
1732 + tree_low_cst (DECL_SIZE (field), 0)
1733 + 63) / 8 / 8; i++)
1734 classes[i] =
1735 merge_classes (X86_64_INTEGER_CLASS,
1736 classes[i]);
1737 }
1738 else
1739 {
1740 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1741 TREE_TYPE (field), subclasses,
1742 (int_bit_position (field)
1743 + bit_offset) % 256);
1744 if (!num)
1745 return 0;
1746 for (i = 0; i < num; i++)
1747 {
1748 int pos =
1749 (int_bit_position (field) + bit_offset) / 8 / 8;
1750 classes[i + pos] =
1751 merge_classes (subclasses[i], classes[i + pos]);
1752 }
1753 }
1754 }
1755 }
1756 }
1757 /* Arrays are handled as small records. */
1758 else if (TREE_CODE (type) == ARRAY_TYPE)
1759 {
1760 int num;
1761 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1762 TREE_TYPE (type), subclasses, bit_offset);
1763 if (!num)
1764 return 0;
1765
1766 /* The partial classes are now full classes. */
1767 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1768 subclasses[0] = X86_64_SSE_CLASS;
1769 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1770 subclasses[0] = X86_64_INTEGER_CLASS;
1771
1772 for (i = 0; i < words; i++)
1773 classes[i] = subclasses[i % num];
1774 }
1775 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1776 else if (TREE_CODE (type) == UNION_TYPE
1777 || TREE_CODE (type) == QUAL_UNION_TYPE)
1778 {
1779 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1780 {
1781 if (TREE_CODE (field) == FIELD_DECL)
1782 {
1783 int num;
1784 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1785 TREE_TYPE (field), subclasses,
1786 bit_offset);
1787 if (!num)
1788 return 0;
1789 for (i = 0; i < num; i++)
1790 classes[i] = merge_classes (subclasses[i], classes[i]);
1791 }
1792 }
1793 }
1794 else
1795 abort ();
1796
1797 /* Final merger cleanup. */
1798 for (i = 0; i < words; i++)
1799 {
1800 /* If one class is MEMORY, everything should be passed in
1801 memory. */
1802 if (classes[i] == X86_64_MEMORY_CLASS)
1803 return 0;
1804
1805 /* The X86_64_SSEUP_CLASS should be always preceded by
1806 X86_64_SSE_CLASS. */
1807 if (classes[i] == X86_64_SSEUP_CLASS
1808 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1809 classes[i] = X86_64_SSE_CLASS;
1810
1811 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1812 if (classes[i] == X86_64_X87UP_CLASS
1813 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1814 classes[i] = X86_64_SSE_CLASS;
1815 }
1816 return words;
1817 }
1818
1819 /* Compute alignment needed. We align all types to natural boundaries with
1820 exception of XFmode that is aligned to 64bits. */
1821 if (mode != VOIDmode && mode != BLKmode)
1822 {
1823 int mode_alignment = GET_MODE_BITSIZE (mode);
1824
1825 if (mode == XFmode)
1826 mode_alignment = 128;
1827 else if (mode == XCmode)
1828 mode_alignment = 256;
1829 /* Misaligned fields are always returned in memory. */
1830 if (bit_offset % mode_alignment)
1831 return 0;
1832 }
1833
1834 /* Classification of atomic types. */
1835 switch (mode)
1836 {
1837 case DImode:
1838 case SImode:
1839 case HImode:
1840 case QImode:
1841 case CSImode:
1842 case CHImode:
1843 case CQImode:
1844 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1845 classes[0] = X86_64_INTEGERSI_CLASS;
1846 else
1847 classes[0] = X86_64_INTEGER_CLASS;
1848 return 1;
1849 case CDImode:
1850 case TImode:
1851 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1852 return 2;
1853 case CTImode:
1854 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1855 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1856 return 4;
1857 case SFmode:
1858 if (!(bit_offset % 64))
1859 classes[0] = X86_64_SSESF_CLASS;
1860 else
1861 classes[0] = X86_64_SSE_CLASS;
1862 return 1;
1863 case DFmode:
1864 classes[0] = X86_64_SSEDF_CLASS;
1865 return 1;
1866 case TFmode:
1867 classes[0] = X86_64_X87_CLASS;
1868 classes[1] = X86_64_X87UP_CLASS;
1869 return 2;
1870 case TCmode:
1871 classes[0] = X86_64_X87_CLASS;
1872 classes[1] = X86_64_X87UP_CLASS;
1873 classes[2] = X86_64_X87_CLASS;
1874 classes[3] = X86_64_X87UP_CLASS;
1875 return 4;
1876 case DCmode:
1877 classes[0] = X86_64_SSEDF_CLASS;
1878 classes[1] = X86_64_SSEDF_CLASS;
1879 return 2;
1880 case SCmode:
1881 classes[0] = X86_64_SSE_CLASS;
1882 return 1;
1883 case V4SFmode:
1884 case V4SImode:
1885 classes[0] = X86_64_SSE_CLASS;
1886 classes[1] = X86_64_SSEUP_CLASS;
1887 return 2;
1888 case V2SFmode:
1889 case V2SImode:
1890 case V4HImode:
1891 case V8QImode:
1892 classes[0] = X86_64_SSE_CLASS;
1893 return 1;
1894 case BLKmode:
1895 case VOIDmode:
1896 return 0;
1897 default:
1898 abort ();
1899 }
1900 }
1901
1902 /* Examine the argument and return set number of register required in each
1903 class. Return 0 iff parameter should be passed in memory. */
1904 static int
1905 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1906 enum machine_mode mode;
1907 tree type;
1908 int *int_nregs, *sse_nregs;
1909 int in_return;
1910 {
1911 enum x86_64_reg_class class[MAX_CLASSES];
1912 int n = classify_argument (mode, type, class, 0);
1913
1914 *int_nregs = 0;
1915 *sse_nregs = 0;
1916 if (!n)
1917 return 0;
1918 for (n--; n >= 0; n--)
1919 switch (class[n])
1920 {
1921 case X86_64_INTEGER_CLASS:
1922 case X86_64_INTEGERSI_CLASS:
1923 (*int_nregs)++;
1924 break;
1925 case X86_64_SSE_CLASS:
1926 case X86_64_SSESF_CLASS:
1927 case X86_64_SSEDF_CLASS:
1928 (*sse_nregs)++;
1929 break;
1930 case X86_64_NO_CLASS:
1931 case X86_64_SSEUP_CLASS:
1932 break;
1933 case X86_64_X87_CLASS:
1934 case X86_64_X87UP_CLASS:
1935 if (!in_return)
1936 return 0;
1937 break;
1938 case X86_64_MEMORY_CLASS:
1939 abort ();
1940 }
1941 return 1;
1942 }
1943 /* Construct container for the argument used by GCC interface. See
1944 FUNCTION_ARG for the detailed description. */
1945 static rtx
1946 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1947 enum machine_mode mode;
1948 tree type;
1949 int in_return;
1950 int nintregs, nsseregs;
1951 const int * intreg;
1952 int sse_regno;
1953 {
1954 enum machine_mode tmpmode;
1955 int bytes =
1956 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1957 enum x86_64_reg_class class[MAX_CLASSES];
1958 int n;
1959 int i;
1960 int nexps = 0;
1961 int needed_sseregs, needed_intregs;
1962 rtx exp[MAX_CLASSES];
1963 rtx ret;
1964
1965 n = classify_argument (mode, type, class, 0);
1966 if (TARGET_DEBUG_ARG)
1967 {
1968 if (!n)
1969 fprintf (stderr, "Memory class\n");
1970 else
1971 {
1972 fprintf (stderr, "Classes:");
1973 for (i = 0; i < n; i++)
1974 {
1975 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1976 }
1977 fprintf (stderr, "\n");
1978 }
1979 }
1980 if (!n)
1981 return NULL;
1982 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1983 return NULL;
1984 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1985 return NULL;
1986
1987 /* First construct simple cases. Avoid SCmode, since we want to use
1988 single register to pass this type. */
1989 if (n == 1 && mode != SCmode)
1990 switch (class[0])
1991 {
1992 case X86_64_INTEGER_CLASS:
1993 case X86_64_INTEGERSI_CLASS:
1994 return gen_rtx_REG (mode, intreg[0]);
1995 case X86_64_SSE_CLASS:
1996 case X86_64_SSESF_CLASS:
1997 case X86_64_SSEDF_CLASS:
1998 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1999 case X86_64_X87_CLASS:
2000 return gen_rtx_REG (mode, FIRST_STACK_REG);
2001 case X86_64_NO_CLASS:
2002 /* Zero sized array, struct or class. */
2003 return NULL;
2004 default:
2005 abort ();
2006 }
2007 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2008 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2009 if (n == 2
2010 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2011 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2012 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2013 && class[1] == X86_64_INTEGER_CLASS
2014 && (mode == CDImode || mode == TImode)
2015 && intreg[0] + 1 == intreg[1])
2016 return gen_rtx_REG (mode, intreg[0]);
2017 if (n == 4
2018 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2019 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2020 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2021
2022 /* Otherwise figure out the entries of the PARALLEL. */
2023 for (i = 0; i < n; i++)
2024 {
2025 switch (class[i])
2026 {
2027 case X86_64_NO_CLASS:
2028 break;
2029 case X86_64_INTEGER_CLASS:
2030 case X86_64_INTEGERSI_CLASS:
2031 /* Merge TImodes on aligned occassions here too. */
2032 if (i * 8 + 8 > bytes)
2033 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2034 else if (class[i] == X86_64_INTEGERSI_CLASS)
2035 tmpmode = SImode;
2036 else
2037 tmpmode = DImode;
2038 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2039 if (tmpmode == BLKmode)
2040 tmpmode = DImode;
2041 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2042 gen_rtx_REG (tmpmode, *intreg),
2043 GEN_INT (i*8));
2044 intreg++;
2045 break;
2046 case X86_64_SSESF_CLASS:
2047 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2048 gen_rtx_REG (SFmode,
2049 SSE_REGNO (sse_regno)),
2050 GEN_INT (i*8));
2051 sse_regno++;
2052 break;
2053 case X86_64_SSEDF_CLASS:
2054 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2055 gen_rtx_REG (DFmode,
2056 SSE_REGNO (sse_regno)),
2057 GEN_INT (i*8));
2058 sse_regno++;
2059 break;
2060 case X86_64_SSE_CLASS:
2061 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2062 tmpmode = TImode, i++;
2063 else
2064 tmpmode = DImode;
2065 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2066 gen_rtx_REG (tmpmode,
2067 SSE_REGNO (sse_regno)),
2068 GEN_INT (i*8));
2069 sse_regno++;
2070 break;
2071 default:
2072 abort ();
2073 }
2074 }
2075 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2076 for (i = 0; i < nexps; i++)
2077 XVECEXP (ret, 0, i) = exp [i];
2078 return ret;
2079 }
2080
2081 /* Update the data in CUM to advance over an argument
2082 of mode MODE and data type TYPE.
2083 (TYPE is null for libcalls where that information may not be available.) */
2084
2085 void
2086 function_arg_advance (cum, mode, type, named)
2087 CUMULATIVE_ARGS *cum; /* current arg information */
2088 enum machine_mode mode; /* current arg mode */
2089 tree type; /* type of the argument or 0 if lib support */
2090 int named; /* whether or not the argument was named */
2091 {
2092 int bytes =
2093 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2094 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2095
2096 if (TARGET_DEBUG_ARG)
2097 fprintf (stderr,
2098 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2099 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2100 if (TARGET_64BIT)
2101 {
2102 int int_nregs, sse_nregs;
2103 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2104 cum->words += words;
2105 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2106 {
2107 cum->nregs -= int_nregs;
2108 cum->sse_nregs -= sse_nregs;
2109 cum->regno += int_nregs;
2110 cum->sse_regno += sse_nregs;
2111 }
2112 else
2113 cum->words += words;
2114 }
2115 else
2116 {
2117 if (TARGET_SSE && mode == TImode)
2118 {
2119 cum->sse_words += words;
2120 cum->sse_nregs -= 1;
2121 cum->sse_regno += 1;
2122 if (cum->sse_nregs <= 0)
2123 {
2124 cum->sse_nregs = 0;
2125 cum->sse_regno = 0;
2126 }
2127 }
2128 else
2129 {
2130 cum->words += words;
2131 cum->nregs -= words;
2132 cum->regno += words;
2133
2134 if (cum->nregs <= 0)
2135 {
2136 cum->nregs = 0;
2137 cum->regno = 0;
2138 }
2139 }
2140 }
2141 return;
2142 }
2143
2144 /* Define where to put the arguments to a function.
2145 Value is zero to push the argument on the stack,
2146 or a hard register in which to store the argument.
2147
2148 MODE is the argument's machine mode.
2149 TYPE is the data type of the argument (as a tree).
2150 This is null for libcalls where that information may
2151 not be available.
2152 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2153 the preceding args and about the function being called.
2154 NAMED is nonzero if this argument is a named parameter
2155 (otherwise it is an extra parameter matching an ellipsis). */
2156
2157 rtx
2158 function_arg (cum, mode, type, named)
2159 CUMULATIVE_ARGS *cum; /* current arg information */
2160 enum machine_mode mode; /* current arg mode */
2161 tree type; /* type of the argument or 0 if lib support */
2162 int named; /* != 0 for normal args, == 0 for ... args */
2163 {
2164 rtx ret = NULL_RTX;
2165 int bytes =
2166 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2167 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2168
2169 /* Handle an hidden AL argument containing number of registers for varargs
2170 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2171 any AL settings. */
2172 if (mode == VOIDmode)
2173 {
2174 if (TARGET_64BIT)
2175 return GEN_INT (cum->maybe_vaarg
2176 ? (cum->sse_nregs < 0
2177 ? SSE_REGPARM_MAX
2178 : cum->sse_regno)
2179 : -1);
2180 else
2181 return constm1_rtx;
2182 }
2183 if (TARGET_64BIT)
2184 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2185 &x86_64_int_parameter_registers [cum->regno],
2186 cum->sse_regno);
2187 else
2188 switch (mode)
2189 {
2190 /* For now, pass fp/complex values on the stack. */
2191 default:
2192 break;
2193
2194 case BLKmode:
2195 case DImode:
2196 case SImode:
2197 case HImode:
2198 case QImode:
2199 if (words <= cum->nregs)
2200 ret = gen_rtx_REG (mode, cum->regno);
2201 break;
2202 case TImode:
2203 if (cum->sse_nregs)
2204 ret = gen_rtx_REG (mode, cum->sse_regno);
2205 break;
2206 }
2207
2208 if (TARGET_DEBUG_ARG)
2209 {
2210 fprintf (stderr,
2211 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2212 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2213
2214 if (ret)
2215 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2216 else
2217 fprintf (stderr, ", stack");
2218
2219 fprintf (stderr, " )\n");
2220 }
2221
2222 return ret;
2223 }
2224
2225 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2226 and type. */
2227
2228 int
2229 ix86_function_arg_boundary (mode, type)
2230 enum machine_mode mode;
2231 tree type;
2232 {
2233 int align;
2234 if (!TARGET_64BIT)
2235 return PARM_BOUNDARY;
2236 if (type)
2237 align = TYPE_ALIGN (type);
2238 else
2239 align = GET_MODE_ALIGNMENT (mode);
2240 if (align < PARM_BOUNDARY)
2241 align = PARM_BOUNDARY;
2242 if (align > 128)
2243 align = 128;
2244 return align;
2245 }
2246
2247 /* Return true if N is a possible register number of function value. */
2248 bool
2249 ix86_function_value_regno_p (regno)
2250 int regno;
2251 {
2252 if (!TARGET_64BIT)
2253 {
2254 return ((regno) == 0
2255 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2256 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2257 }
2258 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2259 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2260 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2261 }
2262
2263 /* Define how to find the value returned by a function.
2264 VALTYPE is the data type of the value (as a tree).
2265 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2266 otherwise, FUNC is 0. */
2267 rtx
2268 ix86_function_value (valtype)
2269 tree valtype;
2270 {
2271 if (TARGET_64BIT)
2272 {
2273 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2274 REGPARM_MAX, SSE_REGPARM_MAX,
2275 x86_64_int_return_registers, 0);
2276 /* For zero sized structures, construct_continer return NULL, but we need
2277 to keep rest of compiler happy by returning meaningfull value. */
2278 if (!ret)
2279 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2280 return ret;
2281 }
2282 else
2283 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2284 }
2285
2286 /* Return false iff type is returned in memory. */
2287 int
2288 ix86_return_in_memory (type)
2289 tree type;
2290 {
2291 int needed_intregs, needed_sseregs;
2292 if (TARGET_64BIT)
2293 {
2294 return !examine_argument (TYPE_MODE (type), type, 1,
2295 &needed_intregs, &needed_sseregs);
2296 }
2297 else
2298 {
2299 if (TYPE_MODE (type) == BLKmode
2300 || (VECTOR_MODE_P (TYPE_MODE (type))
2301 && int_size_in_bytes (type) == 8)
2302 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2303 && TYPE_MODE (type) != TFmode
2304 && !VECTOR_MODE_P (TYPE_MODE (type))))
2305 return 1;
2306 return 0;
2307 }
2308 }
2309
2310 /* Define how to find the value returned by a library function
2311 assuming the value has mode MODE. */
2312 rtx
2313 ix86_libcall_value (mode)
2314 enum machine_mode mode;
2315 {
2316 if (TARGET_64BIT)
2317 {
2318 switch (mode)
2319 {
2320 case SFmode:
2321 case SCmode:
2322 case DFmode:
2323 case DCmode:
2324 return gen_rtx_REG (mode, FIRST_SSE_REG);
2325 case TFmode:
2326 case TCmode:
2327 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2328 default:
2329 return gen_rtx_REG (mode, 0);
2330 }
2331 }
2332 else
2333 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2334 }
2335 \f
2336 /* Create the va_list data type. */
2337
2338 tree
2339 ix86_build_va_list ()
2340 {
2341 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2342
2343 /* For i386 we use plain pointer to argument area. */
2344 if (!TARGET_64BIT)
2345 return build_pointer_type (char_type_node);
2346
2347 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2348 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2349
2350 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2351 unsigned_type_node);
2352 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2353 unsigned_type_node);
2354 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2355 ptr_type_node);
2356 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2357 ptr_type_node);
2358
2359 DECL_FIELD_CONTEXT (f_gpr) = record;
2360 DECL_FIELD_CONTEXT (f_fpr) = record;
2361 DECL_FIELD_CONTEXT (f_ovf) = record;
2362 DECL_FIELD_CONTEXT (f_sav) = record;
2363
2364 TREE_CHAIN (record) = type_decl;
2365 TYPE_NAME (record) = type_decl;
2366 TYPE_FIELDS (record) = f_gpr;
2367 TREE_CHAIN (f_gpr) = f_fpr;
2368 TREE_CHAIN (f_fpr) = f_ovf;
2369 TREE_CHAIN (f_ovf) = f_sav;
2370
2371 layout_type (record);
2372
2373 /* The correct type is an array type of one element. */
2374 return build_array_type (record, build_index_type (size_zero_node));
2375 }
2376
2377 /* Perform any needed actions needed for a function that is receiving a
2378 variable number of arguments.
2379
2380 CUM is as above.
2381
2382 MODE and TYPE are the mode and type of the current parameter.
2383
2384 PRETEND_SIZE is a variable that should be set to the amount of stack
2385 that must be pushed by the prolog to pretend that our caller pushed
2386 it.
2387
2388 Normally, this macro will push all remaining incoming registers on the
2389 stack and set PRETEND_SIZE to the length of the registers pushed. */
2390
2391 void
2392 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2393 CUMULATIVE_ARGS *cum;
2394 enum machine_mode mode;
2395 tree type;
2396 int *pretend_size ATTRIBUTE_UNUSED;
2397 int no_rtl;
2398
2399 {
2400 CUMULATIVE_ARGS next_cum;
2401 rtx save_area = NULL_RTX, mem;
2402 rtx label;
2403 rtx label_ref;
2404 rtx tmp_reg;
2405 rtx nsse_reg;
2406 int set;
2407 tree fntype;
2408 int stdarg_p;
2409 int i;
2410
2411 if (!TARGET_64BIT)
2412 return;
2413
2414 /* Indicate to allocate space on the stack for varargs save area. */
2415 ix86_save_varrargs_registers = 1;
2416
2417 fntype = TREE_TYPE (current_function_decl);
2418 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2419 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2420 != void_type_node));
2421
2422 /* For varargs, we do not want to skip the dummy va_dcl argument.
2423 For stdargs, we do want to skip the last named argument. */
2424 next_cum = *cum;
2425 if (stdarg_p)
2426 function_arg_advance (&next_cum, mode, type, 1);
2427
2428 if (!no_rtl)
2429 save_area = frame_pointer_rtx;
2430
2431 set = get_varargs_alias_set ();
2432
2433 for (i = next_cum.regno; i < ix86_regparm; i++)
2434 {
2435 mem = gen_rtx_MEM (Pmode,
2436 plus_constant (save_area, i * UNITS_PER_WORD));
2437 set_mem_alias_set (mem, set);
2438 emit_move_insn (mem, gen_rtx_REG (Pmode,
2439 x86_64_int_parameter_registers[i]));
2440 }
2441
2442 if (next_cum.sse_nregs)
2443 {
2444 /* Now emit code to save SSE registers. The AX parameter contains number
2445 of SSE parameter regsiters used to call this function. We use
2446 sse_prologue_save insn template that produces computed jump across
2447 SSE saves. We need some preparation work to get this working. */
2448
2449 label = gen_label_rtx ();
2450 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2451
2452 /* Compute address to jump to :
2453 label - 5*eax + nnamed_sse_arguments*5 */
2454 tmp_reg = gen_reg_rtx (Pmode);
2455 nsse_reg = gen_reg_rtx (Pmode);
2456 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2457 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2458 gen_rtx_MULT (Pmode, nsse_reg,
2459 GEN_INT (4))));
2460 if (next_cum.sse_regno)
2461 emit_move_insn
2462 (nsse_reg,
2463 gen_rtx_CONST (DImode,
2464 gen_rtx_PLUS (DImode,
2465 label_ref,
2466 GEN_INT (next_cum.sse_regno * 4))));
2467 else
2468 emit_move_insn (nsse_reg, label_ref);
2469 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2470
2471 /* Compute address of memory block we save into. We always use pointer
2472 pointing 127 bytes after first byte to store - this is needed to keep
2473 instruction size limited by 4 bytes. */
2474 tmp_reg = gen_reg_rtx (Pmode);
2475 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2476 plus_constant (save_area,
2477 8 * REGPARM_MAX + 127)));
2478 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2479 set_mem_alias_set (mem, set);
2480 set_mem_align (mem, BITS_PER_WORD);
2481
2482 /* And finally do the dirty job! */
2483 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2484 GEN_INT (next_cum.sse_regno), label));
2485 }
2486
2487 }
2488
2489 /* Implement va_start. */
2490
2491 void
2492 ix86_va_start (stdarg_p, valist, nextarg)
2493 int stdarg_p;
2494 tree valist;
2495 rtx nextarg;
2496 {
2497 HOST_WIDE_INT words, n_gpr, n_fpr;
2498 tree f_gpr, f_fpr, f_ovf, f_sav;
2499 tree gpr, fpr, ovf, sav, t;
2500
2501 /* Only 64bit target needs something special. */
2502 if (!TARGET_64BIT)
2503 {
2504 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2505 return;
2506 }
2507
2508 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2509 f_fpr = TREE_CHAIN (f_gpr);
2510 f_ovf = TREE_CHAIN (f_fpr);
2511 f_sav = TREE_CHAIN (f_ovf);
2512
2513 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2514 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2515 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2516 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2517 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2518
2519 /* Count number of gp and fp argument registers used. */
2520 words = current_function_args_info.words;
2521 n_gpr = current_function_args_info.regno;
2522 n_fpr = current_function_args_info.sse_regno;
2523
2524 if (TARGET_DEBUG_ARG)
2525 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2526 (int) words, (int) n_gpr, (int) n_fpr);
2527
2528 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2529 build_int_2 (n_gpr * 8, 0));
2530 TREE_SIDE_EFFECTS (t) = 1;
2531 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2532
2533 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2534 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2535 TREE_SIDE_EFFECTS (t) = 1;
2536 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2537
2538 /* Find the overflow area. */
2539 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2540 if (words != 0)
2541 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2542 build_int_2 (words * UNITS_PER_WORD, 0));
2543 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2544 TREE_SIDE_EFFECTS (t) = 1;
2545 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2546
2547 /* Find the register save area.
2548 Prologue of the function save it right above stack frame. */
2549 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2550 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2551 TREE_SIDE_EFFECTS (t) = 1;
2552 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2553 }
2554
2555 /* Implement va_arg. */
2556 rtx
2557 ix86_va_arg (valist, type)
2558 tree valist, type;
2559 {
2560 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2561 tree f_gpr, f_fpr, f_ovf, f_sav;
2562 tree gpr, fpr, ovf, sav, t;
2563 int size, rsize;
2564 rtx lab_false, lab_over = NULL_RTX;
2565 rtx addr_rtx, r;
2566 rtx container;
2567
2568 /* Only 64bit target needs something special. */
2569 if (!TARGET_64BIT)
2570 {
2571 return std_expand_builtin_va_arg (valist, type);
2572 }
2573
2574 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2575 f_fpr = TREE_CHAIN (f_gpr);
2576 f_ovf = TREE_CHAIN (f_fpr);
2577 f_sav = TREE_CHAIN (f_ovf);
2578
2579 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2580 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2581 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2582 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2583 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2584
2585 size = int_size_in_bytes (type);
2586 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2587
2588 container = construct_container (TYPE_MODE (type), type, 0,
2589 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2590 /*
2591 * Pull the value out of the saved registers ...
2592 */
2593
2594 addr_rtx = gen_reg_rtx (Pmode);
2595
2596 if (container)
2597 {
2598 rtx int_addr_rtx, sse_addr_rtx;
2599 int needed_intregs, needed_sseregs;
2600 int need_temp;
2601
2602 lab_over = gen_label_rtx ();
2603 lab_false = gen_label_rtx ();
2604
2605 examine_argument (TYPE_MODE (type), type, 0,
2606 &needed_intregs, &needed_sseregs);
2607
2608
2609 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2610 || TYPE_ALIGN (type) > 128);
2611
2612 /* In case we are passing structure, verify that it is consetuctive block
2613 on the register save area. If not we need to do moves. */
2614 if (!need_temp && !REG_P (container))
2615 {
2616 /* Verify that all registers are strictly consetuctive */
2617 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2618 {
2619 int i;
2620
2621 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2622 {
2623 rtx slot = XVECEXP (container, 0, i);
2624 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2625 || INTVAL (XEXP (slot, 1)) != i * 16)
2626 need_temp = 1;
2627 }
2628 }
2629 else
2630 {
2631 int i;
2632
2633 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2634 {
2635 rtx slot = XVECEXP (container, 0, i);
2636 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2637 || INTVAL (XEXP (slot, 1)) != i * 8)
2638 need_temp = 1;
2639 }
2640 }
2641 }
2642 if (!need_temp)
2643 {
2644 int_addr_rtx = addr_rtx;
2645 sse_addr_rtx = addr_rtx;
2646 }
2647 else
2648 {
2649 int_addr_rtx = gen_reg_rtx (Pmode);
2650 sse_addr_rtx = gen_reg_rtx (Pmode);
2651 }
2652 /* First ensure that we fit completely in registers. */
2653 if (needed_intregs)
2654 {
2655 emit_cmp_and_jump_insns (expand_expr
2656 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2657 GEN_INT ((REGPARM_MAX - needed_intregs +
2658 1) * 8), GE, const1_rtx, SImode,
2659 1, lab_false);
2660 }
2661 if (needed_sseregs)
2662 {
2663 emit_cmp_and_jump_insns (expand_expr
2664 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2665 GEN_INT ((SSE_REGPARM_MAX -
2666 needed_sseregs + 1) * 16 +
2667 REGPARM_MAX * 8), GE, const1_rtx,
2668 SImode, 1, lab_false);
2669 }
2670
2671 /* Compute index to start of area used for integer regs. */
2672 if (needed_intregs)
2673 {
2674 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2675 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2676 if (r != int_addr_rtx)
2677 emit_move_insn (int_addr_rtx, r);
2678 }
2679 if (needed_sseregs)
2680 {
2681 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2682 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2683 if (r != sse_addr_rtx)
2684 emit_move_insn (sse_addr_rtx, r);
2685 }
2686 if (need_temp)
2687 {
2688 int i;
2689 rtx mem;
2690
2691 /* Never use the memory itself, as it has the alias set. */
2692 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2693 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2694 set_mem_alias_set (mem, get_varargs_alias_set ());
2695 set_mem_align (mem, BITS_PER_UNIT);
2696
2697 for (i = 0; i < XVECLEN (container, 0); i++)
2698 {
2699 rtx slot = XVECEXP (container, 0, i);
2700 rtx reg = XEXP (slot, 0);
2701 enum machine_mode mode = GET_MODE (reg);
2702 rtx src_addr;
2703 rtx src_mem;
2704 int src_offset;
2705 rtx dest_mem;
2706
2707 if (SSE_REGNO_P (REGNO (reg)))
2708 {
2709 src_addr = sse_addr_rtx;
2710 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2711 }
2712 else
2713 {
2714 src_addr = int_addr_rtx;
2715 src_offset = REGNO (reg) * 8;
2716 }
2717 src_mem = gen_rtx_MEM (mode, src_addr);
2718 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2719 src_mem = adjust_address (src_mem, mode, src_offset);
2720 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2721 emit_move_insn (dest_mem, src_mem);
2722 }
2723 }
2724
2725 if (needed_intregs)
2726 {
2727 t =
2728 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2729 build_int_2 (needed_intregs * 8, 0));
2730 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2731 TREE_SIDE_EFFECTS (t) = 1;
2732 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2733 }
2734 if (needed_sseregs)
2735 {
2736 t =
2737 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2738 build_int_2 (needed_sseregs * 16, 0));
2739 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2740 TREE_SIDE_EFFECTS (t) = 1;
2741 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2742 }
2743
2744 emit_jump_insn (gen_jump (lab_over));
2745 emit_barrier ();
2746 emit_label (lab_false);
2747 }
2748
2749 /* ... otherwise out of the overflow area. */
2750
2751 /* Care for on-stack alignment if needed. */
2752 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2753 t = ovf;
2754 else
2755 {
2756 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2757 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2758 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2759 }
2760 t = save_expr (t);
2761
2762 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2763 if (r != addr_rtx)
2764 emit_move_insn (addr_rtx, r);
2765
2766 t =
2767 build (PLUS_EXPR, TREE_TYPE (t), t,
2768 build_int_2 (rsize * UNITS_PER_WORD, 0));
2769 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2770 TREE_SIDE_EFFECTS (t) = 1;
2771 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2772
2773 if (container)
2774 emit_label (lab_over);
2775
2776 return addr_rtx;
2777 }
2778 \f
2779 /* Return nonzero if OP is general operand representable on x86_64. */
2780
2781 int
2782 x86_64_general_operand (op, mode)
2783 rtx op;
2784 enum machine_mode mode;
2785 {
2786 if (!TARGET_64BIT)
2787 return general_operand (op, mode);
2788 if (nonimmediate_operand (op, mode))
2789 return 1;
2790 return x86_64_sign_extended_value (op);
2791 }
2792
2793 /* Return nonzero if OP is general operand representable on x86_64
2794 as either sign extended or zero extended constant. */
2795
2796 int
2797 x86_64_szext_general_operand (op, mode)
2798 rtx op;
2799 enum machine_mode mode;
2800 {
2801 if (!TARGET_64BIT)
2802 return general_operand (op, mode);
2803 if (nonimmediate_operand (op, mode))
2804 return 1;
2805 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2806 }
2807
2808 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2809
2810 int
2811 x86_64_nonmemory_operand (op, mode)
2812 rtx op;
2813 enum machine_mode mode;
2814 {
2815 if (!TARGET_64BIT)
2816 return nonmemory_operand (op, mode);
2817 if (register_operand (op, mode))
2818 return 1;
2819 return x86_64_sign_extended_value (op);
2820 }
2821
2822 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2823
2824 int
2825 x86_64_movabs_operand (op, mode)
2826 rtx op;
2827 enum machine_mode mode;
2828 {
2829 if (!TARGET_64BIT || !flag_pic)
2830 return nonmemory_operand (op, mode);
2831 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2832 return 1;
2833 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2834 return 1;
2835 return 0;
2836 }
2837
2838 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2839
2840 int
2841 x86_64_szext_nonmemory_operand (op, mode)
2842 rtx op;
2843 enum machine_mode mode;
2844 {
2845 if (!TARGET_64BIT)
2846 return nonmemory_operand (op, mode);
2847 if (register_operand (op, mode))
2848 return 1;
2849 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2850 }
2851
2852 /* Return nonzero if OP is immediate operand representable on x86_64. */
2853
2854 int
2855 x86_64_immediate_operand (op, mode)
2856 rtx op;
2857 enum machine_mode mode;
2858 {
2859 if (!TARGET_64BIT)
2860 return immediate_operand (op, mode);
2861 return x86_64_sign_extended_value (op);
2862 }
2863
2864 /* Return nonzero if OP is immediate operand representable on x86_64. */
2865
2866 int
2867 x86_64_zext_immediate_operand (op, mode)
2868 rtx op;
2869 enum machine_mode mode ATTRIBUTE_UNUSED;
2870 {
2871 return x86_64_zero_extended_value (op);
2872 }
2873
2874 /* Return nonzero if OP is (const_int 1), else return zero. */
2875
2876 int
2877 const_int_1_operand (op, mode)
2878 rtx op;
2879 enum machine_mode mode ATTRIBUTE_UNUSED;
2880 {
2881 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2882 }
2883
2884 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2885 reference and a constant. */
2886
2887 int
2888 symbolic_operand (op, mode)
2889 register rtx op;
2890 enum machine_mode mode ATTRIBUTE_UNUSED;
2891 {
2892 switch (GET_CODE (op))
2893 {
2894 case SYMBOL_REF:
2895 case LABEL_REF:
2896 return 1;
2897
2898 case CONST:
2899 op = XEXP (op, 0);
2900 if (GET_CODE (op) == SYMBOL_REF
2901 || GET_CODE (op) == LABEL_REF
2902 || (GET_CODE (op) == UNSPEC
2903 && (XINT (op, 1) == UNSPEC_GOT
2904 || XINT (op, 1) == UNSPEC_GOTOFF
2905 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2906 return 1;
2907 if (GET_CODE (op) != PLUS
2908 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2909 return 0;
2910
2911 op = XEXP (op, 0);
2912 if (GET_CODE (op) == SYMBOL_REF
2913 || GET_CODE (op) == LABEL_REF)
2914 return 1;
2915 /* Only @GOTOFF gets offsets. */
2916 if (GET_CODE (op) != UNSPEC
2917 || XINT (op, 1) != UNSPEC_GOTOFF)
2918 return 0;
2919
2920 op = XVECEXP (op, 0, 0);
2921 if (GET_CODE (op) == SYMBOL_REF
2922 || GET_CODE (op) == LABEL_REF)
2923 return 1;
2924 return 0;
2925
2926 default:
2927 return 0;
2928 }
2929 }
2930
2931 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2932
2933 int
2934 pic_symbolic_operand (op, mode)
2935 register rtx op;
2936 enum machine_mode mode ATTRIBUTE_UNUSED;
2937 {
2938 if (GET_CODE (op) != CONST)
2939 return 0;
2940 op = XEXP (op, 0);
2941 if (TARGET_64BIT)
2942 {
2943 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2944 return 1;
2945 }
2946 else
2947 {
2948 if (GET_CODE (op) == UNSPEC)
2949 return 1;
2950 if (GET_CODE (op) != PLUS
2951 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2952 return 0;
2953 op = XEXP (op, 0);
2954 if (GET_CODE (op) == UNSPEC)
2955 return 1;
2956 }
2957 return 0;
2958 }
2959
2960 /* Return true if OP is a symbolic operand that resolves locally. */
2961
2962 static int
2963 local_symbolic_operand (op, mode)
2964 rtx op;
2965 enum machine_mode mode ATTRIBUTE_UNUSED;
2966 {
2967 if (GET_CODE (op) == LABEL_REF)
2968 return 1;
2969
2970 if (GET_CODE (op) == CONST
2971 && GET_CODE (XEXP (op, 0)) == PLUS
2972 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2973 op = XEXP (XEXP (op, 0), 0);
2974
2975 if (GET_CODE (op) != SYMBOL_REF)
2976 return 0;
2977
2978 /* These we've been told are local by varasm and encode_section_info
2979 respectively. */
2980 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2981 return 1;
2982
2983 /* There is, however, a not insubstantial body of code in the rest of
2984 the compiler that assumes it can just stick the results of
2985 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2986 /* ??? This is a hack. Should update the body of the compiler to
2987 always create a DECL an invoke targetm.encode_section_info. */
2988 if (strncmp (XSTR (op, 0), internal_label_prefix,
2989 internal_label_prefix_len) == 0)
2990 return 1;
2991
2992 return 0;
2993 }
2994
2995 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2996
2997 int
2998 tls_symbolic_operand (op, mode)
2999 register rtx op;
3000 enum machine_mode mode ATTRIBUTE_UNUSED;
3001 {
3002 const char *symbol_str;
3003
3004 if (GET_CODE (op) != SYMBOL_REF)
3005 return 0;
3006 symbol_str = XSTR (op, 0);
3007
3008 if (symbol_str[0] != '%')
3009 return 0;
3010 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3011 }
3012
3013 static int
3014 tls_symbolic_operand_1 (op, kind)
3015 rtx op;
3016 enum tls_model kind;
3017 {
3018 const char *symbol_str;
3019
3020 if (GET_CODE (op) != SYMBOL_REF)
3021 return 0;
3022 symbol_str = XSTR (op, 0);
3023
3024 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3025 }
3026
3027 int
3028 global_dynamic_symbolic_operand (op, mode)
3029 register rtx op;
3030 enum machine_mode mode ATTRIBUTE_UNUSED;
3031 {
3032 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3033 }
3034
3035 int
3036 local_dynamic_symbolic_operand (op, mode)
3037 register rtx op;
3038 enum machine_mode mode ATTRIBUTE_UNUSED;
3039 {
3040 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3041 }
3042
3043 int
3044 initial_exec_symbolic_operand (op, mode)
3045 register rtx op;
3046 enum machine_mode mode ATTRIBUTE_UNUSED;
3047 {
3048 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3049 }
3050
3051 int
3052 local_exec_symbolic_operand (op, mode)
3053 register rtx op;
3054 enum machine_mode mode ATTRIBUTE_UNUSED;
3055 {
3056 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3057 }
3058
3059 /* Test for a valid operand for a call instruction. Don't allow the
3060 arg pointer register or virtual regs since they may decay into
3061 reg + const, which the patterns can't handle. */
3062
3063 int
3064 call_insn_operand (op, mode)
3065 rtx op;
3066 enum machine_mode mode ATTRIBUTE_UNUSED;
3067 {
3068 /* Disallow indirect through a virtual register. This leads to
3069 compiler aborts when trying to eliminate them. */
3070 if (GET_CODE (op) == REG
3071 && (op == arg_pointer_rtx
3072 || op == frame_pointer_rtx
3073 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3074 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3075 return 0;
3076
3077 /* Disallow `call 1234'. Due to varying assembler lameness this
3078 gets either rejected or translated to `call .+1234'. */
3079 if (GET_CODE (op) == CONST_INT)
3080 return 0;
3081
3082 /* Explicitly allow SYMBOL_REF even if pic. */
3083 if (GET_CODE (op) == SYMBOL_REF)
3084 return 1;
3085
3086 /* Half-pic doesn't allow anything but registers and constants.
3087 We've just taken care of the later. */
3088 if (HALF_PIC_P ())
3089 return register_operand (op, Pmode);
3090
3091 /* Otherwise we can allow any general_operand in the address. */
3092 return general_operand (op, Pmode);
3093 }
3094
3095 int
3096 constant_call_address_operand (op, mode)
3097 rtx op;
3098 enum machine_mode mode ATTRIBUTE_UNUSED;
3099 {
3100 if (GET_CODE (op) == CONST
3101 && GET_CODE (XEXP (op, 0)) == PLUS
3102 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3103 op = XEXP (XEXP (op, 0), 0);
3104 return GET_CODE (op) == SYMBOL_REF;
3105 }
3106
3107 /* Match exactly zero and one. */
3108
3109 int
3110 const0_operand (op, mode)
3111 register rtx op;
3112 enum machine_mode mode;
3113 {
3114 return op == CONST0_RTX (mode);
3115 }
3116
3117 int
3118 const1_operand (op, mode)
3119 register rtx op;
3120 enum machine_mode mode ATTRIBUTE_UNUSED;
3121 {
3122 return op == const1_rtx;
3123 }
3124
3125 /* Match 2, 4, or 8. Used for leal multiplicands. */
3126
3127 int
3128 const248_operand (op, mode)
3129 register rtx op;
3130 enum machine_mode mode ATTRIBUTE_UNUSED;
3131 {
3132 return (GET_CODE (op) == CONST_INT
3133 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3134 }
3135
3136 /* True if this is a constant appropriate for an increment or decremenmt. */
3137
3138 int
3139 incdec_operand (op, mode)
3140 register rtx op;
3141 enum machine_mode mode ATTRIBUTE_UNUSED;
3142 {
3143 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3144 registers, since carry flag is not set. */
3145 if (TARGET_PENTIUM4 && !optimize_size)
3146 return 0;
3147 return op == const1_rtx || op == constm1_rtx;
3148 }
3149
3150 /* Return nonzero if OP is acceptable as operand of DImode shift
3151 expander. */
3152
3153 int
3154 shiftdi_operand (op, mode)
3155 rtx op;
3156 enum machine_mode mode ATTRIBUTE_UNUSED;
3157 {
3158 if (TARGET_64BIT)
3159 return nonimmediate_operand (op, mode);
3160 else
3161 return register_operand (op, mode);
3162 }
3163
3164 /* Return false if this is the stack pointer, or any other fake
3165 register eliminable to the stack pointer. Otherwise, this is
3166 a register operand.
3167
3168 This is used to prevent esp from being used as an index reg.
3169 Which would only happen in pathological cases. */
3170
3171 int
3172 reg_no_sp_operand (op, mode)
3173 register rtx op;
3174 enum machine_mode mode;
3175 {
3176 rtx t = op;
3177 if (GET_CODE (t) == SUBREG)
3178 t = SUBREG_REG (t);
3179 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3180 return 0;
3181
3182 return register_operand (op, mode);
3183 }
3184
3185 int
3186 mmx_reg_operand (op, mode)
3187 register rtx op;
3188 enum machine_mode mode ATTRIBUTE_UNUSED;
3189 {
3190 return MMX_REG_P (op);
3191 }
3192
3193 /* Return false if this is any eliminable register. Otherwise
3194 general_operand. */
3195
3196 int
3197 general_no_elim_operand (op, mode)
3198 register rtx op;
3199 enum machine_mode mode;
3200 {
3201 rtx t = op;
3202 if (GET_CODE (t) == SUBREG)
3203 t = SUBREG_REG (t);
3204 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3205 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3206 || t == virtual_stack_dynamic_rtx)
3207 return 0;
3208 if (REG_P (t)
3209 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3210 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3211 return 0;
3212
3213 return general_operand (op, mode);
3214 }
3215
3216 /* Return false if this is any eliminable register. Otherwise
3217 register_operand or const_int. */
3218
3219 int
3220 nonmemory_no_elim_operand (op, mode)
3221 register rtx op;
3222 enum machine_mode mode;
3223 {
3224 rtx t = op;
3225 if (GET_CODE (t) == SUBREG)
3226 t = SUBREG_REG (t);
3227 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3228 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3229 || t == virtual_stack_dynamic_rtx)
3230 return 0;
3231
3232 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3233 }
3234
3235 /* Return true if op is a Q_REGS class register. */
3236
3237 int
3238 q_regs_operand (op, mode)
3239 register rtx op;
3240 enum machine_mode mode;
3241 {
3242 if (mode != VOIDmode && GET_MODE (op) != mode)
3243 return 0;
3244 if (GET_CODE (op) == SUBREG)
3245 op = SUBREG_REG (op);
3246 return ANY_QI_REG_P (op);
3247 }
3248
3249 /* Return true if op is a NON_Q_REGS class register. */
3250
3251 int
3252 non_q_regs_operand (op, mode)
3253 register rtx op;
3254 enum machine_mode mode;
3255 {
3256 if (mode != VOIDmode && GET_MODE (op) != mode)
3257 return 0;
3258 if (GET_CODE (op) == SUBREG)
3259 op = SUBREG_REG (op);
3260 return NON_QI_REG_P (op);
3261 }
3262
3263 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3264 insns. */
3265 int
3266 sse_comparison_operator (op, mode)
3267 rtx op;
3268 enum machine_mode mode ATTRIBUTE_UNUSED;
3269 {
3270 enum rtx_code code = GET_CODE (op);
3271 switch (code)
3272 {
3273 /* Operations supported directly. */
3274 case EQ:
3275 case LT:
3276 case LE:
3277 case UNORDERED:
3278 case NE:
3279 case UNGE:
3280 case UNGT:
3281 case ORDERED:
3282 return 1;
3283 /* These are equivalent to ones above in non-IEEE comparisons. */
3284 case UNEQ:
3285 case UNLT:
3286 case UNLE:
3287 case LTGT:
3288 case GE:
3289 case GT:
3290 return !TARGET_IEEE_FP;
3291 default:
3292 return 0;
3293 }
3294 }
3295 /* Return 1 if OP is a valid comparison operator in valid mode. */
3296 int
3297 ix86_comparison_operator (op, mode)
3298 register rtx op;
3299 enum machine_mode mode;
3300 {
3301 enum machine_mode inmode;
3302 enum rtx_code code = GET_CODE (op);
3303 if (mode != VOIDmode && GET_MODE (op) != mode)
3304 return 0;
3305 if (GET_RTX_CLASS (code) != '<')
3306 return 0;
3307 inmode = GET_MODE (XEXP (op, 0));
3308
3309 if (inmode == CCFPmode || inmode == CCFPUmode)
3310 {
3311 enum rtx_code second_code, bypass_code;
3312 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3313 return (bypass_code == NIL && second_code == NIL);
3314 }
3315 switch (code)
3316 {
3317 case EQ: case NE:
3318 return 1;
3319 case LT: case GE:
3320 if (inmode == CCmode || inmode == CCGCmode
3321 || inmode == CCGOCmode || inmode == CCNOmode)
3322 return 1;
3323 return 0;
3324 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3325 if (inmode == CCmode)
3326 return 1;
3327 return 0;
3328 case GT: case LE:
3329 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3330 return 1;
3331 return 0;
3332 default:
3333 return 0;
3334 }
3335 }
3336
3337 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3338
3339 int
3340 fcmov_comparison_operator (op, mode)
3341 register rtx op;
3342 enum machine_mode mode;
3343 {
3344 enum machine_mode inmode;
3345 enum rtx_code code = GET_CODE (op);
3346 if (mode != VOIDmode && GET_MODE (op) != mode)
3347 return 0;
3348 if (GET_RTX_CLASS (code) != '<')
3349 return 0;
3350 inmode = GET_MODE (XEXP (op, 0));
3351 if (inmode == CCFPmode || inmode == CCFPUmode)
3352 {
3353 enum rtx_code second_code, bypass_code;
3354 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3355 if (bypass_code != NIL || second_code != NIL)
3356 return 0;
3357 code = ix86_fp_compare_code_to_integer (code);
3358 }
3359 /* i387 supports just limited amount of conditional codes. */
3360 switch (code)
3361 {
3362 case LTU: case GTU: case LEU: case GEU:
3363 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3364 return 1;
3365 return 0;
3366 case ORDERED: case UNORDERED:
3367 case EQ: case NE:
3368 return 1;
3369 default:
3370 return 0;
3371 }
3372 }
3373
3374 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3375
3376 int
3377 promotable_binary_operator (op, mode)
3378 register rtx op;
3379 enum machine_mode mode ATTRIBUTE_UNUSED;
3380 {
3381 switch (GET_CODE (op))
3382 {
3383 case MULT:
3384 /* Modern CPUs have same latency for HImode and SImode multiply,
3385 but 386 and 486 do HImode multiply faster. */
3386 return ix86_cpu > PROCESSOR_I486;
3387 case PLUS:
3388 case AND:
3389 case IOR:
3390 case XOR:
3391 case ASHIFT:
3392 return 1;
3393 default:
3394 return 0;
3395 }
3396 }
3397
3398 /* Nearly general operand, but accept any const_double, since we wish
3399 to be able to drop them into memory rather than have them get pulled
3400 into registers. */
3401
3402 int
3403 cmp_fp_expander_operand (op, mode)
3404 register rtx op;
3405 enum machine_mode mode;
3406 {
3407 if (mode != VOIDmode && mode != GET_MODE (op))
3408 return 0;
3409 if (GET_CODE (op) == CONST_DOUBLE)
3410 return 1;
3411 return general_operand (op, mode);
3412 }
3413
3414 /* Match an SI or HImode register for a zero_extract. */
3415
3416 int
3417 ext_register_operand (op, mode)
3418 register rtx op;
3419 enum machine_mode mode ATTRIBUTE_UNUSED;
3420 {
3421 int regno;
3422 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3423 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3424 return 0;
3425
3426 if (!register_operand (op, VOIDmode))
3427 return 0;
3428
3429 /* Be curefull to accept only registers having upper parts. */
3430 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3431 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3432 }
3433
3434 /* Return 1 if this is a valid binary floating-point operation.
3435 OP is the expression matched, and MODE is its mode. */
3436
3437 int
3438 binary_fp_operator (op, mode)
3439 register rtx op;
3440 enum machine_mode mode;
3441 {
3442 if (mode != VOIDmode && mode != GET_MODE (op))
3443 return 0;
3444
3445 switch (GET_CODE (op))
3446 {
3447 case PLUS:
3448 case MINUS:
3449 case MULT:
3450 case DIV:
3451 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3452
3453 default:
3454 return 0;
3455 }
3456 }
3457
3458 int
3459 mult_operator (op, mode)
3460 register rtx op;
3461 enum machine_mode mode ATTRIBUTE_UNUSED;
3462 {
3463 return GET_CODE (op) == MULT;
3464 }
3465
3466 int
3467 div_operator (op, mode)
3468 register rtx op;
3469 enum machine_mode mode ATTRIBUTE_UNUSED;
3470 {
3471 return GET_CODE (op) == DIV;
3472 }
3473
3474 int
3475 arith_or_logical_operator (op, mode)
3476 rtx op;
3477 enum machine_mode mode;
3478 {
3479 return ((mode == VOIDmode || GET_MODE (op) == mode)
3480 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3481 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3482 }
3483
3484 /* Returns 1 if OP is memory operand with a displacement. */
3485
3486 int
3487 memory_displacement_operand (op, mode)
3488 register rtx op;
3489 enum machine_mode mode;
3490 {
3491 struct ix86_address parts;
3492
3493 if (! memory_operand (op, mode))
3494 return 0;
3495
3496 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3497 abort ();
3498
3499 return parts.disp != NULL_RTX;
3500 }
3501
3502 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3503 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3504
3505 ??? It seems likely that this will only work because cmpsi is an
3506 expander, and no actual insns use this. */
3507
3508 int
3509 cmpsi_operand (op, mode)
3510 rtx op;
3511 enum machine_mode mode;
3512 {
3513 if (nonimmediate_operand (op, mode))
3514 return 1;
3515
3516 if (GET_CODE (op) == AND
3517 && GET_MODE (op) == SImode
3518 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3519 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3520 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3521 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3522 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3523 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3524 return 1;
3525
3526 return 0;
3527 }
3528
3529 /* Returns 1 if OP is memory operand that can not be represented by the
3530 modRM array. */
3531
3532 int
3533 long_memory_operand (op, mode)
3534 register rtx op;
3535 enum machine_mode mode;
3536 {
3537 if (! memory_operand (op, mode))
3538 return 0;
3539
3540 return memory_address_length (op) != 0;
3541 }
3542
3543 /* Return nonzero if the rtx is known aligned. */
3544
3545 int
3546 aligned_operand (op, mode)
3547 rtx op;
3548 enum machine_mode mode;
3549 {
3550 struct ix86_address parts;
3551
3552 if (!general_operand (op, mode))
3553 return 0;
3554
3555 /* Registers and immediate operands are always "aligned". */
3556 if (GET_CODE (op) != MEM)
3557 return 1;
3558
3559 /* Don't even try to do any aligned optimizations with volatiles. */
3560 if (MEM_VOLATILE_P (op))
3561 return 0;
3562
3563 op = XEXP (op, 0);
3564
3565 /* Pushes and pops are only valid on the stack pointer. */
3566 if (GET_CODE (op) == PRE_DEC
3567 || GET_CODE (op) == POST_INC)
3568 return 1;
3569
3570 /* Decode the address. */
3571 if (! ix86_decompose_address (op, &parts))
3572 abort ();
3573
3574 if (parts.base && GET_CODE (parts.base) == SUBREG)
3575 parts.base = SUBREG_REG (parts.base);
3576 if (parts.index && GET_CODE (parts.index) == SUBREG)
3577 parts.index = SUBREG_REG (parts.index);
3578
3579 /* Look for some component that isn't known to be aligned. */
3580 if (parts.index)
3581 {
3582 if (parts.scale < 4
3583 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3584 return 0;
3585 }
3586 if (parts.base)
3587 {
3588 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3589 return 0;
3590 }
3591 if (parts.disp)
3592 {
3593 if (GET_CODE (parts.disp) != CONST_INT
3594 || (INTVAL (parts.disp) & 3) != 0)
3595 return 0;
3596 }
3597
3598 /* Didn't find one -- this must be an aligned address. */
3599 return 1;
3600 }
3601 \f
3602 /* Return true if the constant is something that can be loaded with
3603 a special instruction. Only handle 0.0 and 1.0; others are less
3604 worthwhile. */
3605
3606 int
3607 standard_80387_constant_p (x)
3608 rtx x;
3609 {
3610 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3611 return -1;
3612 /* Note that on the 80387, other constants, such as pi, that we should support
3613 too. On some machines, these are much slower to load as standard constant,
3614 than to load from doubles in memory. */
3615 if (x == CONST0_RTX (GET_MODE (x)))
3616 return 1;
3617 if (x == CONST1_RTX (GET_MODE (x)))
3618 return 2;
3619 return 0;
3620 }
3621
3622 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3623 */
3624 int
3625 standard_sse_constant_p (x)
3626 rtx x;
3627 {
3628 if (GET_CODE (x) != CONST_DOUBLE)
3629 return -1;
3630 return (x == CONST0_RTX (GET_MODE (x)));
3631 }
3632
3633 /* Returns 1 if OP contains a symbol reference */
3634
3635 int
3636 symbolic_reference_mentioned_p (op)
3637 rtx op;
3638 {
3639 register const char *fmt;
3640 register int i;
3641
3642 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3643 return 1;
3644
3645 fmt = GET_RTX_FORMAT (GET_CODE (op));
3646 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3647 {
3648 if (fmt[i] == 'E')
3649 {
3650 register int j;
3651
3652 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3653 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3654 return 1;
3655 }
3656
3657 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3658 return 1;
3659 }
3660
3661 return 0;
3662 }
3663
3664 /* Return 1 if it is appropriate to emit `ret' instructions in the
3665 body of a function. Do this only if the epilogue is simple, needing a
3666 couple of insns. Prior to reloading, we can't tell how many registers
3667 must be saved, so return 0 then. Return 0 if there is no frame
3668 marker to de-allocate.
3669
3670 If NON_SAVING_SETJMP is defined and true, then it is not possible
3671 for the epilogue to be simple, so return 0. This is a special case
3672 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3673 until final, but jump_optimize may need to know sooner if a
3674 `return' is OK. */
3675
3676 int
3677 ix86_can_use_return_insn_p ()
3678 {
3679 struct ix86_frame frame;
3680
3681 #ifdef NON_SAVING_SETJMP
3682 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3683 return 0;
3684 #endif
3685
3686 if (! reload_completed || frame_pointer_needed)
3687 return 0;
3688
3689 /* Don't allow more than 32 pop, since that's all we can do
3690 with one instruction. */
3691 if (current_function_pops_args
3692 && current_function_args_size >= 32768)
3693 return 0;
3694
3695 ix86_compute_frame_layout (&frame);
3696 return frame.to_allocate == 0 && frame.nregs == 0;
3697 }
3698 \f
3699 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3700 int
3701 x86_64_sign_extended_value (value)
3702 rtx value;
3703 {
3704 switch (GET_CODE (value))
3705 {
3706 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3707 to be at least 32 and this all acceptable constants are
3708 represented as CONST_INT. */
3709 case CONST_INT:
3710 if (HOST_BITS_PER_WIDE_INT == 32)
3711 return 1;
3712 else
3713 {
3714 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3715 return trunc_int_for_mode (val, SImode) == val;
3716 }
3717 break;
3718
3719 /* For certain code models, the symbolic references are known to fit. */
3720 case SYMBOL_REF:
3721 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3722
3723 /* For certain code models, the code is near as well. */
3724 case LABEL_REF:
3725 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3726
3727 /* We also may accept the offsetted memory references in certain special
3728 cases. */
3729 case CONST:
3730 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3731 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3732 return 1;
3733 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3734 {
3735 rtx op1 = XEXP (XEXP (value, 0), 0);
3736 rtx op2 = XEXP (XEXP (value, 0), 1);
3737 HOST_WIDE_INT offset;
3738
3739 if (ix86_cmodel == CM_LARGE)
3740 return 0;
3741 if (GET_CODE (op2) != CONST_INT)
3742 return 0;
3743 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3744 switch (GET_CODE (op1))
3745 {
3746 case SYMBOL_REF:
3747 /* For CM_SMALL assume that latest object is 1MB before
3748 end of 31bits boundary. We may also accept pretty
3749 large negative constants knowing that all objects are
3750 in the positive half of address space. */
3751 if (ix86_cmodel == CM_SMALL
3752 && offset < 1024*1024*1024
3753 && trunc_int_for_mode (offset, SImode) == offset)
3754 return 1;
3755 /* For CM_KERNEL we know that all object resist in the
3756 negative half of 32bits address space. We may not
3757 accept negative offsets, since they may be just off
3758 and we may accept pretty large positive ones. */
3759 if (ix86_cmodel == CM_KERNEL
3760 && offset > 0
3761 && trunc_int_for_mode (offset, SImode) == offset)
3762 return 1;
3763 break;
3764 case LABEL_REF:
3765 /* These conditions are similar to SYMBOL_REF ones, just the
3766 constraints for code models differ. */
3767 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3768 && offset < 1024*1024*1024
3769 && trunc_int_for_mode (offset, SImode) == offset)
3770 return 1;
3771 if (ix86_cmodel == CM_KERNEL
3772 && offset > 0
3773 && trunc_int_for_mode (offset, SImode) == offset)
3774 return 1;
3775 break;
3776 default:
3777 return 0;
3778 }
3779 }
3780 return 0;
3781 default:
3782 return 0;
3783 }
3784 }
3785
3786 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3787 int
3788 x86_64_zero_extended_value (value)
3789 rtx value;
3790 {
3791 switch (GET_CODE (value))
3792 {
3793 case CONST_DOUBLE:
3794 if (HOST_BITS_PER_WIDE_INT == 32)
3795 return (GET_MODE (value) == VOIDmode
3796 && !CONST_DOUBLE_HIGH (value));
3797 else
3798 return 0;
3799 case CONST_INT:
3800 if (HOST_BITS_PER_WIDE_INT == 32)
3801 return INTVAL (value) >= 0;
3802 else
3803 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3804 break;
3805
3806 /* For certain code models, the symbolic references are known to fit. */
3807 case SYMBOL_REF:
3808 return ix86_cmodel == CM_SMALL;
3809
3810 /* For certain code models, the code is near as well. */
3811 case LABEL_REF:
3812 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3813
3814 /* We also may accept the offsetted memory references in certain special
3815 cases. */
3816 case CONST:
3817 if (GET_CODE (XEXP (value, 0)) == PLUS)
3818 {
3819 rtx op1 = XEXP (XEXP (value, 0), 0);
3820 rtx op2 = XEXP (XEXP (value, 0), 1);
3821
3822 if (ix86_cmodel == CM_LARGE)
3823 return 0;
3824 switch (GET_CODE (op1))
3825 {
3826 case SYMBOL_REF:
3827 return 0;
3828 /* For small code model we may accept pretty large positive
3829 offsets, since one bit is available for free. Negative
3830 offsets are limited by the size of NULL pointer area
3831 specified by the ABI. */
3832 if (ix86_cmodel == CM_SMALL
3833 && GET_CODE (op2) == CONST_INT
3834 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3835 && (trunc_int_for_mode (INTVAL (op2), SImode)
3836 == INTVAL (op2)))
3837 return 1;
3838 /* ??? For the kernel, we may accept adjustment of
3839 -0x10000000, since we know that it will just convert
3840 negative address space to positive, but perhaps this
3841 is not worthwhile. */
3842 break;
3843 case LABEL_REF:
3844 /* These conditions are similar to SYMBOL_REF ones, just the
3845 constraints for code models differ. */
3846 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3847 && GET_CODE (op2) == CONST_INT
3848 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3849 && (trunc_int_for_mode (INTVAL (op2), SImode)
3850 == INTVAL (op2)))
3851 return 1;
3852 break;
3853 default:
3854 return 0;
3855 }
3856 }
3857 return 0;
3858 default:
3859 return 0;
3860 }
3861 }
3862
3863 /* Value should be nonzero if functions must have frame pointers.
3864 Zero means the frame pointer need not be set up (and parms may
3865 be accessed via the stack pointer) in functions that seem suitable. */
3866
3867 int
3868 ix86_frame_pointer_required ()
3869 {
3870 /* If we accessed previous frames, then the generated code expects
3871 to be able to access the saved ebp value in our frame. */
3872 if (cfun->machine->accesses_prev_frame)
3873 return 1;
3874
3875 /* Several x86 os'es need a frame pointer for other reasons,
3876 usually pertaining to setjmp. */
3877 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3878 return 1;
3879
3880 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3881 the frame pointer by default. Turn it back on now if we've not
3882 got a leaf function. */
3883 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3884 return 1;
3885
3886 return 0;
3887 }
3888
3889 /* Record that the current function accesses previous call frames. */
3890
3891 void
3892 ix86_setup_frame_addresses ()
3893 {
3894 cfun->machine->accesses_prev_frame = 1;
3895 }
3896 \f
3897 static char pic_label_name[32];
3898
3899 /* This function generates code for -fpic that loads %ebx with
3900 the return address of the caller and then returns. */
3901
3902 void
3903 ix86_asm_file_end (file)
3904 FILE *file;
3905 {
3906 rtx xops[2];
3907
3908 if (pic_label_name[0] == 0)
3909 return;
3910
3911 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3912 to updating relocations to a section being discarded such that this
3913 doesn't work. Ought to detect this at configure time. */
3914 #if 0
3915 /* The trick here is to create a linkonce section containing the
3916 pic label thunk, but to refer to it with an internal label.
3917 Because the label is internal, we don't have inter-dso name
3918 binding issues on hosts that don't support ".hidden".
3919
3920 In order to use these macros, however, we must create a fake
3921 function decl. */
3922 if (targetm.have_named_sections)
3923 {
3924 tree decl = build_decl (FUNCTION_DECL,
3925 get_identifier ("i686.get_pc_thunk"),
3926 error_mark_node);
3927 DECL_ONE_ONLY (decl) = 1;
3928 (*targetm.asm_out.unique_section) (decl, 0);
3929 named_section (decl, NULL);
3930 }
3931 else
3932 #else
3933 text_section ();
3934 #endif
3935
3936 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3937 internal (non-global) label that's being emitted, it didn't make
3938 sense to have .type information for local labels. This caused
3939 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3940 me debug info for a label that you're declaring non-global?) this
3941 was changed to call ASM_OUTPUT_LABEL() instead. */
3942
3943 ASM_OUTPUT_LABEL (file, pic_label_name);
3944
3945 xops[0] = pic_offset_table_rtx;
3946 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3947 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3948 output_asm_insn ("ret", xops);
3949 }
3950
3951 /* Emit code for the SET_GOT patterns. */
3952
3953 const char *
3954 output_set_got (dest)
3955 rtx dest;
3956 {
3957 rtx xops[3];
3958
3959 xops[0] = dest;
3960 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3961
3962 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3963 {
3964 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3965
3966 if (!flag_pic)
3967 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3968 else
3969 output_asm_insn ("call\t%a2", xops);
3970
3971 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3972 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3973
3974 if (flag_pic)
3975 output_asm_insn ("pop{l}\t%0", xops);
3976 }
3977 else
3978 {
3979 if (! pic_label_name[0])
3980 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3981
3982 xops[2] = gen_rtx_SYMBOL_REF (Pmode, pic_label_name);
3983 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3984 output_asm_insn ("call\t%X2", xops);
3985 }
3986
3987 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3988 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3989 else
3990 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3991
3992 return "";
3993 }
3994
3995 /* Generate an "push" pattern for input ARG. */
3996
3997 static rtx
3998 gen_push (arg)
3999 rtx arg;
4000 {
4001 return gen_rtx_SET (VOIDmode,
4002 gen_rtx_MEM (Pmode,
4003 gen_rtx_PRE_DEC (Pmode,
4004 stack_pointer_rtx)),
4005 arg);
4006 }
4007
4008 /* Return 1 if we need to save REGNO. */
4009 static int
4010 ix86_save_reg (regno, maybe_eh_return)
4011 unsigned int regno;
4012 int maybe_eh_return;
4013 {
4014 if (regno == PIC_OFFSET_TABLE_REGNUM
4015 && (regs_ever_live[regno]
4016 || current_function_profile
4017 || current_function_calls_eh_return))
4018 return 1;
4019
4020 if (current_function_calls_eh_return && maybe_eh_return)
4021 {
4022 unsigned i;
4023 for (i = 0; ; i++)
4024 {
4025 unsigned test = EH_RETURN_DATA_REGNO (i);
4026 if (test == INVALID_REGNUM)
4027 break;
4028 if (test == regno)
4029 return 1;
4030 }
4031 }
4032
4033 return (regs_ever_live[regno]
4034 && !call_used_regs[regno]
4035 && !fixed_regs[regno]
4036 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4037 }
4038
4039 /* Return number of registers to be saved on the stack. */
4040
4041 static int
4042 ix86_nsaved_regs ()
4043 {
4044 int nregs = 0;
4045 int regno;
4046
4047 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4048 if (ix86_save_reg (regno, true))
4049 nregs++;
4050 return nregs;
4051 }
4052
4053 /* Return the offset between two registers, one to be eliminated, and the other
4054 its replacement, at the start of a routine. */
4055
4056 HOST_WIDE_INT
4057 ix86_initial_elimination_offset (from, to)
4058 int from;
4059 int to;
4060 {
4061 struct ix86_frame frame;
4062 ix86_compute_frame_layout (&frame);
4063
4064 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4065 return frame.hard_frame_pointer_offset;
4066 else if (from == FRAME_POINTER_REGNUM
4067 && to == HARD_FRAME_POINTER_REGNUM)
4068 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4069 else
4070 {
4071 if (to != STACK_POINTER_REGNUM)
4072 abort ();
4073 else if (from == ARG_POINTER_REGNUM)
4074 return frame.stack_pointer_offset;
4075 else if (from != FRAME_POINTER_REGNUM)
4076 abort ();
4077 else
4078 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4079 }
4080 }
4081
4082 /* Fill structure ix86_frame about frame of currently computed function. */
4083
4084 static void
4085 ix86_compute_frame_layout (frame)
4086 struct ix86_frame *frame;
4087 {
4088 HOST_WIDE_INT total_size;
4089 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4090 int offset;
4091 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4092 HOST_WIDE_INT size = get_frame_size ();
4093
4094 frame->nregs = ix86_nsaved_regs ();
4095 total_size = size;
4096
4097 /* Skip return address and saved base pointer. */
4098 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4099
4100 frame->hard_frame_pointer_offset = offset;
4101
4102 /* Do some sanity checking of stack_alignment_needed and
4103 preferred_alignment, since i386 port is the only using those features
4104 that may break easily. */
4105
4106 if (size && !stack_alignment_needed)
4107 abort ();
4108 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4109 abort ();
4110 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4111 abort ();
4112 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4113 abort ();
4114
4115 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4116 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4117
4118 /* Register save area */
4119 offset += frame->nregs * UNITS_PER_WORD;
4120
4121 /* Va-arg area */
4122 if (ix86_save_varrargs_registers)
4123 {
4124 offset += X86_64_VARARGS_SIZE;
4125 frame->va_arg_size = X86_64_VARARGS_SIZE;
4126 }
4127 else
4128 frame->va_arg_size = 0;
4129
4130 /* Align start of frame for local function. */
4131 frame->padding1 = ((offset + stack_alignment_needed - 1)
4132 & -stack_alignment_needed) - offset;
4133
4134 offset += frame->padding1;
4135
4136 /* Frame pointer points here. */
4137 frame->frame_pointer_offset = offset;
4138
4139 offset += size;
4140
4141 /* Add outgoing arguments area. Can be skipped if we eliminated
4142 all the function calls as dead code. */
4143 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4144 {
4145 offset += current_function_outgoing_args_size;
4146 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4147 }
4148 else
4149 frame->outgoing_arguments_size = 0;
4150
4151 /* Align stack boundary. Only needed if we're calling another function. */
4152 if (!current_function_is_leaf)
4153 frame->padding2 = ((offset + preferred_alignment - 1)
4154 & -preferred_alignment) - offset;
4155 else
4156 frame->padding2 = 0;
4157
4158 offset += frame->padding2;
4159
4160 /* We've reached end of stack frame. */
4161 frame->stack_pointer_offset = offset;
4162
4163 /* Size prologue needs to allocate. */
4164 frame->to_allocate =
4165 (size + frame->padding1 + frame->padding2
4166 + frame->outgoing_arguments_size + frame->va_arg_size);
4167
4168 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4169 && current_function_is_leaf)
4170 {
4171 frame->red_zone_size = frame->to_allocate;
4172 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4173 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4174 }
4175 else
4176 frame->red_zone_size = 0;
4177 frame->to_allocate -= frame->red_zone_size;
4178 frame->stack_pointer_offset -= frame->red_zone_size;
4179 #if 0
4180 fprintf (stderr, "nregs: %i\n", frame->nregs);
4181 fprintf (stderr, "size: %i\n", size);
4182 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4183 fprintf (stderr, "padding1: %i\n", frame->padding1);
4184 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4185 fprintf (stderr, "padding2: %i\n", frame->padding2);
4186 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4187 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4188 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4189 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4190 frame->hard_frame_pointer_offset);
4191 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4192 #endif
4193 }
4194
4195 /* Emit code to save registers in the prologue. */
4196
4197 static void
4198 ix86_emit_save_regs ()
4199 {
4200 register int regno;
4201 rtx insn;
4202
4203 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4204 if (ix86_save_reg (regno, true))
4205 {
4206 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4207 RTX_FRAME_RELATED_P (insn) = 1;
4208 }
4209 }
4210
4211 /* Emit code to save registers using MOV insns. First register
4212 is restored from POINTER + OFFSET. */
4213 static void
4214 ix86_emit_save_regs_using_mov (pointer, offset)
4215 rtx pointer;
4216 HOST_WIDE_INT offset;
4217 {
4218 int regno;
4219 rtx insn;
4220
4221 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4222 if (ix86_save_reg (regno, true))
4223 {
4224 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4225 Pmode, offset),
4226 gen_rtx_REG (Pmode, regno));
4227 RTX_FRAME_RELATED_P (insn) = 1;
4228 offset += UNITS_PER_WORD;
4229 }
4230 }
4231
4232 /* Expand the prologue into a bunch of separate insns. */
4233
4234 void
4235 ix86_expand_prologue ()
4236 {
4237 rtx insn;
4238 int pic_reg_used = (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
4239 && (regs_ever_live[PIC_OFFSET_TABLE_REGNUM]
4240 || current_function_profile));
4241 struct ix86_frame frame;
4242 int use_mov = 0;
4243 HOST_WIDE_INT allocate;
4244
4245 if (!optimize_size)
4246 {
4247 use_fast_prologue_epilogue
4248 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4249 if (TARGET_PROLOGUE_USING_MOVE)
4250 use_mov = use_fast_prologue_epilogue;
4251 }
4252 ix86_compute_frame_layout (&frame);
4253
4254 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4255 slower on all targets. Also sdb doesn't like it. */
4256
4257 if (frame_pointer_needed)
4258 {
4259 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4260 RTX_FRAME_RELATED_P (insn) = 1;
4261
4262 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4263 RTX_FRAME_RELATED_P (insn) = 1;
4264 }
4265
4266 allocate = frame.to_allocate;
4267 /* In case we are dealing only with single register and empty frame,
4268 push is equivalent of the mov+add sequence. */
4269 if (allocate == 0 && frame.nregs <= 1)
4270 use_mov = 0;
4271
4272 if (!use_mov)
4273 ix86_emit_save_regs ();
4274 else
4275 allocate += frame.nregs * UNITS_PER_WORD;
4276
4277 if (allocate == 0)
4278 ;
4279 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4280 {
4281 insn = emit_insn (gen_pro_epilogue_adjust_stack
4282 (stack_pointer_rtx, stack_pointer_rtx,
4283 GEN_INT (-allocate)));
4284 RTX_FRAME_RELATED_P (insn) = 1;
4285 }
4286 else
4287 {
4288 /* ??? Is this only valid for Win32? */
4289
4290 rtx arg0, sym;
4291
4292 if (TARGET_64BIT)
4293 abort ();
4294
4295 arg0 = gen_rtx_REG (SImode, 0);
4296 emit_move_insn (arg0, GEN_INT (allocate));
4297
4298 sym = gen_rtx_MEM (FUNCTION_MODE,
4299 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4300 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4301
4302 CALL_INSN_FUNCTION_USAGE (insn)
4303 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4304 CALL_INSN_FUNCTION_USAGE (insn));
4305 }
4306 if (use_mov)
4307 {
4308 if (!frame_pointer_needed || !frame.to_allocate)
4309 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4310 else
4311 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4312 -frame.nregs * UNITS_PER_WORD);
4313 }
4314
4315 #ifdef SUBTARGET_PROLOGUE
4316 SUBTARGET_PROLOGUE;
4317 #endif
4318
4319 if (pic_reg_used)
4320 {
4321 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4322
4323 /* Even with accurate pre-reload life analysis, we can wind up
4324 deleting all references to the pic register after reload.
4325 Consider if cross-jumping unifies two sides of a branch
4326 controled by a comparison vs the only read from a global.
4327 In which case, allow the set_got to be deleted, though we're
4328 too late to do anything about the ebx save in the prologue. */
4329 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4330 }
4331
4332 /* Prevent function calls from be scheduled before the call to mcount.
4333 In the pic_reg_used case, make sure that the got load isn't deleted. */
4334 if (current_function_profile)
4335 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4336 }
4337
4338 /* Emit code to restore saved registers using MOV insns. First register
4339 is restored from POINTER + OFFSET. */
4340 static void
4341 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4342 rtx pointer;
4343 int offset;
4344 int maybe_eh_return;
4345 {
4346 int regno;
4347
4348 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4349 if (ix86_save_reg (regno, maybe_eh_return))
4350 {
4351 emit_move_insn (gen_rtx_REG (Pmode, regno),
4352 adjust_address (gen_rtx_MEM (Pmode, pointer),
4353 Pmode, offset));
4354 offset += UNITS_PER_WORD;
4355 }
4356 }
4357
4358 /* Restore function stack, frame, and registers. */
4359
4360 void
4361 ix86_expand_epilogue (style)
4362 int style;
4363 {
4364 int regno;
4365 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4366 struct ix86_frame frame;
4367 HOST_WIDE_INT offset;
4368
4369 ix86_compute_frame_layout (&frame);
4370
4371 /* Calculate start of saved registers relative to ebp. Special care
4372 must be taken for the normal return case of a function using
4373 eh_return: the eax and edx registers are marked as saved, but not
4374 restored along this path. */
4375 offset = frame.nregs;
4376 if (current_function_calls_eh_return && style != 2)
4377 offset -= 2;
4378 offset *= -UNITS_PER_WORD;
4379
4380 /* If we're only restoring one register and sp is not valid then
4381 using a move instruction to restore the register since it's
4382 less work than reloading sp and popping the register.
4383
4384 The default code result in stack adjustment using add/lea instruction,
4385 while this code results in LEAVE instruction (or discrete equivalent),
4386 so it is profitable in some other cases as well. Especially when there
4387 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4388 and there is exactly one register to pop. This heruistic may need some
4389 tuning in future. */
4390 if ((!sp_valid && frame.nregs <= 1)
4391 || (TARGET_EPILOGUE_USING_MOVE
4392 && use_fast_prologue_epilogue
4393 && (frame.nregs > 1 || frame.to_allocate))
4394 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4395 || (frame_pointer_needed && TARGET_USE_LEAVE
4396 && use_fast_prologue_epilogue && frame.nregs == 1)
4397 || current_function_calls_eh_return)
4398 {
4399 /* Restore registers. We can use ebp or esp to address the memory
4400 locations. If both are available, default to ebp, since offsets
4401 are known to be small. Only exception is esp pointing directly to the
4402 end of block of saved registers, where we may simplify addressing
4403 mode. */
4404
4405 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4406 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4407 frame.to_allocate, style == 2);
4408 else
4409 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4410 offset, style == 2);
4411
4412 /* eh_return epilogues need %ecx added to the stack pointer. */
4413 if (style == 2)
4414 {
4415 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4416
4417 if (frame_pointer_needed)
4418 {
4419 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4420 tmp = plus_constant (tmp, UNITS_PER_WORD);
4421 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4422
4423 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4424 emit_move_insn (hard_frame_pointer_rtx, tmp);
4425
4426 emit_insn (gen_pro_epilogue_adjust_stack
4427 (stack_pointer_rtx, sa, const0_rtx));
4428 }
4429 else
4430 {
4431 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4432 tmp = plus_constant (tmp, (frame.to_allocate
4433 + frame.nregs * UNITS_PER_WORD));
4434 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4435 }
4436 }
4437 else if (!frame_pointer_needed)
4438 emit_insn (gen_pro_epilogue_adjust_stack
4439 (stack_pointer_rtx, stack_pointer_rtx,
4440 GEN_INT (frame.to_allocate
4441 + frame.nregs * UNITS_PER_WORD)));
4442 /* If not an i386, mov & pop is faster than "leave". */
4443 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4444 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4445 else
4446 {
4447 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4448 hard_frame_pointer_rtx,
4449 const0_rtx));
4450 if (TARGET_64BIT)
4451 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4452 else
4453 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4454 }
4455 }
4456 else
4457 {
4458 /* First step is to deallocate the stack frame so that we can
4459 pop the registers. */
4460 if (!sp_valid)
4461 {
4462 if (!frame_pointer_needed)
4463 abort ();
4464 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4465 hard_frame_pointer_rtx,
4466 GEN_INT (offset)));
4467 }
4468 else if (frame.to_allocate)
4469 emit_insn (gen_pro_epilogue_adjust_stack
4470 (stack_pointer_rtx, stack_pointer_rtx,
4471 GEN_INT (frame.to_allocate)));
4472
4473 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4474 if (ix86_save_reg (regno, false))
4475 {
4476 if (TARGET_64BIT)
4477 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4478 else
4479 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4480 }
4481 if (frame_pointer_needed)
4482 {
4483 /* Leave results in shorter dependency chains on CPUs that are
4484 able to grok it fast. */
4485 if (TARGET_USE_LEAVE)
4486 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4487 else if (TARGET_64BIT)
4488 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4489 else
4490 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4491 }
4492 }
4493
4494 /* Sibcall epilogues don't want a return instruction. */
4495 if (style == 0)
4496 return;
4497
4498 if (current_function_pops_args && current_function_args_size)
4499 {
4500 rtx popc = GEN_INT (current_function_pops_args);
4501
4502 /* i386 can only pop 64K bytes. If asked to pop more, pop
4503 return address, do explicit add, and jump indirectly to the
4504 caller. */
4505
4506 if (current_function_pops_args >= 65536)
4507 {
4508 rtx ecx = gen_rtx_REG (SImode, 2);
4509
4510 /* There are is no "pascal" calling convention in 64bit ABI. */
4511 if (TARGET_64BIT)
4512 abort ();
4513
4514 emit_insn (gen_popsi1 (ecx));
4515 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4516 emit_jump_insn (gen_return_indirect_internal (ecx));
4517 }
4518 else
4519 emit_jump_insn (gen_return_pop_internal (popc));
4520 }
4521 else
4522 emit_jump_insn (gen_return_internal ());
4523 }
4524 \f
4525 /* Extract the parts of an RTL expression that is a valid memory address
4526 for an instruction. Return 0 if the structure of the address is
4527 grossly off. Return -1 if the address contains ASHIFT, so it is not
4528 strictly valid, but still used for computing length of lea instruction.
4529 */
4530
4531 static int
4532 ix86_decompose_address (addr, out)
4533 register rtx addr;
4534 struct ix86_address *out;
4535 {
4536 rtx base = NULL_RTX;
4537 rtx index = NULL_RTX;
4538 rtx disp = NULL_RTX;
4539 HOST_WIDE_INT scale = 1;
4540 rtx scale_rtx = NULL_RTX;
4541 int retval = 1;
4542
4543 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4544 base = addr;
4545 else if (GET_CODE (addr) == PLUS)
4546 {
4547 rtx op0 = XEXP (addr, 0);
4548 rtx op1 = XEXP (addr, 1);
4549 enum rtx_code code0 = GET_CODE (op0);
4550 enum rtx_code code1 = GET_CODE (op1);
4551
4552 if (code0 == REG || code0 == SUBREG)
4553 {
4554 if (code1 == REG || code1 == SUBREG)
4555 index = op0, base = op1; /* index + base */
4556 else
4557 base = op0, disp = op1; /* base + displacement */
4558 }
4559 else if (code0 == MULT)
4560 {
4561 index = XEXP (op0, 0);
4562 scale_rtx = XEXP (op0, 1);
4563 if (code1 == REG || code1 == SUBREG)
4564 base = op1; /* index*scale + base */
4565 else
4566 disp = op1; /* index*scale + disp */
4567 }
4568 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4569 {
4570 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4571 scale_rtx = XEXP (XEXP (op0, 0), 1);
4572 base = XEXP (op0, 1);
4573 disp = op1;
4574 }
4575 else if (code0 == PLUS)
4576 {
4577 index = XEXP (op0, 0); /* index + base + disp */
4578 base = XEXP (op0, 1);
4579 disp = op1;
4580 }
4581 else
4582 return 0;
4583 }
4584 else if (GET_CODE (addr) == MULT)
4585 {
4586 index = XEXP (addr, 0); /* index*scale */
4587 scale_rtx = XEXP (addr, 1);
4588 }
4589 else if (GET_CODE (addr) == ASHIFT)
4590 {
4591 rtx tmp;
4592
4593 /* We're called for lea too, which implements ashift on occasion. */
4594 index = XEXP (addr, 0);
4595 tmp = XEXP (addr, 1);
4596 if (GET_CODE (tmp) != CONST_INT)
4597 return 0;
4598 scale = INTVAL (tmp);
4599 if ((unsigned HOST_WIDE_INT) scale > 3)
4600 return 0;
4601 scale = 1 << scale;
4602 retval = -1;
4603 }
4604 else
4605 disp = addr; /* displacement */
4606
4607 /* Extract the integral value of scale. */
4608 if (scale_rtx)
4609 {
4610 if (GET_CODE (scale_rtx) != CONST_INT)
4611 return 0;
4612 scale = INTVAL (scale_rtx);
4613 }
4614
4615 /* Allow arg pointer and stack pointer as index if there is not scaling */
4616 if (base && index && scale == 1
4617 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4618 || index == stack_pointer_rtx))
4619 {
4620 rtx tmp = base;
4621 base = index;
4622 index = tmp;
4623 }
4624
4625 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4626 if ((base == hard_frame_pointer_rtx
4627 || base == frame_pointer_rtx
4628 || base == arg_pointer_rtx) && !disp)
4629 disp = const0_rtx;
4630
4631 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4632 Avoid this by transforming to [%esi+0]. */
4633 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4634 && base && !index && !disp
4635 && REG_P (base)
4636 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4637 disp = const0_rtx;
4638
4639 /* Special case: encode reg+reg instead of reg*2. */
4640 if (!base && index && scale && scale == 2)
4641 base = index, scale = 1;
4642
4643 /* Special case: scaling cannot be encoded without base or displacement. */
4644 if (!base && !disp && index && scale != 1)
4645 disp = const0_rtx;
4646
4647 out->base = base;
4648 out->index = index;
4649 out->disp = disp;
4650 out->scale = scale;
4651
4652 return retval;
4653 }
4654 \f
4655 /* Return cost of the memory address x.
4656 For i386, it is better to use a complex address than let gcc copy
4657 the address into a reg and make a new pseudo. But not if the address
4658 requires to two regs - that would mean more pseudos with longer
4659 lifetimes. */
4660 int
4661 ix86_address_cost (x)
4662 rtx x;
4663 {
4664 struct ix86_address parts;
4665 int cost = 1;
4666
4667 if (!ix86_decompose_address (x, &parts))
4668 abort ();
4669
4670 if (parts.base && GET_CODE (parts.base) == SUBREG)
4671 parts.base = SUBREG_REG (parts.base);
4672 if (parts.index && GET_CODE (parts.index) == SUBREG)
4673 parts.index = SUBREG_REG (parts.index);
4674
4675 /* More complex memory references are better. */
4676 if (parts.disp && parts.disp != const0_rtx)
4677 cost--;
4678
4679 /* Attempt to minimize number of registers in the address. */
4680 if ((parts.base
4681 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4682 || (parts.index
4683 && (!REG_P (parts.index)
4684 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4685 cost++;
4686
4687 if (parts.base
4688 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4689 && parts.index
4690 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4691 && parts.base != parts.index)
4692 cost++;
4693
4694 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4695 since it's predecode logic can't detect the length of instructions
4696 and it degenerates to vector decoded. Increase cost of such
4697 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4698 to split such addresses or even refuse such addresses at all.
4699
4700 Following addressing modes are affected:
4701 [base+scale*index]
4702 [scale*index+disp]
4703 [base+index]
4704
4705 The first and last case may be avoidable by explicitly coding the zero in
4706 memory address, but I don't have AMD-K6 machine handy to check this
4707 theory. */
4708
4709 if (TARGET_K6
4710 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4711 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4712 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4713 cost += 10;
4714
4715 return cost;
4716 }
4717 \f
4718 /* If X is a machine specific address (i.e. a symbol or label being
4719 referenced as a displacement from the GOT implemented using an
4720 UNSPEC), then return the base term. Otherwise return X. */
4721
4722 rtx
4723 ix86_find_base_term (x)
4724 rtx x;
4725 {
4726 rtx term;
4727
4728 if (TARGET_64BIT)
4729 {
4730 if (GET_CODE (x) != CONST)
4731 return x;
4732 term = XEXP (x, 0);
4733 if (GET_CODE (term) == PLUS
4734 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4735 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4736 term = XEXP (term, 0);
4737 if (GET_CODE (term) != UNSPEC
4738 || XINT (term, 1) != UNSPEC_GOTPCREL)
4739 return x;
4740
4741 term = XVECEXP (term, 0, 0);
4742
4743 if (GET_CODE (term) != SYMBOL_REF
4744 && GET_CODE (term) != LABEL_REF)
4745 return x;
4746
4747 return term;
4748 }
4749
4750 if (GET_CODE (x) != PLUS
4751 || XEXP (x, 0) != pic_offset_table_rtx
4752 || GET_CODE (XEXP (x, 1)) != CONST)
4753 return x;
4754
4755 term = XEXP (XEXP (x, 1), 0);
4756
4757 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4758 term = XEXP (term, 0);
4759
4760 if (GET_CODE (term) != UNSPEC
4761 || XINT (term, 1) != UNSPEC_GOTOFF)
4762 return x;
4763
4764 term = XVECEXP (term, 0, 0);
4765
4766 if (GET_CODE (term) != SYMBOL_REF
4767 && GET_CODE (term) != LABEL_REF)
4768 return x;
4769
4770 return term;
4771 }
4772 \f
4773 /* Determine if a given RTX is a valid constant. We already know this
4774 satisfies CONSTANT_P. */
4775
4776 bool
4777 legitimate_constant_p (x)
4778 rtx x;
4779 {
4780 rtx inner;
4781
4782 switch (GET_CODE (x))
4783 {
4784 case SYMBOL_REF:
4785 /* TLS symbols are not constant. */
4786 if (tls_symbolic_operand (x, Pmode))
4787 return false;
4788 break;
4789
4790 case CONST:
4791 inner = XEXP (x, 0);
4792
4793 /* Offsets of TLS symbols are never valid.
4794 Discourage CSE from creating them. */
4795 if (GET_CODE (inner) == PLUS
4796 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4797 return false;
4798
4799 /* Only some unspecs are valid as "constants". */
4800 if (GET_CODE (inner) == UNSPEC)
4801 switch (XINT (inner, 1))
4802 {
4803 case UNSPEC_TPOFF:
4804 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4805 case UNSPEC_TP:
4806 return true;
4807 default:
4808 return false;
4809 }
4810 break;
4811
4812 default:
4813 break;
4814 }
4815
4816 /* Otherwise we handle everything else in the move patterns. */
4817 return true;
4818 }
4819
4820 /* Determine if a given RTX is a valid constant address. */
4821
4822 bool
4823 constant_address_p (x)
4824 rtx x;
4825 {
4826 switch (GET_CODE (x))
4827 {
4828 case LABEL_REF:
4829 case CONST_INT:
4830 return true;
4831
4832 case CONST_DOUBLE:
4833 return TARGET_64BIT;
4834
4835 case CONST:
4836 case SYMBOL_REF:
4837 return !flag_pic && legitimate_constant_p (x);
4838
4839 default:
4840 return false;
4841 }
4842 }
4843
4844 /* Nonzero if the constant value X is a legitimate general operand
4845 when generating PIC code. It is given that flag_pic is on and
4846 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4847
4848 bool
4849 legitimate_pic_operand_p (x)
4850 rtx x;
4851 {
4852 rtx inner;
4853
4854 switch (GET_CODE (x))
4855 {
4856 case CONST:
4857 inner = XEXP (x, 0);
4858
4859 /* Only some unspecs are valid as "constants". */
4860 if (GET_CODE (inner) == UNSPEC)
4861 switch (XINT (inner, 1))
4862 {
4863 case UNSPEC_TPOFF:
4864 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4865 case UNSPEC_TP:
4866 return true;
4867 default:
4868 return false;
4869 }
4870 /* FALLTHRU */
4871
4872 case SYMBOL_REF:
4873 case LABEL_REF:
4874 return legitimate_pic_address_disp_p (x);
4875
4876 default:
4877 return true;
4878 }
4879 }
4880
4881 /* Determine if a given CONST RTX is a valid memory displacement
4882 in PIC mode. */
4883
4884 int
4885 legitimate_pic_address_disp_p (disp)
4886 register rtx disp;
4887 {
4888 bool saw_plus;
4889
4890 /* In 64bit mode we can allow direct addresses of symbols and labels
4891 when they are not dynamic symbols. */
4892 if (TARGET_64BIT)
4893 {
4894 rtx x = disp;
4895 if (GET_CODE (disp) == CONST)
4896 x = XEXP (disp, 0);
4897 /* ??? Handle PIC code models */
4898 if (GET_CODE (x) == PLUS
4899 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4900 && ix86_cmodel == CM_SMALL_PIC
4901 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4902 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4903 x = XEXP (x, 0);
4904 if (local_symbolic_operand (x, Pmode))
4905 return 1;
4906 }
4907 if (GET_CODE (disp) != CONST)
4908 return 0;
4909 disp = XEXP (disp, 0);
4910
4911 if (TARGET_64BIT)
4912 {
4913 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4914 of GOT tables. We should not need these anyway. */
4915 if (GET_CODE (disp) != UNSPEC
4916 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4917 return 0;
4918
4919 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4920 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4921 return 0;
4922 return 1;
4923 }
4924
4925 saw_plus = false;
4926 if (GET_CODE (disp) == PLUS)
4927 {
4928 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4929 return 0;
4930 disp = XEXP (disp, 0);
4931 saw_plus = true;
4932 }
4933
4934 if (GET_CODE (disp) != UNSPEC)
4935 return 0;
4936
4937 switch (XINT (disp, 1))
4938 {
4939 case UNSPEC_GOT:
4940 if (saw_plus)
4941 return false;
4942 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4943 case UNSPEC_GOTOFF:
4944 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4945 case UNSPEC_GOTTPOFF:
4946 if (saw_plus)
4947 return false;
4948 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4949 case UNSPEC_NTPOFF:
4950 /* ??? Could support offset here. */
4951 if (saw_plus)
4952 return false;
4953 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4954 case UNSPEC_DTPOFF:
4955 /* ??? Could support offset here. */
4956 if (saw_plus)
4957 return false;
4958 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4959 }
4960
4961 return 0;
4962 }
4963
4964 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4965 memory address for an instruction. The MODE argument is the machine mode
4966 for the MEM expression that wants to use this address.
4967
4968 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4969 convert common non-canonical forms to canonical form so that they will
4970 be recognized. */
4971
4972 int
4973 legitimate_address_p (mode, addr, strict)
4974 enum machine_mode mode;
4975 register rtx addr;
4976 int strict;
4977 {
4978 struct ix86_address parts;
4979 rtx base, index, disp;
4980 HOST_WIDE_INT scale;
4981 const char *reason = NULL;
4982 rtx reason_rtx = NULL_RTX;
4983
4984 if (TARGET_DEBUG_ADDR)
4985 {
4986 fprintf (stderr,
4987 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4988 GET_MODE_NAME (mode), strict);
4989 debug_rtx (addr);
4990 }
4991
4992 if (ix86_decompose_address (addr, &parts) <= 0)
4993 {
4994 reason = "decomposition failed";
4995 goto report_error;
4996 }
4997
4998 base = parts.base;
4999 index = parts.index;
5000 disp = parts.disp;
5001 scale = parts.scale;
5002
5003 /* Validate base register.
5004
5005 Don't allow SUBREG's here, it can lead to spill failures when the base
5006 is one word out of a two word structure, which is represented internally
5007 as a DImode int. */
5008
5009 if (base)
5010 {
5011 rtx reg;
5012 reason_rtx = base;
5013
5014 if (GET_CODE (base) == SUBREG)
5015 reg = SUBREG_REG (base);
5016 else
5017 reg = base;
5018
5019 if (GET_CODE (reg) != REG)
5020 {
5021 reason = "base is not a register";
5022 goto report_error;
5023 }
5024
5025 if (GET_MODE (base) != Pmode)
5026 {
5027 reason = "base is not in Pmode";
5028 goto report_error;
5029 }
5030
5031 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5032 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5033 {
5034 reason = "base is not valid";
5035 goto report_error;
5036 }
5037 }
5038
5039 /* Validate index register.
5040
5041 Don't allow SUBREG's here, it can lead to spill failures when the index
5042 is one word out of a two word structure, which is represented internally
5043 as a DImode int. */
5044
5045 if (index)
5046 {
5047 rtx reg;
5048 reason_rtx = index;
5049
5050 if (GET_CODE (index) == SUBREG)
5051 reg = SUBREG_REG (index);
5052 else
5053 reg = index;
5054
5055 if (GET_CODE (reg) != REG)
5056 {
5057 reason = "index is not a register";
5058 goto report_error;
5059 }
5060
5061 if (GET_MODE (index) != Pmode)
5062 {
5063 reason = "index is not in Pmode";
5064 goto report_error;
5065 }
5066
5067 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5068 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5069 {
5070 reason = "index is not valid";
5071 goto report_error;
5072 }
5073 }
5074
5075 /* Validate scale factor. */
5076 if (scale != 1)
5077 {
5078 reason_rtx = GEN_INT (scale);
5079 if (!index)
5080 {
5081 reason = "scale without index";
5082 goto report_error;
5083 }
5084
5085 if (scale != 2 && scale != 4 && scale != 8)
5086 {
5087 reason = "scale is not a valid multiplier";
5088 goto report_error;
5089 }
5090 }
5091
5092 /* Validate displacement. */
5093 if (disp)
5094 {
5095 reason_rtx = disp;
5096
5097 if (TARGET_64BIT)
5098 {
5099 if (!x86_64_sign_extended_value (disp))
5100 {
5101 reason = "displacement is out of range";
5102 goto report_error;
5103 }
5104 }
5105 else
5106 {
5107 if (GET_CODE (disp) == CONST_DOUBLE)
5108 {
5109 reason = "displacement is a const_double";
5110 goto report_error;
5111 }
5112 }
5113
5114 if (GET_CODE (disp) == CONST
5115 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5116 switch (XINT (XEXP (disp, 0), 1))
5117 {
5118 case UNSPEC_GOT:
5119 case UNSPEC_GOTOFF:
5120 case UNSPEC_GOTPCREL:
5121 if (!flag_pic)
5122 abort ();
5123 goto is_legitimate_pic;
5124
5125 case UNSPEC_GOTTPOFF:
5126 case UNSPEC_NTPOFF:
5127 case UNSPEC_DTPOFF:
5128 break;
5129
5130 default:
5131 reason = "invalid address unspec";
5132 goto report_error;
5133 }
5134
5135 else if (flag_pic && SYMBOLIC_CONST (disp))
5136 {
5137 is_legitimate_pic:
5138 if (TARGET_64BIT && (index || base))
5139 {
5140 reason = "non-constant pic memory reference";
5141 goto report_error;
5142 }
5143 if (! legitimate_pic_address_disp_p (disp))
5144 {
5145 reason = "displacement is an invalid pic construct";
5146 goto report_error;
5147 }
5148
5149 /* This code used to verify that a symbolic pic displacement
5150 includes the pic_offset_table_rtx register.
5151
5152 While this is good idea, unfortunately these constructs may
5153 be created by "adds using lea" optimization for incorrect
5154 code like:
5155
5156 int a;
5157 int foo(int i)
5158 {
5159 return *(&a+i);
5160 }
5161
5162 This code is nonsensical, but results in addressing
5163 GOT table with pic_offset_table_rtx base. We can't
5164 just refuse it easily, since it gets matched by
5165 "addsi3" pattern, that later gets split to lea in the
5166 case output register differs from input. While this
5167 can be handled by separate addsi pattern for this case
5168 that never results in lea, this seems to be easier and
5169 correct fix for crash to disable this test. */
5170 }
5171 else if (HALF_PIC_P ())
5172 {
5173 if (! HALF_PIC_ADDRESS_P (disp)
5174 || (base != NULL_RTX || index != NULL_RTX))
5175 {
5176 reason = "displacement is an invalid half-pic reference";
5177 goto report_error;
5178 }
5179 }
5180 else if (!CONSTANT_ADDRESS_P (disp))
5181 {
5182 reason = "displacement is not constant";
5183 goto report_error;
5184 }
5185 }
5186
5187 /* Everything looks valid. */
5188 if (TARGET_DEBUG_ADDR)
5189 fprintf (stderr, "Success.\n");
5190 return TRUE;
5191
5192 report_error:
5193 if (TARGET_DEBUG_ADDR)
5194 {
5195 fprintf (stderr, "Error: %s\n", reason);
5196 debug_rtx (reason_rtx);
5197 }
5198 return FALSE;
5199 }
5200 \f
5201 /* Return an unique alias set for the GOT. */
5202
5203 static HOST_WIDE_INT
5204 ix86_GOT_alias_set ()
5205 {
5206 static HOST_WIDE_INT set = -1;
5207 if (set == -1)
5208 set = new_alias_set ();
5209 return set;
5210 }
5211
5212 /* Return a legitimate reference for ORIG (an address) using the
5213 register REG. If REG is 0, a new pseudo is generated.
5214
5215 There are two types of references that must be handled:
5216
5217 1. Global data references must load the address from the GOT, via
5218 the PIC reg. An insn is emitted to do this load, and the reg is
5219 returned.
5220
5221 2. Static data references, constant pool addresses, and code labels
5222 compute the address as an offset from the GOT, whose base is in
5223 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5224 differentiate them from global data objects. The returned
5225 address is the PIC reg + an unspec constant.
5226
5227 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5228 reg also appears in the address. */
5229
5230 rtx
5231 legitimize_pic_address (orig, reg)
5232 rtx orig;
5233 rtx reg;
5234 {
5235 rtx addr = orig;
5236 rtx new = orig;
5237 rtx base;
5238
5239 if (local_symbolic_operand (addr, Pmode))
5240 {
5241 /* In 64bit mode we can address such objects directly. */
5242 if (TARGET_64BIT)
5243 new = addr;
5244 else
5245 {
5246 /* This symbol may be referenced via a displacement from the PIC
5247 base address (@GOTOFF). */
5248
5249 if (reload_in_progress)
5250 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5251 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5252 new = gen_rtx_CONST (Pmode, new);
5253 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5254
5255 if (reg != 0)
5256 {
5257 emit_move_insn (reg, new);
5258 new = reg;
5259 }
5260 }
5261 }
5262 else if (GET_CODE (addr) == SYMBOL_REF)
5263 {
5264 if (TARGET_64BIT)
5265 {
5266 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5267 new = gen_rtx_CONST (Pmode, new);
5268 new = gen_rtx_MEM (Pmode, new);
5269 RTX_UNCHANGING_P (new) = 1;
5270 set_mem_alias_set (new, ix86_GOT_alias_set ());
5271
5272 if (reg == 0)
5273 reg = gen_reg_rtx (Pmode);
5274 /* Use directly gen_movsi, otherwise the address is loaded
5275 into register for CSE. We don't want to CSE this addresses,
5276 instead we CSE addresses from the GOT table, so skip this. */
5277 emit_insn (gen_movsi (reg, new));
5278 new = reg;
5279 }
5280 else
5281 {
5282 /* This symbol must be referenced via a load from the
5283 Global Offset Table (@GOT). */
5284
5285 if (reload_in_progress)
5286 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5287 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5288 new = gen_rtx_CONST (Pmode, new);
5289 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5290 new = gen_rtx_MEM (Pmode, new);
5291 RTX_UNCHANGING_P (new) = 1;
5292 set_mem_alias_set (new, ix86_GOT_alias_set ());
5293
5294 if (reg == 0)
5295 reg = gen_reg_rtx (Pmode);
5296 emit_move_insn (reg, new);
5297 new = reg;
5298 }
5299 }
5300 else
5301 {
5302 if (GET_CODE (addr) == CONST)
5303 {
5304 addr = XEXP (addr, 0);
5305
5306 /* We must match stuff we generate before. Assume the only
5307 unspecs that can get here are ours. Not that we could do
5308 anything with them anyway... */
5309 if (GET_CODE (addr) == UNSPEC
5310 || (GET_CODE (addr) == PLUS
5311 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5312 return orig;
5313 if (GET_CODE (addr) != PLUS)
5314 abort ();
5315 }
5316 if (GET_CODE (addr) == PLUS)
5317 {
5318 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5319
5320 /* Check first to see if this is a constant offset from a @GOTOFF
5321 symbol reference. */
5322 if (local_symbolic_operand (op0, Pmode)
5323 && GET_CODE (op1) == CONST_INT)
5324 {
5325 if (!TARGET_64BIT)
5326 {
5327 if (reload_in_progress)
5328 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5329 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5330 UNSPEC_GOTOFF);
5331 new = gen_rtx_PLUS (Pmode, new, op1);
5332 new = gen_rtx_CONST (Pmode, new);
5333 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5334
5335 if (reg != 0)
5336 {
5337 emit_move_insn (reg, new);
5338 new = reg;
5339 }
5340 }
5341 else
5342 {
5343 /* ??? We need to limit offsets here. */
5344 }
5345 }
5346 else
5347 {
5348 base = legitimize_pic_address (XEXP (addr, 0), reg);
5349 new = legitimize_pic_address (XEXP (addr, 1),
5350 base == reg ? NULL_RTX : reg);
5351
5352 if (GET_CODE (new) == CONST_INT)
5353 new = plus_constant (base, INTVAL (new));
5354 else
5355 {
5356 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5357 {
5358 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5359 new = XEXP (new, 1);
5360 }
5361 new = gen_rtx_PLUS (Pmode, base, new);
5362 }
5363 }
5364 }
5365 }
5366 return new;
5367 }
5368
5369 static void
5370 ix86_encode_section_info (decl, first)
5371 tree decl;
5372 int first ATTRIBUTE_UNUSED;
5373 {
5374 bool local_p = (*targetm.binds_local_p) (decl);
5375 rtx rtl, symbol;
5376
5377 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5378 if (GET_CODE (rtl) != MEM)
5379 return;
5380 symbol = XEXP (rtl, 0);
5381 if (GET_CODE (symbol) != SYMBOL_REF)
5382 return;
5383
5384 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5385 symbol so that we may access it directly in the GOT. */
5386
5387 if (flag_pic)
5388 SYMBOL_REF_FLAG (symbol) = local_p;
5389
5390 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5391 "local dynamic", "initial exec" or "local exec" TLS models
5392 respectively. */
5393
5394 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5395 {
5396 const char *symbol_str;
5397 char *newstr;
5398 size_t len;
5399 enum tls_model kind;
5400
5401 if (!flag_pic)
5402 {
5403 if (local_p)
5404 kind = TLS_MODEL_LOCAL_EXEC;
5405 else
5406 kind = TLS_MODEL_INITIAL_EXEC;
5407 }
5408 /* Local dynamic is inefficient when we're not combining the
5409 parts of the address. */
5410 else if (optimize && local_p)
5411 kind = TLS_MODEL_LOCAL_DYNAMIC;
5412 else
5413 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5414 if (kind < flag_tls_default)
5415 kind = flag_tls_default;
5416
5417 symbol_str = XSTR (symbol, 0);
5418
5419 if (symbol_str[0] == '%')
5420 {
5421 if (symbol_str[1] == tls_model_chars[kind])
5422 return;
5423 symbol_str += 2;
5424 }
5425 len = strlen (symbol_str) + 1;
5426 newstr = alloca (len + 2);
5427
5428 newstr[0] = '%';
5429 newstr[1] = tls_model_chars[kind];
5430 memcpy (newstr + 2, symbol_str, len);
5431
5432 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5433 }
5434 }
5435
5436 /* Undo the above when printing symbol names. */
5437
5438 static const char *
5439 ix86_strip_name_encoding (str)
5440 const char *str;
5441 {
5442 if (str[0] == '%')
5443 str += 2;
5444 if (str [0] == '*')
5445 str += 1;
5446 return str;
5447 }
5448 \f
5449 /* Load the thread pointer into a register. */
5450
5451 static rtx
5452 get_thread_pointer ()
5453 {
5454 rtx tp;
5455
5456 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5457 tp = gen_rtx_CONST (Pmode, tp);
5458 tp = force_reg (Pmode, tp);
5459
5460 return tp;
5461 }
5462
5463 /* Try machine-dependent ways of modifying an illegitimate address
5464 to be legitimate. If we find one, return the new, valid address.
5465 This macro is used in only one place: `memory_address' in explow.c.
5466
5467 OLDX is the address as it was before break_out_memory_refs was called.
5468 In some cases it is useful to look at this to decide what needs to be done.
5469
5470 MODE and WIN are passed so that this macro can use
5471 GO_IF_LEGITIMATE_ADDRESS.
5472
5473 It is always safe for this macro to do nothing. It exists to recognize
5474 opportunities to optimize the output.
5475
5476 For the 80386, we handle X+REG by loading X into a register R and
5477 using R+REG. R will go in a general reg and indexing will be used.
5478 However, if REG is a broken-out memory address or multiplication,
5479 nothing needs to be done because REG can certainly go in a general reg.
5480
5481 When -fpic is used, special handling is needed for symbolic references.
5482 See comments by legitimize_pic_address in i386.c for details. */
5483
5484 rtx
5485 legitimize_address (x, oldx, mode)
5486 register rtx x;
5487 register rtx oldx ATTRIBUTE_UNUSED;
5488 enum machine_mode mode;
5489 {
5490 int changed = 0;
5491 unsigned log;
5492
5493 if (TARGET_DEBUG_ADDR)
5494 {
5495 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5496 GET_MODE_NAME (mode));
5497 debug_rtx (x);
5498 }
5499
5500 log = tls_symbolic_operand (x, mode);
5501 if (log)
5502 {
5503 rtx dest, base, off, pic;
5504
5505 switch (log)
5506 {
5507 case TLS_MODEL_GLOBAL_DYNAMIC:
5508 dest = gen_reg_rtx (Pmode);
5509 emit_insn (gen_tls_global_dynamic (dest, x));
5510 break;
5511
5512 case TLS_MODEL_LOCAL_DYNAMIC:
5513 base = gen_reg_rtx (Pmode);
5514 emit_insn (gen_tls_local_dynamic_base (base));
5515
5516 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5517 off = gen_rtx_CONST (Pmode, off);
5518
5519 return gen_rtx_PLUS (Pmode, base, off);
5520
5521 case TLS_MODEL_INITIAL_EXEC:
5522 if (flag_pic)
5523 {
5524 if (reload_in_progress)
5525 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5526 pic = pic_offset_table_rtx;
5527 }
5528 else
5529 {
5530 pic = gen_reg_rtx (Pmode);
5531 emit_insn (gen_set_got (pic));
5532 }
5533
5534 base = get_thread_pointer ();
5535
5536 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5537 off = gen_rtx_CONST (Pmode, off);
5538 off = gen_rtx_PLUS (Pmode, pic, off);
5539 off = gen_rtx_MEM (Pmode, off);
5540 RTX_UNCHANGING_P (off) = 1;
5541 set_mem_alias_set (off, ix86_GOT_alias_set ());
5542
5543 /* Damn Sun for specifing a set of dynamic relocations without
5544 considering the two-operand nature of the architecture!
5545 We'd be much better off with a "GOTNTPOFF" relocation that
5546 already contained the negated constant. */
5547 /* ??? Using negl and reg+reg addressing appears to be a lose
5548 size-wise. The negl is two bytes, just like the extra movl
5549 incurred by the two-operand subl, but reg+reg addressing
5550 uses the two-byte modrm form, unlike plain reg. */
5551
5552 dest = gen_reg_rtx (Pmode);
5553 emit_insn (gen_subsi3 (dest, base, off));
5554 break;
5555
5556 case TLS_MODEL_LOCAL_EXEC:
5557 base = get_thread_pointer ();
5558
5559 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5560 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5561 off = gen_rtx_CONST (Pmode, off);
5562
5563 if (TARGET_GNU_TLS)
5564 return gen_rtx_PLUS (Pmode, base, off);
5565 else
5566 {
5567 dest = gen_reg_rtx (Pmode);
5568 emit_insn (gen_subsi3 (dest, base, off));
5569 }
5570 break;
5571
5572 default:
5573 abort ();
5574 }
5575
5576 return dest;
5577 }
5578
5579 if (flag_pic && SYMBOLIC_CONST (x))
5580 return legitimize_pic_address (x, 0);
5581
5582 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5583 if (GET_CODE (x) == ASHIFT
5584 && GET_CODE (XEXP (x, 1)) == CONST_INT
5585 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5586 {
5587 changed = 1;
5588 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5589 GEN_INT (1 << log));
5590 }
5591
5592 if (GET_CODE (x) == PLUS)
5593 {
5594 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5595
5596 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5597 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5598 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5599 {
5600 changed = 1;
5601 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5602 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5603 GEN_INT (1 << log));
5604 }
5605
5606 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5607 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5608 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5609 {
5610 changed = 1;
5611 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5612 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5613 GEN_INT (1 << log));
5614 }
5615
5616 /* Put multiply first if it isn't already. */
5617 if (GET_CODE (XEXP (x, 1)) == MULT)
5618 {
5619 rtx tmp = XEXP (x, 0);
5620 XEXP (x, 0) = XEXP (x, 1);
5621 XEXP (x, 1) = tmp;
5622 changed = 1;
5623 }
5624
5625 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5626 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5627 created by virtual register instantiation, register elimination, and
5628 similar optimizations. */
5629 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5630 {
5631 changed = 1;
5632 x = gen_rtx_PLUS (Pmode,
5633 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5634 XEXP (XEXP (x, 1), 0)),
5635 XEXP (XEXP (x, 1), 1));
5636 }
5637
5638 /* Canonicalize
5639 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5640 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5641 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5642 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5643 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5644 && CONSTANT_P (XEXP (x, 1)))
5645 {
5646 rtx constant;
5647 rtx other = NULL_RTX;
5648
5649 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5650 {
5651 constant = XEXP (x, 1);
5652 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5653 }
5654 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5655 {
5656 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5657 other = XEXP (x, 1);
5658 }
5659 else
5660 constant = 0;
5661
5662 if (constant)
5663 {
5664 changed = 1;
5665 x = gen_rtx_PLUS (Pmode,
5666 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5667 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5668 plus_constant (other, INTVAL (constant)));
5669 }
5670 }
5671
5672 if (changed && legitimate_address_p (mode, x, FALSE))
5673 return x;
5674
5675 if (GET_CODE (XEXP (x, 0)) == MULT)
5676 {
5677 changed = 1;
5678 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5679 }
5680
5681 if (GET_CODE (XEXP (x, 1)) == MULT)
5682 {
5683 changed = 1;
5684 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5685 }
5686
5687 if (changed
5688 && GET_CODE (XEXP (x, 1)) == REG
5689 && GET_CODE (XEXP (x, 0)) == REG)
5690 return x;
5691
5692 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5693 {
5694 changed = 1;
5695 x = legitimize_pic_address (x, 0);
5696 }
5697
5698 if (changed && legitimate_address_p (mode, x, FALSE))
5699 return x;
5700
5701 if (GET_CODE (XEXP (x, 0)) == REG)
5702 {
5703 register rtx temp = gen_reg_rtx (Pmode);
5704 register rtx val = force_operand (XEXP (x, 1), temp);
5705 if (val != temp)
5706 emit_move_insn (temp, val);
5707
5708 XEXP (x, 1) = temp;
5709 return x;
5710 }
5711
5712 else if (GET_CODE (XEXP (x, 1)) == REG)
5713 {
5714 register rtx temp = gen_reg_rtx (Pmode);
5715 register rtx val = force_operand (XEXP (x, 0), temp);
5716 if (val != temp)
5717 emit_move_insn (temp, val);
5718
5719 XEXP (x, 0) = temp;
5720 return x;
5721 }
5722 }
5723
5724 return x;
5725 }
5726 \f
5727 /* Print an integer constant expression in assembler syntax. Addition
5728 and subtraction are the only arithmetic that may appear in these
5729 expressions. FILE is the stdio stream to write to, X is the rtx, and
5730 CODE is the operand print code from the output string. */
5731
5732 static void
5733 output_pic_addr_const (file, x, code)
5734 FILE *file;
5735 rtx x;
5736 int code;
5737 {
5738 char buf[256];
5739
5740 switch (GET_CODE (x))
5741 {
5742 case PC:
5743 if (flag_pic)
5744 putc ('.', file);
5745 else
5746 abort ();
5747 break;
5748
5749 case SYMBOL_REF:
5750 assemble_name (file, XSTR (x, 0));
5751 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5752 fputs ("@PLT", file);
5753 break;
5754
5755 case LABEL_REF:
5756 x = XEXP (x, 0);
5757 /* FALLTHRU */
5758 case CODE_LABEL:
5759 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5760 assemble_name (asm_out_file, buf);
5761 break;
5762
5763 case CONST_INT:
5764 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5765 break;
5766
5767 case CONST:
5768 /* This used to output parentheses around the expression,
5769 but that does not work on the 386 (either ATT or BSD assembler). */
5770 output_pic_addr_const (file, XEXP (x, 0), code);
5771 break;
5772
5773 case CONST_DOUBLE:
5774 if (GET_MODE (x) == VOIDmode)
5775 {
5776 /* We can use %d if the number is <32 bits and positive. */
5777 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5778 fprintf (file, "0x%lx%08lx",
5779 (unsigned long) CONST_DOUBLE_HIGH (x),
5780 (unsigned long) CONST_DOUBLE_LOW (x));
5781 else
5782 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5783 }
5784 else
5785 /* We can't handle floating point constants;
5786 PRINT_OPERAND must handle them. */
5787 output_operand_lossage ("floating constant misused");
5788 break;
5789
5790 case PLUS:
5791 /* Some assemblers need integer constants to appear first. */
5792 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5793 {
5794 output_pic_addr_const (file, XEXP (x, 0), code);
5795 putc ('+', file);
5796 output_pic_addr_const (file, XEXP (x, 1), code);
5797 }
5798 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5799 {
5800 output_pic_addr_const (file, XEXP (x, 1), code);
5801 putc ('+', file);
5802 output_pic_addr_const (file, XEXP (x, 0), code);
5803 }
5804 else
5805 abort ();
5806 break;
5807
5808 case MINUS:
5809 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5810 output_pic_addr_const (file, XEXP (x, 0), code);
5811 putc ('-', file);
5812 output_pic_addr_const (file, XEXP (x, 1), code);
5813 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5814 break;
5815
5816 case UNSPEC:
5817 if (XVECLEN (x, 0) != 1)
5818 abort ();
5819 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5820 switch (XINT (x, 1))
5821 {
5822 case UNSPEC_GOT:
5823 fputs ("@GOT", file);
5824 break;
5825 case UNSPEC_GOTOFF:
5826 fputs ("@GOTOFF", file);
5827 break;
5828 case UNSPEC_GOTPCREL:
5829 fputs ("@GOTPCREL(%RIP)", file);
5830 break;
5831 case UNSPEC_GOTTPOFF:
5832 fputs ("@GOTTPOFF", file);
5833 break;
5834 case UNSPEC_TPOFF:
5835 fputs ("@TPOFF", file);
5836 break;
5837 case UNSPEC_NTPOFF:
5838 fputs ("@NTPOFF", file);
5839 break;
5840 case UNSPEC_DTPOFF:
5841 fputs ("@DTPOFF", file);
5842 break;
5843 default:
5844 output_operand_lossage ("invalid UNSPEC as operand");
5845 break;
5846 }
5847 break;
5848
5849 default:
5850 output_operand_lossage ("invalid expression as operand");
5851 }
5852 }
5853
5854 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5855 We need to handle our special PIC relocations. */
5856
5857 void
5858 i386_dwarf_output_addr_const (file, x)
5859 FILE *file;
5860 rtx x;
5861 {
5862 #ifdef ASM_QUAD
5863 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5864 #else
5865 if (TARGET_64BIT)
5866 abort ();
5867 fprintf (file, "%s", ASM_LONG);
5868 #endif
5869 if (flag_pic)
5870 output_pic_addr_const (file, x, '\0');
5871 else
5872 output_addr_const (file, x);
5873 fputc ('\n', file);
5874 }
5875
5876 /* In the name of slightly smaller debug output, and to cater to
5877 general assembler losage, recognize PIC+GOTOFF and turn it back
5878 into a direct symbol reference. */
5879
5880 rtx
5881 i386_simplify_dwarf_addr (orig_x)
5882 rtx orig_x;
5883 {
5884 rtx x = orig_x, y;
5885
5886 if (GET_CODE (x) == MEM)
5887 x = XEXP (x, 0);
5888
5889 if (TARGET_64BIT)
5890 {
5891 if (GET_CODE (x) != CONST
5892 || GET_CODE (XEXP (x, 0)) != UNSPEC
5893 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5894 || GET_CODE (orig_x) != MEM)
5895 return orig_x;
5896 return XVECEXP (XEXP (x, 0), 0, 0);
5897 }
5898
5899 if (GET_CODE (x) != PLUS
5900 || GET_CODE (XEXP (x, 1)) != CONST)
5901 return orig_x;
5902
5903 if (GET_CODE (XEXP (x, 0)) == REG
5904 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5905 /* %ebx + GOT/GOTOFF */
5906 y = NULL;
5907 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5908 {
5909 /* %ebx + %reg * scale + GOT/GOTOFF */
5910 y = XEXP (x, 0);
5911 if (GET_CODE (XEXP (y, 0)) == REG
5912 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5913 y = XEXP (y, 1);
5914 else if (GET_CODE (XEXP (y, 1)) == REG
5915 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5916 y = XEXP (y, 0);
5917 else
5918 return orig_x;
5919 if (GET_CODE (y) != REG
5920 && GET_CODE (y) != MULT
5921 && GET_CODE (y) != ASHIFT)
5922 return orig_x;
5923 }
5924 else
5925 return orig_x;
5926
5927 x = XEXP (XEXP (x, 1), 0);
5928 if (GET_CODE (x) == UNSPEC
5929 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5930 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5931 {
5932 if (y)
5933 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5934 return XVECEXP (x, 0, 0);
5935 }
5936
5937 if (GET_CODE (x) == PLUS
5938 && GET_CODE (XEXP (x, 0)) == UNSPEC
5939 && GET_CODE (XEXP (x, 1)) == CONST_INT
5940 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5941 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5942 && GET_CODE (orig_x) != MEM)))
5943 {
5944 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5945 if (y)
5946 return gen_rtx_PLUS (Pmode, y, x);
5947 return x;
5948 }
5949
5950 return orig_x;
5951 }
5952 \f
5953 static void
5954 put_condition_code (code, mode, reverse, fp, file)
5955 enum rtx_code code;
5956 enum machine_mode mode;
5957 int reverse, fp;
5958 FILE *file;
5959 {
5960 const char *suffix;
5961
5962 if (mode == CCFPmode || mode == CCFPUmode)
5963 {
5964 enum rtx_code second_code, bypass_code;
5965 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5966 if (bypass_code != NIL || second_code != NIL)
5967 abort ();
5968 code = ix86_fp_compare_code_to_integer (code);
5969 mode = CCmode;
5970 }
5971 if (reverse)
5972 code = reverse_condition (code);
5973
5974 switch (code)
5975 {
5976 case EQ:
5977 suffix = "e";
5978 break;
5979 case NE:
5980 suffix = "ne";
5981 break;
5982 case GT:
5983 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5984 abort ();
5985 suffix = "g";
5986 break;
5987 case GTU:
5988 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5989 Those same assemblers have the same but opposite losage on cmov. */
5990 if (mode != CCmode)
5991 abort ();
5992 suffix = fp ? "nbe" : "a";
5993 break;
5994 case LT:
5995 if (mode == CCNOmode || mode == CCGOCmode)
5996 suffix = "s";
5997 else if (mode == CCmode || mode == CCGCmode)
5998 suffix = "l";
5999 else
6000 abort ();
6001 break;
6002 case LTU:
6003 if (mode != CCmode)
6004 abort ();
6005 suffix = "b";
6006 break;
6007 case GE:
6008 if (mode == CCNOmode || mode == CCGOCmode)
6009 suffix = "ns";
6010 else if (mode == CCmode || mode == CCGCmode)
6011 suffix = "ge";
6012 else
6013 abort ();
6014 break;
6015 case GEU:
6016 /* ??? As above. */
6017 if (mode != CCmode)
6018 abort ();
6019 suffix = fp ? "nb" : "ae";
6020 break;
6021 case LE:
6022 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6023 abort ();
6024 suffix = "le";
6025 break;
6026 case LEU:
6027 if (mode != CCmode)
6028 abort ();
6029 suffix = "be";
6030 break;
6031 case UNORDERED:
6032 suffix = fp ? "u" : "p";
6033 break;
6034 case ORDERED:
6035 suffix = fp ? "nu" : "np";
6036 break;
6037 default:
6038 abort ();
6039 }
6040 fputs (suffix, file);
6041 }
6042
6043 void
6044 print_reg (x, code, file)
6045 rtx x;
6046 int code;
6047 FILE *file;
6048 {
6049 if (REGNO (x) == ARG_POINTER_REGNUM
6050 || REGNO (x) == FRAME_POINTER_REGNUM
6051 || REGNO (x) == FLAGS_REG
6052 || REGNO (x) == FPSR_REG)
6053 abort ();
6054
6055 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6056 putc ('%', file);
6057
6058 if (code == 'w' || MMX_REG_P (x))
6059 code = 2;
6060 else if (code == 'b')
6061 code = 1;
6062 else if (code == 'k')
6063 code = 4;
6064 else if (code == 'q')
6065 code = 8;
6066 else if (code == 'y')
6067 code = 3;
6068 else if (code == 'h')
6069 code = 0;
6070 else
6071 code = GET_MODE_SIZE (GET_MODE (x));
6072
6073 /* Irritatingly, AMD extended registers use different naming convention
6074 from the normal registers. */
6075 if (REX_INT_REG_P (x))
6076 {
6077 if (!TARGET_64BIT)
6078 abort ();
6079 switch (code)
6080 {
6081 case 0:
6082 error ("extended registers have no high halves");
6083 break;
6084 case 1:
6085 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6086 break;
6087 case 2:
6088 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6089 break;
6090 case 4:
6091 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6092 break;
6093 case 8:
6094 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6095 break;
6096 default:
6097 error ("unsupported operand size for extended register");
6098 break;
6099 }
6100 return;
6101 }
6102 switch (code)
6103 {
6104 case 3:
6105 if (STACK_TOP_P (x))
6106 {
6107 fputs ("st(0)", file);
6108 break;
6109 }
6110 /* FALLTHRU */
6111 case 8:
6112 case 4:
6113 case 12:
6114 if (! ANY_FP_REG_P (x))
6115 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6116 /* FALLTHRU */
6117 case 16:
6118 case 2:
6119 fputs (hi_reg_name[REGNO (x)], file);
6120 break;
6121 case 1:
6122 fputs (qi_reg_name[REGNO (x)], file);
6123 break;
6124 case 0:
6125 fputs (qi_high_reg_name[REGNO (x)], file);
6126 break;
6127 default:
6128 abort ();
6129 }
6130 }
6131
6132 /* Locate some local-dynamic symbol still in use by this function
6133 so that we can print its name in some tls_local_dynamic_base
6134 pattern. */
6135
6136 static const char *
6137 get_some_local_dynamic_name ()
6138 {
6139 rtx insn;
6140
6141 if (cfun->machine->some_ld_name)
6142 return cfun->machine->some_ld_name;
6143
6144 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6145 if (INSN_P (insn)
6146 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6147 return cfun->machine->some_ld_name;
6148
6149 abort ();
6150 }
6151
6152 static int
6153 get_some_local_dynamic_name_1 (px, data)
6154 rtx *px;
6155 void *data ATTRIBUTE_UNUSED;
6156 {
6157 rtx x = *px;
6158
6159 if (GET_CODE (x) == SYMBOL_REF
6160 && local_dynamic_symbolic_operand (x, Pmode))
6161 {
6162 cfun->machine->some_ld_name = XSTR (x, 0);
6163 return 1;
6164 }
6165
6166 return 0;
6167 }
6168
6169 /* Meaning of CODE:
6170 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6171 C -- print opcode suffix for set/cmov insn.
6172 c -- like C, but print reversed condition
6173 F,f -- likewise, but for floating-point.
6174 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6175 nothing
6176 R -- print the prefix for register names.
6177 z -- print the opcode suffix for the size of the current operand.
6178 * -- print a star (in certain assembler syntax)
6179 A -- print an absolute memory reference.
6180 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6181 s -- print a shift double count, followed by the assemblers argument
6182 delimiter.
6183 b -- print the QImode name of the register for the indicated operand.
6184 %b0 would print %al if operands[0] is reg 0.
6185 w -- likewise, print the HImode name of the register.
6186 k -- likewise, print the SImode name of the register.
6187 q -- likewise, print the DImode name of the register.
6188 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6189 y -- print "st(0)" instead of "st" as a register.
6190 D -- print condition for SSE cmp instruction.
6191 P -- if PIC, print an @PLT suffix.
6192 X -- don't print any sort of PIC '@' suffix for a symbol.
6193 & -- print some in-use local-dynamic symbol name.
6194 */
6195
6196 void
6197 print_operand (file, x, code)
6198 FILE *file;
6199 rtx x;
6200 int code;
6201 {
6202 if (code)
6203 {
6204 switch (code)
6205 {
6206 case '*':
6207 if (ASSEMBLER_DIALECT == ASM_ATT)
6208 putc ('*', file);
6209 return;
6210
6211 case '&':
6212 assemble_name (file, get_some_local_dynamic_name ());
6213 return;
6214
6215 case 'A':
6216 if (ASSEMBLER_DIALECT == ASM_ATT)
6217 putc ('*', file);
6218 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6219 {
6220 /* Intel syntax. For absolute addresses, registers should not
6221 be surrounded by braces. */
6222 if (GET_CODE (x) != REG)
6223 {
6224 putc ('[', file);
6225 PRINT_OPERAND (file, x, 0);
6226 putc (']', file);
6227 return;
6228 }
6229 }
6230 else
6231 abort ();
6232
6233 PRINT_OPERAND (file, x, 0);
6234 return;
6235
6236
6237 case 'L':
6238 if (ASSEMBLER_DIALECT == ASM_ATT)
6239 putc ('l', file);
6240 return;
6241
6242 case 'W':
6243 if (ASSEMBLER_DIALECT == ASM_ATT)
6244 putc ('w', file);
6245 return;
6246
6247 case 'B':
6248 if (ASSEMBLER_DIALECT == ASM_ATT)
6249 putc ('b', file);
6250 return;
6251
6252 case 'Q':
6253 if (ASSEMBLER_DIALECT == ASM_ATT)
6254 putc ('l', file);
6255 return;
6256
6257 case 'S':
6258 if (ASSEMBLER_DIALECT == ASM_ATT)
6259 putc ('s', file);
6260 return;
6261
6262 case 'T':
6263 if (ASSEMBLER_DIALECT == ASM_ATT)
6264 putc ('t', file);
6265 return;
6266
6267 case 'z':
6268 /* 387 opcodes don't get size suffixes if the operands are
6269 registers. */
6270 if (STACK_REG_P (x))
6271 return;
6272
6273 /* Likewise if using Intel opcodes. */
6274 if (ASSEMBLER_DIALECT == ASM_INTEL)
6275 return;
6276
6277 /* This is the size of op from size of operand. */
6278 switch (GET_MODE_SIZE (GET_MODE (x)))
6279 {
6280 case 2:
6281 #ifdef HAVE_GAS_FILDS_FISTS
6282 putc ('s', file);
6283 #endif
6284 return;
6285
6286 case 4:
6287 if (GET_MODE (x) == SFmode)
6288 {
6289 putc ('s', file);
6290 return;
6291 }
6292 else
6293 putc ('l', file);
6294 return;
6295
6296 case 12:
6297 case 16:
6298 putc ('t', file);
6299 return;
6300
6301 case 8:
6302 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6303 {
6304 #ifdef GAS_MNEMONICS
6305 putc ('q', file);
6306 #else
6307 putc ('l', file);
6308 putc ('l', file);
6309 #endif
6310 }
6311 else
6312 putc ('l', file);
6313 return;
6314
6315 default:
6316 abort ();
6317 }
6318
6319 case 'b':
6320 case 'w':
6321 case 'k':
6322 case 'q':
6323 case 'h':
6324 case 'y':
6325 case 'X':
6326 case 'P':
6327 break;
6328
6329 case 's':
6330 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6331 {
6332 PRINT_OPERAND (file, x, 0);
6333 putc (',', file);
6334 }
6335 return;
6336
6337 case 'D':
6338 /* Little bit of braindamage here. The SSE compare instructions
6339 does use completely different names for the comparisons that the
6340 fp conditional moves. */
6341 switch (GET_CODE (x))
6342 {
6343 case EQ:
6344 case UNEQ:
6345 fputs ("eq", file);
6346 break;
6347 case LT:
6348 case UNLT:
6349 fputs ("lt", file);
6350 break;
6351 case LE:
6352 case UNLE:
6353 fputs ("le", file);
6354 break;
6355 case UNORDERED:
6356 fputs ("unord", file);
6357 break;
6358 case NE:
6359 case LTGT:
6360 fputs ("neq", file);
6361 break;
6362 case UNGE:
6363 case GE:
6364 fputs ("nlt", file);
6365 break;
6366 case UNGT:
6367 case GT:
6368 fputs ("nle", file);
6369 break;
6370 case ORDERED:
6371 fputs ("ord", file);
6372 break;
6373 default:
6374 abort ();
6375 break;
6376 }
6377 return;
6378 case 'O':
6379 #ifdef CMOV_SUN_AS_SYNTAX
6380 if (ASSEMBLER_DIALECT == ASM_ATT)
6381 {
6382 switch (GET_MODE (x))
6383 {
6384 case HImode: putc ('w', file); break;
6385 case SImode:
6386 case SFmode: putc ('l', file); break;
6387 case DImode:
6388 case DFmode: putc ('q', file); break;
6389 default: abort ();
6390 }
6391 putc ('.', file);
6392 }
6393 #endif
6394 return;
6395 case 'C':
6396 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6397 return;
6398 case 'F':
6399 #ifdef CMOV_SUN_AS_SYNTAX
6400 if (ASSEMBLER_DIALECT == ASM_ATT)
6401 putc ('.', file);
6402 #endif
6403 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6404 return;
6405
6406 /* Like above, but reverse condition */
6407 case 'c':
6408 /* Check to see if argument to %c is really a constant
6409 and not a condition code which needs to be reversed. */
6410 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6411 {
6412 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6413 return;
6414 }
6415 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6416 return;
6417 case 'f':
6418 #ifdef CMOV_SUN_AS_SYNTAX
6419 if (ASSEMBLER_DIALECT == ASM_ATT)
6420 putc ('.', file);
6421 #endif
6422 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6423 return;
6424 case '+':
6425 {
6426 rtx x;
6427
6428 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6429 return;
6430
6431 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6432 if (x)
6433 {
6434 int pred_val = INTVAL (XEXP (x, 0));
6435
6436 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6437 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6438 {
6439 int taken = pred_val > REG_BR_PROB_BASE / 2;
6440 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6441
6442 /* Emit hints only in the case default branch prediction
6443 heruistics would fail. */
6444 if (taken != cputaken)
6445 {
6446 /* We use 3e (DS) prefix for taken branches and
6447 2e (CS) prefix for not taken branches. */
6448 if (taken)
6449 fputs ("ds ; ", file);
6450 else
6451 fputs ("cs ; ", file);
6452 }
6453 }
6454 }
6455 return;
6456 }
6457 default:
6458 output_operand_lossage ("invalid operand code `%c'", code);
6459 }
6460 }
6461
6462 if (GET_CODE (x) == REG)
6463 {
6464 PRINT_REG (x, code, file);
6465 }
6466
6467 else if (GET_CODE (x) == MEM)
6468 {
6469 /* No `byte ptr' prefix for call instructions. */
6470 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6471 {
6472 const char * size;
6473 switch (GET_MODE_SIZE (GET_MODE (x)))
6474 {
6475 case 1: size = "BYTE"; break;
6476 case 2: size = "WORD"; break;
6477 case 4: size = "DWORD"; break;
6478 case 8: size = "QWORD"; break;
6479 case 12: size = "XWORD"; break;
6480 case 16: size = "XMMWORD"; break;
6481 default:
6482 abort ();
6483 }
6484
6485 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6486 if (code == 'b')
6487 size = "BYTE";
6488 else if (code == 'w')
6489 size = "WORD";
6490 else if (code == 'k')
6491 size = "DWORD";
6492
6493 fputs (size, file);
6494 fputs (" PTR ", file);
6495 }
6496
6497 x = XEXP (x, 0);
6498 if (flag_pic && CONSTANT_ADDRESS_P (x))
6499 output_pic_addr_const (file, x, code);
6500 /* Avoid (%rip) for call operands. */
6501 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6502 && GET_CODE (x) != CONST_INT)
6503 output_addr_const (file, x);
6504 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6505 output_operand_lossage ("invalid constraints for operand");
6506 else
6507 output_address (x);
6508 }
6509
6510 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6511 {
6512 REAL_VALUE_TYPE r;
6513 long l;
6514
6515 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6516 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6517
6518 if (ASSEMBLER_DIALECT == ASM_ATT)
6519 putc ('$', file);
6520 fprintf (file, "0x%lx", l);
6521 }
6522
6523 /* These float cases don't actually occur as immediate operands. */
6524 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6525 {
6526 REAL_VALUE_TYPE r;
6527 char dstr[30];
6528
6529 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6530 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6531 fprintf (file, "%s", dstr);
6532 }
6533
6534 else if (GET_CODE (x) == CONST_DOUBLE
6535 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6536 {
6537 REAL_VALUE_TYPE r;
6538 char dstr[30];
6539
6540 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6541 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6542 fprintf (file, "%s", dstr);
6543 }
6544
6545 else if (GET_CODE (x) == CONST
6546 && GET_CODE (XEXP (x, 0)) == UNSPEC
6547 && XINT (XEXP (x, 0), 1) == UNSPEC_TP)
6548 {
6549 if (ASSEMBLER_DIALECT == ASM_INTEL)
6550 fputs ("DWORD PTR ", file);
6551 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6552 putc ('%', file);
6553 fputs ("gs:0", file);
6554 }
6555
6556 else
6557 {
6558 if (code != 'P')
6559 {
6560 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6561 {
6562 if (ASSEMBLER_DIALECT == ASM_ATT)
6563 putc ('$', file);
6564 }
6565 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6566 || GET_CODE (x) == LABEL_REF)
6567 {
6568 if (ASSEMBLER_DIALECT == ASM_ATT)
6569 putc ('$', file);
6570 else
6571 fputs ("OFFSET FLAT:", file);
6572 }
6573 }
6574 if (GET_CODE (x) == CONST_INT)
6575 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6576 else if (flag_pic)
6577 output_pic_addr_const (file, x, code);
6578 else
6579 output_addr_const (file, x);
6580 }
6581 }
6582 \f
6583 /* Print a memory operand whose address is ADDR. */
6584
6585 void
6586 print_operand_address (file, addr)
6587 FILE *file;
6588 register rtx addr;
6589 {
6590 struct ix86_address parts;
6591 rtx base, index, disp;
6592 int scale;
6593
6594 if (! ix86_decompose_address (addr, &parts))
6595 abort ();
6596
6597 base = parts.base;
6598 index = parts.index;
6599 disp = parts.disp;
6600 scale = parts.scale;
6601
6602 if (!base && !index)
6603 {
6604 /* Displacement only requires special attention. */
6605
6606 if (GET_CODE (disp) == CONST_INT)
6607 {
6608 if (ASSEMBLER_DIALECT == ASM_INTEL)
6609 {
6610 if (USER_LABEL_PREFIX[0] == 0)
6611 putc ('%', file);
6612 fputs ("ds:", file);
6613 }
6614 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6615 }
6616 else if (flag_pic)
6617 output_pic_addr_const (file, addr, 0);
6618 else
6619 output_addr_const (file, addr);
6620
6621 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6622 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6623 fputs ("(%rip)", file);
6624 }
6625 else
6626 {
6627 if (ASSEMBLER_DIALECT == ASM_ATT)
6628 {
6629 if (disp)
6630 {
6631 if (flag_pic)
6632 output_pic_addr_const (file, disp, 0);
6633 else if (GET_CODE (disp) == LABEL_REF)
6634 output_asm_label (disp);
6635 else
6636 output_addr_const (file, disp);
6637 }
6638
6639 putc ('(', file);
6640 if (base)
6641 PRINT_REG (base, 0, file);
6642 if (index)
6643 {
6644 putc (',', file);
6645 PRINT_REG (index, 0, file);
6646 if (scale != 1)
6647 fprintf (file, ",%d", scale);
6648 }
6649 putc (')', file);
6650 }
6651 else
6652 {
6653 rtx offset = NULL_RTX;
6654
6655 if (disp)
6656 {
6657 /* Pull out the offset of a symbol; print any symbol itself. */
6658 if (GET_CODE (disp) == CONST
6659 && GET_CODE (XEXP (disp, 0)) == PLUS
6660 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6661 {
6662 offset = XEXP (XEXP (disp, 0), 1);
6663 disp = gen_rtx_CONST (VOIDmode,
6664 XEXP (XEXP (disp, 0), 0));
6665 }
6666
6667 if (flag_pic)
6668 output_pic_addr_const (file, disp, 0);
6669 else if (GET_CODE (disp) == LABEL_REF)
6670 output_asm_label (disp);
6671 else if (GET_CODE (disp) == CONST_INT)
6672 offset = disp;
6673 else
6674 output_addr_const (file, disp);
6675 }
6676
6677 putc ('[', file);
6678 if (base)
6679 {
6680 PRINT_REG (base, 0, file);
6681 if (offset)
6682 {
6683 if (INTVAL (offset) >= 0)
6684 putc ('+', file);
6685 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6686 }
6687 }
6688 else if (offset)
6689 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6690 else
6691 putc ('0', file);
6692
6693 if (index)
6694 {
6695 putc ('+', file);
6696 PRINT_REG (index, 0, file);
6697 if (scale != 1)
6698 fprintf (file, "*%d", scale);
6699 }
6700 putc (']', file);
6701 }
6702 }
6703 }
6704
6705 bool
6706 output_addr_const_extra (file, x)
6707 FILE *file;
6708 rtx x;
6709 {
6710 rtx op;
6711
6712 if (GET_CODE (x) != UNSPEC)
6713 return false;
6714
6715 op = XVECEXP (x, 0, 0);
6716 switch (XINT (x, 1))
6717 {
6718 case UNSPEC_GOTTPOFF:
6719 output_addr_const (file, op);
6720 fputs ("@GOTTPOFF", file);
6721 break;
6722 case UNSPEC_TPOFF:
6723 output_addr_const (file, op);
6724 fputs ("@TPOFF", file);
6725 break;
6726 case UNSPEC_NTPOFF:
6727 output_addr_const (file, op);
6728 fputs ("@NTPOFF", file);
6729 break;
6730 case UNSPEC_DTPOFF:
6731 output_addr_const (file, op);
6732 fputs ("@DTPOFF", file);
6733 break;
6734
6735 default:
6736 return false;
6737 }
6738
6739 return true;
6740 }
6741 \f
6742 /* Split one or more DImode RTL references into pairs of SImode
6743 references. The RTL can be REG, offsettable MEM, integer constant, or
6744 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6745 split and "num" is its length. lo_half and hi_half are output arrays
6746 that parallel "operands". */
6747
6748 void
6749 split_di (operands, num, lo_half, hi_half)
6750 rtx operands[];
6751 int num;
6752 rtx lo_half[], hi_half[];
6753 {
6754 while (num--)
6755 {
6756 rtx op = operands[num];
6757
6758 /* simplify_subreg refuse to split volatile memory addresses,
6759 but we still have to handle it. */
6760 if (GET_CODE (op) == MEM)
6761 {
6762 lo_half[num] = adjust_address (op, SImode, 0);
6763 hi_half[num] = adjust_address (op, SImode, 4);
6764 }
6765 else
6766 {
6767 lo_half[num] = simplify_gen_subreg (SImode, op,
6768 GET_MODE (op) == VOIDmode
6769 ? DImode : GET_MODE (op), 0);
6770 hi_half[num] = simplify_gen_subreg (SImode, op,
6771 GET_MODE (op) == VOIDmode
6772 ? DImode : GET_MODE (op), 4);
6773 }
6774 }
6775 }
6776 /* Split one or more TImode RTL references into pairs of SImode
6777 references. The RTL can be REG, offsettable MEM, integer constant, or
6778 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6779 split and "num" is its length. lo_half and hi_half are output arrays
6780 that parallel "operands". */
6781
6782 void
6783 split_ti (operands, num, lo_half, hi_half)
6784 rtx operands[];
6785 int num;
6786 rtx lo_half[], hi_half[];
6787 {
6788 while (num--)
6789 {
6790 rtx op = operands[num];
6791
6792 /* simplify_subreg refuse to split volatile memory addresses, but we
6793 still have to handle it. */
6794 if (GET_CODE (op) == MEM)
6795 {
6796 lo_half[num] = adjust_address (op, DImode, 0);
6797 hi_half[num] = adjust_address (op, DImode, 8);
6798 }
6799 else
6800 {
6801 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6802 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6803 }
6804 }
6805 }
6806 \f
6807 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6808 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6809 is the expression of the binary operation. The output may either be
6810 emitted here, or returned to the caller, like all output_* functions.
6811
6812 There is no guarantee that the operands are the same mode, as they
6813 might be within FLOAT or FLOAT_EXTEND expressions. */
6814
6815 #ifndef SYSV386_COMPAT
6816 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6817 wants to fix the assemblers because that causes incompatibility
6818 with gcc. No-one wants to fix gcc because that causes
6819 incompatibility with assemblers... You can use the option of
6820 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6821 #define SYSV386_COMPAT 1
6822 #endif
6823
6824 const char *
6825 output_387_binary_op (insn, operands)
6826 rtx insn;
6827 rtx *operands;
6828 {
6829 static char buf[30];
6830 const char *p;
6831 const char *ssep;
6832 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6833
6834 #ifdef ENABLE_CHECKING
6835 /* Even if we do not want to check the inputs, this documents input
6836 constraints. Which helps in understanding the following code. */
6837 if (STACK_REG_P (operands[0])
6838 && ((REG_P (operands[1])
6839 && REGNO (operands[0]) == REGNO (operands[1])
6840 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6841 || (REG_P (operands[2])
6842 && REGNO (operands[0]) == REGNO (operands[2])
6843 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6844 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6845 ; /* ok */
6846 else if (!is_sse)
6847 abort ();
6848 #endif
6849
6850 switch (GET_CODE (operands[3]))
6851 {
6852 case PLUS:
6853 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6854 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6855 p = "fiadd";
6856 else
6857 p = "fadd";
6858 ssep = "add";
6859 break;
6860
6861 case MINUS:
6862 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6863 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6864 p = "fisub";
6865 else
6866 p = "fsub";
6867 ssep = "sub";
6868 break;
6869
6870 case MULT:
6871 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6872 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6873 p = "fimul";
6874 else
6875 p = "fmul";
6876 ssep = "mul";
6877 break;
6878
6879 case DIV:
6880 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6881 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6882 p = "fidiv";
6883 else
6884 p = "fdiv";
6885 ssep = "div";
6886 break;
6887
6888 default:
6889 abort ();
6890 }
6891
6892 if (is_sse)
6893 {
6894 strcpy (buf, ssep);
6895 if (GET_MODE (operands[0]) == SFmode)
6896 strcat (buf, "ss\t{%2, %0|%0, %2}");
6897 else
6898 strcat (buf, "sd\t{%2, %0|%0, %2}");
6899 return buf;
6900 }
6901 strcpy (buf, p);
6902
6903 switch (GET_CODE (operands[3]))
6904 {
6905 case MULT:
6906 case PLUS:
6907 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6908 {
6909 rtx temp = operands[2];
6910 operands[2] = operands[1];
6911 operands[1] = temp;
6912 }
6913
6914 /* know operands[0] == operands[1]. */
6915
6916 if (GET_CODE (operands[2]) == MEM)
6917 {
6918 p = "%z2\t%2";
6919 break;
6920 }
6921
6922 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6923 {
6924 if (STACK_TOP_P (operands[0]))
6925 /* How is it that we are storing to a dead operand[2]?
6926 Well, presumably operands[1] is dead too. We can't
6927 store the result to st(0) as st(0) gets popped on this
6928 instruction. Instead store to operands[2] (which I
6929 think has to be st(1)). st(1) will be popped later.
6930 gcc <= 2.8.1 didn't have this check and generated
6931 assembly code that the Unixware assembler rejected. */
6932 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6933 else
6934 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6935 break;
6936 }
6937
6938 if (STACK_TOP_P (operands[0]))
6939 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6940 else
6941 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6942 break;
6943
6944 case MINUS:
6945 case DIV:
6946 if (GET_CODE (operands[1]) == MEM)
6947 {
6948 p = "r%z1\t%1";
6949 break;
6950 }
6951
6952 if (GET_CODE (operands[2]) == MEM)
6953 {
6954 p = "%z2\t%2";
6955 break;
6956 }
6957
6958 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6959 {
6960 #if SYSV386_COMPAT
6961 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6962 derived assemblers, confusingly reverse the direction of
6963 the operation for fsub{r} and fdiv{r} when the
6964 destination register is not st(0). The Intel assembler
6965 doesn't have this brain damage. Read !SYSV386_COMPAT to
6966 figure out what the hardware really does. */
6967 if (STACK_TOP_P (operands[0]))
6968 p = "{p\t%0, %2|rp\t%2, %0}";
6969 else
6970 p = "{rp\t%2, %0|p\t%0, %2}";
6971 #else
6972 if (STACK_TOP_P (operands[0]))
6973 /* As above for fmul/fadd, we can't store to st(0). */
6974 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6975 else
6976 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6977 #endif
6978 break;
6979 }
6980
6981 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6982 {
6983 #if SYSV386_COMPAT
6984 if (STACK_TOP_P (operands[0]))
6985 p = "{rp\t%0, %1|p\t%1, %0}";
6986 else
6987 p = "{p\t%1, %0|rp\t%0, %1}";
6988 #else
6989 if (STACK_TOP_P (operands[0]))
6990 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6991 else
6992 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6993 #endif
6994 break;
6995 }
6996
6997 if (STACK_TOP_P (operands[0]))
6998 {
6999 if (STACK_TOP_P (operands[1]))
7000 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7001 else
7002 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7003 break;
7004 }
7005 else if (STACK_TOP_P (operands[1]))
7006 {
7007 #if SYSV386_COMPAT
7008 p = "{\t%1, %0|r\t%0, %1}";
7009 #else
7010 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7011 #endif
7012 }
7013 else
7014 {
7015 #if SYSV386_COMPAT
7016 p = "{r\t%2, %0|\t%0, %2}";
7017 #else
7018 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7019 #endif
7020 }
7021 break;
7022
7023 default:
7024 abort ();
7025 }
7026
7027 strcat (buf, p);
7028 return buf;
7029 }
7030
7031 /* Output code to initialize control word copies used by
7032 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7033 is set to control word rounding downwards. */
7034 void
7035 emit_i387_cw_initialization (normal, round_down)
7036 rtx normal, round_down;
7037 {
7038 rtx reg = gen_reg_rtx (HImode);
7039
7040 emit_insn (gen_x86_fnstcw_1 (normal));
7041 emit_move_insn (reg, normal);
7042 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7043 && !TARGET_64BIT)
7044 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7045 else
7046 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7047 emit_move_insn (round_down, reg);
7048 }
7049
7050 /* Output code for INSN to convert a float to a signed int. OPERANDS
7051 are the insn operands. The output may be [HSD]Imode and the input
7052 operand may be [SDX]Fmode. */
7053
7054 const char *
7055 output_fix_trunc (insn, operands)
7056 rtx insn;
7057 rtx *operands;
7058 {
7059 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7060 int dimode_p = GET_MODE (operands[0]) == DImode;
7061
7062 /* Jump through a hoop or two for DImode, since the hardware has no
7063 non-popping instruction. We used to do this a different way, but
7064 that was somewhat fragile and broke with post-reload splitters. */
7065 if (dimode_p && !stack_top_dies)
7066 output_asm_insn ("fld\t%y1", operands);
7067
7068 if (!STACK_TOP_P (operands[1]))
7069 abort ();
7070
7071 if (GET_CODE (operands[0]) != MEM)
7072 abort ();
7073
7074 output_asm_insn ("fldcw\t%3", operands);
7075 if (stack_top_dies || dimode_p)
7076 output_asm_insn ("fistp%z0\t%0", operands);
7077 else
7078 output_asm_insn ("fist%z0\t%0", operands);
7079 output_asm_insn ("fldcw\t%2", operands);
7080
7081 return "";
7082 }
7083
7084 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7085 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7086 when fucom should be used. */
7087
7088 const char *
7089 output_fp_compare (insn, operands, eflags_p, unordered_p)
7090 rtx insn;
7091 rtx *operands;
7092 int eflags_p, unordered_p;
7093 {
7094 int stack_top_dies;
7095 rtx cmp_op0 = operands[0];
7096 rtx cmp_op1 = operands[1];
7097 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7098
7099 if (eflags_p == 2)
7100 {
7101 cmp_op0 = cmp_op1;
7102 cmp_op1 = operands[2];
7103 }
7104 if (is_sse)
7105 {
7106 if (GET_MODE (operands[0]) == SFmode)
7107 if (unordered_p)
7108 return "ucomiss\t{%1, %0|%0, %1}";
7109 else
7110 return "comiss\t{%1, %0|%0, %y}";
7111 else
7112 if (unordered_p)
7113 return "ucomisd\t{%1, %0|%0, %1}";
7114 else
7115 return "comisd\t{%1, %0|%0, %y}";
7116 }
7117
7118 if (! STACK_TOP_P (cmp_op0))
7119 abort ();
7120
7121 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7122
7123 if (STACK_REG_P (cmp_op1)
7124 && stack_top_dies
7125 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7126 && REGNO (cmp_op1) != FIRST_STACK_REG)
7127 {
7128 /* If both the top of the 387 stack dies, and the other operand
7129 is also a stack register that dies, then this must be a
7130 `fcompp' float compare */
7131
7132 if (eflags_p == 1)
7133 {
7134 /* There is no double popping fcomi variant. Fortunately,
7135 eflags is immune from the fstp's cc clobbering. */
7136 if (unordered_p)
7137 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7138 else
7139 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7140 return "fstp\t%y0";
7141 }
7142 else
7143 {
7144 if (eflags_p == 2)
7145 {
7146 if (unordered_p)
7147 return "fucompp\n\tfnstsw\t%0";
7148 else
7149 return "fcompp\n\tfnstsw\t%0";
7150 }
7151 else
7152 {
7153 if (unordered_p)
7154 return "fucompp";
7155 else
7156 return "fcompp";
7157 }
7158 }
7159 }
7160 else
7161 {
7162 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7163
7164 static const char * const alt[24] =
7165 {
7166 "fcom%z1\t%y1",
7167 "fcomp%z1\t%y1",
7168 "fucom%z1\t%y1",
7169 "fucomp%z1\t%y1",
7170
7171 "ficom%z1\t%y1",
7172 "ficomp%z1\t%y1",
7173 NULL,
7174 NULL,
7175
7176 "fcomi\t{%y1, %0|%0, %y1}",
7177 "fcomip\t{%y1, %0|%0, %y1}",
7178 "fucomi\t{%y1, %0|%0, %y1}",
7179 "fucomip\t{%y1, %0|%0, %y1}",
7180
7181 NULL,
7182 NULL,
7183 NULL,
7184 NULL,
7185
7186 "fcom%z2\t%y2\n\tfnstsw\t%0",
7187 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7188 "fucom%z2\t%y2\n\tfnstsw\t%0",
7189 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7190
7191 "ficom%z2\t%y2\n\tfnstsw\t%0",
7192 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7193 NULL,
7194 NULL
7195 };
7196
7197 int mask;
7198 const char *ret;
7199
7200 mask = eflags_p << 3;
7201 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7202 mask |= unordered_p << 1;
7203 mask |= stack_top_dies;
7204
7205 if (mask >= 24)
7206 abort ();
7207 ret = alt[mask];
7208 if (ret == NULL)
7209 abort ();
7210
7211 return ret;
7212 }
7213 }
7214
7215 void
7216 ix86_output_addr_vec_elt (file, value)
7217 FILE *file;
7218 int value;
7219 {
7220 const char *directive = ASM_LONG;
7221
7222 if (TARGET_64BIT)
7223 {
7224 #ifdef ASM_QUAD
7225 directive = ASM_QUAD;
7226 #else
7227 abort ();
7228 #endif
7229 }
7230
7231 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7232 }
7233
7234 void
7235 ix86_output_addr_diff_elt (file, value, rel)
7236 FILE *file;
7237 int value, rel;
7238 {
7239 if (TARGET_64BIT)
7240 fprintf (file, "%s%s%d-%s%d\n",
7241 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7242 else if (HAVE_AS_GOTOFF_IN_DATA)
7243 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7244 else
7245 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7246 ASM_LONG, LPREFIX, value);
7247 }
7248 \f
7249 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7250 for the target. */
7251
7252 void
7253 ix86_expand_clear (dest)
7254 rtx dest;
7255 {
7256 rtx tmp;
7257
7258 /* We play register width games, which are only valid after reload. */
7259 if (!reload_completed)
7260 abort ();
7261
7262 /* Avoid HImode and its attendant prefix byte. */
7263 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7264 dest = gen_rtx_REG (SImode, REGNO (dest));
7265
7266 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7267
7268 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7269 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7270 {
7271 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7272 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7273 }
7274
7275 emit_insn (tmp);
7276 }
7277
7278 /* X is an unchanging MEM. If it is a constant pool reference, return
7279 the constant pool rtx, else NULL. */
7280
7281 static rtx
7282 maybe_get_pool_constant (x)
7283 rtx x;
7284 {
7285 x = XEXP (x, 0);
7286
7287 if (flag_pic)
7288 {
7289 if (GET_CODE (x) != PLUS)
7290 return NULL_RTX;
7291 if (XEXP (x, 0) != pic_offset_table_rtx)
7292 return NULL_RTX;
7293 x = XEXP (x, 1);
7294 if (GET_CODE (x) != CONST)
7295 return NULL_RTX;
7296 x = XEXP (x, 0);
7297 if (GET_CODE (x) != UNSPEC)
7298 return NULL_RTX;
7299 if (XINT (x, 1) != UNSPEC_GOTOFF)
7300 return NULL_RTX;
7301 x = XVECEXP (x, 0, 0);
7302 }
7303
7304 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7305 return get_pool_constant (x);
7306
7307 return NULL_RTX;
7308 }
7309
7310 void
7311 ix86_expand_move (mode, operands)
7312 enum machine_mode mode;
7313 rtx operands[];
7314 {
7315 int strict = (reload_in_progress || reload_completed);
7316 rtx insn, op0, op1, tmp;
7317
7318 op0 = operands[0];
7319 op1 = operands[1];
7320
7321 /* ??? We have a slight problem. We need to say that tls symbols are
7322 not legitimate constants so that reload does not helpfully reload
7323 these constants from a REG_EQUIV, which we cannot handle. (Recall
7324 that general- and local-dynamic address resolution requires a
7325 function call.)
7326
7327 However, if we say that tls symbols are not legitimate constants,
7328 then emit_move_insn helpfully drop them into the constant pool.
7329
7330 It is far easier to work around emit_move_insn than reload. Recognize
7331 the MEM that we would have created and extract the symbol_ref. */
7332
7333 if (mode == Pmode
7334 && GET_CODE (op1) == MEM
7335 && RTX_UNCHANGING_P (op1))
7336 {
7337 tmp = maybe_get_pool_constant (op1);
7338 /* Note that we only care about symbolic constants here, which
7339 unlike CONST_INT will always have a proper mode. */
7340 if (tmp && GET_MODE (tmp) == Pmode)
7341 op1 = tmp;
7342 }
7343
7344 if (tls_symbolic_operand (op1, Pmode))
7345 {
7346 op1 = legitimize_address (op1, op1, VOIDmode);
7347 if (GET_CODE (op0) == MEM)
7348 {
7349 tmp = gen_reg_rtx (mode);
7350 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7351 op1 = tmp;
7352 }
7353 }
7354 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7355 {
7356 if (GET_CODE (op0) == MEM)
7357 op1 = force_reg (Pmode, op1);
7358 else
7359 {
7360 rtx temp = op0;
7361 if (GET_CODE (temp) != REG)
7362 temp = gen_reg_rtx (Pmode);
7363 temp = legitimize_pic_address (op1, temp);
7364 if (temp == op0)
7365 return;
7366 op1 = temp;
7367 }
7368 }
7369 else
7370 {
7371 if (GET_CODE (op0) == MEM
7372 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7373 || !push_operand (op0, mode))
7374 && GET_CODE (op1) == MEM)
7375 op1 = force_reg (mode, op1);
7376
7377 if (push_operand (op0, mode)
7378 && ! general_no_elim_operand (op1, mode))
7379 op1 = copy_to_mode_reg (mode, op1);
7380
7381 /* Force large constants in 64bit compilation into register
7382 to get them CSEed. */
7383 if (TARGET_64BIT && mode == DImode
7384 && immediate_operand (op1, mode)
7385 && !x86_64_zero_extended_value (op1)
7386 && !register_operand (op0, mode)
7387 && optimize && !reload_completed && !reload_in_progress)
7388 op1 = copy_to_mode_reg (mode, op1);
7389
7390 if (FLOAT_MODE_P (mode))
7391 {
7392 /* If we are loading a floating point constant to a register,
7393 force the value to memory now, since we'll get better code
7394 out the back end. */
7395
7396 if (strict)
7397 ;
7398 else if (GET_CODE (op1) == CONST_DOUBLE
7399 && register_operand (op0, mode))
7400 op1 = validize_mem (force_const_mem (mode, op1));
7401 }
7402 }
7403
7404 insn = gen_rtx_SET (VOIDmode, op0, op1);
7405
7406 emit_insn (insn);
7407 }
7408
7409 void
7410 ix86_expand_vector_move (mode, operands)
7411 enum machine_mode mode;
7412 rtx operands[];
7413 {
7414 /* Force constants other than zero into memory. We do not know how
7415 the instructions used to build constants modify the upper 64 bits
7416 of the register, once we have that information we may be able
7417 to handle some of them more efficiently. */
7418 if ((reload_in_progress | reload_completed) == 0
7419 && register_operand (operands[0], mode)
7420 && CONSTANT_P (operands[1]))
7421 {
7422 rtx addr = gen_reg_rtx (Pmode);
7423 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7424 operands[1] = gen_rtx_MEM (mode, addr);
7425 }
7426
7427 /* Make operand1 a register if it isn't already. */
7428 if ((reload_in_progress | reload_completed) == 0
7429 && !register_operand (operands[0], mode)
7430 && !register_operand (operands[1], mode)
7431 && operands[1] != CONST0_RTX (mode))
7432 {
7433 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7434 emit_move_insn (operands[0], temp);
7435 return;
7436 }
7437
7438 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7439 }
7440
7441 /* Attempt to expand a binary operator. Make the expansion closer to the
7442 actual machine, then just general_operand, which will allow 3 separate
7443 memory references (one output, two input) in a single insn. */
7444
7445 void
7446 ix86_expand_binary_operator (code, mode, operands)
7447 enum rtx_code code;
7448 enum machine_mode mode;
7449 rtx operands[];
7450 {
7451 int matching_memory;
7452 rtx src1, src2, dst, op, clob;
7453
7454 dst = operands[0];
7455 src1 = operands[1];
7456 src2 = operands[2];
7457
7458 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7459 if (GET_RTX_CLASS (code) == 'c'
7460 && (rtx_equal_p (dst, src2)
7461 || immediate_operand (src1, mode)))
7462 {
7463 rtx temp = src1;
7464 src1 = src2;
7465 src2 = temp;
7466 }
7467
7468 /* If the destination is memory, and we do not have matching source
7469 operands, do things in registers. */
7470 matching_memory = 0;
7471 if (GET_CODE (dst) == MEM)
7472 {
7473 if (rtx_equal_p (dst, src1))
7474 matching_memory = 1;
7475 else if (GET_RTX_CLASS (code) == 'c'
7476 && rtx_equal_p (dst, src2))
7477 matching_memory = 2;
7478 else
7479 dst = gen_reg_rtx (mode);
7480 }
7481
7482 /* Both source operands cannot be in memory. */
7483 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7484 {
7485 if (matching_memory != 2)
7486 src2 = force_reg (mode, src2);
7487 else
7488 src1 = force_reg (mode, src1);
7489 }
7490
7491 /* If the operation is not commutable, source 1 cannot be a constant
7492 or non-matching memory. */
7493 if ((CONSTANT_P (src1)
7494 || (!matching_memory && GET_CODE (src1) == MEM))
7495 && GET_RTX_CLASS (code) != 'c')
7496 src1 = force_reg (mode, src1);
7497
7498 /* If optimizing, copy to regs to improve CSE */
7499 if (optimize && ! no_new_pseudos)
7500 {
7501 if (GET_CODE (dst) == MEM)
7502 dst = gen_reg_rtx (mode);
7503 if (GET_CODE (src1) == MEM)
7504 src1 = force_reg (mode, src1);
7505 if (GET_CODE (src2) == MEM)
7506 src2 = force_reg (mode, src2);
7507 }
7508
7509 /* Emit the instruction. */
7510
7511 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7512 if (reload_in_progress)
7513 {
7514 /* Reload doesn't know about the flags register, and doesn't know that
7515 it doesn't want to clobber it. We can only do this with PLUS. */
7516 if (code != PLUS)
7517 abort ();
7518 emit_insn (op);
7519 }
7520 else
7521 {
7522 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7523 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7524 }
7525
7526 /* Fix up the destination if needed. */
7527 if (dst != operands[0])
7528 emit_move_insn (operands[0], dst);
7529 }
7530
7531 /* Return TRUE or FALSE depending on whether the binary operator meets the
7532 appropriate constraints. */
7533
7534 int
7535 ix86_binary_operator_ok (code, mode, operands)
7536 enum rtx_code code;
7537 enum machine_mode mode ATTRIBUTE_UNUSED;
7538 rtx operands[3];
7539 {
7540 /* Both source operands cannot be in memory. */
7541 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7542 return 0;
7543 /* If the operation is not commutable, source 1 cannot be a constant. */
7544 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7545 return 0;
7546 /* If the destination is memory, we must have a matching source operand. */
7547 if (GET_CODE (operands[0]) == MEM
7548 && ! (rtx_equal_p (operands[0], operands[1])
7549 || (GET_RTX_CLASS (code) == 'c'
7550 && rtx_equal_p (operands[0], operands[2]))))
7551 return 0;
7552 /* If the operation is not commutable and the source 1 is memory, we must
7553 have a matching destination. */
7554 if (GET_CODE (operands[1]) == MEM
7555 && GET_RTX_CLASS (code) != 'c'
7556 && ! rtx_equal_p (operands[0], operands[1]))
7557 return 0;
7558 return 1;
7559 }
7560
7561 /* Attempt to expand a unary operator. Make the expansion closer to the
7562 actual machine, then just general_operand, which will allow 2 separate
7563 memory references (one output, one input) in a single insn. */
7564
7565 void
7566 ix86_expand_unary_operator (code, mode, operands)
7567 enum rtx_code code;
7568 enum machine_mode mode;
7569 rtx operands[];
7570 {
7571 int matching_memory;
7572 rtx src, dst, op, clob;
7573
7574 dst = operands[0];
7575 src = operands[1];
7576
7577 /* If the destination is memory, and we do not have matching source
7578 operands, do things in registers. */
7579 matching_memory = 0;
7580 if (GET_CODE (dst) == MEM)
7581 {
7582 if (rtx_equal_p (dst, src))
7583 matching_memory = 1;
7584 else
7585 dst = gen_reg_rtx (mode);
7586 }
7587
7588 /* When source operand is memory, destination must match. */
7589 if (!matching_memory && GET_CODE (src) == MEM)
7590 src = force_reg (mode, src);
7591
7592 /* If optimizing, copy to regs to improve CSE */
7593 if (optimize && ! no_new_pseudos)
7594 {
7595 if (GET_CODE (dst) == MEM)
7596 dst = gen_reg_rtx (mode);
7597 if (GET_CODE (src) == MEM)
7598 src = force_reg (mode, src);
7599 }
7600
7601 /* Emit the instruction. */
7602
7603 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7604 if (reload_in_progress || code == NOT)
7605 {
7606 /* Reload doesn't know about the flags register, and doesn't know that
7607 it doesn't want to clobber it. */
7608 if (code != NOT)
7609 abort ();
7610 emit_insn (op);
7611 }
7612 else
7613 {
7614 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7615 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7616 }
7617
7618 /* Fix up the destination if needed. */
7619 if (dst != operands[0])
7620 emit_move_insn (operands[0], dst);
7621 }
7622
7623 /* Return TRUE or FALSE depending on whether the unary operator meets the
7624 appropriate constraints. */
7625
7626 int
7627 ix86_unary_operator_ok (code, mode, operands)
7628 enum rtx_code code ATTRIBUTE_UNUSED;
7629 enum machine_mode mode ATTRIBUTE_UNUSED;
7630 rtx operands[2] ATTRIBUTE_UNUSED;
7631 {
7632 /* If one of operands is memory, source and destination must match. */
7633 if ((GET_CODE (operands[0]) == MEM
7634 || GET_CODE (operands[1]) == MEM)
7635 && ! rtx_equal_p (operands[0], operands[1]))
7636 return FALSE;
7637 return TRUE;
7638 }
7639
7640 /* Return TRUE or FALSE depending on whether the first SET in INSN
7641 has source and destination with matching CC modes, and that the
7642 CC mode is at least as constrained as REQ_MODE. */
7643
7644 int
7645 ix86_match_ccmode (insn, req_mode)
7646 rtx insn;
7647 enum machine_mode req_mode;
7648 {
7649 rtx set;
7650 enum machine_mode set_mode;
7651
7652 set = PATTERN (insn);
7653 if (GET_CODE (set) == PARALLEL)
7654 set = XVECEXP (set, 0, 0);
7655 if (GET_CODE (set) != SET)
7656 abort ();
7657 if (GET_CODE (SET_SRC (set)) != COMPARE)
7658 abort ();
7659
7660 set_mode = GET_MODE (SET_DEST (set));
7661 switch (set_mode)
7662 {
7663 case CCNOmode:
7664 if (req_mode != CCNOmode
7665 && (req_mode != CCmode
7666 || XEXP (SET_SRC (set), 1) != const0_rtx))
7667 return 0;
7668 break;
7669 case CCmode:
7670 if (req_mode == CCGCmode)
7671 return 0;
7672 /* FALLTHRU */
7673 case CCGCmode:
7674 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7675 return 0;
7676 /* FALLTHRU */
7677 case CCGOCmode:
7678 if (req_mode == CCZmode)
7679 return 0;
7680 /* FALLTHRU */
7681 case CCZmode:
7682 break;
7683
7684 default:
7685 abort ();
7686 }
7687
7688 return (GET_MODE (SET_SRC (set)) == set_mode);
7689 }
7690
7691 /* Generate insn patterns to do an integer compare of OPERANDS. */
7692
7693 static rtx
7694 ix86_expand_int_compare (code, op0, op1)
7695 enum rtx_code code;
7696 rtx op0, op1;
7697 {
7698 enum machine_mode cmpmode;
7699 rtx tmp, flags;
7700
7701 cmpmode = SELECT_CC_MODE (code, op0, op1);
7702 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7703
7704 /* This is very simple, but making the interface the same as in the
7705 FP case makes the rest of the code easier. */
7706 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7707 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7708
7709 /* Return the test that should be put into the flags user, i.e.
7710 the bcc, scc, or cmov instruction. */
7711 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7712 }
7713
7714 /* Figure out whether to use ordered or unordered fp comparisons.
7715 Return the appropriate mode to use. */
7716
7717 enum machine_mode
7718 ix86_fp_compare_mode (code)
7719 enum rtx_code code ATTRIBUTE_UNUSED;
7720 {
7721 /* ??? In order to make all comparisons reversible, we do all comparisons
7722 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7723 all forms trapping and nontrapping comparisons, we can make inequality
7724 comparisons trapping again, since it results in better code when using
7725 FCOM based compares. */
7726 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7727 }
7728
7729 enum machine_mode
7730 ix86_cc_mode (code, op0, op1)
7731 enum rtx_code code;
7732 rtx op0, op1;
7733 {
7734 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7735 return ix86_fp_compare_mode (code);
7736 switch (code)
7737 {
7738 /* Only zero flag is needed. */
7739 case EQ: /* ZF=0 */
7740 case NE: /* ZF!=0 */
7741 return CCZmode;
7742 /* Codes needing carry flag. */
7743 case GEU: /* CF=0 */
7744 case GTU: /* CF=0 & ZF=0 */
7745 case LTU: /* CF=1 */
7746 case LEU: /* CF=1 | ZF=1 */
7747 return CCmode;
7748 /* Codes possibly doable only with sign flag when
7749 comparing against zero. */
7750 case GE: /* SF=OF or SF=0 */
7751 case LT: /* SF<>OF or SF=1 */
7752 if (op1 == const0_rtx)
7753 return CCGOCmode;
7754 else
7755 /* For other cases Carry flag is not required. */
7756 return CCGCmode;
7757 /* Codes doable only with sign flag when comparing
7758 against zero, but we miss jump instruction for it
7759 so we need to use relational tests agains overflow
7760 that thus needs to be zero. */
7761 case GT: /* ZF=0 & SF=OF */
7762 case LE: /* ZF=1 | SF<>OF */
7763 if (op1 == const0_rtx)
7764 return CCNOmode;
7765 else
7766 return CCGCmode;
7767 /* strcmp pattern do (use flags) and combine may ask us for proper
7768 mode. */
7769 case USE:
7770 return CCmode;
7771 default:
7772 abort ();
7773 }
7774 }
7775
7776 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7777
7778 int
7779 ix86_use_fcomi_compare (code)
7780 enum rtx_code code ATTRIBUTE_UNUSED;
7781 {
7782 enum rtx_code swapped_code = swap_condition (code);
7783 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7784 || (ix86_fp_comparison_cost (swapped_code)
7785 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7786 }
7787
7788 /* Swap, force into registers, or otherwise massage the two operands
7789 to a fp comparison. The operands are updated in place; the new
7790 comparsion code is returned. */
7791
7792 static enum rtx_code
7793 ix86_prepare_fp_compare_args (code, pop0, pop1)
7794 enum rtx_code code;
7795 rtx *pop0, *pop1;
7796 {
7797 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7798 rtx op0 = *pop0, op1 = *pop1;
7799 enum machine_mode op_mode = GET_MODE (op0);
7800 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7801
7802 /* All of the unordered compare instructions only work on registers.
7803 The same is true of the XFmode compare instructions. The same is
7804 true of the fcomi compare instructions. */
7805
7806 if (!is_sse
7807 && (fpcmp_mode == CCFPUmode
7808 || op_mode == XFmode
7809 || op_mode == TFmode
7810 || ix86_use_fcomi_compare (code)))
7811 {
7812 op0 = force_reg (op_mode, op0);
7813 op1 = force_reg (op_mode, op1);
7814 }
7815 else
7816 {
7817 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7818 things around if they appear profitable, otherwise force op0
7819 into a register. */
7820
7821 if (standard_80387_constant_p (op0) == 0
7822 || (GET_CODE (op0) == MEM
7823 && ! (standard_80387_constant_p (op1) == 0
7824 || GET_CODE (op1) == MEM)))
7825 {
7826 rtx tmp;
7827 tmp = op0, op0 = op1, op1 = tmp;
7828 code = swap_condition (code);
7829 }
7830
7831 if (GET_CODE (op0) != REG)
7832 op0 = force_reg (op_mode, op0);
7833
7834 if (CONSTANT_P (op1))
7835 {
7836 if (standard_80387_constant_p (op1))
7837 op1 = force_reg (op_mode, op1);
7838 else
7839 op1 = validize_mem (force_const_mem (op_mode, op1));
7840 }
7841 }
7842
7843 /* Try to rearrange the comparison to make it cheaper. */
7844 if (ix86_fp_comparison_cost (code)
7845 > ix86_fp_comparison_cost (swap_condition (code))
7846 && (GET_CODE (op1) == REG || !no_new_pseudos))
7847 {
7848 rtx tmp;
7849 tmp = op0, op0 = op1, op1 = tmp;
7850 code = swap_condition (code);
7851 if (GET_CODE (op0) != REG)
7852 op0 = force_reg (op_mode, op0);
7853 }
7854
7855 *pop0 = op0;
7856 *pop1 = op1;
7857 return code;
7858 }
7859
7860 /* Convert comparison codes we use to represent FP comparison to integer
7861 code that will result in proper branch. Return UNKNOWN if no such code
7862 is available. */
7863 static enum rtx_code
7864 ix86_fp_compare_code_to_integer (code)
7865 enum rtx_code code;
7866 {
7867 switch (code)
7868 {
7869 case GT:
7870 return GTU;
7871 case GE:
7872 return GEU;
7873 case ORDERED:
7874 case UNORDERED:
7875 return code;
7876 break;
7877 case UNEQ:
7878 return EQ;
7879 break;
7880 case UNLT:
7881 return LTU;
7882 break;
7883 case UNLE:
7884 return LEU;
7885 break;
7886 case LTGT:
7887 return NE;
7888 break;
7889 default:
7890 return UNKNOWN;
7891 }
7892 }
7893
7894 /* Split comparison code CODE into comparisons we can do using branch
7895 instructions. BYPASS_CODE is comparison code for branch that will
7896 branch around FIRST_CODE and SECOND_CODE. If some of branches
7897 is not required, set value to NIL.
7898 We never require more than two branches. */
7899 static void
7900 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7901 enum rtx_code code, *bypass_code, *first_code, *second_code;
7902 {
7903 *first_code = code;
7904 *bypass_code = NIL;
7905 *second_code = NIL;
7906
7907 /* The fcomi comparison sets flags as follows:
7908
7909 cmp ZF PF CF
7910 > 0 0 0
7911 < 0 0 1
7912 = 1 0 0
7913 un 1 1 1 */
7914
7915 switch (code)
7916 {
7917 case GT: /* GTU - CF=0 & ZF=0 */
7918 case GE: /* GEU - CF=0 */
7919 case ORDERED: /* PF=0 */
7920 case UNORDERED: /* PF=1 */
7921 case UNEQ: /* EQ - ZF=1 */
7922 case UNLT: /* LTU - CF=1 */
7923 case UNLE: /* LEU - CF=1 | ZF=1 */
7924 case LTGT: /* EQ - ZF=0 */
7925 break;
7926 case LT: /* LTU - CF=1 - fails on unordered */
7927 *first_code = UNLT;
7928 *bypass_code = UNORDERED;
7929 break;
7930 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7931 *first_code = UNLE;
7932 *bypass_code = UNORDERED;
7933 break;
7934 case EQ: /* EQ - ZF=1 - fails on unordered */
7935 *first_code = UNEQ;
7936 *bypass_code = UNORDERED;
7937 break;
7938 case NE: /* NE - ZF=0 - fails on unordered */
7939 *first_code = LTGT;
7940 *second_code = UNORDERED;
7941 break;
7942 case UNGE: /* GEU - CF=0 - fails on unordered */
7943 *first_code = GE;
7944 *second_code = UNORDERED;
7945 break;
7946 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7947 *first_code = GT;
7948 *second_code = UNORDERED;
7949 break;
7950 default:
7951 abort ();
7952 }
7953 if (!TARGET_IEEE_FP)
7954 {
7955 *second_code = NIL;
7956 *bypass_code = NIL;
7957 }
7958 }
7959
7960 /* Return cost of comparison done fcom + arithmetics operations on AX.
7961 All following functions do use number of instructions as an cost metrics.
7962 In future this should be tweaked to compute bytes for optimize_size and
7963 take into account performance of various instructions on various CPUs. */
7964 static int
7965 ix86_fp_comparison_arithmetics_cost (code)
7966 enum rtx_code code;
7967 {
7968 if (!TARGET_IEEE_FP)
7969 return 4;
7970 /* The cost of code output by ix86_expand_fp_compare. */
7971 switch (code)
7972 {
7973 case UNLE:
7974 case UNLT:
7975 case LTGT:
7976 case GT:
7977 case GE:
7978 case UNORDERED:
7979 case ORDERED:
7980 case UNEQ:
7981 return 4;
7982 break;
7983 case LT:
7984 case NE:
7985 case EQ:
7986 case UNGE:
7987 return 5;
7988 break;
7989 case LE:
7990 case UNGT:
7991 return 6;
7992 break;
7993 default:
7994 abort ();
7995 }
7996 }
7997
7998 /* Return cost of comparison done using fcomi operation.
7999 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8000 static int
8001 ix86_fp_comparison_fcomi_cost (code)
8002 enum rtx_code code;
8003 {
8004 enum rtx_code bypass_code, first_code, second_code;
8005 /* Return arbitarily high cost when instruction is not supported - this
8006 prevents gcc from using it. */
8007 if (!TARGET_CMOVE)
8008 return 1024;
8009 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8010 return (bypass_code != NIL || second_code != NIL) + 2;
8011 }
8012
8013 /* Return cost of comparison done using sahf operation.
8014 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8015 static int
8016 ix86_fp_comparison_sahf_cost (code)
8017 enum rtx_code code;
8018 {
8019 enum rtx_code bypass_code, first_code, second_code;
8020 /* Return arbitarily high cost when instruction is not preferred - this
8021 avoids gcc from using it. */
8022 if (!TARGET_USE_SAHF && !optimize_size)
8023 return 1024;
8024 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8025 return (bypass_code != NIL || second_code != NIL) + 3;
8026 }
8027
8028 /* Compute cost of the comparison done using any method.
8029 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8030 static int
8031 ix86_fp_comparison_cost (code)
8032 enum rtx_code code;
8033 {
8034 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8035 int min;
8036
8037 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8038 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8039
8040 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8041 if (min > sahf_cost)
8042 min = sahf_cost;
8043 if (min > fcomi_cost)
8044 min = fcomi_cost;
8045 return min;
8046 }
8047
8048 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8049
8050 static rtx
8051 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8052 enum rtx_code code;
8053 rtx op0, op1, scratch;
8054 rtx *second_test;
8055 rtx *bypass_test;
8056 {
8057 enum machine_mode fpcmp_mode, intcmp_mode;
8058 rtx tmp, tmp2;
8059 int cost = ix86_fp_comparison_cost (code);
8060 enum rtx_code bypass_code, first_code, second_code;
8061
8062 fpcmp_mode = ix86_fp_compare_mode (code);
8063 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8064
8065 if (second_test)
8066 *second_test = NULL_RTX;
8067 if (bypass_test)
8068 *bypass_test = NULL_RTX;
8069
8070 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8071
8072 /* Do fcomi/sahf based test when profitable. */
8073 if ((bypass_code == NIL || bypass_test)
8074 && (second_code == NIL || second_test)
8075 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8076 {
8077 if (TARGET_CMOVE)
8078 {
8079 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8080 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8081 tmp);
8082 emit_insn (tmp);
8083 }
8084 else
8085 {
8086 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8087 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8088 if (!scratch)
8089 scratch = gen_reg_rtx (HImode);
8090 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8091 emit_insn (gen_x86_sahf_1 (scratch));
8092 }
8093
8094 /* The FP codes work out to act like unsigned. */
8095 intcmp_mode = fpcmp_mode;
8096 code = first_code;
8097 if (bypass_code != NIL)
8098 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8099 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8100 const0_rtx);
8101 if (second_code != NIL)
8102 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8103 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8104 const0_rtx);
8105 }
8106 else
8107 {
8108 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8109 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8110 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8111 if (!scratch)
8112 scratch = gen_reg_rtx (HImode);
8113 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8114
8115 /* In the unordered case, we have to check C2 for NaN's, which
8116 doesn't happen to work out to anything nice combination-wise.
8117 So do some bit twiddling on the value we've got in AH to come
8118 up with an appropriate set of condition codes. */
8119
8120 intcmp_mode = CCNOmode;
8121 switch (code)
8122 {
8123 case GT:
8124 case UNGT:
8125 if (code == GT || !TARGET_IEEE_FP)
8126 {
8127 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8128 code = EQ;
8129 }
8130 else
8131 {
8132 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8133 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8134 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8135 intcmp_mode = CCmode;
8136 code = GEU;
8137 }
8138 break;
8139 case LT:
8140 case UNLT:
8141 if (code == LT && TARGET_IEEE_FP)
8142 {
8143 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8144 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8145 intcmp_mode = CCmode;
8146 code = EQ;
8147 }
8148 else
8149 {
8150 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8151 code = NE;
8152 }
8153 break;
8154 case GE:
8155 case UNGE:
8156 if (code == GE || !TARGET_IEEE_FP)
8157 {
8158 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8159 code = EQ;
8160 }
8161 else
8162 {
8163 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8164 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8165 GEN_INT (0x01)));
8166 code = NE;
8167 }
8168 break;
8169 case LE:
8170 case UNLE:
8171 if (code == LE && TARGET_IEEE_FP)
8172 {
8173 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8174 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8175 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8176 intcmp_mode = CCmode;
8177 code = LTU;
8178 }
8179 else
8180 {
8181 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8182 code = NE;
8183 }
8184 break;
8185 case EQ:
8186 case UNEQ:
8187 if (code == EQ && TARGET_IEEE_FP)
8188 {
8189 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8190 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8191 intcmp_mode = CCmode;
8192 code = EQ;
8193 }
8194 else
8195 {
8196 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8197 code = NE;
8198 break;
8199 }
8200 break;
8201 case NE:
8202 case LTGT:
8203 if (code == NE && TARGET_IEEE_FP)
8204 {
8205 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8206 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8207 GEN_INT (0x40)));
8208 code = NE;
8209 }
8210 else
8211 {
8212 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8213 code = EQ;
8214 }
8215 break;
8216
8217 case UNORDERED:
8218 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8219 code = NE;
8220 break;
8221 case ORDERED:
8222 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8223 code = EQ;
8224 break;
8225
8226 default:
8227 abort ();
8228 }
8229 }
8230
8231 /* Return the test that should be put into the flags user, i.e.
8232 the bcc, scc, or cmov instruction. */
8233 return gen_rtx_fmt_ee (code, VOIDmode,
8234 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8235 const0_rtx);
8236 }
8237
8238 rtx
8239 ix86_expand_compare (code, second_test, bypass_test)
8240 enum rtx_code code;
8241 rtx *second_test, *bypass_test;
8242 {
8243 rtx op0, op1, ret;
8244 op0 = ix86_compare_op0;
8245 op1 = ix86_compare_op1;
8246
8247 if (second_test)
8248 *second_test = NULL_RTX;
8249 if (bypass_test)
8250 *bypass_test = NULL_RTX;
8251
8252 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8253 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8254 second_test, bypass_test);
8255 else
8256 ret = ix86_expand_int_compare (code, op0, op1);
8257
8258 return ret;
8259 }
8260
8261 /* Return true if the CODE will result in nontrivial jump sequence. */
8262 bool
8263 ix86_fp_jump_nontrivial_p (code)
8264 enum rtx_code code;
8265 {
8266 enum rtx_code bypass_code, first_code, second_code;
8267 if (!TARGET_CMOVE)
8268 return true;
8269 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8270 return bypass_code != NIL || second_code != NIL;
8271 }
8272
8273 void
8274 ix86_expand_branch (code, label)
8275 enum rtx_code code;
8276 rtx label;
8277 {
8278 rtx tmp;
8279
8280 switch (GET_MODE (ix86_compare_op0))
8281 {
8282 case QImode:
8283 case HImode:
8284 case SImode:
8285 simple:
8286 tmp = ix86_expand_compare (code, NULL, NULL);
8287 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8288 gen_rtx_LABEL_REF (VOIDmode, label),
8289 pc_rtx);
8290 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8291 return;
8292
8293 case SFmode:
8294 case DFmode:
8295 case XFmode:
8296 case TFmode:
8297 {
8298 rtvec vec;
8299 int use_fcomi;
8300 enum rtx_code bypass_code, first_code, second_code;
8301
8302 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8303 &ix86_compare_op1);
8304
8305 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8306
8307 /* Check whether we will use the natural sequence with one jump. If
8308 so, we can expand jump early. Otherwise delay expansion by
8309 creating compound insn to not confuse optimizers. */
8310 if (bypass_code == NIL && second_code == NIL
8311 && TARGET_CMOVE)
8312 {
8313 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8314 gen_rtx_LABEL_REF (VOIDmode, label),
8315 pc_rtx, NULL_RTX);
8316 }
8317 else
8318 {
8319 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8320 ix86_compare_op0, ix86_compare_op1);
8321 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8322 gen_rtx_LABEL_REF (VOIDmode, label),
8323 pc_rtx);
8324 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8325
8326 use_fcomi = ix86_use_fcomi_compare (code);
8327 vec = rtvec_alloc (3 + !use_fcomi);
8328 RTVEC_ELT (vec, 0) = tmp;
8329 RTVEC_ELT (vec, 1)
8330 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8331 RTVEC_ELT (vec, 2)
8332 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8333 if (! use_fcomi)
8334 RTVEC_ELT (vec, 3)
8335 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8336
8337 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8338 }
8339 return;
8340 }
8341
8342 case DImode:
8343 if (TARGET_64BIT)
8344 goto simple;
8345 /* Expand DImode branch into multiple compare+branch. */
8346 {
8347 rtx lo[2], hi[2], label2;
8348 enum rtx_code code1, code2, code3;
8349
8350 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8351 {
8352 tmp = ix86_compare_op0;
8353 ix86_compare_op0 = ix86_compare_op1;
8354 ix86_compare_op1 = tmp;
8355 code = swap_condition (code);
8356 }
8357 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8358 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8359
8360 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8361 avoid two branches. This costs one extra insn, so disable when
8362 optimizing for size. */
8363
8364 if ((code == EQ || code == NE)
8365 && (!optimize_size
8366 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8367 {
8368 rtx xor0, xor1;
8369
8370 xor1 = hi[0];
8371 if (hi[1] != const0_rtx)
8372 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8373 NULL_RTX, 0, OPTAB_WIDEN);
8374
8375 xor0 = lo[0];
8376 if (lo[1] != const0_rtx)
8377 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8378 NULL_RTX, 0, OPTAB_WIDEN);
8379
8380 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8381 NULL_RTX, 0, OPTAB_WIDEN);
8382
8383 ix86_compare_op0 = tmp;
8384 ix86_compare_op1 = const0_rtx;
8385 ix86_expand_branch (code, label);
8386 return;
8387 }
8388
8389 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8390 op1 is a constant and the low word is zero, then we can just
8391 examine the high word. */
8392
8393 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8394 switch (code)
8395 {
8396 case LT: case LTU: case GE: case GEU:
8397 ix86_compare_op0 = hi[0];
8398 ix86_compare_op1 = hi[1];
8399 ix86_expand_branch (code, label);
8400 return;
8401 default:
8402 break;
8403 }
8404
8405 /* Otherwise, we need two or three jumps. */
8406
8407 label2 = gen_label_rtx ();
8408
8409 code1 = code;
8410 code2 = swap_condition (code);
8411 code3 = unsigned_condition (code);
8412
8413 switch (code)
8414 {
8415 case LT: case GT: case LTU: case GTU:
8416 break;
8417
8418 case LE: code1 = LT; code2 = GT; break;
8419 case GE: code1 = GT; code2 = LT; break;
8420 case LEU: code1 = LTU; code2 = GTU; break;
8421 case GEU: code1 = GTU; code2 = LTU; break;
8422
8423 case EQ: code1 = NIL; code2 = NE; break;
8424 case NE: code2 = NIL; break;
8425
8426 default:
8427 abort ();
8428 }
8429
8430 /*
8431 * a < b =>
8432 * if (hi(a) < hi(b)) goto true;
8433 * if (hi(a) > hi(b)) goto false;
8434 * if (lo(a) < lo(b)) goto true;
8435 * false:
8436 */
8437
8438 ix86_compare_op0 = hi[0];
8439 ix86_compare_op1 = hi[1];
8440
8441 if (code1 != NIL)
8442 ix86_expand_branch (code1, label);
8443 if (code2 != NIL)
8444 ix86_expand_branch (code2, label2);
8445
8446 ix86_compare_op0 = lo[0];
8447 ix86_compare_op1 = lo[1];
8448 ix86_expand_branch (code3, label);
8449
8450 if (code2 != NIL)
8451 emit_label (label2);
8452 return;
8453 }
8454
8455 default:
8456 abort ();
8457 }
8458 }
8459
8460 /* Split branch based on floating point condition. */
8461 void
8462 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8463 enum rtx_code code;
8464 rtx op1, op2, target1, target2, tmp;
8465 {
8466 rtx second, bypass;
8467 rtx label = NULL_RTX;
8468 rtx condition;
8469 int bypass_probability = -1, second_probability = -1, probability = -1;
8470 rtx i;
8471
8472 if (target2 != pc_rtx)
8473 {
8474 rtx tmp = target2;
8475 code = reverse_condition_maybe_unordered (code);
8476 target2 = target1;
8477 target1 = tmp;
8478 }
8479
8480 condition = ix86_expand_fp_compare (code, op1, op2,
8481 tmp, &second, &bypass);
8482
8483 if (split_branch_probability >= 0)
8484 {
8485 /* Distribute the probabilities across the jumps.
8486 Assume the BYPASS and SECOND to be always test
8487 for UNORDERED. */
8488 probability = split_branch_probability;
8489
8490 /* Value of 1 is low enough to make no need for probability
8491 to be updated. Later we may run some experiments and see
8492 if unordered values are more frequent in practice. */
8493 if (bypass)
8494 bypass_probability = 1;
8495 if (second)
8496 second_probability = 1;
8497 }
8498 if (bypass != NULL_RTX)
8499 {
8500 label = gen_label_rtx ();
8501 i = emit_jump_insn (gen_rtx_SET
8502 (VOIDmode, pc_rtx,
8503 gen_rtx_IF_THEN_ELSE (VOIDmode,
8504 bypass,
8505 gen_rtx_LABEL_REF (VOIDmode,
8506 label),
8507 pc_rtx)));
8508 if (bypass_probability >= 0)
8509 REG_NOTES (i)
8510 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8511 GEN_INT (bypass_probability),
8512 REG_NOTES (i));
8513 }
8514 i = emit_jump_insn (gen_rtx_SET
8515 (VOIDmode, pc_rtx,
8516 gen_rtx_IF_THEN_ELSE (VOIDmode,
8517 condition, target1, target2)));
8518 if (probability >= 0)
8519 REG_NOTES (i)
8520 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8521 GEN_INT (probability),
8522 REG_NOTES (i));
8523 if (second != NULL_RTX)
8524 {
8525 i = emit_jump_insn (gen_rtx_SET
8526 (VOIDmode, pc_rtx,
8527 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8528 target2)));
8529 if (second_probability >= 0)
8530 REG_NOTES (i)
8531 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8532 GEN_INT (second_probability),
8533 REG_NOTES (i));
8534 }
8535 if (label != NULL_RTX)
8536 emit_label (label);
8537 }
8538
8539 int
8540 ix86_expand_setcc (code, dest)
8541 enum rtx_code code;
8542 rtx dest;
8543 {
8544 rtx ret, tmp, tmpreg;
8545 rtx second_test, bypass_test;
8546
8547 if (GET_MODE (ix86_compare_op0) == DImode
8548 && !TARGET_64BIT)
8549 return 0; /* FAIL */
8550
8551 if (GET_MODE (dest) != QImode)
8552 abort ();
8553
8554 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8555 PUT_MODE (ret, QImode);
8556
8557 tmp = dest;
8558 tmpreg = dest;
8559
8560 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8561 if (bypass_test || second_test)
8562 {
8563 rtx test = second_test;
8564 int bypass = 0;
8565 rtx tmp2 = gen_reg_rtx (QImode);
8566 if (bypass_test)
8567 {
8568 if (second_test)
8569 abort ();
8570 test = bypass_test;
8571 bypass = 1;
8572 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8573 }
8574 PUT_MODE (test, QImode);
8575 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8576
8577 if (bypass)
8578 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8579 else
8580 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8581 }
8582
8583 return 1; /* DONE */
8584 }
8585
8586 int
8587 ix86_expand_int_movcc (operands)
8588 rtx operands[];
8589 {
8590 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8591 rtx compare_seq, compare_op;
8592 rtx second_test, bypass_test;
8593 enum machine_mode mode = GET_MODE (operands[0]);
8594
8595 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8596 In case comparsion is done with immediate, we can convert it to LTU or
8597 GEU by altering the integer. */
8598
8599 if ((code == LEU || code == GTU)
8600 && GET_CODE (ix86_compare_op1) == CONST_INT
8601 && mode != HImode
8602 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
8603 /* The operand still must be representable as sign extended value. */
8604 && (!TARGET_64BIT
8605 || GET_MODE (ix86_compare_op0) != DImode
8606 || (unsigned int) INTVAL (ix86_compare_op1) != 0x7fffffff)
8607 && GET_CODE (operands[2]) == CONST_INT
8608 && GET_CODE (operands[3]) == CONST_INT)
8609 {
8610 if (code == LEU)
8611 code = LTU;
8612 else
8613 code = GEU;
8614 ix86_compare_op1
8615 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8616 GET_MODE (ix86_compare_op0));
8617 }
8618
8619 start_sequence ();
8620 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8621 compare_seq = gen_sequence ();
8622 end_sequence ();
8623
8624 compare_code = GET_CODE (compare_op);
8625
8626 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8627 HImode insns, we'd be swallowed in word prefix ops. */
8628
8629 if (mode != HImode
8630 && (mode != DImode || TARGET_64BIT)
8631 && GET_CODE (operands[2]) == CONST_INT
8632 && GET_CODE (operands[3]) == CONST_INT)
8633 {
8634 rtx out = operands[0];
8635 HOST_WIDE_INT ct = INTVAL (operands[2]);
8636 HOST_WIDE_INT cf = INTVAL (operands[3]);
8637 HOST_WIDE_INT diff;
8638
8639 if ((compare_code == LTU || compare_code == GEU)
8640 && !second_test && !bypass_test)
8641 {
8642
8643 /* Detect overlap between destination and compare sources. */
8644 rtx tmp = out;
8645
8646 /* To simplify rest of code, restrict to the GEU case. */
8647 if (compare_code == LTU)
8648 {
8649 int tmp = ct;
8650 ct = cf;
8651 cf = tmp;
8652 compare_code = reverse_condition (compare_code);
8653 code = reverse_condition (code);
8654 }
8655 diff = ct - cf;
8656
8657 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8658 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8659 tmp = gen_reg_rtx (mode);
8660
8661 emit_insn (compare_seq);
8662 if (mode == DImode)
8663 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8664 else
8665 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8666
8667 if (diff == 1)
8668 {
8669 /*
8670 * cmpl op0,op1
8671 * sbbl dest,dest
8672 * [addl dest, ct]
8673 *
8674 * Size 5 - 8.
8675 */
8676 if (ct)
8677 tmp = expand_simple_binop (mode, PLUS,
8678 tmp, GEN_INT (ct),
8679 tmp, 1, OPTAB_DIRECT);
8680 }
8681 else if (cf == -1)
8682 {
8683 /*
8684 * cmpl op0,op1
8685 * sbbl dest,dest
8686 * orl $ct, dest
8687 *
8688 * Size 8.
8689 */
8690 tmp = expand_simple_binop (mode, IOR,
8691 tmp, GEN_INT (ct),
8692 tmp, 1, OPTAB_DIRECT);
8693 }
8694 else if (diff == -1 && ct)
8695 {
8696 /*
8697 * cmpl op0,op1
8698 * sbbl dest,dest
8699 * xorl $-1, dest
8700 * [addl dest, cf]
8701 *
8702 * Size 8 - 11.
8703 */
8704 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8705 if (cf)
8706 tmp = expand_simple_binop (mode, PLUS,
8707 tmp, GEN_INT (cf),
8708 tmp, 1, OPTAB_DIRECT);
8709 }
8710 else
8711 {
8712 /*
8713 * cmpl op0,op1
8714 * sbbl dest,dest
8715 * andl cf - ct, dest
8716 * [addl dest, ct]
8717 *
8718 * Size 8 - 11.
8719 */
8720 tmp = expand_simple_binop (mode, AND,
8721 tmp,
8722 gen_int_mode (cf - ct, mode),
8723 tmp, 1, OPTAB_DIRECT);
8724 if (ct)
8725 tmp = expand_simple_binop (mode, PLUS,
8726 tmp, GEN_INT (ct),
8727 tmp, 1, OPTAB_DIRECT);
8728 }
8729
8730 if (tmp != out)
8731 emit_move_insn (out, tmp);
8732
8733 return 1; /* DONE */
8734 }
8735
8736 diff = ct - cf;
8737 if (diff < 0)
8738 {
8739 HOST_WIDE_INT tmp;
8740 tmp = ct, ct = cf, cf = tmp;
8741 diff = -diff;
8742 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8743 {
8744 /* We may be reversing unordered compare to normal compare, that
8745 is not valid in general (we may convert non-trapping condition
8746 to trapping one), however on i386 we currently emit all
8747 comparisons unordered. */
8748 compare_code = reverse_condition_maybe_unordered (compare_code);
8749 code = reverse_condition_maybe_unordered (code);
8750 }
8751 else
8752 {
8753 compare_code = reverse_condition (compare_code);
8754 code = reverse_condition (code);
8755 }
8756 }
8757
8758 compare_code = NIL;
8759 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8760 && GET_CODE (ix86_compare_op1) == CONST_INT)
8761 {
8762 if (ix86_compare_op1 == const0_rtx
8763 && (code == LT || code == GE))
8764 compare_code = code;
8765 else if (ix86_compare_op1 == constm1_rtx)
8766 {
8767 if (code == LE)
8768 compare_code = LT;
8769 else if (code == GT)
8770 compare_code = GE;
8771 }
8772 }
8773
8774 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8775 if (compare_code != NIL
8776 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8777 && (cf == -1 || ct == -1))
8778 {
8779 /* If lea code below could be used, only optimize
8780 if it results in a 2 insn sequence. */
8781
8782 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8783 || diff == 3 || diff == 5 || diff == 9)
8784 || (compare_code == LT && ct == -1)
8785 || (compare_code == GE && cf == -1))
8786 {
8787 /*
8788 * notl op1 (if necessary)
8789 * sarl $31, op1
8790 * orl cf, op1
8791 */
8792 if (ct != -1)
8793 {
8794 cf = ct;
8795 ct = -1;
8796 code = reverse_condition (code);
8797 }
8798
8799 out = emit_store_flag (out, code, ix86_compare_op0,
8800 ix86_compare_op1, VOIDmode, 0, -1);
8801
8802 out = expand_simple_binop (mode, IOR,
8803 out, GEN_INT (cf),
8804 out, 1, OPTAB_DIRECT);
8805 if (out != operands[0])
8806 emit_move_insn (operands[0], out);
8807
8808 return 1; /* DONE */
8809 }
8810 }
8811
8812 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8813 || diff == 3 || diff == 5 || diff == 9)
8814 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8815 {
8816 /*
8817 * xorl dest,dest
8818 * cmpl op1,op2
8819 * setcc dest
8820 * lea cf(dest*(ct-cf)),dest
8821 *
8822 * Size 14.
8823 *
8824 * This also catches the degenerate setcc-only case.
8825 */
8826
8827 rtx tmp;
8828 int nops;
8829
8830 out = emit_store_flag (out, code, ix86_compare_op0,
8831 ix86_compare_op1, VOIDmode, 0, 1);
8832
8833 nops = 0;
8834 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8835 done in proper mode to match. */
8836 if (diff == 1)
8837 tmp = out;
8838 else
8839 {
8840 rtx out1;
8841 out1 = out;
8842 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8843 nops++;
8844 if (diff & 1)
8845 {
8846 tmp = gen_rtx_PLUS (mode, tmp, out1);
8847 nops++;
8848 }
8849 }
8850 if (cf != 0)
8851 {
8852 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8853 nops++;
8854 }
8855 if (tmp != out
8856 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8857 {
8858 if (nops == 1)
8859 {
8860 rtx clob;
8861
8862 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8863 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8864
8865 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8866 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8867 emit_insn (tmp);
8868 }
8869 else
8870 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8871 }
8872 if (out != operands[0])
8873 emit_move_insn (operands[0], out);
8874
8875 return 1; /* DONE */
8876 }
8877
8878 /*
8879 * General case: Jumpful:
8880 * xorl dest,dest cmpl op1, op2
8881 * cmpl op1, op2 movl ct, dest
8882 * setcc dest jcc 1f
8883 * decl dest movl cf, dest
8884 * andl (cf-ct),dest 1:
8885 * addl ct,dest
8886 *
8887 * Size 20. Size 14.
8888 *
8889 * This is reasonably steep, but branch mispredict costs are
8890 * high on modern cpus, so consider failing only if optimizing
8891 * for space.
8892 *
8893 * %%% Parameterize branch_cost on the tuning architecture, then
8894 * use that. The 80386 couldn't care less about mispredicts.
8895 */
8896
8897 if (!optimize_size && !TARGET_CMOVE)
8898 {
8899 if (ct == 0)
8900 {
8901 ct = cf;
8902 cf = 0;
8903 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8904 /* We may be reversing unordered compare to normal compare,
8905 that is not valid in general (we may convert non-trapping
8906 condition to trapping one), however on i386 we currently
8907 emit all comparisons unordered. */
8908 code = reverse_condition_maybe_unordered (code);
8909 else
8910 {
8911 code = reverse_condition (code);
8912 if (compare_code != NIL)
8913 compare_code = reverse_condition (compare_code);
8914 }
8915 }
8916
8917 if (compare_code != NIL)
8918 {
8919 /* notl op1 (if needed)
8920 sarl $31, op1
8921 andl (cf-ct), op1
8922 addl ct, op1
8923
8924 For x < 0 (resp. x <= -1) there will be no notl,
8925 so if possible swap the constants to get rid of the
8926 complement.
8927 True/false will be -1/0 while code below (store flag
8928 followed by decrement) is 0/-1, so the constants need
8929 to be exchanged once more. */
8930
8931 if (compare_code == GE || !cf)
8932 {
8933 code = reverse_condition (code);
8934 compare_code = LT;
8935 }
8936 else
8937 {
8938 HOST_WIDE_INT tmp = cf;
8939 cf = ct;
8940 ct = tmp;
8941 }
8942
8943 out = emit_store_flag (out, code, ix86_compare_op0,
8944 ix86_compare_op1, VOIDmode, 0, -1);
8945 }
8946 else
8947 {
8948 out = emit_store_flag (out, code, ix86_compare_op0,
8949 ix86_compare_op1, VOIDmode, 0, 1);
8950
8951 out = expand_simple_binop (mode, PLUS,
8952 out, constm1_rtx,
8953 out, 1, OPTAB_DIRECT);
8954 }
8955
8956 out = expand_simple_binop (mode, AND,
8957 out,
8958 gen_int_mode (cf - ct, mode),
8959 out, 1, OPTAB_DIRECT);
8960 out = expand_simple_binop (mode, PLUS,
8961 out, GEN_INT (ct),
8962 out, 1, OPTAB_DIRECT);
8963 if (out != operands[0])
8964 emit_move_insn (operands[0], out);
8965
8966 return 1; /* DONE */
8967 }
8968 }
8969
8970 if (!TARGET_CMOVE)
8971 {
8972 /* Try a few things more with specific constants and a variable. */
8973
8974 optab op;
8975 rtx var, orig_out, out, tmp;
8976
8977 if (optimize_size)
8978 return 0; /* FAIL */
8979
8980 /* If one of the two operands is an interesting constant, load a
8981 constant with the above and mask it in with a logical operation. */
8982
8983 if (GET_CODE (operands[2]) == CONST_INT)
8984 {
8985 var = operands[3];
8986 if (INTVAL (operands[2]) == 0)
8987 operands[3] = constm1_rtx, op = and_optab;
8988 else if (INTVAL (operands[2]) == -1)
8989 operands[3] = const0_rtx, op = ior_optab;
8990 else
8991 return 0; /* FAIL */
8992 }
8993 else if (GET_CODE (operands[3]) == CONST_INT)
8994 {
8995 var = operands[2];
8996 if (INTVAL (operands[3]) == 0)
8997 operands[2] = constm1_rtx, op = and_optab;
8998 else if (INTVAL (operands[3]) == -1)
8999 operands[2] = const0_rtx, op = ior_optab;
9000 else
9001 return 0; /* FAIL */
9002 }
9003 else
9004 return 0; /* FAIL */
9005
9006 orig_out = operands[0];
9007 tmp = gen_reg_rtx (mode);
9008 operands[0] = tmp;
9009
9010 /* Recurse to get the constant loaded. */
9011 if (ix86_expand_int_movcc (operands) == 0)
9012 return 0; /* FAIL */
9013
9014 /* Mask in the interesting variable. */
9015 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9016 OPTAB_WIDEN);
9017 if (out != orig_out)
9018 emit_move_insn (orig_out, out);
9019
9020 return 1; /* DONE */
9021 }
9022
9023 /*
9024 * For comparison with above,
9025 *
9026 * movl cf,dest
9027 * movl ct,tmp
9028 * cmpl op1,op2
9029 * cmovcc tmp,dest
9030 *
9031 * Size 15.
9032 */
9033
9034 if (! nonimmediate_operand (operands[2], mode))
9035 operands[2] = force_reg (mode, operands[2]);
9036 if (! nonimmediate_operand (operands[3], mode))
9037 operands[3] = force_reg (mode, operands[3]);
9038
9039 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9040 {
9041 rtx tmp = gen_reg_rtx (mode);
9042 emit_move_insn (tmp, operands[3]);
9043 operands[3] = tmp;
9044 }
9045 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9046 {
9047 rtx tmp = gen_reg_rtx (mode);
9048 emit_move_insn (tmp, operands[2]);
9049 operands[2] = tmp;
9050 }
9051 if (! register_operand (operands[2], VOIDmode)
9052 && ! register_operand (operands[3], VOIDmode))
9053 operands[2] = force_reg (mode, operands[2]);
9054
9055 emit_insn (compare_seq);
9056 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9057 gen_rtx_IF_THEN_ELSE (mode,
9058 compare_op, operands[2],
9059 operands[3])));
9060 if (bypass_test)
9061 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9062 gen_rtx_IF_THEN_ELSE (mode,
9063 bypass_test,
9064 operands[3],
9065 operands[0])));
9066 if (second_test)
9067 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9068 gen_rtx_IF_THEN_ELSE (mode,
9069 second_test,
9070 operands[2],
9071 operands[0])));
9072
9073 return 1; /* DONE */
9074 }
9075
9076 int
9077 ix86_expand_fp_movcc (operands)
9078 rtx operands[];
9079 {
9080 enum rtx_code code;
9081 rtx tmp;
9082 rtx compare_op, second_test, bypass_test;
9083
9084 /* For SF/DFmode conditional moves based on comparisons
9085 in same mode, we may want to use SSE min/max instructions. */
9086 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9087 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9088 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9089 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9090 && (!TARGET_IEEE_FP
9091 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9092 /* We may be called from the post-reload splitter. */
9093 && (!REG_P (operands[0])
9094 || SSE_REG_P (operands[0])
9095 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9096 {
9097 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9098 code = GET_CODE (operands[1]);
9099
9100 /* See if we have (cross) match between comparison operands and
9101 conditional move operands. */
9102 if (rtx_equal_p (operands[2], op1))
9103 {
9104 rtx tmp = op0;
9105 op0 = op1;
9106 op1 = tmp;
9107 code = reverse_condition_maybe_unordered (code);
9108 }
9109 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9110 {
9111 /* Check for min operation. */
9112 if (code == LT)
9113 {
9114 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9115 if (memory_operand (op0, VOIDmode))
9116 op0 = force_reg (GET_MODE (operands[0]), op0);
9117 if (GET_MODE (operands[0]) == SFmode)
9118 emit_insn (gen_minsf3 (operands[0], op0, op1));
9119 else
9120 emit_insn (gen_mindf3 (operands[0], op0, op1));
9121 return 1;
9122 }
9123 /* Check for max operation. */
9124 if (code == GT)
9125 {
9126 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9127 if (memory_operand (op0, VOIDmode))
9128 op0 = force_reg (GET_MODE (operands[0]), op0);
9129 if (GET_MODE (operands[0]) == SFmode)
9130 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9131 else
9132 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9133 return 1;
9134 }
9135 }
9136 /* Manage condition to be sse_comparison_operator. In case we are
9137 in non-ieee mode, try to canonicalize the destination operand
9138 to be first in the comparison - this helps reload to avoid extra
9139 moves. */
9140 if (!sse_comparison_operator (operands[1], VOIDmode)
9141 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9142 {
9143 rtx tmp = ix86_compare_op0;
9144 ix86_compare_op0 = ix86_compare_op1;
9145 ix86_compare_op1 = tmp;
9146 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9147 VOIDmode, ix86_compare_op0,
9148 ix86_compare_op1);
9149 }
9150 /* Similary try to manage result to be first operand of conditional
9151 move. We also don't support the NE comparison on SSE, so try to
9152 avoid it. */
9153 if ((rtx_equal_p (operands[0], operands[3])
9154 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9155 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9156 {
9157 rtx tmp = operands[2];
9158 operands[2] = operands[3];
9159 operands[3] = tmp;
9160 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9161 (GET_CODE (operands[1])),
9162 VOIDmode, ix86_compare_op0,
9163 ix86_compare_op1);
9164 }
9165 if (GET_MODE (operands[0]) == SFmode)
9166 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9167 operands[2], operands[3],
9168 ix86_compare_op0, ix86_compare_op1));
9169 else
9170 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9171 operands[2], operands[3],
9172 ix86_compare_op0, ix86_compare_op1));
9173 return 1;
9174 }
9175
9176 /* The floating point conditional move instructions don't directly
9177 support conditions resulting from a signed integer comparison. */
9178
9179 code = GET_CODE (operands[1]);
9180 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9181
9182 /* The floating point conditional move instructions don't directly
9183 support signed integer comparisons. */
9184
9185 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9186 {
9187 if (second_test != NULL || bypass_test != NULL)
9188 abort ();
9189 tmp = gen_reg_rtx (QImode);
9190 ix86_expand_setcc (code, tmp);
9191 code = NE;
9192 ix86_compare_op0 = tmp;
9193 ix86_compare_op1 = const0_rtx;
9194 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9195 }
9196 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9197 {
9198 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9199 emit_move_insn (tmp, operands[3]);
9200 operands[3] = tmp;
9201 }
9202 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9203 {
9204 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9205 emit_move_insn (tmp, operands[2]);
9206 operands[2] = tmp;
9207 }
9208
9209 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9210 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9211 compare_op,
9212 operands[2],
9213 operands[3])));
9214 if (bypass_test)
9215 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9216 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9217 bypass_test,
9218 operands[3],
9219 operands[0])));
9220 if (second_test)
9221 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9222 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9223 second_test,
9224 operands[2],
9225 operands[0])));
9226
9227 return 1;
9228 }
9229
9230 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9231 works for floating pointer parameters and nonoffsetable memories.
9232 For pushes, it returns just stack offsets; the values will be saved
9233 in the right order. Maximally three parts are generated. */
9234
9235 static int
9236 ix86_split_to_parts (operand, parts, mode)
9237 rtx operand;
9238 rtx *parts;
9239 enum machine_mode mode;
9240 {
9241 int size;
9242
9243 if (!TARGET_64BIT)
9244 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9245 else
9246 size = (GET_MODE_SIZE (mode) + 4) / 8;
9247
9248 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9249 abort ();
9250 if (size < 2 || size > 3)
9251 abort ();
9252
9253 /* Optimize constant pool reference to immediates. This is used by fp
9254 moves, that force all constants to memory to allow combining. */
9255 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9256 {
9257 rtx tmp = maybe_get_pool_constant (operand);
9258 if (tmp)
9259 operand = tmp;
9260 }
9261
9262 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9263 {
9264 /* The only non-offsetable memories we handle are pushes. */
9265 if (! push_operand (operand, VOIDmode))
9266 abort ();
9267
9268 operand = copy_rtx (operand);
9269 PUT_MODE (operand, Pmode);
9270 parts[0] = parts[1] = parts[2] = operand;
9271 }
9272 else if (!TARGET_64BIT)
9273 {
9274 if (mode == DImode)
9275 split_di (&operand, 1, &parts[0], &parts[1]);
9276 else
9277 {
9278 if (REG_P (operand))
9279 {
9280 if (!reload_completed)
9281 abort ();
9282 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9283 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9284 if (size == 3)
9285 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9286 }
9287 else if (offsettable_memref_p (operand))
9288 {
9289 operand = adjust_address (operand, SImode, 0);
9290 parts[0] = operand;
9291 parts[1] = adjust_address (operand, SImode, 4);
9292 if (size == 3)
9293 parts[2] = adjust_address (operand, SImode, 8);
9294 }
9295 else if (GET_CODE (operand) == CONST_DOUBLE)
9296 {
9297 REAL_VALUE_TYPE r;
9298 long l[4];
9299
9300 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9301 switch (mode)
9302 {
9303 case XFmode:
9304 case TFmode:
9305 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9306 parts[2] = gen_int_mode (l[2], SImode);
9307 break;
9308 case DFmode:
9309 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9310 break;
9311 default:
9312 abort ();
9313 }
9314 parts[1] = gen_int_mode (l[1], SImode);
9315 parts[0] = gen_int_mode (l[0], SImode);
9316 }
9317 else
9318 abort ();
9319 }
9320 }
9321 else
9322 {
9323 if (mode == TImode)
9324 split_ti (&operand, 1, &parts[0], &parts[1]);
9325 if (mode == XFmode || mode == TFmode)
9326 {
9327 if (REG_P (operand))
9328 {
9329 if (!reload_completed)
9330 abort ();
9331 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9332 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9333 }
9334 else if (offsettable_memref_p (operand))
9335 {
9336 operand = adjust_address (operand, DImode, 0);
9337 parts[0] = operand;
9338 parts[1] = adjust_address (operand, SImode, 8);
9339 }
9340 else if (GET_CODE (operand) == CONST_DOUBLE)
9341 {
9342 REAL_VALUE_TYPE r;
9343 long l[3];
9344
9345 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9346 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9347 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9348 if (HOST_BITS_PER_WIDE_INT >= 64)
9349 parts[0]
9350 = gen_int_mode
9351 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9352 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9353 DImode);
9354 else
9355 parts[0] = immed_double_const (l[0], l[1], DImode);
9356 parts[1] = gen_int_mode (l[2], SImode);
9357 }
9358 else
9359 abort ();
9360 }
9361 }
9362
9363 return size;
9364 }
9365
9366 /* Emit insns to perform a move or push of DI, DF, and XF values.
9367 Return false when normal moves are needed; true when all required
9368 insns have been emitted. Operands 2-4 contain the input values
9369 int the correct order; operands 5-7 contain the output values. */
9370
9371 void
9372 ix86_split_long_move (operands)
9373 rtx operands[];
9374 {
9375 rtx part[2][3];
9376 int nparts;
9377 int push = 0;
9378 int collisions = 0;
9379 enum machine_mode mode = GET_MODE (operands[0]);
9380
9381 /* The DFmode expanders may ask us to move double.
9382 For 64bit target this is single move. By hiding the fact
9383 here we simplify i386.md splitters. */
9384 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9385 {
9386 /* Optimize constant pool reference to immediates. This is used by
9387 fp moves, that force all constants to memory to allow combining. */
9388
9389 if (GET_CODE (operands[1]) == MEM
9390 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9391 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9392 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9393 if (push_operand (operands[0], VOIDmode))
9394 {
9395 operands[0] = copy_rtx (operands[0]);
9396 PUT_MODE (operands[0], Pmode);
9397 }
9398 else
9399 operands[0] = gen_lowpart (DImode, operands[0]);
9400 operands[1] = gen_lowpart (DImode, operands[1]);
9401 emit_move_insn (operands[0], operands[1]);
9402 return;
9403 }
9404
9405 /* The only non-offsettable memory we handle is push. */
9406 if (push_operand (operands[0], VOIDmode))
9407 push = 1;
9408 else if (GET_CODE (operands[0]) == MEM
9409 && ! offsettable_memref_p (operands[0]))
9410 abort ();
9411
9412 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9413 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9414
9415 /* When emitting push, take care for source operands on the stack. */
9416 if (push && GET_CODE (operands[1]) == MEM
9417 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9418 {
9419 if (nparts == 3)
9420 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9421 XEXP (part[1][2], 0));
9422 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9423 XEXP (part[1][1], 0));
9424 }
9425
9426 /* We need to do copy in the right order in case an address register
9427 of the source overlaps the destination. */
9428 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9429 {
9430 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9431 collisions++;
9432 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9433 collisions++;
9434 if (nparts == 3
9435 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9436 collisions++;
9437
9438 /* Collision in the middle part can be handled by reordering. */
9439 if (collisions == 1 && nparts == 3
9440 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9441 {
9442 rtx tmp;
9443 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9444 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9445 }
9446
9447 /* If there are more collisions, we can't handle it by reordering.
9448 Do an lea to the last part and use only one colliding move. */
9449 else if (collisions > 1)
9450 {
9451 collisions = 1;
9452 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9453 XEXP (part[1][0], 0)));
9454 part[1][0] = change_address (part[1][0],
9455 TARGET_64BIT ? DImode : SImode,
9456 part[0][nparts - 1]);
9457 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9458 if (nparts == 3)
9459 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9460 }
9461 }
9462
9463 if (push)
9464 {
9465 if (!TARGET_64BIT)
9466 {
9467 if (nparts == 3)
9468 {
9469 /* We use only first 12 bytes of TFmode value, but for pushing we
9470 are required to adjust stack as if we were pushing real 16byte
9471 value. */
9472 if (mode == TFmode && !TARGET_64BIT)
9473 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9474 GEN_INT (-4)));
9475 emit_move_insn (part[0][2], part[1][2]);
9476 }
9477 }
9478 else
9479 {
9480 /* In 64bit mode we don't have 32bit push available. In case this is
9481 register, it is OK - we will just use larger counterpart. We also
9482 retype memory - these comes from attempt to avoid REX prefix on
9483 moving of second half of TFmode value. */
9484 if (GET_MODE (part[1][1]) == SImode)
9485 {
9486 if (GET_CODE (part[1][1]) == MEM)
9487 part[1][1] = adjust_address (part[1][1], DImode, 0);
9488 else if (REG_P (part[1][1]))
9489 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9490 else
9491 abort ();
9492 if (GET_MODE (part[1][0]) == SImode)
9493 part[1][0] = part[1][1];
9494 }
9495 }
9496 emit_move_insn (part[0][1], part[1][1]);
9497 emit_move_insn (part[0][0], part[1][0]);
9498 return;
9499 }
9500
9501 /* Choose correct order to not overwrite the source before it is copied. */
9502 if ((REG_P (part[0][0])
9503 && REG_P (part[1][1])
9504 && (REGNO (part[0][0]) == REGNO (part[1][1])
9505 || (nparts == 3
9506 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9507 || (collisions > 0
9508 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9509 {
9510 if (nparts == 3)
9511 {
9512 operands[2] = part[0][2];
9513 operands[3] = part[0][1];
9514 operands[4] = part[0][0];
9515 operands[5] = part[1][2];
9516 operands[6] = part[1][1];
9517 operands[7] = part[1][0];
9518 }
9519 else
9520 {
9521 operands[2] = part[0][1];
9522 operands[3] = part[0][0];
9523 operands[5] = part[1][1];
9524 operands[6] = part[1][0];
9525 }
9526 }
9527 else
9528 {
9529 if (nparts == 3)
9530 {
9531 operands[2] = part[0][0];
9532 operands[3] = part[0][1];
9533 operands[4] = part[0][2];
9534 operands[5] = part[1][0];
9535 operands[6] = part[1][1];
9536 operands[7] = part[1][2];
9537 }
9538 else
9539 {
9540 operands[2] = part[0][0];
9541 operands[3] = part[0][1];
9542 operands[5] = part[1][0];
9543 operands[6] = part[1][1];
9544 }
9545 }
9546 emit_move_insn (operands[2], operands[5]);
9547 emit_move_insn (operands[3], operands[6]);
9548 if (nparts == 3)
9549 emit_move_insn (operands[4], operands[7]);
9550
9551 return;
9552 }
9553
9554 void
9555 ix86_split_ashldi (operands, scratch)
9556 rtx *operands, scratch;
9557 {
9558 rtx low[2], high[2];
9559 int count;
9560
9561 if (GET_CODE (operands[2]) == CONST_INT)
9562 {
9563 split_di (operands, 2, low, high);
9564 count = INTVAL (operands[2]) & 63;
9565
9566 if (count >= 32)
9567 {
9568 emit_move_insn (high[0], low[1]);
9569 emit_move_insn (low[0], const0_rtx);
9570
9571 if (count > 32)
9572 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9573 }
9574 else
9575 {
9576 if (!rtx_equal_p (operands[0], operands[1]))
9577 emit_move_insn (operands[0], operands[1]);
9578 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9579 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9580 }
9581 }
9582 else
9583 {
9584 if (!rtx_equal_p (operands[0], operands[1]))
9585 emit_move_insn (operands[0], operands[1]);
9586
9587 split_di (operands, 1, low, high);
9588
9589 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9590 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9591
9592 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9593 {
9594 if (! no_new_pseudos)
9595 scratch = force_reg (SImode, const0_rtx);
9596 else
9597 emit_move_insn (scratch, const0_rtx);
9598
9599 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9600 scratch));
9601 }
9602 else
9603 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9604 }
9605 }
9606
9607 void
9608 ix86_split_ashrdi (operands, scratch)
9609 rtx *operands, scratch;
9610 {
9611 rtx low[2], high[2];
9612 int count;
9613
9614 if (GET_CODE (operands[2]) == CONST_INT)
9615 {
9616 split_di (operands, 2, low, high);
9617 count = INTVAL (operands[2]) & 63;
9618
9619 if (count >= 32)
9620 {
9621 emit_move_insn (low[0], high[1]);
9622
9623 if (! reload_completed)
9624 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9625 else
9626 {
9627 emit_move_insn (high[0], low[0]);
9628 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9629 }
9630
9631 if (count > 32)
9632 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9633 }
9634 else
9635 {
9636 if (!rtx_equal_p (operands[0], operands[1]))
9637 emit_move_insn (operands[0], operands[1]);
9638 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9639 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9640 }
9641 }
9642 else
9643 {
9644 if (!rtx_equal_p (operands[0], operands[1]))
9645 emit_move_insn (operands[0], operands[1]);
9646
9647 split_di (operands, 1, low, high);
9648
9649 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9650 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9651
9652 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9653 {
9654 if (! no_new_pseudos)
9655 scratch = gen_reg_rtx (SImode);
9656 emit_move_insn (scratch, high[0]);
9657 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9658 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9659 scratch));
9660 }
9661 else
9662 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9663 }
9664 }
9665
9666 void
9667 ix86_split_lshrdi (operands, scratch)
9668 rtx *operands, scratch;
9669 {
9670 rtx low[2], high[2];
9671 int count;
9672
9673 if (GET_CODE (operands[2]) == CONST_INT)
9674 {
9675 split_di (operands, 2, low, high);
9676 count = INTVAL (operands[2]) & 63;
9677
9678 if (count >= 32)
9679 {
9680 emit_move_insn (low[0], high[1]);
9681 emit_move_insn (high[0], const0_rtx);
9682
9683 if (count > 32)
9684 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9685 }
9686 else
9687 {
9688 if (!rtx_equal_p (operands[0], operands[1]))
9689 emit_move_insn (operands[0], operands[1]);
9690 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9691 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9692 }
9693 }
9694 else
9695 {
9696 if (!rtx_equal_p (operands[0], operands[1]))
9697 emit_move_insn (operands[0], operands[1]);
9698
9699 split_di (operands, 1, low, high);
9700
9701 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9702 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9703
9704 /* Heh. By reversing the arguments, we can reuse this pattern. */
9705 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9706 {
9707 if (! no_new_pseudos)
9708 scratch = force_reg (SImode, const0_rtx);
9709 else
9710 emit_move_insn (scratch, const0_rtx);
9711
9712 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9713 scratch));
9714 }
9715 else
9716 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9717 }
9718 }
9719
9720 /* Helper function for the string operations below. Dest VARIABLE whether
9721 it is aligned to VALUE bytes. If true, jump to the label. */
9722 static rtx
9723 ix86_expand_aligntest (variable, value)
9724 rtx variable;
9725 int value;
9726 {
9727 rtx label = gen_label_rtx ();
9728 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9729 if (GET_MODE (variable) == DImode)
9730 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9731 else
9732 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9733 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9734 1, label);
9735 return label;
9736 }
9737
9738 /* Adjust COUNTER by the VALUE. */
9739 static void
9740 ix86_adjust_counter (countreg, value)
9741 rtx countreg;
9742 HOST_WIDE_INT value;
9743 {
9744 if (GET_MODE (countreg) == DImode)
9745 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9746 else
9747 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9748 }
9749
9750 /* Zero extend possibly SImode EXP to Pmode register. */
9751 rtx
9752 ix86_zero_extend_to_Pmode (exp)
9753 rtx exp;
9754 {
9755 rtx r;
9756 if (GET_MODE (exp) == VOIDmode)
9757 return force_reg (Pmode, exp);
9758 if (GET_MODE (exp) == Pmode)
9759 return copy_to_mode_reg (Pmode, exp);
9760 r = gen_reg_rtx (Pmode);
9761 emit_insn (gen_zero_extendsidi2 (r, exp));
9762 return r;
9763 }
9764
9765 /* Expand string move (memcpy) operation. Use i386 string operations when
9766 profitable. expand_clrstr contains similar code. */
9767 int
9768 ix86_expand_movstr (dst, src, count_exp, align_exp)
9769 rtx dst, src, count_exp, align_exp;
9770 {
9771 rtx srcreg, destreg, countreg;
9772 enum machine_mode counter_mode;
9773 HOST_WIDE_INT align = 0;
9774 unsigned HOST_WIDE_INT count = 0;
9775 rtx insns;
9776
9777 start_sequence ();
9778
9779 if (GET_CODE (align_exp) == CONST_INT)
9780 align = INTVAL (align_exp);
9781
9782 /* This simple hack avoids all inlining code and simplifies code below. */
9783 if (!TARGET_ALIGN_STRINGOPS)
9784 align = 64;
9785
9786 if (GET_CODE (count_exp) == CONST_INT)
9787 count = INTVAL (count_exp);
9788
9789 /* Figure out proper mode for counter. For 32bits it is always SImode,
9790 for 64bits use SImode when possible, otherwise DImode.
9791 Set count to number of bytes copied when known at compile time. */
9792 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9793 || x86_64_zero_extended_value (count_exp))
9794 counter_mode = SImode;
9795 else
9796 counter_mode = DImode;
9797
9798 if (counter_mode != SImode && counter_mode != DImode)
9799 abort ();
9800
9801 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9802 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9803
9804 emit_insn (gen_cld ());
9805
9806 /* When optimizing for size emit simple rep ; movsb instruction for
9807 counts not divisible by 4. */
9808
9809 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9810 {
9811 countreg = ix86_zero_extend_to_Pmode (count_exp);
9812 if (TARGET_64BIT)
9813 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9814 destreg, srcreg, countreg));
9815 else
9816 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9817 destreg, srcreg, countreg));
9818 }
9819
9820 /* For constant aligned (or small unaligned) copies use rep movsl
9821 followed by code copying the rest. For PentiumPro ensure 8 byte
9822 alignment to allow rep movsl acceleration. */
9823
9824 else if (count != 0
9825 && (align >= 8
9826 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9827 || optimize_size || count < (unsigned int) 64))
9828 {
9829 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9830 if (count & ~(size - 1))
9831 {
9832 countreg = copy_to_mode_reg (counter_mode,
9833 GEN_INT ((count >> (size == 4 ? 2 : 3))
9834 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9835 countreg = ix86_zero_extend_to_Pmode (countreg);
9836 if (size == 4)
9837 {
9838 if (TARGET_64BIT)
9839 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9840 destreg, srcreg, countreg));
9841 else
9842 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9843 destreg, srcreg, countreg));
9844 }
9845 else
9846 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9847 destreg, srcreg, countreg));
9848 }
9849 if (size == 8 && (count & 0x04))
9850 emit_insn (gen_strmovsi (destreg, srcreg));
9851 if (count & 0x02)
9852 emit_insn (gen_strmovhi (destreg, srcreg));
9853 if (count & 0x01)
9854 emit_insn (gen_strmovqi (destreg, srcreg));
9855 }
9856 /* The generic code based on the glibc implementation:
9857 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9858 allowing accelerated copying there)
9859 - copy the data using rep movsl
9860 - copy the rest. */
9861 else
9862 {
9863 rtx countreg2;
9864 rtx label = NULL;
9865 int desired_alignment = (TARGET_PENTIUMPRO
9866 && (count == 0 || count >= (unsigned int) 260)
9867 ? 8 : UNITS_PER_WORD);
9868
9869 /* In case we don't know anything about the alignment, default to
9870 library version, since it is usually equally fast and result in
9871 shorter code. */
9872 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9873 {
9874 end_sequence ();
9875 return 0;
9876 }
9877
9878 if (TARGET_SINGLE_STRINGOP)
9879 emit_insn (gen_cld ());
9880
9881 countreg2 = gen_reg_rtx (Pmode);
9882 countreg = copy_to_mode_reg (counter_mode, count_exp);
9883
9884 /* We don't use loops to align destination and to copy parts smaller
9885 than 4 bytes, because gcc is able to optimize such code better (in
9886 the case the destination or the count really is aligned, gcc is often
9887 able to predict the branches) and also it is friendlier to the
9888 hardware branch prediction.
9889
9890 Using loops is benefical for generic case, because we can
9891 handle small counts using the loops. Many CPUs (such as Athlon)
9892 have large REP prefix setup costs.
9893
9894 This is quite costy. Maybe we can revisit this decision later or
9895 add some customizability to this code. */
9896
9897 if (count == 0 && align < desired_alignment)
9898 {
9899 label = gen_label_rtx ();
9900 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9901 LEU, 0, counter_mode, 1, label);
9902 }
9903 if (align <= 1)
9904 {
9905 rtx label = ix86_expand_aligntest (destreg, 1);
9906 emit_insn (gen_strmovqi (destreg, srcreg));
9907 ix86_adjust_counter (countreg, 1);
9908 emit_label (label);
9909 LABEL_NUSES (label) = 1;
9910 }
9911 if (align <= 2)
9912 {
9913 rtx label = ix86_expand_aligntest (destreg, 2);
9914 emit_insn (gen_strmovhi (destreg, srcreg));
9915 ix86_adjust_counter (countreg, 2);
9916 emit_label (label);
9917 LABEL_NUSES (label) = 1;
9918 }
9919 if (align <= 4 && desired_alignment > 4)
9920 {
9921 rtx label = ix86_expand_aligntest (destreg, 4);
9922 emit_insn (gen_strmovsi (destreg, srcreg));
9923 ix86_adjust_counter (countreg, 4);
9924 emit_label (label);
9925 LABEL_NUSES (label) = 1;
9926 }
9927
9928 if (label && desired_alignment > 4 && !TARGET_64BIT)
9929 {
9930 emit_label (label);
9931 LABEL_NUSES (label) = 1;
9932 label = NULL_RTX;
9933 }
9934 if (!TARGET_SINGLE_STRINGOP)
9935 emit_insn (gen_cld ());
9936 if (TARGET_64BIT)
9937 {
9938 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9939 GEN_INT (3)));
9940 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9941 destreg, srcreg, countreg2));
9942 }
9943 else
9944 {
9945 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9946 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9947 destreg, srcreg, countreg2));
9948 }
9949
9950 if (label)
9951 {
9952 emit_label (label);
9953 LABEL_NUSES (label) = 1;
9954 }
9955 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9956 emit_insn (gen_strmovsi (destreg, srcreg));
9957 if ((align <= 4 || count == 0) && TARGET_64BIT)
9958 {
9959 rtx label = ix86_expand_aligntest (countreg, 4);
9960 emit_insn (gen_strmovsi (destreg, srcreg));
9961 emit_label (label);
9962 LABEL_NUSES (label) = 1;
9963 }
9964 if (align > 2 && count != 0 && (count & 2))
9965 emit_insn (gen_strmovhi (destreg, srcreg));
9966 if (align <= 2 || count == 0)
9967 {
9968 rtx label = ix86_expand_aligntest (countreg, 2);
9969 emit_insn (gen_strmovhi (destreg, srcreg));
9970 emit_label (label);
9971 LABEL_NUSES (label) = 1;
9972 }
9973 if (align > 1 && count != 0 && (count & 1))
9974 emit_insn (gen_strmovqi (destreg, srcreg));
9975 if (align <= 1 || count == 0)
9976 {
9977 rtx label = ix86_expand_aligntest (countreg, 1);
9978 emit_insn (gen_strmovqi (destreg, srcreg));
9979 emit_label (label);
9980 LABEL_NUSES (label) = 1;
9981 }
9982 }
9983
9984 insns = get_insns ();
9985 end_sequence ();
9986
9987 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9988 emit_insns (insns);
9989 return 1;
9990 }
9991
9992 /* Expand string clear operation (bzero). Use i386 string operations when
9993 profitable. expand_movstr contains similar code. */
9994 int
9995 ix86_expand_clrstr (src, count_exp, align_exp)
9996 rtx src, count_exp, align_exp;
9997 {
9998 rtx destreg, zeroreg, countreg;
9999 enum machine_mode counter_mode;
10000 HOST_WIDE_INT align = 0;
10001 unsigned HOST_WIDE_INT count = 0;
10002
10003 if (GET_CODE (align_exp) == CONST_INT)
10004 align = INTVAL (align_exp);
10005
10006 /* This simple hack avoids all inlining code and simplifies code below. */
10007 if (!TARGET_ALIGN_STRINGOPS)
10008 align = 32;
10009
10010 if (GET_CODE (count_exp) == CONST_INT)
10011 count = INTVAL (count_exp);
10012 /* Figure out proper mode for counter. For 32bits it is always SImode,
10013 for 64bits use SImode when possible, otherwise DImode.
10014 Set count to number of bytes copied when known at compile time. */
10015 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10016 || x86_64_zero_extended_value (count_exp))
10017 counter_mode = SImode;
10018 else
10019 counter_mode = DImode;
10020
10021 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10022
10023 emit_insn (gen_cld ());
10024
10025 /* When optimizing for size emit simple rep ; movsb instruction for
10026 counts not divisible by 4. */
10027
10028 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10029 {
10030 countreg = ix86_zero_extend_to_Pmode (count_exp);
10031 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10032 if (TARGET_64BIT)
10033 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10034 destreg, countreg));
10035 else
10036 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10037 destreg, countreg));
10038 }
10039 else if (count != 0
10040 && (align >= 8
10041 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10042 || optimize_size || count < (unsigned int) 64))
10043 {
10044 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10045 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10046 if (count & ~(size - 1))
10047 {
10048 countreg = copy_to_mode_reg (counter_mode,
10049 GEN_INT ((count >> (size == 4 ? 2 : 3))
10050 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10051 countreg = ix86_zero_extend_to_Pmode (countreg);
10052 if (size == 4)
10053 {
10054 if (TARGET_64BIT)
10055 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10056 destreg, countreg));
10057 else
10058 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10059 destreg, countreg));
10060 }
10061 else
10062 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10063 destreg, countreg));
10064 }
10065 if (size == 8 && (count & 0x04))
10066 emit_insn (gen_strsetsi (destreg,
10067 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10068 if (count & 0x02)
10069 emit_insn (gen_strsethi (destreg,
10070 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10071 if (count & 0x01)
10072 emit_insn (gen_strsetqi (destreg,
10073 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10074 }
10075 else
10076 {
10077 rtx countreg2;
10078 rtx label = NULL;
10079 /* Compute desired alignment of the string operation. */
10080 int desired_alignment = (TARGET_PENTIUMPRO
10081 && (count == 0 || count >= (unsigned int) 260)
10082 ? 8 : UNITS_PER_WORD);
10083
10084 /* In case we don't know anything about the alignment, default to
10085 library version, since it is usually equally fast and result in
10086 shorter code. */
10087 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10088 return 0;
10089
10090 if (TARGET_SINGLE_STRINGOP)
10091 emit_insn (gen_cld ());
10092
10093 countreg2 = gen_reg_rtx (Pmode);
10094 countreg = copy_to_mode_reg (counter_mode, count_exp);
10095 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10096
10097 if (count == 0 && align < desired_alignment)
10098 {
10099 label = gen_label_rtx ();
10100 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10101 LEU, 0, counter_mode, 1, label);
10102 }
10103 if (align <= 1)
10104 {
10105 rtx label = ix86_expand_aligntest (destreg, 1);
10106 emit_insn (gen_strsetqi (destreg,
10107 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10108 ix86_adjust_counter (countreg, 1);
10109 emit_label (label);
10110 LABEL_NUSES (label) = 1;
10111 }
10112 if (align <= 2)
10113 {
10114 rtx label = ix86_expand_aligntest (destreg, 2);
10115 emit_insn (gen_strsethi (destreg,
10116 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10117 ix86_adjust_counter (countreg, 2);
10118 emit_label (label);
10119 LABEL_NUSES (label) = 1;
10120 }
10121 if (align <= 4 && desired_alignment > 4)
10122 {
10123 rtx label = ix86_expand_aligntest (destreg, 4);
10124 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10125 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10126 : zeroreg)));
10127 ix86_adjust_counter (countreg, 4);
10128 emit_label (label);
10129 LABEL_NUSES (label) = 1;
10130 }
10131
10132 if (label && desired_alignment > 4 && !TARGET_64BIT)
10133 {
10134 emit_label (label);
10135 LABEL_NUSES (label) = 1;
10136 label = NULL_RTX;
10137 }
10138
10139 if (!TARGET_SINGLE_STRINGOP)
10140 emit_insn (gen_cld ());
10141 if (TARGET_64BIT)
10142 {
10143 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10144 GEN_INT (3)));
10145 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10146 destreg, countreg2));
10147 }
10148 else
10149 {
10150 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10151 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10152 destreg, countreg2));
10153 }
10154 if (label)
10155 {
10156 emit_label (label);
10157 LABEL_NUSES (label) = 1;
10158 }
10159
10160 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10161 emit_insn (gen_strsetsi (destreg,
10162 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10163 if (TARGET_64BIT && (align <= 4 || count == 0))
10164 {
10165 rtx label = ix86_expand_aligntest (countreg, 2);
10166 emit_insn (gen_strsetsi (destreg,
10167 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10168 emit_label (label);
10169 LABEL_NUSES (label) = 1;
10170 }
10171 if (align > 2 && count != 0 && (count & 2))
10172 emit_insn (gen_strsethi (destreg,
10173 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10174 if (align <= 2 || count == 0)
10175 {
10176 rtx label = ix86_expand_aligntest (countreg, 2);
10177 emit_insn (gen_strsethi (destreg,
10178 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10179 emit_label (label);
10180 LABEL_NUSES (label) = 1;
10181 }
10182 if (align > 1 && count != 0 && (count & 1))
10183 emit_insn (gen_strsetqi (destreg,
10184 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10185 if (align <= 1 || count == 0)
10186 {
10187 rtx label = ix86_expand_aligntest (countreg, 1);
10188 emit_insn (gen_strsetqi (destreg,
10189 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10190 emit_label (label);
10191 LABEL_NUSES (label) = 1;
10192 }
10193 }
10194 return 1;
10195 }
10196 /* Expand strlen. */
10197 int
10198 ix86_expand_strlen (out, src, eoschar, align)
10199 rtx out, src, eoschar, align;
10200 {
10201 rtx addr, scratch1, scratch2, scratch3, scratch4;
10202
10203 /* The generic case of strlen expander is long. Avoid it's
10204 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10205
10206 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10207 && !TARGET_INLINE_ALL_STRINGOPS
10208 && !optimize_size
10209 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10210 return 0;
10211
10212 addr = force_reg (Pmode, XEXP (src, 0));
10213 scratch1 = gen_reg_rtx (Pmode);
10214
10215 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10216 && !optimize_size)
10217 {
10218 /* Well it seems that some optimizer does not combine a call like
10219 foo(strlen(bar), strlen(bar));
10220 when the move and the subtraction is done here. It does calculate
10221 the length just once when these instructions are done inside of
10222 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10223 often used and I use one fewer register for the lifetime of
10224 output_strlen_unroll() this is better. */
10225
10226 emit_move_insn (out, addr);
10227
10228 ix86_expand_strlensi_unroll_1 (out, align);
10229
10230 /* strlensi_unroll_1 returns the address of the zero at the end of
10231 the string, like memchr(), so compute the length by subtracting
10232 the start address. */
10233 if (TARGET_64BIT)
10234 emit_insn (gen_subdi3 (out, out, addr));
10235 else
10236 emit_insn (gen_subsi3 (out, out, addr));
10237 }
10238 else
10239 {
10240 scratch2 = gen_reg_rtx (Pmode);
10241 scratch3 = gen_reg_rtx (Pmode);
10242 scratch4 = force_reg (Pmode, constm1_rtx);
10243
10244 emit_move_insn (scratch3, addr);
10245 eoschar = force_reg (QImode, eoschar);
10246
10247 emit_insn (gen_cld ());
10248 if (TARGET_64BIT)
10249 {
10250 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10251 align, scratch4, scratch3));
10252 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10253 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10254 }
10255 else
10256 {
10257 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10258 align, scratch4, scratch3));
10259 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10260 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10261 }
10262 }
10263 return 1;
10264 }
10265
10266 /* Expand the appropriate insns for doing strlen if not just doing
10267 repnz; scasb
10268
10269 out = result, initialized with the start address
10270 align_rtx = alignment of the address.
10271 scratch = scratch register, initialized with the startaddress when
10272 not aligned, otherwise undefined
10273
10274 This is just the body. It needs the initialisations mentioned above and
10275 some address computing at the end. These things are done in i386.md. */
10276
10277 static void
10278 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10279 rtx out, align_rtx;
10280 {
10281 int align;
10282 rtx tmp;
10283 rtx align_2_label = NULL_RTX;
10284 rtx align_3_label = NULL_RTX;
10285 rtx align_4_label = gen_label_rtx ();
10286 rtx end_0_label = gen_label_rtx ();
10287 rtx mem;
10288 rtx tmpreg = gen_reg_rtx (SImode);
10289 rtx scratch = gen_reg_rtx (SImode);
10290
10291 align = 0;
10292 if (GET_CODE (align_rtx) == CONST_INT)
10293 align = INTVAL (align_rtx);
10294
10295 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10296
10297 /* Is there a known alignment and is it less than 4? */
10298 if (align < 4)
10299 {
10300 rtx scratch1 = gen_reg_rtx (Pmode);
10301 emit_move_insn (scratch1, out);
10302 /* Is there a known alignment and is it not 2? */
10303 if (align != 2)
10304 {
10305 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10306 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10307
10308 /* Leave just the 3 lower bits. */
10309 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10310 NULL_RTX, 0, OPTAB_WIDEN);
10311
10312 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10313 Pmode, 1, align_4_label);
10314 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10315 Pmode, 1, align_2_label);
10316 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10317 Pmode, 1, align_3_label);
10318 }
10319 else
10320 {
10321 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10322 check if is aligned to 4 - byte. */
10323
10324 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10325 NULL_RTX, 0, OPTAB_WIDEN);
10326
10327 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10328 Pmode, 1, align_4_label);
10329 }
10330
10331 mem = gen_rtx_MEM (QImode, out);
10332
10333 /* Now compare the bytes. */
10334
10335 /* Compare the first n unaligned byte on a byte per byte basis. */
10336 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10337 QImode, 1, end_0_label);
10338
10339 /* Increment the address. */
10340 if (TARGET_64BIT)
10341 emit_insn (gen_adddi3 (out, out, const1_rtx));
10342 else
10343 emit_insn (gen_addsi3 (out, out, const1_rtx));
10344
10345 /* Not needed with an alignment of 2 */
10346 if (align != 2)
10347 {
10348 emit_label (align_2_label);
10349
10350 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10351 end_0_label);
10352
10353 if (TARGET_64BIT)
10354 emit_insn (gen_adddi3 (out, out, const1_rtx));
10355 else
10356 emit_insn (gen_addsi3 (out, out, const1_rtx));
10357
10358 emit_label (align_3_label);
10359 }
10360
10361 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10362 end_0_label);
10363
10364 if (TARGET_64BIT)
10365 emit_insn (gen_adddi3 (out, out, const1_rtx));
10366 else
10367 emit_insn (gen_addsi3 (out, out, const1_rtx));
10368 }
10369
10370 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10371 align this loop. It gives only huge programs, but does not help to
10372 speed up. */
10373 emit_label (align_4_label);
10374
10375 mem = gen_rtx_MEM (SImode, out);
10376 emit_move_insn (scratch, mem);
10377 if (TARGET_64BIT)
10378 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10379 else
10380 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10381
10382 /* This formula yields a nonzero result iff one of the bytes is zero.
10383 This saves three branches inside loop and many cycles. */
10384
10385 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10386 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10387 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10388 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10389 gen_int_mode (0x80808080, SImode)));
10390 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10391 align_4_label);
10392
10393 if (TARGET_CMOVE)
10394 {
10395 rtx reg = gen_reg_rtx (SImode);
10396 rtx reg2 = gen_reg_rtx (Pmode);
10397 emit_move_insn (reg, tmpreg);
10398 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10399
10400 /* If zero is not in the first two bytes, move two bytes forward. */
10401 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10402 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10403 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10404 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10405 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10406 reg,
10407 tmpreg)));
10408 /* Emit lea manually to avoid clobbering of flags. */
10409 emit_insn (gen_rtx_SET (SImode, reg2,
10410 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10411
10412 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10413 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10414 emit_insn (gen_rtx_SET (VOIDmode, out,
10415 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10416 reg2,
10417 out)));
10418
10419 }
10420 else
10421 {
10422 rtx end_2_label = gen_label_rtx ();
10423 /* Is zero in the first two bytes? */
10424
10425 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10426 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10427 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10428 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10429 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10430 pc_rtx);
10431 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10432 JUMP_LABEL (tmp) = end_2_label;
10433
10434 /* Not in the first two. Move two bytes forward. */
10435 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10436 if (TARGET_64BIT)
10437 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10438 else
10439 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10440
10441 emit_label (end_2_label);
10442
10443 }
10444
10445 /* Avoid branch in fixing the byte. */
10446 tmpreg = gen_lowpart (QImode, tmpreg);
10447 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10448 if (TARGET_64BIT)
10449 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10450 else
10451 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10452
10453 emit_label (end_0_label);
10454 }
10455
10456 void
10457 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10458 rtx retval, fnaddr, callarg1, callarg2, pop;
10459 {
10460 rtx use = NULL, call;
10461
10462 if (pop == const0_rtx)
10463 pop = NULL;
10464 if (TARGET_64BIT && pop)
10465 abort ();
10466
10467 /* Static functions and indirect calls don't need the pic register. */
10468 if (! TARGET_64BIT && flag_pic
10469 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10470 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10471 use_reg (&use, pic_offset_table_rtx);
10472
10473 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10474 {
10475 rtx al = gen_rtx_REG (QImode, 0);
10476 emit_move_insn (al, callarg2);
10477 use_reg (&use, al);
10478 }
10479
10480 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10481 {
10482 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10483 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10484 }
10485
10486 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10487 if (retval)
10488 call = gen_rtx_SET (VOIDmode, retval, call);
10489 if (pop)
10490 {
10491 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10492 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10493 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10494 }
10495
10496 call = emit_call_insn (call);
10497 if (use)
10498 CALL_INSN_FUNCTION_USAGE (call) = use;
10499 }
10500
10501 \f
10502 /* Clear stack slot assignments remembered from previous functions.
10503 This is called from INIT_EXPANDERS once before RTL is emitted for each
10504 function. */
10505
10506 static void
10507 ix86_init_machine_status (p)
10508 struct function *p;
10509 {
10510 p->machine = (struct machine_function *)
10511 xcalloc (1, sizeof (struct machine_function));
10512 }
10513
10514 /* Mark machine specific bits of P for GC. */
10515 static void
10516 ix86_mark_machine_status (p)
10517 struct function *p;
10518 {
10519 struct machine_function *machine = p->machine;
10520 enum machine_mode mode;
10521 int n;
10522
10523 if (! machine)
10524 return;
10525
10526 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
10527 mode = (enum machine_mode) ((int) mode + 1))
10528 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
10529 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
10530 }
10531
10532 static void
10533 ix86_free_machine_status (p)
10534 struct function *p;
10535 {
10536 free (p->machine);
10537 p->machine = NULL;
10538 }
10539
10540 /* Return a MEM corresponding to a stack slot with mode MODE.
10541 Allocate a new slot if necessary.
10542
10543 The RTL for a function can have several slots available: N is
10544 which slot to use. */
10545
10546 rtx
10547 assign_386_stack_local (mode, n)
10548 enum machine_mode mode;
10549 int n;
10550 {
10551 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10552 abort ();
10553
10554 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10555 ix86_stack_locals[(int) mode][n]
10556 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10557
10558 return ix86_stack_locals[(int) mode][n];
10559 }
10560
10561 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10562
10563 rtx
10564 ix86_tls_get_addr ()
10565 {
10566 static rtx symbol;
10567
10568 if (!symbol)
10569 {
10570 symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10571 ? "___tls_get_addr"
10572 : "__tls_get_addr"));
10573 ggc_add_rtx_root (&symbol, 1);
10574 }
10575
10576 return symbol;
10577 }
10578 \f
10579 /* Calculate the length of the memory address in the instruction
10580 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10581
10582 static int
10583 memory_address_length (addr)
10584 rtx addr;
10585 {
10586 struct ix86_address parts;
10587 rtx base, index, disp;
10588 int len;
10589
10590 if (GET_CODE (addr) == PRE_DEC
10591 || GET_CODE (addr) == POST_INC
10592 || GET_CODE (addr) == PRE_MODIFY
10593 || GET_CODE (addr) == POST_MODIFY)
10594 return 0;
10595
10596 if (! ix86_decompose_address (addr, &parts))
10597 abort ();
10598
10599 base = parts.base;
10600 index = parts.index;
10601 disp = parts.disp;
10602 len = 0;
10603
10604 /* Register Indirect. */
10605 if (base && !index && !disp)
10606 {
10607 /* Special cases: ebp and esp need the two-byte modrm form. */
10608 if (addr == stack_pointer_rtx
10609 || addr == arg_pointer_rtx
10610 || addr == frame_pointer_rtx
10611 || addr == hard_frame_pointer_rtx)
10612 len = 1;
10613 }
10614
10615 /* Direct Addressing. */
10616 else if (disp && !base && !index)
10617 len = 4;
10618
10619 else
10620 {
10621 /* Find the length of the displacement constant. */
10622 if (disp)
10623 {
10624 if (GET_CODE (disp) == CONST_INT
10625 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10626 len = 1;
10627 else
10628 len = 4;
10629 }
10630
10631 /* An index requires the two-byte modrm form. */
10632 if (index)
10633 len += 1;
10634 }
10635
10636 return len;
10637 }
10638
10639 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10640 is set, expect that insn have 8bit immediate alternative. */
10641 int
10642 ix86_attr_length_immediate_default (insn, shortform)
10643 rtx insn;
10644 int shortform;
10645 {
10646 int len = 0;
10647 int i;
10648 extract_insn_cached (insn);
10649 for (i = recog_data.n_operands - 1; i >= 0; --i)
10650 if (CONSTANT_P (recog_data.operand[i]))
10651 {
10652 if (len)
10653 abort ();
10654 if (shortform
10655 && GET_CODE (recog_data.operand[i]) == CONST_INT
10656 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10657 len = 1;
10658 else
10659 {
10660 switch (get_attr_mode (insn))
10661 {
10662 case MODE_QI:
10663 len+=1;
10664 break;
10665 case MODE_HI:
10666 len+=2;
10667 break;
10668 case MODE_SI:
10669 len+=4;
10670 break;
10671 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10672 case MODE_DI:
10673 len+=4;
10674 break;
10675 default:
10676 fatal_insn ("unknown insn mode", insn);
10677 }
10678 }
10679 }
10680 return len;
10681 }
10682 /* Compute default value for "length_address" attribute. */
10683 int
10684 ix86_attr_length_address_default (insn)
10685 rtx insn;
10686 {
10687 int i;
10688 extract_insn_cached (insn);
10689 for (i = recog_data.n_operands - 1; i >= 0; --i)
10690 if (GET_CODE (recog_data.operand[i]) == MEM)
10691 {
10692 return memory_address_length (XEXP (recog_data.operand[i], 0));
10693 break;
10694 }
10695 return 0;
10696 }
10697 \f
10698 /* Return the maximum number of instructions a cpu can issue. */
10699
10700 static int
10701 ix86_issue_rate ()
10702 {
10703 switch (ix86_cpu)
10704 {
10705 case PROCESSOR_PENTIUM:
10706 case PROCESSOR_K6:
10707 return 2;
10708
10709 case PROCESSOR_PENTIUMPRO:
10710 case PROCESSOR_PENTIUM4:
10711 case PROCESSOR_ATHLON:
10712 return 3;
10713
10714 default:
10715 return 1;
10716 }
10717 }
10718
10719 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10720 by DEP_INSN and nothing set by DEP_INSN. */
10721
10722 static int
10723 ix86_flags_dependant (insn, dep_insn, insn_type)
10724 rtx insn, dep_insn;
10725 enum attr_type insn_type;
10726 {
10727 rtx set, set2;
10728
10729 /* Simplify the test for uninteresting insns. */
10730 if (insn_type != TYPE_SETCC
10731 && insn_type != TYPE_ICMOV
10732 && insn_type != TYPE_FCMOV
10733 && insn_type != TYPE_IBR)
10734 return 0;
10735
10736 if ((set = single_set (dep_insn)) != 0)
10737 {
10738 set = SET_DEST (set);
10739 set2 = NULL_RTX;
10740 }
10741 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10742 && XVECLEN (PATTERN (dep_insn), 0) == 2
10743 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10744 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10745 {
10746 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10747 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10748 }
10749 else
10750 return 0;
10751
10752 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10753 return 0;
10754
10755 /* This test is true if the dependent insn reads the flags but
10756 not any other potentially set register. */
10757 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10758 return 0;
10759
10760 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10761 return 0;
10762
10763 return 1;
10764 }
10765
10766 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10767 address with operands set by DEP_INSN. */
10768
10769 static int
10770 ix86_agi_dependant (insn, dep_insn, insn_type)
10771 rtx insn, dep_insn;
10772 enum attr_type insn_type;
10773 {
10774 rtx addr;
10775
10776 if (insn_type == TYPE_LEA
10777 && TARGET_PENTIUM)
10778 {
10779 addr = PATTERN (insn);
10780 if (GET_CODE (addr) == SET)
10781 ;
10782 else if (GET_CODE (addr) == PARALLEL
10783 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10784 addr = XVECEXP (addr, 0, 0);
10785 else
10786 abort ();
10787 addr = SET_SRC (addr);
10788 }
10789 else
10790 {
10791 int i;
10792 extract_insn_cached (insn);
10793 for (i = recog_data.n_operands - 1; i >= 0; --i)
10794 if (GET_CODE (recog_data.operand[i]) == MEM)
10795 {
10796 addr = XEXP (recog_data.operand[i], 0);
10797 goto found;
10798 }
10799 return 0;
10800 found:;
10801 }
10802
10803 return modified_in_p (addr, dep_insn);
10804 }
10805
10806 static int
10807 ix86_adjust_cost (insn, link, dep_insn, cost)
10808 rtx insn, link, dep_insn;
10809 int cost;
10810 {
10811 enum attr_type insn_type, dep_insn_type;
10812 enum attr_memory memory, dep_memory;
10813 rtx set, set2;
10814 int dep_insn_code_number;
10815
10816 /* Anti and output depenancies have zero cost on all CPUs. */
10817 if (REG_NOTE_KIND (link) != 0)
10818 return 0;
10819
10820 dep_insn_code_number = recog_memoized (dep_insn);
10821
10822 /* If we can't recognize the insns, we can't really do anything. */
10823 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10824 return cost;
10825
10826 insn_type = get_attr_type (insn);
10827 dep_insn_type = get_attr_type (dep_insn);
10828
10829 switch (ix86_cpu)
10830 {
10831 case PROCESSOR_PENTIUM:
10832 /* Address Generation Interlock adds a cycle of latency. */
10833 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10834 cost += 1;
10835
10836 /* ??? Compares pair with jump/setcc. */
10837 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10838 cost = 0;
10839
10840 /* Floating point stores require value to be ready one cycle ealier. */
10841 if (insn_type == TYPE_FMOV
10842 && get_attr_memory (insn) == MEMORY_STORE
10843 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10844 cost += 1;
10845 break;
10846
10847 case PROCESSOR_PENTIUMPRO:
10848 memory = get_attr_memory (insn);
10849 dep_memory = get_attr_memory (dep_insn);
10850
10851 /* Since we can't represent delayed latencies of load+operation,
10852 increase the cost here for non-imov insns. */
10853 if (dep_insn_type != TYPE_IMOV
10854 && dep_insn_type != TYPE_FMOV
10855 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10856 cost += 1;
10857
10858 /* INT->FP conversion is expensive. */
10859 if (get_attr_fp_int_src (dep_insn))
10860 cost += 5;
10861
10862 /* There is one cycle extra latency between an FP op and a store. */
10863 if (insn_type == TYPE_FMOV
10864 && (set = single_set (dep_insn)) != NULL_RTX
10865 && (set2 = single_set (insn)) != NULL_RTX
10866 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10867 && GET_CODE (SET_DEST (set2)) == MEM)
10868 cost += 1;
10869
10870 /* Show ability of reorder buffer to hide latency of load by executing
10871 in parallel with previous instruction in case
10872 previous instruction is not needed to compute the address. */
10873 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10874 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10875 {
10876 /* Claim moves to take one cycle, as core can issue one load
10877 at time and the next load can start cycle later. */
10878 if (dep_insn_type == TYPE_IMOV
10879 || dep_insn_type == TYPE_FMOV)
10880 cost = 1;
10881 else if (cost > 1)
10882 cost--;
10883 }
10884 break;
10885
10886 case PROCESSOR_K6:
10887 memory = get_attr_memory (insn);
10888 dep_memory = get_attr_memory (dep_insn);
10889 /* The esp dependency is resolved before the instruction is really
10890 finished. */
10891 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10892 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10893 return 1;
10894
10895 /* Since we can't represent delayed latencies of load+operation,
10896 increase the cost here for non-imov insns. */
10897 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10898 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10899
10900 /* INT->FP conversion is expensive. */
10901 if (get_attr_fp_int_src (dep_insn))
10902 cost += 5;
10903
10904 /* Show ability of reorder buffer to hide latency of load by executing
10905 in parallel with previous instruction in case
10906 previous instruction is not needed to compute the address. */
10907 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10908 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10909 {
10910 /* Claim moves to take one cycle, as core can issue one load
10911 at time and the next load can start cycle later. */
10912 if (dep_insn_type == TYPE_IMOV
10913 || dep_insn_type == TYPE_FMOV)
10914 cost = 1;
10915 else if (cost > 2)
10916 cost -= 2;
10917 else
10918 cost = 1;
10919 }
10920 break;
10921
10922 case PROCESSOR_ATHLON:
10923 memory = get_attr_memory (insn);
10924 dep_memory = get_attr_memory (dep_insn);
10925
10926 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10927 {
10928 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10929 cost += 2;
10930 else
10931 cost += 3;
10932 }
10933 /* Show ability of reorder buffer to hide latency of load by executing
10934 in parallel with previous instruction in case
10935 previous instruction is not needed to compute the address. */
10936 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10937 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10938 {
10939 /* Claim moves to take one cycle, as core can issue one load
10940 at time and the next load can start cycle later. */
10941 if (dep_insn_type == TYPE_IMOV
10942 || dep_insn_type == TYPE_FMOV)
10943 cost = 0;
10944 else if (cost >= 3)
10945 cost -= 3;
10946 else
10947 cost = 0;
10948 }
10949
10950 default:
10951 break;
10952 }
10953
10954 return cost;
10955 }
10956
10957 static union
10958 {
10959 struct ppro_sched_data
10960 {
10961 rtx decode[3];
10962 int issued_this_cycle;
10963 } ppro;
10964 } ix86_sched_data;
10965
10966 static enum attr_ppro_uops
10967 ix86_safe_ppro_uops (insn)
10968 rtx insn;
10969 {
10970 if (recog_memoized (insn) >= 0)
10971 return get_attr_ppro_uops (insn);
10972 else
10973 return PPRO_UOPS_MANY;
10974 }
10975
10976 static void
10977 ix86_dump_ppro_packet (dump)
10978 FILE *dump;
10979 {
10980 if (ix86_sched_data.ppro.decode[0])
10981 {
10982 fprintf (dump, "PPRO packet: %d",
10983 INSN_UID (ix86_sched_data.ppro.decode[0]));
10984 if (ix86_sched_data.ppro.decode[1])
10985 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10986 if (ix86_sched_data.ppro.decode[2])
10987 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10988 fputc ('\n', dump);
10989 }
10990 }
10991
10992 /* We're beginning a new block. Initialize data structures as necessary. */
10993
10994 static void
10995 ix86_sched_init (dump, sched_verbose, veclen)
10996 FILE *dump ATTRIBUTE_UNUSED;
10997 int sched_verbose ATTRIBUTE_UNUSED;
10998 int veclen ATTRIBUTE_UNUSED;
10999 {
11000 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11001 }
11002
11003 /* Shift INSN to SLOT, and shift everything else down. */
11004
11005 static void
11006 ix86_reorder_insn (insnp, slot)
11007 rtx *insnp, *slot;
11008 {
11009 if (insnp != slot)
11010 {
11011 rtx insn = *insnp;
11012 do
11013 insnp[0] = insnp[1];
11014 while (++insnp != slot);
11015 *insnp = insn;
11016 }
11017 }
11018
11019 static void
11020 ix86_sched_reorder_ppro (ready, e_ready)
11021 rtx *ready;
11022 rtx *e_ready;
11023 {
11024 rtx decode[3];
11025 enum attr_ppro_uops cur_uops;
11026 int issued_this_cycle;
11027 rtx *insnp;
11028 int i;
11029
11030 /* At this point .ppro.decode contains the state of the three
11031 decoders from last "cycle". That is, those insns that were
11032 actually independent. But here we're scheduling for the
11033 decoder, and we may find things that are decodable in the
11034 same cycle. */
11035
11036 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11037 issued_this_cycle = 0;
11038
11039 insnp = e_ready;
11040 cur_uops = ix86_safe_ppro_uops (*insnp);
11041
11042 /* If the decoders are empty, and we've a complex insn at the
11043 head of the priority queue, let it issue without complaint. */
11044 if (decode[0] == NULL)
11045 {
11046 if (cur_uops == PPRO_UOPS_MANY)
11047 {
11048 decode[0] = *insnp;
11049 goto ppro_done;
11050 }
11051
11052 /* Otherwise, search for a 2-4 uop unsn to issue. */
11053 while (cur_uops != PPRO_UOPS_FEW)
11054 {
11055 if (insnp == ready)
11056 break;
11057 cur_uops = ix86_safe_ppro_uops (*--insnp);
11058 }
11059
11060 /* If so, move it to the head of the line. */
11061 if (cur_uops == PPRO_UOPS_FEW)
11062 ix86_reorder_insn (insnp, e_ready);
11063
11064 /* Issue the head of the queue. */
11065 issued_this_cycle = 1;
11066 decode[0] = *e_ready--;
11067 }
11068
11069 /* Look for simple insns to fill in the other two slots. */
11070 for (i = 1; i < 3; ++i)
11071 if (decode[i] == NULL)
11072 {
11073 if (ready > e_ready)
11074 goto ppro_done;
11075
11076 insnp = e_ready;
11077 cur_uops = ix86_safe_ppro_uops (*insnp);
11078 while (cur_uops != PPRO_UOPS_ONE)
11079 {
11080 if (insnp == ready)
11081 break;
11082 cur_uops = ix86_safe_ppro_uops (*--insnp);
11083 }
11084
11085 /* Found one. Move it to the head of the queue and issue it. */
11086 if (cur_uops == PPRO_UOPS_ONE)
11087 {
11088 ix86_reorder_insn (insnp, e_ready);
11089 decode[i] = *e_ready--;
11090 issued_this_cycle++;
11091 continue;
11092 }
11093
11094 /* ??? Didn't find one. Ideally, here we would do a lazy split
11095 of 2-uop insns, issue one and queue the other. */
11096 }
11097
11098 ppro_done:
11099 if (issued_this_cycle == 0)
11100 issued_this_cycle = 1;
11101 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11102 }
11103
11104 /* We are about to being issuing insns for this clock cycle.
11105 Override the default sort algorithm to better slot instructions. */
11106 static int
11107 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11108 FILE *dump ATTRIBUTE_UNUSED;
11109 int sched_verbose ATTRIBUTE_UNUSED;
11110 rtx *ready;
11111 int *n_readyp;
11112 int clock_var ATTRIBUTE_UNUSED;
11113 {
11114 int n_ready = *n_readyp;
11115 rtx *e_ready = ready + n_ready - 1;
11116
11117 /* Make sure to go ahead and initialize key items in
11118 ix86_sched_data if we are not going to bother trying to
11119 reorder the ready queue. */
11120 if (n_ready < 2)
11121 {
11122 ix86_sched_data.ppro.issued_this_cycle = 1;
11123 goto out;
11124 }
11125
11126 switch (ix86_cpu)
11127 {
11128 default:
11129 break;
11130
11131 case PROCESSOR_PENTIUMPRO:
11132 ix86_sched_reorder_ppro (ready, e_ready);
11133 break;
11134 }
11135
11136 out:
11137 return ix86_issue_rate ();
11138 }
11139
11140 /* We are about to issue INSN. Return the number of insns left on the
11141 ready queue that can be issued this cycle. */
11142
11143 static int
11144 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11145 FILE *dump;
11146 int sched_verbose;
11147 rtx insn;
11148 int can_issue_more;
11149 {
11150 int i;
11151 switch (ix86_cpu)
11152 {
11153 default:
11154 return can_issue_more - 1;
11155
11156 case PROCESSOR_PENTIUMPRO:
11157 {
11158 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11159
11160 if (uops == PPRO_UOPS_MANY)
11161 {
11162 if (sched_verbose)
11163 ix86_dump_ppro_packet (dump);
11164 ix86_sched_data.ppro.decode[0] = insn;
11165 ix86_sched_data.ppro.decode[1] = NULL;
11166 ix86_sched_data.ppro.decode[2] = NULL;
11167 if (sched_verbose)
11168 ix86_dump_ppro_packet (dump);
11169 ix86_sched_data.ppro.decode[0] = NULL;
11170 }
11171 else if (uops == PPRO_UOPS_FEW)
11172 {
11173 if (sched_verbose)
11174 ix86_dump_ppro_packet (dump);
11175 ix86_sched_data.ppro.decode[0] = insn;
11176 ix86_sched_data.ppro.decode[1] = NULL;
11177 ix86_sched_data.ppro.decode[2] = NULL;
11178 }
11179 else
11180 {
11181 for (i = 0; i < 3; ++i)
11182 if (ix86_sched_data.ppro.decode[i] == NULL)
11183 {
11184 ix86_sched_data.ppro.decode[i] = insn;
11185 break;
11186 }
11187 if (i == 3)
11188 abort ();
11189 if (i == 2)
11190 {
11191 if (sched_verbose)
11192 ix86_dump_ppro_packet (dump);
11193 ix86_sched_data.ppro.decode[0] = NULL;
11194 ix86_sched_data.ppro.decode[1] = NULL;
11195 ix86_sched_data.ppro.decode[2] = NULL;
11196 }
11197 }
11198 }
11199 return --ix86_sched_data.ppro.issued_this_cycle;
11200 }
11201 }
11202
11203 static int
11204 ia32_use_dfa_pipeline_interface ()
11205 {
11206 if (ix86_cpu == PROCESSOR_PENTIUM)
11207 return 1;
11208 return 0;
11209 }
11210
11211 /* How many alternative schedules to try. This should be as wide as the
11212 scheduling freedom in the DFA, but no wider. Making this value too
11213 large results extra work for the scheduler. */
11214
11215 static int
11216 ia32_multipass_dfa_lookahead ()
11217 {
11218 if (ix86_cpu == PROCESSOR_PENTIUM)
11219 return 2;
11220 else
11221 return 0;
11222 }
11223
11224 \f
11225 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11226 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11227 appropriate. */
11228
11229 void
11230 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11231 rtx insns;
11232 rtx dstref, srcref, dstreg, srcreg;
11233 {
11234 rtx insn;
11235
11236 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11237 if (INSN_P (insn))
11238 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11239 dstreg, srcreg);
11240 }
11241
11242 /* Subroutine of above to actually do the updating by recursively walking
11243 the rtx. */
11244
11245 static void
11246 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11247 rtx x;
11248 rtx dstref, srcref, dstreg, srcreg;
11249 {
11250 enum rtx_code code = GET_CODE (x);
11251 const char *format_ptr = GET_RTX_FORMAT (code);
11252 int i, j;
11253
11254 if (code == MEM && XEXP (x, 0) == dstreg)
11255 MEM_COPY_ATTRIBUTES (x, dstref);
11256 else if (code == MEM && XEXP (x, 0) == srcreg)
11257 MEM_COPY_ATTRIBUTES (x, srcref);
11258
11259 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11260 {
11261 if (*format_ptr == 'e')
11262 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11263 dstreg, srcreg);
11264 else if (*format_ptr == 'E')
11265 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11266 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11267 dstreg, srcreg);
11268 }
11269 }
11270 \f
11271 /* Compute the alignment given to a constant that is being placed in memory.
11272 EXP is the constant and ALIGN is the alignment that the object would
11273 ordinarily have.
11274 The value of this function is used instead of that alignment to align
11275 the object. */
11276
11277 int
11278 ix86_constant_alignment (exp, align)
11279 tree exp;
11280 int align;
11281 {
11282 if (TREE_CODE (exp) == REAL_CST)
11283 {
11284 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11285 return 64;
11286 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11287 return 128;
11288 }
11289 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11290 && align < 256)
11291 return 256;
11292
11293 return align;
11294 }
11295
11296 /* Compute the alignment for a static variable.
11297 TYPE is the data type, and ALIGN is the alignment that
11298 the object would ordinarily have. The value of this function is used
11299 instead of that alignment to align the object. */
11300
11301 int
11302 ix86_data_alignment (type, align)
11303 tree type;
11304 int align;
11305 {
11306 if (AGGREGATE_TYPE_P (type)
11307 && TYPE_SIZE (type)
11308 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11309 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11310 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11311 return 256;
11312
11313 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11314 to 16byte boundary. */
11315 if (TARGET_64BIT)
11316 {
11317 if (AGGREGATE_TYPE_P (type)
11318 && TYPE_SIZE (type)
11319 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11320 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11321 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11322 return 128;
11323 }
11324
11325 if (TREE_CODE (type) == ARRAY_TYPE)
11326 {
11327 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11328 return 64;
11329 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11330 return 128;
11331 }
11332 else if (TREE_CODE (type) == COMPLEX_TYPE)
11333 {
11334
11335 if (TYPE_MODE (type) == DCmode && align < 64)
11336 return 64;
11337 if (TYPE_MODE (type) == XCmode && align < 128)
11338 return 128;
11339 }
11340 else if ((TREE_CODE (type) == RECORD_TYPE
11341 || TREE_CODE (type) == UNION_TYPE
11342 || TREE_CODE (type) == QUAL_UNION_TYPE)
11343 && TYPE_FIELDS (type))
11344 {
11345 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11346 return 64;
11347 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11348 return 128;
11349 }
11350 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11351 || TREE_CODE (type) == INTEGER_TYPE)
11352 {
11353 if (TYPE_MODE (type) == DFmode && align < 64)
11354 return 64;
11355 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11356 return 128;
11357 }
11358
11359 return align;
11360 }
11361
11362 /* Compute the alignment for a local variable.
11363 TYPE is the data type, and ALIGN is the alignment that
11364 the object would ordinarily have. The value of this macro is used
11365 instead of that alignment to align the object. */
11366
11367 int
11368 ix86_local_alignment (type, align)
11369 tree type;
11370 int align;
11371 {
11372 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11373 to 16byte boundary. */
11374 if (TARGET_64BIT)
11375 {
11376 if (AGGREGATE_TYPE_P (type)
11377 && TYPE_SIZE (type)
11378 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11379 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11380 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11381 return 128;
11382 }
11383 if (TREE_CODE (type) == ARRAY_TYPE)
11384 {
11385 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11386 return 64;
11387 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11388 return 128;
11389 }
11390 else if (TREE_CODE (type) == COMPLEX_TYPE)
11391 {
11392 if (TYPE_MODE (type) == DCmode && align < 64)
11393 return 64;
11394 if (TYPE_MODE (type) == XCmode && align < 128)
11395 return 128;
11396 }
11397 else if ((TREE_CODE (type) == RECORD_TYPE
11398 || TREE_CODE (type) == UNION_TYPE
11399 || TREE_CODE (type) == QUAL_UNION_TYPE)
11400 && TYPE_FIELDS (type))
11401 {
11402 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11403 return 64;
11404 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11405 return 128;
11406 }
11407 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11408 || TREE_CODE (type) == INTEGER_TYPE)
11409 {
11410
11411 if (TYPE_MODE (type) == DFmode && align < 64)
11412 return 64;
11413 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11414 return 128;
11415 }
11416 return align;
11417 }
11418 \f
11419 /* Emit RTL insns to initialize the variable parts of a trampoline.
11420 FNADDR is an RTX for the address of the function's pure code.
11421 CXT is an RTX for the static chain value for the function. */
11422 void
11423 x86_initialize_trampoline (tramp, fnaddr, cxt)
11424 rtx tramp, fnaddr, cxt;
11425 {
11426 if (!TARGET_64BIT)
11427 {
11428 /* Compute offset from the end of the jmp to the target function. */
11429 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11430 plus_constant (tramp, 10),
11431 NULL_RTX, 1, OPTAB_DIRECT);
11432 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11433 gen_int_mode (0xb9, QImode));
11434 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11435 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11436 gen_int_mode (0xe9, QImode));
11437 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11438 }
11439 else
11440 {
11441 int offset = 0;
11442 /* Try to load address using shorter movl instead of movabs.
11443 We may want to support movq for kernel mode, but kernel does not use
11444 trampolines at the moment. */
11445 if (x86_64_zero_extended_value (fnaddr))
11446 {
11447 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11448 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11449 gen_int_mode (0xbb41, HImode));
11450 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11451 gen_lowpart (SImode, fnaddr));
11452 offset += 6;
11453 }
11454 else
11455 {
11456 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11457 gen_int_mode (0xbb49, HImode));
11458 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11459 fnaddr);
11460 offset += 10;
11461 }
11462 /* Load static chain using movabs to r10. */
11463 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11464 gen_int_mode (0xba49, HImode));
11465 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11466 cxt);
11467 offset += 10;
11468 /* Jump to the r11 */
11469 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11470 gen_int_mode (0xff49, HImode));
11471 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11472 gen_int_mode (0xe3, QImode));
11473 offset += 3;
11474 if (offset > TRAMPOLINE_SIZE)
11475 abort ();
11476 }
11477 }
11478 \f
11479 #define def_builtin(MASK, NAME, TYPE, CODE) \
11480 do { \
11481 if ((MASK) & target_flags) \
11482 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
11483 } while (0)
11484
11485 struct builtin_description
11486 {
11487 const unsigned int mask;
11488 const enum insn_code icode;
11489 const char *const name;
11490 const enum ix86_builtins code;
11491 const enum rtx_code comparison;
11492 const unsigned int flag;
11493 };
11494
11495 /* Used for builtins that are enabled both by -msse and -msse2. */
11496 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11497
11498 static const struct builtin_description bdesc_comi[] =
11499 {
11500 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11501 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11502 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11503 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11504 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11505 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11506 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11507 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11508 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11509 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11510 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11511 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11512 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11513 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11514 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11515 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11516 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11517 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11518 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11519 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11520 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11521 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11522 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11523 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11524 };
11525
11526 static const struct builtin_description bdesc_2arg[] =
11527 {
11528 /* SSE */
11529 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11530 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11531 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11532 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11533 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11534 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11535 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11536 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11537
11538 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11539 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11540 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11541 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11542 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11543 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11544 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11545 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11546 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11547 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11548 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11549 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11550 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11551 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11552 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11553 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11554 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11555 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11556 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11557 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11558 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11559 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11560 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11561 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11562
11563 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11564 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11565 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11566 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11567
11568 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11569 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11570 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11571 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11572 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11573
11574 /* MMX */
11575 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11576 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11577 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11578 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11579 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11580 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11581
11582 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11583 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11584 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11585 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11586 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11587 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11588 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11589 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11590
11591 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11592 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11593 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11594
11595 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11596 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11597 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11598 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11599
11600 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11601 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11602
11603 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11604 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11605 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11606 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11607 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11608 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11609
11610 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11611 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11612 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11613 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11614
11615 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11616 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11617 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11618 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11619 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11620 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11621
11622 /* Special. */
11623 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11624 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11625 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11626
11627 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11628 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11629
11630 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11631 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11632 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11633 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11634 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11635 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11636
11637 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11638 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11639 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11640 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11641 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11642 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11643
11644 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11645 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11646 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11647 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11648
11649 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11650 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11651
11652 /* SSE2 */
11653 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11654 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11655 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11656 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11657 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11658 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11659 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11660 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11661
11662 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11663 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11664 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11665 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11666 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11667 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11668 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11669 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11670 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11671 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11672 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11673 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11674 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11675 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11676 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11677 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11678 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11679 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11680 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11681 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11682 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11683 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11684 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11685 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11686
11687 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11688 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11689 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11690 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11691
11692 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11693 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11694 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11695 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11696
11697 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11698 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11699 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11700
11701 /* SSE2 MMX */
11702 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11703 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11704 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11705 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11706 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11707 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11708 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11709 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11710
11711 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11712 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11713 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11714 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11715 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11716 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11717 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11718 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11719
11720 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11721 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11722 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11723 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11724
11725 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11726 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11727 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11728 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11729
11730 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11731 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11732
11733 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11734 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11735 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11736 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11737 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11738 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11739
11740 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11741 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11742 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11743 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11744
11745 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11746 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11747 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11748 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11750 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11751
11752 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11753 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11754 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11755
11756 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11757 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11758
11759 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11760 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11761 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11762 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11763 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11764 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11765
11766 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11767 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11768 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11769 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11770 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11771 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11772
11773 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11774 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11775 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11776 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11777
11778 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11779
11780 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11781 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11782 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11783 };
11784
11785 static const struct builtin_description bdesc_1arg[] =
11786 {
11787 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11788 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11789
11790 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11791 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11792 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11793
11794 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11795 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11796 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11797 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11798
11799 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11800 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11801 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11802
11803 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11804
11805 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11806 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11807
11808 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11809 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11810 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11811 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11812 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11813
11814 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11815
11816 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11817 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11818
11819 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11820 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11821 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11822 };
11823
11824 void
11825 ix86_init_builtins ()
11826 {
11827 if (TARGET_MMX)
11828 ix86_init_mmx_sse_builtins ();
11829 }
11830
11831 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11832 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11833 builtins. */
11834 static void
11835 ix86_init_mmx_sse_builtins ()
11836 {
11837 const struct builtin_description * d;
11838 size_t i;
11839 tree endlink = void_list_node;
11840
11841 tree pchar_type_node = build_pointer_type (char_type_node);
11842 tree pfloat_type_node = build_pointer_type (float_type_node);
11843 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11844 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11845 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11846
11847 /* Comparisons. */
11848 tree int_ftype_v4sf_v4sf
11849 = build_function_type (integer_type_node,
11850 tree_cons (NULL_TREE, V4SF_type_node,
11851 tree_cons (NULL_TREE,
11852 V4SF_type_node,
11853 endlink)));
11854 tree v4si_ftype_v4sf_v4sf
11855 = build_function_type (V4SI_type_node,
11856 tree_cons (NULL_TREE, V4SF_type_node,
11857 tree_cons (NULL_TREE,
11858 V4SF_type_node,
11859 endlink)));
11860 /* MMX/SSE/integer conversions. */
11861 tree int_ftype_v4sf
11862 = build_function_type (integer_type_node,
11863 tree_cons (NULL_TREE, V4SF_type_node,
11864 endlink));
11865 tree int_ftype_v8qi
11866 = build_function_type (integer_type_node,
11867 tree_cons (NULL_TREE, V8QI_type_node,
11868 endlink));
11869 tree v4sf_ftype_v4sf_int
11870 = build_function_type (V4SF_type_node,
11871 tree_cons (NULL_TREE, V4SF_type_node,
11872 tree_cons (NULL_TREE, integer_type_node,
11873 endlink)));
11874 tree v4sf_ftype_v4sf_v2si
11875 = build_function_type (V4SF_type_node,
11876 tree_cons (NULL_TREE, V4SF_type_node,
11877 tree_cons (NULL_TREE, V2SI_type_node,
11878 endlink)));
11879 tree int_ftype_v4hi_int
11880 = build_function_type (integer_type_node,
11881 tree_cons (NULL_TREE, V4HI_type_node,
11882 tree_cons (NULL_TREE, integer_type_node,
11883 endlink)));
11884 tree v4hi_ftype_v4hi_int_int
11885 = build_function_type (V4HI_type_node,
11886 tree_cons (NULL_TREE, V4HI_type_node,
11887 tree_cons (NULL_TREE, integer_type_node,
11888 tree_cons (NULL_TREE,
11889 integer_type_node,
11890 endlink))));
11891 /* Miscellaneous. */
11892 tree v8qi_ftype_v4hi_v4hi
11893 = build_function_type (V8QI_type_node,
11894 tree_cons (NULL_TREE, V4HI_type_node,
11895 tree_cons (NULL_TREE, V4HI_type_node,
11896 endlink)));
11897 tree v4hi_ftype_v2si_v2si
11898 = build_function_type (V4HI_type_node,
11899 tree_cons (NULL_TREE, V2SI_type_node,
11900 tree_cons (NULL_TREE, V2SI_type_node,
11901 endlink)));
11902 tree v4sf_ftype_v4sf_v4sf_int
11903 = build_function_type (V4SF_type_node,
11904 tree_cons (NULL_TREE, V4SF_type_node,
11905 tree_cons (NULL_TREE, V4SF_type_node,
11906 tree_cons (NULL_TREE,
11907 integer_type_node,
11908 endlink))));
11909 tree v2si_ftype_v4hi_v4hi
11910 = build_function_type (V2SI_type_node,
11911 tree_cons (NULL_TREE, V4HI_type_node,
11912 tree_cons (NULL_TREE, V4HI_type_node,
11913 endlink)));
11914 tree v4hi_ftype_v4hi_int
11915 = build_function_type (V4HI_type_node,
11916 tree_cons (NULL_TREE, V4HI_type_node,
11917 tree_cons (NULL_TREE, integer_type_node,
11918 endlink)));
11919 tree v4hi_ftype_v4hi_di
11920 = build_function_type (V4HI_type_node,
11921 tree_cons (NULL_TREE, V4HI_type_node,
11922 tree_cons (NULL_TREE,
11923 long_long_integer_type_node,
11924 endlink)));
11925 tree v2si_ftype_v2si_di
11926 = build_function_type (V2SI_type_node,
11927 tree_cons (NULL_TREE, V2SI_type_node,
11928 tree_cons (NULL_TREE,
11929 long_long_integer_type_node,
11930 endlink)));
11931 tree void_ftype_void
11932 = build_function_type (void_type_node, endlink);
11933 tree void_ftype_unsigned
11934 = build_function_type (void_type_node,
11935 tree_cons (NULL_TREE, unsigned_type_node,
11936 endlink));
11937 tree unsigned_ftype_void
11938 = build_function_type (unsigned_type_node, endlink);
11939 tree di_ftype_void
11940 = build_function_type (long_long_unsigned_type_node, endlink);
11941 tree v4sf_ftype_void
11942 = build_function_type (V4SF_type_node, endlink);
11943 tree v2si_ftype_v4sf
11944 = build_function_type (V2SI_type_node,
11945 tree_cons (NULL_TREE, V4SF_type_node,
11946 endlink));
11947 /* Loads/stores. */
11948 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11949 tree_cons (NULL_TREE, V8QI_type_node,
11950 tree_cons (NULL_TREE,
11951 pchar_type_node,
11952 endlink)));
11953 tree void_ftype_v8qi_v8qi_pchar
11954 = build_function_type (void_type_node, maskmovq_args);
11955 tree v4sf_ftype_pfloat
11956 = build_function_type (V4SF_type_node,
11957 tree_cons (NULL_TREE, pfloat_type_node,
11958 endlink));
11959 /* @@@ the type is bogus */
11960 tree v4sf_ftype_v4sf_pv2si
11961 = build_function_type (V4SF_type_node,
11962 tree_cons (NULL_TREE, V4SF_type_node,
11963 tree_cons (NULL_TREE, pv2si_type_node,
11964 endlink)));
11965 tree void_ftype_pv2si_v4sf
11966 = build_function_type (void_type_node,
11967 tree_cons (NULL_TREE, pv2si_type_node,
11968 tree_cons (NULL_TREE, V4SF_type_node,
11969 endlink)));
11970 tree void_ftype_pfloat_v4sf
11971 = build_function_type (void_type_node,
11972 tree_cons (NULL_TREE, pfloat_type_node,
11973 tree_cons (NULL_TREE, V4SF_type_node,
11974 endlink)));
11975 tree void_ftype_pdi_di
11976 = build_function_type (void_type_node,
11977 tree_cons (NULL_TREE, pdi_type_node,
11978 tree_cons (NULL_TREE,
11979 long_long_unsigned_type_node,
11980 endlink)));
11981 tree void_ftype_pv2di_v2di
11982 = build_function_type (void_type_node,
11983 tree_cons (NULL_TREE, pv2di_type_node,
11984 tree_cons (NULL_TREE,
11985 V2DI_type_node,
11986 endlink)));
11987 /* Normal vector unops. */
11988 tree v4sf_ftype_v4sf
11989 = build_function_type (V4SF_type_node,
11990 tree_cons (NULL_TREE, V4SF_type_node,
11991 endlink));
11992
11993 /* Normal vector binops. */
11994 tree v4sf_ftype_v4sf_v4sf
11995 = build_function_type (V4SF_type_node,
11996 tree_cons (NULL_TREE, V4SF_type_node,
11997 tree_cons (NULL_TREE, V4SF_type_node,
11998 endlink)));
11999 tree v8qi_ftype_v8qi_v8qi
12000 = build_function_type (V8QI_type_node,
12001 tree_cons (NULL_TREE, V8QI_type_node,
12002 tree_cons (NULL_TREE, V8QI_type_node,
12003 endlink)));
12004 tree v4hi_ftype_v4hi_v4hi
12005 = build_function_type (V4HI_type_node,
12006 tree_cons (NULL_TREE, V4HI_type_node,
12007 tree_cons (NULL_TREE, V4HI_type_node,
12008 endlink)));
12009 tree v2si_ftype_v2si_v2si
12010 = build_function_type (V2SI_type_node,
12011 tree_cons (NULL_TREE, V2SI_type_node,
12012 tree_cons (NULL_TREE, V2SI_type_node,
12013 endlink)));
12014 tree di_ftype_di_di
12015 = build_function_type (long_long_unsigned_type_node,
12016 tree_cons (NULL_TREE, long_long_unsigned_type_node,
12017 tree_cons (NULL_TREE,
12018 long_long_unsigned_type_node,
12019 endlink)));
12020
12021 tree v2si_ftype_v2sf
12022 = build_function_type (V2SI_type_node,
12023 tree_cons (NULL_TREE, V2SF_type_node,
12024 endlink));
12025 tree v2sf_ftype_v2si
12026 = build_function_type (V2SF_type_node,
12027 tree_cons (NULL_TREE, V2SI_type_node,
12028 endlink));
12029 tree v2si_ftype_v2si
12030 = build_function_type (V2SI_type_node,
12031 tree_cons (NULL_TREE, V2SI_type_node,
12032 endlink));
12033 tree v2sf_ftype_v2sf
12034 = build_function_type (V2SF_type_node,
12035 tree_cons (NULL_TREE, V2SF_type_node,
12036 endlink));
12037 tree v2sf_ftype_v2sf_v2sf
12038 = build_function_type (V2SF_type_node,
12039 tree_cons (NULL_TREE, V2SF_type_node,
12040 tree_cons (NULL_TREE,
12041 V2SF_type_node,
12042 endlink)));
12043 tree v2si_ftype_v2sf_v2sf
12044 = build_function_type (V2SI_type_node,
12045 tree_cons (NULL_TREE, V2SF_type_node,
12046 tree_cons (NULL_TREE,
12047 V2SF_type_node,
12048 endlink)));
12049 tree pint_type_node = build_pointer_type (integer_type_node);
12050 tree pdouble_type_node = build_pointer_type (double_type_node);
12051 tree int_ftype_v2df_v2df
12052 = build_function_type (integer_type_node,
12053 tree_cons (NULL_TREE, V2DF_type_node,
12054 tree_cons (NULL_TREE, V2DF_type_node, endlink)));
12055
12056 tree ti_ftype_void
12057 = build_function_type (intTI_type_node, endlink);
12058 tree ti_ftype_ti_ti
12059 = build_function_type (intTI_type_node,
12060 tree_cons (NULL_TREE, intTI_type_node,
12061 tree_cons (NULL_TREE, intTI_type_node,
12062 endlink)));
12063 tree void_ftype_pvoid
12064 = build_function_type (void_type_node,
12065 tree_cons (NULL_TREE, ptr_type_node, endlink));
12066 tree v2di_ftype_di
12067 = build_function_type (V2DI_type_node,
12068 tree_cons (NULL_TREE, long_long_unsigned_type_node,
12069 endlink));
12070 tree v4sf_ftype_v4si
12071 = build_function_type (V4SF_type_node,
12072 tree_cons (NULL_TREE, V4SI_type_node, endlink));
12073 tree v4si_ftype_v4sf
12074 = build_function_type (V4SI_type_node,
12075 tree_cons (NULL_TREE, V4SF_type_node, endlink));
12076 tree v2df_ftype_v4si
12077 = build_function_type (V2DF_type_node,
12078 tree_cons (NULL_TREE, V4SI_type_node, endlink));
12079 tree v4si_ftype_v2df
12080 = build_function_type (V4SI_type_node,
12081 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12082 tree v2si_ftype_v2df
12083 = build_function_type (V2SI_type_node,
12084 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12085 tree v4sf_ftype_v2df
12086 = build_function_type (V4SF_type_node,
12087 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12088 tree v2df_ftype_v2si
12089 = build_function_type (V2DF_type_node,
12090 tree_cons (NULL_TREE, V2SI_type_node, endlink));
12091 tree v2df_ftype_v4sf
12092 = build_function_type (V2DF_type_node,
12093 tree_cons (NULL_TREE, V4SF_type_node, endlink));
12094 tree int_ftype_v2df
12095 = build_function_type (integer_type_node,
12096 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12097 tree v2df_ftype_v2df_int
12098 = build_function_type (V2DF_type_node,
12099 tree_cons (NULL_TREE, V2DF_type_node,
12100 tree_cons (NULL_TREE, integer_type_node,
12101 endlink)));
12102 tree v4sf_ftype_v4sf_v2df
12103 = build_function_type (V4SF_type_node,
12104 tree_cons (NULL_TREE, V4SF_type_node,
12105 tree_cons (NULL_TREE, V2DF_type_node,
12106 endlink)));
12107 tree v2df_ftype_v2df_v4sf
12108 = build_function_type (V2DF_type_node,
12109 tree_cons (NULL_TREE, V2DF_type_node,
12110 tree_cons (NULL_TREE, V4SF_type_node,
12111 endlink)));
12112 tree v2df_ftype_v2df_v2df_int
12113 = build_function_type (V2DF_type_node,
12114 tree_cons (NULL_TREE, V2DF_type_node,
12115 tree_cons (NULL_TREE, V2DF_type_node,
12116 tree_cons (NULL_TREE,
12117 integer_type_node,
12118 endlink))));
12119 tree v2df_ftype_v2df_pv2si
12120 = build_function_type (V2DF_type_node,
12121 tree_cons (NULL_TREE, V2DF_type_node,
12122 tree_cons (NULL_TREE, pv2si_type_node,
12123 endlink)));
12124 tree void_ftype_pv2si_v2df
12125 = build_function_type (void_type_node,
12126 tree_cons (NULL_TREE, pv2si_type_node,
12127 tree_cons (NULL_TREE, V2DF_type_node,
12128 endlink)));
12129 tree void_ftype_pdouble_v2df
12130 = build_function_type (void_type_node,
12131 tree_cons (NULL_TREE, pdouble_type_node,
12132 tree_cons (NULL_TREE, V2DF_type_node,
12133 endlink)));
12134 tree void_ftype_pint_int
12135 = build_function_type (void_type_node,
12136 tree_cons (NULL_TREE, pint_type_node,
12137 tree_cons (NULL_TREE, integer_type_node,
12138 endlink)));
12139 tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node,
12140 tree_cons (NULL_TREE, V16QI_type_node,
12141 tree_cons (NULL_TREE,
12142 pchar_type_node,
12143 endlink)));
12144 tree void_ftype_v16qi_v16qi_pchar
12145 = build_function_type (void_type_node, maskmovdqu_args);
12146 tree v2df_ftype_pdouble
12147 = build_function_type (V2DF_type_node,
12148 tree_cons (NULL_TREE, pdouble_type_node,
12149 endlink));
12150 tree v2df_ftype_v2df_v2df
12151 = build_function_type (V2DF_type_node,
12152 tree_cons (NULL_TREE, V2DF_type_node,
12153 tree_cons (NULL_TREE, V2DF_type_node,
12154 endlink)));
12155 tree v16qi_ftype_v16qi_v16qi
12156 = build_function_type (V16QI_type_node,
12157 tree_cons (NULL_TREE, V16QI_type_node,
12158 tree_cons (NULL_TREE, V16QI_type_node,
12159 endlink)));
12160 tree v8hi_ftype_v8hi_v8hi
12161 = build_function_type (V8HI_type_node,
12162 tree_cons (NULL_TREE, V8HI_type_node,
12163 tree_cons (NULL_TREE, V8HI_type_node,
12164 endlink)));
12165 tree v4si_ftype_v4si_v4si
12166 = build_function_type (V4SI_type_node,
12167 tree_cons (NULL_TREE, V4SI_type_node,
12168 tree_cons (NULL_TREE, V4SI_type_node,
12169 endlink)));
12170 tree v2di_ftype_v2di_v2di
12171 = build_function_type (V2DI_type_node,
12172 tree_cons (NULL_TREE, V2DI_type_node,
12173 tree_cons (NULL_TREE, V2DI_type_node,
12174 endlink)));
12175 tree v2di_ftype_v2df_v2df
12176 = build_function_type (V2DI_type_node,
12177 tree_cons (NULL_TREE, V2DF_type_node,
12178 tree_cons (NULL_TREE, V2DF_type_node,
12179 endlink)));
12180 tree v2df_ftype_v2df
12181 = build_function_type (V2DF_type_node,
12182 tree_cons (NULL_TREE, V2DF_type_node,
12183 endlink));
12184 tree v2df_ftype_double
12185 = build_function_type (V2DF_type_node,
12186 tree_cons (NULL_TREE, double_type_node,
12187 endlink));
12188 tree v2df_ftype_double_double
12189 = build_function_type (V2DF_type_node,
12190 tree_cons (NULL_TREE, double_type_node,
12191 tree_cons (NULL_TREE, double_type_node,
12192 endlink)));
12193 tree int_ftype_v8hi_int
12194 = build_function_type (integer_type_node,
12195 tree_cons (NULL_TREE, V8HI_type_node,
12196 tree_cons (NULL_TREE, integer_type_node,
12197 endlink)));
12198 tree v8hi_ftype_v8hi_int_int
12199 = build_function_type (V8HI_type_node,
12200 tree_cons (NULL_TREE, V8HI_type_node,
12201 tree_cons (NULL_TREE, integer_type_node,
12202 tree_cons (NULL_TREE,
12203 integer_type_node,
12204 endlink))));
12205 tree v2di_ftype_v2di_int
12206 = build_function_type (V2DI_type_node,
12207 tree_cons (NULL_TREE, V2DI_type_node,
12208 tree_cons (NULL_TREE, integer_type_node,
12209 endlink)));
12210 tree v4si_ftype_v4si_int
12211 = build_function_type (V4SI_type_node,
12212 tree_cons (NULL_TREE, V4SI_type_node,
12213 tree_cons (NULL_TREE, integer_type_node,
12214 endlink)));
12215 tree v8hi_ftype_v8hi_int
12216 = build_function_type (V8HI_type_node,
12217 tree_cons (NULL_TREE, V8HI_type_node,
12218 tree_cons (NULL_TREE, integer_type_node,
12219 endlink)));
12220 tree v8hi_ftype_v8hi_v2di
12221 = build_function_type (V8HI_type_node,
12222 tree_cons (NULL_TREE, V8HI_type_node,
12223 tree_cons (NULL_TREE, V2DI_type_node,
12224 endlink)));
12225 tree v4si_ftype_v4si_v2di
12226 = build_function_type (V4SI_type_node,
12227 tree_cons (NULL_TREE, V4SI_type_node,
12228 tree_cons (NULL_TREE, V2DI_type_node,
12229 endlink)));
12230 tree v4si_ftype_v8hi_v8hi
12231 = build_function_type (V4SI_type_node,
12232 tree_cons (NULL_TREE, V8HI_type_node,
12233 tree_cons (NULL_TREE, V8HI_type_node,
12234 endlink)));
12235 tree di_ftype_v8qi_v8qi
12236 = build_function_type (long_long_unsigned_type_node,
12237 tree_cons (NULL_TREE, V8QI_type_node,
12238 tree_cons (NULL_TREE, V8QI_type_node,
12239 endlink)));
12240 tree v2di_ftype_v16qi_v16qi
12241 = build_function_type (V2DI_type_node,
12242 tree_cons (NULL_TREE, V16QI_type_node,
12243 tree_cons (NULL_TREE, V16QI_type_node,
12244 endlink)));
12245 tree int_ftype_v16qi
12246 = build_function_type (integer_type_node,
12247 tree_cons (NULL_TREE, V16QI_type_node, endlink));
12248
12249 /* Add all builtins that are more or less simple operations on two
12250 operands. */
12251 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12252 {
12253 /* Use one of the operands; the target can have a different mode for
12254 mask-generating compares. */
12255 enum machine_mode mode;
12256 tree type;
12257
12258 if (d->name == 0)
12259 continue;
12260 mode = insn_data[d->icode].operand[1].mode;
12261
12262 switch (mode)
12263 {
12264 case V16QImode:
12265 type = v16qi_ftype_v16qi_v16qi;
12266 break;
12267 case V8HImode:
12268 type = v8hi_ftype_v8hi_v8hi;
12269 break;
12270 case V4SImode:
12271 type = v4si_ftype_v4si_v4si;
12272 break;
12273 case V2DImode:
12274 type = v2di_ftype_v2di_v2di;
12275 break;
12276 case V2DFmode:
12277 type = v2df_ftype_v2df_v2df;
12278 break;
12279 case TImode:
12280 type = ti_ftype_ti_ti;
12281 break;
12282 case V4SFmode:
12283 type = v4sf_ftype_v4sf_v4sf;
12284 break;
12285 case V8QImode:
12286 type = v8qi_ftype_v8qi_v8qi;
12287 break;
12288 case V4HImode:
12289 type = v4hi_ftype_v4hi_v4hi;
12290 break;
12291 case V2SImode:
12292 type = v2si_ftype_v2si_v2si;
12293 break;
12294 case DImode:
12295 type = di_ftype_di_di;
12296 break;
12297
12298 default:
12299 abort ();
12300 }
12301
12302 /* Override for comparisons. */
12303 if (d->icode == CODE_FOR_maskcmpv4sf3
12304 || d->icode == CODE_FOR_maskncmpv4sf3
12305 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12306 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12307 type = v4si_ftype_v4sf_v4sf;
12308
12309 if (d->icode == CODE_FOR_maskcmpv2df3
12310 || d->icode == CODE_FOR_maskncmpv2df3
12311 || d->icode == CODE_FOR_vmmaskcmpv2df3
12312 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12313 type = v2di_ftype_v2df_v2df;
12314
12315 def_builtin (d->mask, d->name, type, d->code);
12316 }
12317
12318 /* Add the remaining MMX insns with somewhat more complicated types. */
12319 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12320 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12321 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12322 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12323 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12324 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12325 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12326
12327 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12328 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12329 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12330
12331 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12332 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12333
12334 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12335 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12336
12337 /* comi/ucomi insns. */
12338 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12339 if (d->mask == MASK_SSE2)
12340 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12341 else
12342 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12343
12344 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12345 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12346 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12347
12348 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12349 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12350 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12351 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12352 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12353 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12354
12355 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12356 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12357 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12358 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12359
12360 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12361 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12362
12363 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12364
12365 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12366 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12367 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12368 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12369 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12370 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12371
12372 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12373 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12374 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12375 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12376
12377 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12378 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12379 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12380 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12381
12382 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12383
12384 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12385
12386 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12387 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12388 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12389 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12390 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12391 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12392
12393 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12394
12395 /* Original 3DNow! */
12396 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12397 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12398 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12399 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12400 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12401 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12402 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12403 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12404 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12405 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12406 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12407 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12408 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12409 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12410 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12411 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12412 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12413 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12414 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12415 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12416
12417 /* 3DNow! extension as used in the Athlon CPU. */
12418 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12419 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12420 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12421 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12422 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12423 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12424
12425 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12426
12427 /* SSE2 */
12428 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12429 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12430
12431 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12432 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12433
12434 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12435 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12436 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12437 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12438 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12439 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12440
12441 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12442 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12443 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12444 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12445
12446 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12447 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12448 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12449 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12450 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12451
12452 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12453 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12454 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12455 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12456
12457 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12458 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12459
12460 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12461
12462 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12463 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12464
12465 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12466 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12467 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12468 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12469 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12470
12471 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12472
12473 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12474 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12475
12476 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12477 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12478 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12479
12480 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12481 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12482 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12483
12484 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12485 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12486 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12487 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12488 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12489 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12490 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12491
12492 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12493 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12494 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12495
12496 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12497 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12498 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12499
12500 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12501 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12502 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12503
12504 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12505 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12506
12507 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12508 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12509 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12510
12511 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12512 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12513 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12514
12515 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12516 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12517
12518 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12519 }
12520
12521 /* Errors in the source file can cause expand_expr to return const0_rtx
12522 where we expect a vector. To avoid crashing, use one of the vector
12523 clear instructions. */
12524 static rtx
12525 safe_vector_operand (x, mode)
12526 rtx x;
12527 enum machine_mode mode;
12528 {
12529 if (x != const0_rtx)
12530 return x;
12531 x = gen_reg_rtx (mode);
12532
12533 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12534 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12535 : gen_rtx_SUBREG (DImode, x, 0)));
12536 else
12537 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12538 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12539 return x;
12540 }
12541
12542 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12543
12544 static rtx
12545 ix86_expand_binop_builtin (icode, arglist, target)
12546 enum insn_code icode;
12547 tree arglist;
12548 rtx target;
12549 {
12550 rtx pat;
12551 tree arg0 = TREE_VALUE (arglist);
12552 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12553 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12554 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12555 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12556 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12557 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12558
12559 if (VECTOR_MODE_P (mode0))
12560 op0 = safe_vector_operand (op0, mode0);
12561 if (VECTOR_MODE_P (mode1))
12562 op1 = safe_vector_operand (op1, mode1);
12563
12564 if (! target
12565 || GET_MODE (target) != tmode
12566 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12567 target = gen_reg_rtx (tmode);
12568
12569 /* In case the insn wants input operands in modes different from
12570 the result, abort. */
12571 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12572 abort ();
12573
12574 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12575 op0 = copy_to_mode_reg (mode0, op0);
12576 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12577 op1 = copy_to_mode_reg (mode1, op1);
12578
12579 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12580 yet one of the two must not be a memory. This is normally enforced
12581 by expanders, but we didn't bother to create one here. */
12582 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12583 op0 = copy_to_mode_reg (mode0, op0);
12584
12585 pat = GEN_FCN (icode) (target, op0, op1);
12586 if (! pat)
12587 return 0;
12588 emit_insn (pat);
12589 return target;
12590 }
12591
12592 /* In type_for_mode we restrict the ability to create TImode types
12593 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12594 to have a V4SFmode signature. Convert them in-place to TImode. */
12595
12596 static rtx
12597 ix86_expand_timode_binop_builtin (icode, arglist, target)
12598 enum insn_code icode;
12599 tree arglist;
12600 rtx target;
12601 {
12602 rtx pat;
12603 tree arg0 = TREE_VALUE (arglist);
12604 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12605 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12606 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12607
12608 op0 = gen_lowpart (TImode, op0);
12609 op1 = gen_lowpart (TImode, op1);
12610 target = gen_reg_rtx (TImode);
12611
12612 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12613 op0 = copy_to_mode_reg (TImode, op0);
12614 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12615 op1 = copy_to_mode_reg (TImode, op1);
12616
12617 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12618 yet one of the two must not be a memory. This is normally enforced
12619 by expanders, but we didn't bother to create one here. */
12620 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12621 op0 = copy_to_mode_reg (TImode, op0);
12622
12623 pat = GEN_FCN (icode) (target, op0, op1);
12624 if (! pat)
12625 return 0;
12626 emit_insn (pat);
12627
12628 return gen_lowpart (V4SFmode, target);
12629 }
12630
12631 /* Subroutine of ix86_expand_builtin to take care of stores. */
12632
12633 static rtx
12634 ix86_expand_store_builtin (icode, arglist)
12635 enum insn_code icode;
12636 tree arglist;
12637 {
12638 rtx pat;
12639 tree arg0 = TREE_VALUE (arglist);
12640 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12641 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12642 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12643 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12644 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12645
12646 if (VECTOR_MODE_P (mode1))
12647 op1 = safe_vector_operand (op1, mode1);
12648
12649 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12650
12651 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12652 op1 = copy_to_mode_reg (mode1, op1);
12653
12654 pat = GEN_FCN (icode) (op0, op1);
12655 if (pat)
12656 emit_insn (pat);
12657 return 0;
12658 }
12659
12660 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12661
12662 static rtx
12663 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12664 enum insn_code icode;
12665 tree arglist;
12666 rtx target;
12667 int do_load;
12668 {
12669 rtx pat;
12670 tree arg0 = TREE_VALUE (arglist);
12671 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12672 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12673 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12674
12675 if (! target
12676 || GET_MODE (target) != tmode
12677 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12678 target = gen_reg_rtx (tmode);
12679 if (do_load)
12680 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12681 else
12682 {
12683 if (VECTOR_MODE_P (mode0))
12684 op0 = safe_vector_operand (op0, mode0);
12685
12686 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12687 op0 = copy_to_mode_reg (mode0, op0);
12688 }
12689
12690 pat = GEN_FCN (icode) (target, op0);
12691 if (! pat)
12692 return 0;
12693 emit_insn (pat);
12694 return target;
12695 }
12696
12697 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12698 sqrtss, rsqrtss, rcpss. */
12699
12700 static rtx
12701 ix86_expand_unop1_builtin (icode, arglist, target)
12702 enum insn_code icode;
12703 tree arglist;
12704 rtx target;
12705 {
12706 rtx pat;
12707 tree arg0 = TREE_VALUE (arglist);
12708 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12709 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12710 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12711
12712 if (! target
12713 || GET_MODE (target) != tmode
12714 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12715 target = gen_reg_rtx (tmode);
12716
12717 if (VECTOR_MODE_P (mode0))
12718 op0 = safe_vector_operand (op0, mode0);
12719
12720 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12721 op0 = copy_to_mode_reg (mode0, op0);
12722
12723 op1 = op0;
12724 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12725 op1 = copy_to_mode_reg (mode0, op1);
12726
12727 pat = GEN_FCN (icode) (target, op0, op1);
12728 if (! pat)
12729 return 0;
12730 emit_insn (pat);
12731 return target;
12732 }
12733
12734 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12735
12736 static rtx
12737 ix86_expand_sse_compare (d, arglist, target)
12738 const struct builtin_description *d;
12739 tree arglist;
12740 rtx target;
12741 {
12742 rtx pat;
12743 tree arg0 = TREE_VALUE (arglist);
12744 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12745 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12746 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12747 rtx op2;
12748 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12749 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12750 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12751 enum rtx_code comparison = d->comparison;
12752
12753 if (VECTOR_MODE_P (mode0))
12754 op0 = safe_vector_operand (op0, mode0);
12755 if (VECTOR_MODE_P (mode1))
12756 op1 = safe_vector_operand (op1, mode1);
12757
12758 /* Swap operands if we have a comparison that isn't available in
12759 hardware. */
12760 if (d->flag)
12761 {
12762 rtx tmp = gen_reg_rtx (mode1);
12763 emit_move_insn (tmp, op1);
12764 op1 = op0;
12765 op0 = tmp;
12766 }
12767
12768 if (! target
12769 || GET_MODE (target) != tmode
12770 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12771 target = gen_reg_rtx (tmode);
12772
12773 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12774 op0 = copy_to_mode_reg (mode0, op0);
12775 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12776 op1 = copy_to_mode_reg (mode1, op1);
12777
12778 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12779 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12780 if (! pat)
12781 return 0;
12782 emit_insn (pat);
12783 return target;
12784 }
12785
12786 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12787
12788 static rtx
12789 ix86_expand_sse_comi (d, arglist, target)
12790 const struct builtin_description *d;
12791 tree arglist;
12792 rtx target;
12793 {
12794 rtx pat;
12795 tree arg0 = TREE_VALUE (arglist);
12796 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12797 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12798 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12799 rtx op2;
12800 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12801 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12802 enum rtx_code comparison = d->comparison;
12803
12804 if (VECTOR_MODE_P (mode0))
12805 op0 = safe_vector_operand (op0, mode0);
12806 if (VECTOR_MODE_P (mode1))
12807 op1 = safe_vector_operand (op1, mode1);
12808
12809 /* Swap operands if we have a comparison that isn't available in
12810 hardware. */
12811 if (d->flag)
12812 {
12813 rtx tmp = op1;
12814 op1 = op0;
12815 op0 = tmp;
12816 }
12817
12818 target = gen_reg_rtx (SImode);
12819 emit_move_insn (target, const0_rtx);
12820 target = gen_rtx_SUBREG (QImode, target, 0);
12821
12822 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12823 op0 = copy_to_mode_reg (mode0, op0);
12824 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12825 op1 = copy_to_mode_reg (mode1, op1);
12826
12827 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12828 pat = GEN_FCN (d->icode) (op0, op1, op2);
12829 if (! pat)
12830 return 0;
12831 emit_insn (pat);
12832 emit_insn (gen_rtx_SET (VOIDmode,
12833 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12834 gen_rtx_fmt_ee (comparison, QImode,
12835 gen_rtx_REG (CCmode, FLAGS_REG),
12836 const0_rtx)));
12837
12838 return SUBREG_REG (target);
12839 }
12840
12841 /* Expand an expression EXP that calls a built-in function,
12842 with result going to TARGET if that's convenient
12843 (and in mode MODE if that's convenient).
12844 SUBTARGET may be used as the target for computing one of EXP's operands.
12845 IGNORE is nonzero if the value is to be ignored. */
12846
12847 rtx
12848 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12849 tree exp;
12850 rtx target;
12851 rtx subtarget ATTRIBUTE_UNUSED;
12852 enum machine_mode mode ATTRIBUTE_UNUSED;
12853 int ignore ATTRIBUTE_UNUSED;
12854 {
12855 const struct builtin_description *d;
12856 size_t i;
12857 enum insn_code icode;
12858 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12859 tree arglist = TREE_OPERAND (exp, 1);
12860 tree arg0, arg1, arg2;
12861 rtx op0, op1, op2, pat;
12862 enum machine_mode tmode, mode0, mode1, mode2;
12863 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12864
12865 switch (fcode)
12866 {
12867 case IX86_BUILTIN_EMMS:
12868 emit_insn (gen_emms ());
12869 return 0;
12870
12871 case IX86_BUILTIN_SFENCE:
12872 emit_insn (gen_sfence ());
12873 return 0;
12874
12875 case IX86_BUILTIN_PEXTRW:
12876 case IX86_BUILTIN_PEXTRW128:
12877 icode = (fcode == IX86_BUILTIN_PEXTRW
12878 ? CODE_FOR_mmx_pextrw
12879 : CODE_FOR_sse2_pextrw);
12880 arg0 = TREE_VALUE (arglist);
12881 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12882 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12883 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12884 tmode = insn_data[icode].operand[0].mode;
12885 mode0 = insn_data[icode].operand[1].mode;
12886 mode1 = insn_data[icode].operand[2].mode;
12887
12888 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12889 op0 = copy_to_mode_reg (mode0, op0);
12890 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12891 {
12892 /* @@@ better error message */
12893 error ("selector must be an immediate");
12894 return gen_reg_rtx (tmode);
12895 }
12896 if (target == 0
12897 || GET_MODE (target) != tmode
12898 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12899 target = gen_reg_rtx (tmode);
12900 pat = GEN_FCN (icode) (target, op0, op1);
12901 if (! pat)
12902 return 0;
12903 emit_insn (pat);
12904 return target;
12905
12906 case IX86_BUILTIN_PINSRW:
12907 case IX86_BUILTIN_PINSRW128:
12908 icode = (fcode == IX86_BUILTIN_PINSRW
12909 ? CODE_FOR_mmx_pinsrw
12910 : CODE_FOR_sse2_pinsrw);
12911 arg0 = TREE_VALUE (arglist);
12912 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12913 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12914 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12915 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12916 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12917 tmode = insn_data[icode].operand[0].mode;
12918 mode0 = insn_data[icode].operand[1].mode;
12919 mode1 = insn_data[icode].operand[2].mode;
12920 mode2 = insn_data[icode].operand[3].mode;
12921
12922 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12923 op0 = copy_to_mode_reg (mode0, op0);
12924 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12925 op1 = copy_to_mode_reg (mode1, op1);
12926 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12927 {
12928 /* @@@ better error message */
12929 error ("selector must be an immediate");
12930 return const0_rtx;
12931 }
12932 if (target == 0
12933 || GET_MODE (target) != tmode
12934 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12935 target = gen_reg_rtx (tmode);
12936 pat = GEN_FCN (icode) (target, op0, op1, op2);
12937 if (! pat)
12938 return 0;
12939 emit_insn (pat);
12940 return target;
12941
12942 case IX86_BUILTIN_MASKMOVQ:
12943 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12944 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12945 : CODE_FOR_sse2_maskmovdqu);
12946 /* Note the arg order is different from the operand order. */
12947 arg1 = TREE_VALUE (arglist);
12948 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12949 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12950 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12951 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12952 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12953 mode0 = insn_data[icode].operand[0].mode;
12954 mode1 = insn_data[icode].operand[1].mode;
12955 mode2 = insn_data[icode].operand[2].mode;
12956
12957 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12958 op0 = copy_to_mode_reg (mode0, op0);
12959 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12960 op1 = copy_to_mode_reg (mode1, op1);
12961 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12962 op2 = copy_to_mode_reg (mode2, op2);
12963 pat = GEN_FCN (icode) (op0, op1, op2);
12964 if (! pat)
12965 return 0;
12966 emit_insn (pat);
12967 return 0;
12968
12969 case IX86_BUILTIN_SQRTSS:
12970 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12971 case IX86_BUILTIN_RSQRTSS:
12972 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12973 case IX86_BUILTIN_RCPSS:
12974 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12975
12976 case IX86_BUILTIN_ANDPS:
12977 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12978 arglist, target);
12979 case IX86_BUILTIN_ANDNPS:
12980 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12981 arglist, target);
12982 case IX86_BUILTIN_ORPS:
12983 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12984 arglist, target);
12985 case IX86_BUILTIN_XORPS:
12986 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12987 arglist, target);
12988
12989 case IX86_BUILTIN_LOADAPS:
12990 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12991
12992 case IX86_BUILTIN_LOADUPS:
12993 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12994
12995 case IX86_BUILTIN_STOREAPS:
12996 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12997 case IX86_BUILTIN_STOREUPS:
12998 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12999
13000 case IX86_BUILTIN_LOADSS:
13001 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13002
13003 case IX86_BUILTIN_STORESS:
13004 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13005
13006 case IX86_BUILTIN_LOADHPS:
13007 case IX86_BUILTIN_LOADLPS:
13008 case IX86_BUILTIN_LOADHPD:
13009 case IX86_BUILTIN_LOADLPD:
13010 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13011 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13012 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13013 : CODE_FOR_sse2_movlpd);
13014 arg0 = TREE_VALUE (arglist);
13015 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13016 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13017 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13018 tmode = insn_data[icode].operand[0].mode;
13019 mode0 = insn_data[icode].operand[1].mode;
13020 mode1 = insn_data[icode].operand[2].mode;
13021
13022 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13023 op0 = copy_to_mode_reg (mode0, op0);
13024 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13025 if (target == 0
13026 || GET_MODE (target) != tmode
13027 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13028 target = gen_reg_rtx (tmode);
13029 pat = GEN_FCN (icode) (target, op0, op1);
13030 if (! pat)
13031 return 0;
13032 emit_insn (pat);
13033 return target;
13034
13035 case IX86_BUILTIN_STOREHPS:
13036 case IX86_BUILTIN_STORELPS:
13037 case IX86_BUILTIN_STOREHPD:
13038 case IX86_BUILTIN_STORELPD:
13039 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13040 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13041 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13042 : CODE_FOR_sse2_movlpd);
13043 arg0 = TREE_VALUE (arglist);
13044 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13045 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13046 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13047 mode0 = insn_data[icode].operand[1].mode;
13048 mode1 = insn_data[icode].operand[2].mode;
13049
13050 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13051 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13052 op1 = copy_to_mode_reg (mode1, op1);
13053
13054 pat = GEN_FCN (icode) (op0, op0, op1);
13055 if (! pat)
13056 return 0;
13057 emit_insn (pat);
13058 return 0;
13059
13060 case IX86_BUILTIN_MOVNTPS:
13061 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13062 case IX86_BUILTIN_MOVNTQ:
13063 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13064
13065 case IX86_BUILTIN_LDMXCSR:
13066 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13067 target = assign_386_stack_local (SImode, 0);
13068 emit_move_insn (target, op0);
13069 emit_insn (gen_ldmxcsr (target));
13070 return 0;
13071
13072 case IX86_BUILTIN_STMXCSR:
13073 target = assign_386_stack_local (SImode, 0);
13074 emit_insn (gen_stmxcsr (target));
13075 return copy_to_mode_reg (SImode, target);
13076
13077 case IX86_BUILTIN_SHUFPS:
13078 case IX86_BUILTIN_SHUFPD:
13079 icode = (fcode == IX86_BUILTIN_SHUFPS
13080 ? CODE_FOR_sse_shufps
13081 : CODE_FOR_sse2_shufpd);
13082 arg0 = TREE_VALUE (arglist);
13083 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13084 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13085 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13086 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13087 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13088 tmode = insn_data[icode].operand[0].mode;
13089 mode0 = insn_data[icode].operand[1].mode;
13090 mode1 = insn_data[icode].operand[2].mode;
13091 mode2 = insn_data[icode].operand[3].mode;
13092
13093 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13094 op0 = copy_to_mode_reg (mode0, op0);
13095 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13096 op1 = copy_to_mode_reg (mode1, op1);
13097 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13098 {
13099 /* @@@ better error message */
13100 error ("mask must be an immediate");
13101 return gen_reg_rtx (tmode);
13102 }
13103 if (target == 0
13104 || GET_MODE (target) != tmode
13105 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13106 target = gen_reg_rtx (tmode);
13107 pat = GEN_FCN (icode) (target, op0, op1, op2);
13108 if (! pat)
13109 return 0;
13110 emit_insn (pat);
13111 return target;
13112
13113 case IX86_BUILTIN_PSHUFW:
13114 case IX86_BUILTIN_PSHUFD:
13115 case IX86_BUILTIN_PSHUFHW:
13116 case IX86_BUILTIN_PSHUFLW:
13117 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13118 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13119 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13120 : CODE_FOR_mmx_pshufw);
13121 arg0 = TREE_VALUE (arglist);
13122 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13123 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13124 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13125 tmode = insn_data[icode].operand[0].mode;
13126 mode1 = insn_data[icode].operand[1].mode;
13127 mode2 = insn_data[icode].operand[2].mode;
13128
13129 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13130 op0 = copy_to_mode_reg (mode1, op0);
13131 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13132 {
13133 /* @@@ better error message */
13134 error ("mask must be an immediate");
13135 return const0_rtx;
13136 }
13137 if (target == 0
13138 || GET_MODE (target) != tmode
13139 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13140 target = gen_reg_rtx (tmode);
13141 pat = GEN_FCN (icode) (target, op0, op1);
13142 if (! pat)
13143 return 0;
13144 emit_insn (pat);
13145 return target;
13146
13147 case IX86_BUILTIN_FEMMS:
13148 emit_insn (gen_femms ());
13149 return NULL_RTX;
13150
13151 case IX86_BUILTIN_PAVGUSB:
13152 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13153
13154 case IX86_BUILTIN_PF2ID:
13155 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13156
13157 case IX86_BUILTIN_PFACC:
13158 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13159
13160 case IX86_BUILTIN_PFADD:
13161 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13162
13163 case IX86_BUILTIN_PFCMPEQ:
13164 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13165
13166 case IX86_BUILTIN_PFCMPGE:
13167 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13168
13169 case IX86_BUILTIN_PFCMPGT:
13170 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13171
13172 case IX86_BUILTIN_PFMAX:
13173 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13174
13175 case IX86_BUILTIN_PFMIN:
13176 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13177
13178 case IX86_BUILTIN_PFMUL:
13179 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13180
13181 case IX86_BUILTIN_PFRCP:
13182 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13183
13184 case IX86_BUILTIN_PFRCPIT1:
13185 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13186
13187 case IX86_BUILTIN_PFRCPIT2:
13188 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13189
13190 case IX86_BUILTIN_PFRSQIT1:
13191 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13192
13193 case IX86_BUILTIN_PFRSQRT:
13194 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13195
13196 case IX86_BUILTIN_PFSUB:
13197 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13198
13199 case IX86_BUILTIN_PFSUBR:
13200 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13201
13202 case IX86_BUILTIN_PI2FD:
13203 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13204
13205 case IX86_BUILTIN_PMULHRW:
13206 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13207
13208 case IX86_BUILTIN_PF2IW:
13209 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13210
13211 case IX86_BUILTIN_PFNACC:
13212 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13213
13214 case IX86_BUILTIN_PFPNACC:
13215 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13216
13217 case IX86_BUILTIN_PI2FW:
13218 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13219
13220 case IX86_BUILTIN_PSWAPDSI:
13221 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13222
13223 case IX86_BUILTIN_PSWAPDSF:
13224 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13225
13226 case IX86_BUILTIN_SSE_ZERO:
13227 target = gen_reg_rtx (V4SFmode);
13228 emit_insn (gen_sse_clrv4sf (target));
13229 return target;
13230
13231 case IX86_BUILTIN_MMX_ZERO:
13232 target = gen_reg_rtx (DImode);
13233 emit_insn (gen_mmx_clrdi (target));
13234 return target;
13235
13236 case IX86_BUILTIN_SQRTSD:
13237 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13238 case IX86_BUILTIN_LOADAPD:
13239 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13240 case IX86_BUILTIN_LOADUPD:
13241 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13242
13243 case IX86_BUILTIN_STOREAPD:
13244 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13245 case IX86_BUILTIN_STOREUPD:
13246 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13247
13248 case IX86_BUILTIN_LOADSD:
13249 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13250
13251 case IX86_BUILTIN_STORESD:
13252 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13253
13254 case IX86_BUILTIN_SETPD1:
13255 target = assign_386_stack_local (DFmode, 0);
13256 arg0 = TREE_VALUE (arglist);
13257 emit_move_insn (adjust_address (target, DFmode, 0),
13258 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13259 op0 = gen_reg_rtx (V2DFmode);
13260 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13261 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13262 return op0;
13263
13264 case IX86_BUILTIN_SETPD:
13265 target = assign_386_stack_local (V2DFmode, 0);
13266 arg0 = TREE_VALUE (arglist);
13267 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13268 emit_move_insn (adjust_address (target, DFmode, 0),
13269 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13270 emit_move_insn (adjust_address (target, DFmode, 8),
13271 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13272 op0 = gen_reg_rtx (V2DFmode);
13273 emit_insn (gen_sse2_movapd (op0, target));
13274 return op0;
13275
13276 case IX86_BUILTIN_LOADRPD:
13277 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13278 gen_reg_rtx (V2DFmode), 1);
13279 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13280 return target;
13281
13282 case IX86_BUILTIN_LOADPD1:
13283 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13284 gen_reg_rtx (V2DFmode), 1);
13285 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13286 return target;
13287
13288 case IX86_BUILTIN_STOREPD1:
13289 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13290 case IX86_BUILTIN_STORERPD:
13291 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13292
13293 case IX86_BUILTIN_MFENCE:
13294 emit_insn (gen_sse2_mfence ());
13295 return 0;
13296 case IX86_BUILTIN_LFENCE:
13297 emit_insn (gen_sse2_lfence ());
13298 return 0;
13299
13300 case IX86_BUILTIN_CLFLUSH:
13301 arg0 = TREE_VALUE (arglist);
13302 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13303 icode = CODE_FOR_sse2_clflush;
13304 mode0 = insn_data[icode].operand[0].mode;
13305 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13306 op0 = copy_to_mode_reg (mode0, op0);
13307
13308 emit_insn (gen_sse2_clflush (op0));
13309 return 0;
13310
13311 case IX86_BUILTIN_MOVNTPD:
13312 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13313 case IX86_BUILTIN_MOVNTDQ:
13314 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13315 case IX86_BUILTIN_MOVNTI:
13316 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13317
13318 default:
13319 break;
13320 }
13321
13322 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13323 if (d->code == fcode)
13324 {
13325 /* Compares are treated specially. */
13326 if (d->icode == CODE_FOR_maskcmpv4sf3
13327 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13328 || d->icode == CODE_FOR_maskncmpv4sf3
13329 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13330 || d->icode == CODE_FOR_maskcmpv2df3
13331 || d->icode == CODE_FOR_vmmaskcmpv2df3
13332 || d->icode == CODE_FOR_maskncmpv2df3
13333 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13334 return ix86_expand_sse_compare (d, arglist, target);
13335
13336 return ix86_expand_binop_builtin (d->icode, arglist, target);
13337 }
13338
13339 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13340 if (d->code == fcode)
13341 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13342
13343 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13344 if (d->code == fcode)
13345 return ix86_expand_sse_comi (d, arglist, target);
13346
13347 /* @@@ Should really do something sensible here. */
13348 return 0;
13349 }
13350
13351 /* Store OPERAND to the memory after reload is completed. This means
13352 that we can't easily use assign_stack_local. */
13353 rtx
13354 ix86_force_to_memory (mode, operand)
13355 enum machine_mode mode;
13356 rtx operand;
13357 {
13358 rtx result;
13359 if (!reload_completed)
13360 abort ();
13361 if (TARGET_64BIT && TARGET_RED_ZONE)
13362 {
13363 result = gen_rtx_MEM (mode,
13364 gen_rtx_PLUS (Pmode,
13365 stack_pointer_rtx,
13366 GEN_INT (-RED_ZONE_SIZE)));
13367 emit_move_insn (result, operand);
13368 }
13369 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13370 {
13371 switch (mode)
13372 {
13373 case HImode:
13374 case SImode:
13375 operand = gen_lowpart (DImode, operand);
13376 /* FALLTHRU */
13377 case DImode:
13378 emit_insn (
13379 gen_rtx_SET (VOIDmode,
13380 gen_rtx_MEM (DImode,
13381 gen_rtx_PRE_DEC (DImode,
13382 stack_pointer_rtx)),
13383 operand));
13384 break;
13385 default:
13386 abort ();
13387 }
13388 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13389 }
13390 else
13391 {
13392 switch (mode)
13393 {
13394 case DImode:
13395 {
13396 rtx operands[2];
13397 split_di (&operand, 1, operands, operands + 1);
13398 emit_insn (
13399 gen_rtx_SET (VOIDmode,
13400 gen_rtx_MEM (SImode,
13401 gen_rtx_PRE_DEC (Pmode,
13402 stack_pointer_rtx)),
13403 operands[1]));
13404 emit_insn (
13405 gen_rtx_SET (VOIDmode,
13406 gen_rtx_MEM (SImode,
13407 gen_rtx_PRE_DEC (Pmode,
13408 stack_pointer_rtx)),
13409 operands[0]));
13410 }
13411 break;
13412 case HImode:
13413 /* It is better to store HImodes as SImodes. */
13414 if (!TARGET_PARTIAL_REG_STALL)
13415 operand = gen_lowpart (SImode, operand);
13416 /* FALLTHRU */
13417 case SImode:
13418 emit_insn (
13419 gen_rtx_SET (VOIDmode,
13420 gen_rtx_MEM (GET_MODE (operand),
13421 gen_rtx_PRE_DEC (SImode,
13422 stack_pointer_rtx)),
13423 operand));
13424 break;
13425 default:
13426 abort ();
13427 }
13428 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13429 }
13430 return result;
13431 }
13432
13433 /* Free operand from the memory. */
13434 void
13435 ix86_free_from_memory (mode)
13436 enum machine_mode mode;
13437 {
13438 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13439 {
13440 int size;
13441
13442 if (mode == DImode || TARGET_64BIT)
13443 size = 8;
13444 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13445 size = 2;
13446 else
13447 size = 4;
13448 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13449 to pop or add instruction if registers are available. */
13450 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13451 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13452 GEN_INT (size))));
13453 }
13454 }
13455
13456 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13457 QImode must go into class Q_REGS.
13458 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13459 movdf to do mem-to-mem moves through integer regs. */
13460 enum reg_class
13461 ix86_preferred_reload_class (x, class)
13462 rtx x;
13463 enum reg_class class;
13464 {
13465 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13466 {
13467 /* SSE can't load any constant directly yet. */
13468 if (SSE_CLASS_P (class))
13469 return NO_REGS;
13470 /* Floats can load 0 and 1. */
13471 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13472 {
13473 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13474 if (MAYBE_SSE_CLASS_P (class))
13475 return (reg_class_subset_p (class, GENERAL_REGS)
13476 ? GENERAL_REGS : FLOAT_REGS);
13477 else
13478 return class;
13479 }
13480 /* General regs can load everything. */
13481 if (reg_class_subset_p (class, GENERAL_REGS))
13482 return GENERAL_REGS;
13483 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13484 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13485 return NO_REGS;
13486 }
13487 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13488 return NO_REGS;
13489 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13490 return Q_REGS;
13491 return class;
13492 }
13493
13494 /* If we are copying between general and FP registers, we need a memory
13495 location. The same is true for SSE and MMX registers.
13496
13497 The macro can't work reliably when one of the CLASSES is class containing
13498 registers from multiple units (SSE, MMX, integer). We avoid this by never
13499 combining those units in single alternative in the machine description.
13500 Ensure that this constraint holds to avoid unexpected surprises.
13501
13502 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13503 enforce these sanity checks. */
13504 int
13505 ix86_secondary_memory_needed (class1, class2, mode, strict)
13506 enum reg_class class1, class2;
13507 enum machine_mode mode;
13508 int strict;
13509 {
13510 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13511 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13512 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13513 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13514 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13515 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13516 {
13517 if (strict)
13518 abort ();
13519 else
13520 return 1;
13521 }
13522 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13523 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13524 && (mode) != SImode)
13525 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13526 && (mode) != SImode));
13527 }
13528 /* Return the cost of moving data from a register in class CLASS1 to
13529 one in class CLASS2.
13530
13531 It is not required that the cost always equal 2 when FROM is the same as TO;
13532 on some machines it is expensive to move between registers if they are not
13533 general registers. */
13534 int
13535 ix86_register_move_cost (mode, class1, class2)
13536 enum machine_mode mode;
13537 enum reg_class class1, class2;
13538 {
13539 /* In case we require secondary memory, compute cost of the store followed
13540 by load. In case of copying from general_purpose_register we may emit
13541 multiple stores followed by single load causing memory size mismatch
13542 stall. Count this as arbitarily high cost of 20. */
13543 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13544 {
13545 int add_cost = 0;
13546 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13547 add_cost = 20;
13548 return (MEMORY_MOVE_COST (mode, class1, 0)
13549 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13550 }
13551 /* Moves between SSE/MMX and integer unit are expensive. */
13552 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13553 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13554 return ix86_cost->mmxsse_to_integer;
13555 if (MAYBE_FLOAT_CLASS_P (class1))
13556 return ix86_cost->fp_move;
13557 if (MAYBE_SSE_CLASS_P (class1))
13558 return ix86_cost->sse_move;
13559 if (MAYBE_MMX_CLASS_P (class1))
13560 return ix86_cost->mmx_move;
13561 return 2;
13562 }
13563
13564 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13565 int
13566 ix86_hard_regno_mode_ok (regno, mode)
13567 int regno;
13568 enum machine_mode mode;
13569 {
13570 /* Flags and only flags can only hold CCmode values. */
13571 if (CC_REGNO_P (regno))
13572 return GET_MODE_CLASS (mode) == MODE_CC;
13573 if (GET_MODE_CLASS (mode) == MODE_CC
13574 || GET_MODE_CLASS (mode) == MODE_RANDOM
13575 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13576 return 0;
13577 if (FP_REGNO_P (regno))
13578 return VALID_FP_MODE_P (mode);
13579 if (SSE_REGNO_P (regno))
13580 return VALID_SSE_REG_MODE (mode);
13581 if (MMX_REGNO_P (regno))
13582 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13583 /* We handle both integer and floats in the general purpose registers.
13584 In future we should be able to handle vector modes as well. */
13585 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13586 return 0;
13587 /* Take care for QImode values - they can be in non-QI regs, but then
13588 they do cause partial register stalls. */
13589 if (regno < 4 || mode != QImode || TARGET_64BIT)
13590 return 1;
13591 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13592 }
13593
13594 /* Return the cost of moving data of mode M between a
13595 register and memory. A value of 2 is the default; this cost is
13596 relative to those in `REGISTER_MOVE_COST'.
13597
13598 If moving between registers and memory is more expensive than
13599 between two registers, you should define this macro to express the
13600 relative cost.
13601
13602 Model also increased moving costs of QImode registers in non
13603 Q_REGS classes.
13604 */
13605 int
13606 ix86_memory_move_cost (mode, class, in)
13607 enum machine_mode mode;
13608 enum reg_class class;
13609 int in;
13610 {
13611 if (FLOAT_CLASS_P (class))
13612 {
13613 int index;
13614 switch (mode)
13615 {
13616 case SFmode:
13617 index = 0;
13618 break;
13619 case DFmode:
13620 index = 1;
13621 break;
13622 case XFmode:
13623 case TFmode:
13624 index = 2;
13625 break;
13626 default:
13627 return 100;
13628 }
13629 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13630 }
13631 if (SSE_CLASS_P (class))
13632 {
13633 int index;
13634 switch (GET_MODE_SIZE (mode))
13635 {
13636 case 4:
13637 index = 0;
13638 break;
13639 case 8:
13640 index = 1;
13641 break;
13642 case 16:
13643 index = 2;
13644 break;
13645 default:
13646 return 100;
13647 }
13648 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13649 }
13650 if (MMX_CLASS_P (class))
13651 {
13652 int index;
13653 switch (GET_MODE_SIZE (mode))
13654 {
13655 case 4:
13656 index = 0;
13657 break;
13658 case 8:
13659 index = 1;
13660 break;
13661 default:
13662 return 100;
13663 }
13664 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13665 }
13666 switch (GET_MODE_SIZE (mode))
13667 {
13668 case 1:
13669 if (in)
13670 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13671 : ix86_cost->movzbl_load);
13672 else
13673 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13674 : ix86_cost->int_store[0] + 4);
13675 break;
13676 case 2:
13677 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13678 default:
13679 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13680 if (mode == TFmode)
13681 mode = XFmode;
13682 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13683 * (int) GET_MODE_SIZE (mode) / 4);
13684 }
13685 }
13686
13687 #ifdef DO_GLOBAL_CTORS_BODY
13688 static void
13689 ix86_svr3_asm_out_constructor (symbol, priority)
13690 rtx symbol;
13691 int priority ATTRIBUTE_UNUSED;
13692 {
13693 init_section ();
13694 fputs ("\tpushl $", asm_out_file);
13695 assemble_name (asm_out_file, XSTR (symbol, 0));
13696 fputc ('\n', asm_out_file);
13697 }
13698 #endif
13699
13700 /* Order the registers for register allocator. */
13701
13702 void
13703 x86_order_regs_for_local_alloc ()
13704 {
13705 int pos = 0;
13706 int i;
13707
13708 /* First allocate the local general purpose registers. */
13709 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13710 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13711 reg_alloc_order [pos++] = i;
13712
13713 /* Global general purpose registers. */
13714 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13715 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13716 reg_alloc_order [pos++] = i;
13717
13718 /* x87 registers come first in case we are doing FP math
13719 using them. */
13720 if (!TARGET_SSE_MATH)
13721 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13722 reg_alloc_order [pos++] = i;
13723
13724 /* SSE registers. */
13725 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13726 reg_alloc_order [pos++] = i;
13727 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13728 reg_alloc_order [pos++] = i;
13729
13730 /* x87 registerts. */
13731 if (TARGET_SSE_MATH)
13732 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13733 reg_alloc_order [pos++] = i;
13734
13735 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13736 reg_alloc_order [pos++] = i;
13737
13738 /* Initialize the rest of array as we do not allocate some registers
13739 at all. */
13740 while (pos < FIRST_PSEUDO_REGISTER)
13741 reg_alloc_order [pos++] = 0;
13742 }
13743
13744 void
13745 x86_output_mi_thunk (file, delta, function)
13746 FILE *file;
13747 int delta;
13748 tree function;
13749 {
13750 tree parm;
13751 rtx xops[3];
13752
13753 if (ix86_regparm > 0)
13754 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13755 else
13756 parm = NULL_TREE;
13757 for (; parm; parm = TREE_CHAIN (parm))
13758 if (TREE_VALUE (parm) == void_type_node)
13759 break;
13760
13761 xops[0] = GEN_INT (delta);
13762 if (TARGET_64BIT)
13763 {
13764 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13765 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13766 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13767 if (flag_pic)
13768 {
13769 fprintf (file, "\tjmp *");
13770 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13771 fprintf (file, "@GOTPCREL(%%rip)\n");
13772 }
13773 else
13774 {
13775 fprintf (file, "\tjmp ");
13776 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13777 fprintf (file, "\n");
13778 }
13779 }
13780 else
13781 {
13782 if (parm)
13783 xops[1] = gen_rtx_REG (SImode, 0);
13784 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13785 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13786 else
13787 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13788 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13789
13790 if (flag_pic)
13791 {
13792 xops[0] = pic_offset_table_rtx;
13793 xops[1] = gen_label_rtx ();
13794 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13795
13796 if (ix86_regparm > 2)
13797 abort ();
13798 output_asm_insn ("push{l}\t%0", xops);
13799 output_asm_insn ("call\t%P1", xops);
13800 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13801 output_asm_insn ("pop{l}\t%0", xops);
13802 output_asm_insn
13803 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13804 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13805 output_asm_insn
13806 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13807 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13808 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13809 }
13810 else
13811 {
13812 fprintf (file, "\tjmp ");
13813 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13814 fprintf (file, "\n");
13815 }
13816 }
13817 }
This page took 0.65556 seconds and 6 git commands to generate.