]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.h (MACHINE_DEPENDENT_REORG): New macro.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 };
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 };
125
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 };
162
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
198 };
199
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
235 };
236
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
272 };
273
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309 };
310
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 };
347
348 const struct processor_costs *ix86_cost = &pentium_cost;
349
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
399
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403 #define FAST_PROLOGUE_INSN_COUNT 30
404
405 /* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407 static int use_fast_prologue_epilogue;
408
409 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
410
411 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
412 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
413 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
414 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
415
416 /* Array of the smallest class containing reg number REGNO, indexed by
417 REGNO. Used by REGNO_REG_CLASS in i386.h. */
418
419 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
420 {
421 /* ax, dx, cx, bx */
422 AREG, DREG, CREG, BREG,
423 /* si, di, bp, sp */
424 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
425 /* FP registers */
426 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
427 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
428 /* arg pointer */
429 NON_Q_REGS,
430 /* flags, fpsr, dirflag, frame */
431 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
433 SSE_REGS, SSE_REGS,
434 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
435 MMX_REGS, MMX_REGS,
436 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
437 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
438 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
439 SSE_REGS, SSE_REGS,
440 };
441
442 /* The "default" register map used in 32bit mode. */
443
444 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
445 {
446 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
447 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
448 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
449 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
450 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
451 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
452 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
453 };
454
455 static int const x86_64_int_parameter_registers[6] =
456 {
457 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
458 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
459 };
460
461 static int const x86_64_int_return_registers[4] =
462 {
463 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
464 };
465
466 /* The "default" register map used in 64bit mode. */
467 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
468 {
469 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
470 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
471 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
472 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
473 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
474 8,9,10,11,12,13,14,15, /* extended integer registers */
475 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
476 };
477
478 /* Define the register numbers to be used in Dwarf debugging information.
479 The SVR4 reference port C compiler uses the following register numbers
480 in its Dwarf output code:
481 0 for %eax (gcc regno = 0)
482 1 for %ecx (gcc regno = 2)
483 2 for %edx (gcc regno = 1)
484 3 for %ebx (gcc regno = 3)
485 4 for %esp (gcc regno = 7)
486 5 for %ebp (gcc regno = 6)
487 6 for %esi (gcc regno = 4)
488 7 for %edi (gcc regno = 5)
489 The following three DWARF register numbers are never generated by
490 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
491 believes these numbers have these meanings.
492 8 for %eip (no gcc equivalent)
493 9 for %eflags (gcc regno = 17)
494 10 for %trapno (no gcc equivalent)
495 It is not at all clear how we should number the FP stack registers
496 for the x86 architecture. If the version of SDB on x86/svr4 were
497 a bit less brain dead with respect to floating-point then we would
498 have a precedent to follow with respect to DWARF register numbers
499 for x86 FP registers, but the SDB on x86/svr4 is so completely
500 broken with respect to FP registers that it is hardly worth thinking
501 of it as something to strive for compatibility with.
502 The version of x86/svr4 SDB I have at the moment does (partially)
503 seem to believe that DWARF register number 11 is associated with
504 the x86 register %st(0), but that's about all. Higher DWARF
505 register numbers don't seem to be associated with anything in
506 particular, and even for DWARF regno 11, SDB only seems to under-
507 stand that it should say that a variable lives in %st(0) (when
508 asked via an `=' command) if we said it was in DWARF regno 11,
509 but SDB still prints garbage when asked for the value of the
510 variable in question (via a `/' command).
511 (Also note that the labels SDB prints for various FP stack regs
512 when doing an `x' command are all wrong.)
513 Note that these problems generally don't affect the native SVR4
514 C compiler because it doesn't allow the use of -O with -g and
515 because when it is *not* optimizing, it allocates a memory
516 location for each floating-point variable, and the memory
517 location is what gets described in the DWARF AT_location
518 attribute for the variable in question.
519 Regardless of the severe mental illness of the x86/svr4 SDB, we
520 do something sensible here and we use the following DWARF
521 register numbers. Note that these are all stack-top-relative
522 numbers.
523 11 for %st(0) (gcc regno = 8)
524 12 for %st(1) (gcc regno = 9)
525 13 for %st(2) (gcc regno = 10)
526 14 for %st(3) (gcc regno = 11)
527 15 for %st(4) (gcc regno = 12)
528 16 for %st(5) (gcc regno = 13)
529 17 for %st(6) (gcc regno = 14)
530 18 for %st(7) (gcc regno = 15)
531 */
532 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
533 {
534 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
535 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
536 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
537 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
538 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
539 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
540 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
541 };
542
543 /* Test and compare insns in i386.md store the information needed to
544 generate branch and scc insns here. */
545
546 rtx ix86_compare_op0 = NULL_RTX;
547 rtx ix86_compare_op1 = NULL_RTX;
548
549 /* The encoding characters for the four TLS models present in ELF. */
550
551 static char const tls_model_chars[] = " GLil";
552
553 #define MAX_386_STACK_LOCALS 3
554 /* Size of the register save area. */
555 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
556
557 /* Define the structure for the machine field in struct function. */
558 struct machine_function GTY(())
559 {
560 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
561 const char *some_ld_name;
562 int save_varrargs_registers;
563 int accesses_prev_frame;
564 };
565
566 #define ix86_stack_locals (cfun->machine->stack_locals)
567 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
568
569 /* Structure describing stack frame layout.
570 Stack grows downward:
571
572 [arguments]
573 <- ARG_POINTER
574 saved pc
575
576 saved frame pointer if frame_pointer_needed
577 <- HARD_FRAME_POINTER
578 [saved regs]
579
580 [padding1] \
581 )
582 [va_arg registers] (
583 > to_allocate <- FRAME_POINTER
584 [frame] (
585 )
586 [padding2] /
587 */
588 struct ix86_frame
589 {
590 int nregs;
591 int padding1;
592 int va_arg_size;
593 HOST_WIDE_INT frame;
594 int padding2;
595 int outgoing_arguments_size;
596 int red_zone_size;
597
598 HOST_WIDE_INT to_allocate;
599 /* The offsets relative to ARG_POINTER. */
600 HOST_WIDE_INT frame_pointer_offset;
601 HOST_WIDE_INT hard_frame_pointer_offset;
602 HOST_WIDE_INT stack_pointer_offset;
603 };
604
605 /* Used to enable/disable debugging features. */
606 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
607 /* Code model option as passed by user. */
608 const char *ix86_cmodel_string;
609 /* Parsed value. */
610 enum cmodel ix86_cmodel;
611 /* Asm dialect. */
612 const char *ix86_asm_string;
613 enum asm_dialect ix86_asm_dialect = ASM_ATT;
614 /* TLS dialext. */
615 const char *ix86_tls_dialect_string;
616 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
617
618 /* Which unit we are generating floating point math for. */
619 enum fpmath_unit ix86_fpmath;
620
621 /* Which cpu are we scheduling for. */
622 enum processor_type ix86_cpu;
623 /* Which instruction set architecture to use. */
624 enum processor_type ix86_arch;
625
626 /* Strings to hold which cpu and instruction set architecture to use. */
627 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
628 const char *ix86_arch_string; /* for -march=<xxx> */
629 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
630
631 /* # of registers to use to pass arguments. */
632 const char *ix86_regparm_string;
633
634 /* true if sse prefetch instruction is not NOOP. */
635 int x86_prefetch_sse;
636
637 /* ix86_regparm_string as a number */
638 int ix86_regparm;
639
640 /* Alignment to use for loops and jumps: */
641
642 /* Power of two alignment for loops. */
643 const char *ix86_align_loops_string;
644
645 /* Power of two alignment for non-loop jumps. */
646 const char *ix86_align_jumps_string;
647
648 /* Power of two alignment for stack boundary in bytes. */
649 const char *ix86_preferred_stack_boundary_string;
650
651 /* Preferred alignment for stack boundary in bits. */
652 int ix86_preferred_stack_boundary;
653
654 /* Values 1-5: see jump.c */
655 int ix86_branch_cost;
656 const char *ix86_branch_cost_string;
657
658 /* Power of two alignment for functions. */
659 const char *ix86_align_funcs_string;
660
661 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
662 static char internal_label_prefix[16];
663 static int internal_label_prefix_len;
664 \f
665 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
666 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
667 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
668 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
669 int, int, FILE *));
670 static const char *get_some_local_dynamic_name PARAMS ((void));
671 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
672 static rtx maybe_get_pool_constant PARAMS ((rtx));
673 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
674 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
675 rtx *, rtx *));
676 static rtx get_thread_pointer PARAMS ((void));
677 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
678 static rtx gen_push PARAMS ((rtx));
679 static int memory_address_length PARAMS ((rtx addr));
680 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
681 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
682 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
683 static void ix86_dump_ppro_packet PARAMS ((FILE *));
684 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
685 static struct machine_function * ix86_init_machine_status PARAMS ((void));
686 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
687 static int ix86_nsaved_regs PARAMS ((void));
688 static void ix86_emit_save_regs PARAMS ((void));
689 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
690 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
691 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
692 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
693 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
694 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
695 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
696 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
697 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
698 static int ix86_issue_rate PARAMS ((void));
699 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
700 static void ix86_sched_init PARAMS ((FILE *, int, int));
701 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
702 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
703 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
704 static int ia32_multipass_dfa_lookahead PARAMS ((void));
705 static void ix86_init_mmx_sse_builtins PARAMS ((void));
706
707 struct ix86_address
708 {
709 rtx base, index, disp;
710 HOST_WIDE_INT scale;
711 };
712
713 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
714
715 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
716 static const char *ix86_strip_name_encoding PARAMS ((const char *))
717 ATTRIBUTE_UNUSED;
718
719 struct builtin_description;
720 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
721 tree, rtx));
722 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
723 tree, rtx));
724 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
726 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
727 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
728 tree, rtx));
729 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
730 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
731 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
732 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
733 enum rtx_code *,
734 enum rtx_code *,
735 enum rtx_code *));
736 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
737 rtx *, rtx *));
738 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
740 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
741 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
742 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
743 static int ix86_save_reg PARAMS ((unsigned int, int));
744 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
745 static int ix86_comp_type_attributes PARAMS ((tree, tree));
746 const struct attribute_spec ix86_attribute_table[];
747 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
748 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
749
750 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
751 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
752 #endif
753
754 /* Register class used for passing given 64bit part of the argument.
755 These represent classes as documented by the PS ABI, with the exception
756 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
757 use SF or DFmode move instead of DImode to avoid reformating penalties.
758
759 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
760 whenever possible (upper half does contain padding).
761 */
762 enum x86_64_reg_class
763 {
764 X86_64_NO_CLASS,
765 X86_64_INTEGER_CLASS,
766 X86_64_INTEGERSI_CLASS,
767 X86_64_SSE_CLASS,
768 X86_64_SSESF_CLASS,
769 X86_64_SSEDF_CLASS,
770 X86_64_SSEUP_CLASS,
771 X86_64_X87_CLASS,
772 X86_64_X87UP_CLASS,
773 X86_64_MEMORY_CLASS
774 };
775 static const char * const x86_64_reg_class_name[] =
776 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777
778 #define MAX_CLASSES 4
779 static int classify_argument PARAMS ((enum machine_mode, tree,
780 enum x86_64_reg_class [MAX_CLASSES],
781 int));
782 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
783 int *));
784 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
785 const int *, int));
786 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
787 enum x86_64_reg_class));
788 \f
789 /* Initialize the GCC target structure. */
790 #undef TARGET_ATTRIBUTE_TABLE
791 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
792 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
793 # undef TARGET_MERGE_DECL_ATTRIBUTES
794 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
795 #endif
796
797 #undef TARGET_COMP_TYPE_ATTRIBUTES
798 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799
800 #undef TARGET_INIT_BUILTINS
801 #define TARGET_INIT_BUILTINS ix86_init_builtins
802
803 #undef TARGET_EXPAND_BUILTIN
804 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805
806 #undef TARGET_ASM_FUNCTION_EPILOGUE
807 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
808
809 #undef TARGET_ASM_OPEN_PAREN
810 #define TARGET_ASM_OPEN_PAREN ""
811 #undef TARGET_ASM_CLOSE_PAREN
812 #define TARGET_ASM_CLOSE_PAREN ""
813
814 #undef TARGET_ASM_ALIGNED_HI_OP
815 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
816 #undef TARGET_ASM_ALIGNED_SI_OP
817 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
818 #ifdef ASM_QUAD
819 #undef TARGET_ASM_ALIGNED_DI_OP
820 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
821 #endif
822
823 #undef TARGET_ASM_UNALIGNED_HI_OP
824 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
825 #undef TARGET_ASM_UNALIGNED_SI_OP
826 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
827 #undef TARGET_ASM_UNALIGNED_DI_OP
828 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829
830 #undef TARGET_SCHED_ADJUST_COST
831 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
832 #undef TARGET_SCHED_ISSUE_RATE
833 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
834 #undef TARGET_SCHED_VARIABLE_ISSUE
835 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
836 #undef TARGET_SCHED_INIT
837 #define TARGET_SCHED_INIT ix86_sched_init
838 #undef TARGET_SCHED_REORDER
839 #define TARGET_SCHED_REORDER ix86_sched_reorder
840 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
841 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
842 ia32_use_dfa_pipeline_interface
843 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
844 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
845 ia32_multipass_dfa_lookahead
846
847 #ifdef HAVE_AS_TLS
848 #undef TARGET_HAVE_TLS
849 #define TARGET_HAVE_TLS true
850 #endif
851
852 struct gcc_target targetm = TARGET_INITIALIZER;
853 \f
854 /* Sometimes certain combinations of command options do not make
855 sense on a particular target machine. You can define a macro
856 `OVERRIDE_OPTIONS' to take account of this. This macro, if
857 defined, is executed once just after all the command options have
858 been parsed.
859
860 Don't use this macro to turn on various extra optimizations for
861 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
862
863 void
864 override_options ()
865 {
866 int i;
867 /* Comes from final.c -- no real reason to change it. */
868 #define MAX_CODE_ALIGN 16
869
870 static struct ptt
871 {
872 const struct processor_costs *cost; /* Processor costs */
873 const int target_enable; /* Target flags to enable. */
874 const int target_disable; /* Target flags to disable. */
875 const int align_loop; /* Default alignments. */
876 const int align_loop_max_skip;
877 const int align_jump;
878 const int align_jump_max_skip;
879 const int align_func;
880 const int branch_cost;
881 }
882 const processor_target_table[PROCESSOR_max] =
883 {
884 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
885 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
886 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
887 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
888 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
889 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
890 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
891 };
892
893 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
894 static struct pta
895 {
896 const char *const name; /* processor name or nickname. */
897 const enum processor_type processor;
898 const enum pta_flags
899 {
900 PTA_SSE = 1,
901 PTA_SSE2 = 2,
902 PTA_MMX = 4,
903 PTA_PREFETCH_SSE = 8,
904 PTA_3DNOW = 16,
905 PTA_3DNOW_A = 64
906 } flags;
907 }
908 const processor_alias_table[] =
909 {
910 {"i386", PROCESSOR_I386, 0},
911 {"i486", PROCESSOR_I486, 0},
912 {"i586", PROCESSOR_PENTIUM, 0},
913 {"pentium", PROCESSOR_PENTIUM, 0},
914 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
915 {"i686", PROCESSOR_PENTIUMPRO, 0},
916 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
917 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
918 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
919 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
920 PTA_MMX | PTA_PREFETCH_SSE},
921 {"k6", PROCESSOR_K6, PTA_MMX},
922 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
923 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
924 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
925 | PTA_3DNOW_A},
926 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
927 | PTA_3DNOW | PTA_3DNOW_A},
928 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
929 | PTA_3DNOW_A | PTA_SSE},
930 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
931 | PTA_3DNOW_A | PTA_SSE},
932 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
933 | PTA_3DNOW_A | PTA_SSE},
934 };
935
936 int const pta_size = ARRAY_SIZE (processor_alias_table);
937
938 #ifdef SUBTARGET_OVERRIDE_OPTIONS
939 SUBTARGET_OVERRIDE_OPTIONS;
940 #endif
941
942 if (!ix86_cpu_string && ix86_arch_string)
943 ix86_cpu_string = ix86_arch_string;
944 if (!ix86_cpu_string)
945 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
946 if (!ix86_arch_string)
947 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
948
949 if (ix86_cmodel_string != 0)
950 {
951 if (!strcmp (ix86_cmodel_string, "small"))
952 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
953 else if (flag_pic)
954 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
955 else if (!strcmp (ix86_cmodel_string, "32"))
956 ix86_cmodel = CM_32;
957 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
958 ix86_cmodel = CM_KERNEL;
959 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
960 ix86_cmodel = CM_MEDIUM;
961 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
962 ix86_cmodel = CM_LARGE;
963 else
964 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
965 }
966 else
967 {
968 ix86_cmodel = CM_32;
969 if (TARGET_64BIT)
970 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
971 }
972 if (ix86_asm_string != 0)
973 {
974 if (!strcmp (ix86_asm_string, "intel"))
975 ix86_asm_dialect = ASM_INTEL;
976 else if (!strcmp (ix86_asm_string, "att"))
977 ix86_asm_dialect = ASM_ATT;
978 else
979 error ("bad value (%s) for -masm= switch", ix86_asm_string);
980 }
981 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
982 error ("code model `%s' not supported in the %s bit mode",
983 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
984 if (ix86_cmodel == CM_LARGE)
985 sorry ("code model `large' not supported yet");
986 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
987 sorry ("%i-bit mode not compiled in",
988 (target_flags & MASK_64BIT) ? 64 : 32);
989
990 for (i = 0; i < pta_size; i++)
991 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
992 {
993 ix86_arch = processor_alias_table[i].processor;
994 /* Default cpu tuning to the architecture. */
995 ix86_cpu = ix86_arch;
996 if (processor_alias_table[i].flags & PTA_MMX
997 && !(target_flags & MASK_MMX_SET))
998 target_flags |= MASK_MMX;
999 if (processor_alias_table[i].flags & PTA_3DNOW
1000 && !(target_flags & MASK_3DNOW_SET))
1001 target_flags |= MASK_3DNOW;
1002 if (processor_alias_table[i].flags & PTA_3DNOW_A
1003 && !(target_flags & MASK_3DNOW_A_SET))
1004 target_flags |= MASK_3DNOW_A;
1005 if (processor_alias_table[i].flags & PTA_SSE
1006 && !(target_flags & MASK_SSE_SET))
1007 target_flags |= MASK_SSE;
1008 if (processor_alias_table[i].flags & PTA_SSE2
1009 && !(target_flags & MASK_SSE2_SET))
1010 target_flags |= MASK_SSE2;
1011 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1012 x86_prefetch_sse = true;
1013 break;
1014 }
1015
1016 if (i == pta_size)
1017 error ("bad value (%s) for -march= switch", ix86_arch_string);
1018
1019 for (i = 0; i < pta_size; i++)
1020 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1021 {
1022 ix86_cpu = processor_alias_table[i].processor;
1023 break;
1024 }
1025 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1026 x86_prefetch_sse = true;
1027 if (i == pta_size)
1028 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1029
1030 if (optimize_size)
1031 ix86_cost = &size_cost;
1032 else
1033 ix86_cost = processor_target_table[ix86_cpu].cost;
1034 target_flags |= processor_target_table[ix86_cpu].target_enable;
1035 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1036
1037 /* Arrange to set up i386_stack_locals for all functions. */
1038 init_machine_status = ix86_init_machine_status;
1039
1040 /* Validate -mregparm= value. */
1041 if (ix86_regparm_string)
1042 {
1043 i = atoi (ix86_regparm_string);
1044 if (i < 0 || i > REGPARM_MAX)
1045 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1046 else
1047 ix86_regparm = i;
1048 }
1049 else
1050 if (TARGET_64BIT)
1051 ix86_regparm = REGPARM_MAX;
1052
1053 /* If the user has provided any of the -malign-* options,
1054 warn and use that value only if -falign-* is not set.
1055 Remove this code in GCC 3.2 or later. */
1056 if (ix86_align_loops_string)
1057 {
1058 warning ("-malign-loops is obsolete, use -falign-loops");
1059 if (align_loops == 0)
1060 {
1061 i = atoi (ix86_align_loops_string);
1062 if (i < 0 || i > MAX_CODE_ALIGN)
1063 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1064 else
1065 align_loops = 1 << i;
1066 }
1067 }
1068
1069 if (ix86_align_jumps_string)
1070 {
1071 warning ("-malign-jumps is obsolete, use -falign-jumps");
1072 if (align_jumps == 0)
1073 {
1074 i = atoi (ix86_align_jumps_string);
1075 if (i < 0 || i > MAX_CODE_ALIGN)
1076 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1077 else
1078 align_jumps = 1 << i;
1079 }
1080 }
1081
1082 if (ix86_align_funcs_string)
1083 {
1084 warning ("-malign-functions is obsolete, use -falign-functions");
1085 if (align_functions == 0)
1086 {
1087 i = atoi (ix86_align_funcs_string);
1088 if (i < 0 || i > MAX_CODE_ALIGN)
1089 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1090 else
1091 align_functions = 1 << i;
1092 }
1093 }
1094
1095 /* Default align_* from the processor table. */
1096 if (align_loops == 0)
1097 {
1098 align_loops = processor_target_table[ix86_cpu].align_loop;
1099 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1100 }
1101 if (align_jumps == 0)
1102 {
1103 align_jumps = processor_target_table[ix86_cpu].align_jump;
1104 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1105 }
1106 if (align_functions == 0)
1107 {
1108 align_functions = processor_target_table[ix86_cpu].align_func;
1109 }
1110
1111 /* Validate -mpreferred-stack-boundary= value, or provide default.
1112 The default of 128 bits is for Pentium III's SSE __m128, but we
1113 don't want additional code to keep the stack aligned when
1114 optimizing for code size. */
1115 ix86_preferred_stack_boundary = (optimize_size
1116 ? TARGET_64BIT ? 64 : 32
1117 : 128);
1118 if (ix86_preferred_stack_boundary_string)
1119 {
1120 i = atoi (ix86_preferred_stack_boundary_string);
1121 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1122 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1123 TARGET_64BIT ? 3 : 2);
1124 else
1125 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1126 }
1127
1128 /* Validate -mbranch-cost= value, or provide default. */
1129 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1130 if (ix86_branch_cost_string)
1131 {
1132 i = atoi (ix86_branch_cost_string);
1133 if (i < 0 || i > 5)
1134 error ("-mbranch-cost=%d is not between 0 and 5", i);
1135 else
1136 ix86_branch_cost = i;
1137 }
1138
1139 if (ix86_tls_dialect_string)
1140 {
1141 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1142 ix86_tls_dialect = TLS_DIALECT_GNU;
1143 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1144 ix86_tls_dialect = TLS_DIALECT_SUN;
1145 else
1146 error ("bad value (%s) for -mtls-dialect= switch",
1147 ix86_tls_dialect_string);
1148 }
1149
1150 /* Keep nonleaf frame pointers. */
1151 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1152 flag_omit_frame_pointer = 1;
1153
1154 /* If we're doing fast math, we don't care about comparison order
1155 wrt NaNs. This lets us use a shorter comparison sequence. */
1156 if (flag_unsafe_math_optimizations)
1157 target_flags &= ~MASK_IEEE_FP;
1158
1159 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1160 since the insns won't need emulation. */
1161 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1162 target_flags &= ~MASK_NO_FANCY_MATH_387;
1163
1164 if (TARGET_64BIT)
1165 {
1166 if (TARGET_ALIGN_DOUBLE)
1167 error ("-malign-double makes no sense in the 64bit mode");
1168 if (TARGET_RTD)
1169 error ("-mrtd calling convention not supported in the 64bit mode");
1170 /* Enable by default the SSE and MMX builtins. */
1171 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1172 ix86_fpmath = FPMATH_SSE;
1173 }
1174 else
1175 ix86_fpmath = FPMATH_387;
1176
1177 if (ix86_fpmath_string != 0)
1178 {
1179 if (! strcmp (ix86_fpmath_string, "387"))
1180 ix86_fpmath = FPMATH_387;
1181 else if (! strcmp (ix86_fpmath_string, "sse"))
1182 {
1183 if (!TARGET_SSE)
1184 {
1185 warning ("SSE instruction set disabled, using 387 arithmetics");
1186 ix86_fpmath = FPMATH_387;
1187 }
1188 else
1189 ix86_fpmath = FPMATH_SSE;
1190 }
1191 else if (! strcmp (ix86_fpmath_string, "387,sse")
1192 || ! strcmp (ix86_fpmath_string, "sse,387"))
1193 {
1194 if (!TARGET_SSE)
1195 {
1196 warning ("SSE instruction set disabled, using 387 arithmetics");
1197 ix86_fpmath = FPMATH_387;
1198 }
1199 else if (!TARGET_80387)
1200 {
1201 warning ("387 instruction set disabled, using SSE arithmetics");
1202 ix86_fpmath = FPMATH_SSE;
1203 }
1204 else
1205 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1206 }
1207 else
1208 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1209 }
1210
1211 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1212 on by -msse. */
1213 if (TARGET_SSE)
1214 {
1215 target_flags |= MASK_MMX;
1216 x86_prefetch_sse = true;
1217 }
1218
1219 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1220 if (TARGET_3DNOW)
1221 {
1222 target_flags |= MASK_MMX;
1223 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1224 extensions it adds. */
1225 if (x86_3dnow_a & (1 << ix86_arch))
1226 target_flags |= MASK_3DNOW_A;
1227 }
1228 if ((x86_accumulate_outgoing_args & CPUMASK)
1229 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1230 && !optimize_size)
1231 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1232
1233 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1234 {
1235 char *p;
1236 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1237 p = strchr (internal_label_prefix, 'X');
1238 internal_label_prefix_len = p - internal_label_prefix;
1239 *p = '\0';
1240 }
1241 }
1242 \f
1243 void
1244 optimization_options (level, size)
1245 int level;
1246 int size ATTRIBUTE_UNUSED;
1247 {
1248 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1249 make the problem with not enough registers even worse. */
1250 #ifdef INSN_SCHEDULING
1251 if (level > 1)
1252 flag_schedule_insns = 0;
1253 #endif
1254 if (TARGET_64BIT && optimize >= 1)
1255 flag_omit_frame_pointer = 1;
1256 if (TARGET_64BIT)
1257 {
1258 flag_pcc_struct_return = 0;
1259 flag_asynchronous_unwind_tables = 1;
1260 }
1261 }
1262 \f
1263 /* Table of valid machine attributes. */
1264 const struct attribute_spec ix86_attribute_table[] =
1265 {
1266 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1267 /* Stdcall attribute says callee is responsible for popping arguments
1268 if they are not variable. */
1269 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1270 /* Cdecl attribute says the callee is a normal C declaration */
1271 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1272 /* Regparm attribute specifies how many integer arguments are to be
1273 passed in registers. */
1274 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1275 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1276 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1277 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1278 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1279 #endif
1280 { NULL, 0, 0, false, false, false, NULL }
1281 };
1282
1283 /* Handle a "cdecl" or "stdcall" attribute;
1284 arguments as in struct attribute_spec.handler. */
1285 static tree
1286 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1287 tree *node;
1288 tree name;
1289 tree args ATTRIBUTE_UNUSED;
1290 int flags ATTRIBUTE_UNUSED;
1291 bool *no_add_attrs;
1292 {
1293 if (TREE_CODE (*node) != FUNCTION_TYPE
1294 && TREE_CODE (*node) != METHOD_TYPE
1295 && TREE_CODE (*node) != FIELD_DECL
1296 && TREE_CODE (*node) != TYPE_DECL)
1297 {
1298 warning ("`%s' attribute only applies to functions",
1299 IDENTIFIER_POINTER (name));
1300 *no_add_attrs = true;
1301 }
1302
1303 if (TARGET_64BIT)
1304 {
1305 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1306 *no_add_attrs = true;
1307 }
1308
1309 return NULL_TREE;
1310 }
1311
1312 /* Handle a "regparm" attribute;
1313 arguments as in struct attribute_spec.handler. */
1314 static tree
1315 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1316 tree *node;
1317 tree name;
1318 tree args;
1319 int flags ATTRIBUTE_UNUSED;
1320 bool *no_add_attrs;
1321 {
1322 if (TREE_CODE (*node) != FUNCTION_TYPE
1323 && TREE_CODE (*node) != METHOD_TYPE
1324 && TREE_CODE (*node) != FIELD_DECL
1325 && TREE_CODE (*node) != TYPE_DECL)
1326 {
1327 warning ("`%s' attribute only applies to functions",
1328 IDENTIFIER_POINTER (name));
1329 *no_add_attrs = true;
1330 }
1331 else
1332 {
1333 tree cst;
1334
1335 cst = TREE_VALUE (args);
1336 if (TREE_CODE (cst) != INTEGER_CST)
1337 {
1338 warning ("`%s' attribute requires an integer constant argument",
1339 IDENTIFIER_POINTER (name));
1340 *no_add_attrs = true;
1341 }
1342 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1343 {
1344 warning ("argument to `%s' attribute larger than %d",
1345 IDENTIFIER_POINTER (name), REGPARM_MAX);
1346 *no_add_attrs = true;
1347 }
1348 }
1349
1350 return NULL_TREE;
1351 }
1352
1353 /* Return 0 if the attributes for two types are incompatible, 1 if they
1354 are compatible, and 2 if they are nearly compatible (which causes a
1355 warning to be generated). */
1356
1357 static int
1358 ix86_comp_type_attributes (type1, type2)
1359 tree type1;
1360 tree type2;
1361 {
1362 /* Check for mismatch of non-default calling convention. */
1363 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1364
1365 if (TREE_CODE (type1) != FUNCTION_TYPE)
1366 return 1;
1367
1368 /* Check for mismatched return types (cdecl vs stdcall). */
1369 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1370 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1371 return 0;
1372 return 1;
1373 }
1374 \f
1375 /* Value is the number of bytes of arguments automatically
1376 popped when returning from a subroutine call.
1377 FUNDECL is the declaration node of the function (as a tree),
1378 FUNTYPE is the data type of the function (as a tree),
1379 or for a library call it is an identifier node for the subroutine name.
1380 SIZE is the number of bytes of arguments passed on the stack.
1381
1382 On the 80386, the RTD insn may be used to pop them if the number
1383 of args is fixed, but if the number is variable then the caller
1384 must pop them all. RTD can't be used for library calls now
1385 because the library is compiled with the Unix compiler.
1386 Use of RTD is a selectable option, since it is incompatible with
1387 standard Unix calling sequences. If the option is not selected,
1388 the caller must always pop the args.
1389
1390 The attribute stdcall is equivalent to RTD on a per module basis. */
1391
1392 int
1393 ix86_return_pops_args (fundecl, funtype, size)
1394 tree fundecl;
1395 tree funtype;
1396 int size;
1397 {
1398 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1399
1400 /* Cdecl functions override -mrtd, and never pop the stack. */
1401 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1402
1403 /* Stdcall functions will pop the stack if not variable args. */
1404 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1405 rtd = 1;
1406
1407 if (rtd
1408 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1409 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1410 == void_type_node)))
1411 return size;
1412 }
1413
1414 /* Lose any fake structure return argument if it is passed on the stack. */
1415 if (aggregate_value_p (TREE_TYPE (funtype))
1416 && !TARGET_64BIT)
1417 {
1418 int nregs = ix86_regparm;
1419
1420 if (funtype)
1421 {
1422 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1423
1424 if (attr)
1425 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1426 }
1427
1428 if (!nregs)
1429 return GET_MODE_SIZE (Pmode);
1430 }
1431
1432 return 0;
1433 }
1434 \f
1435 /* Argument support functions. */
1436
1437 /* Return true when register may be used to pass function parameters. */
1438 bool
1439 ix86_function_arg_regno_p (regno)
1440 int regno;
1441 {
1442 int i;
1443 if (!TARGET_64BIT)
1444 return (regno < REGPARM_MAX
1445 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1446 if (SSE_REGNO_P (regno) && TARGET_SSE)
1447 return true;
1448 /* RAX is used as hidden argument to va_arg functions. */
1449 if (!regno)
1450 return true;
1451 for (i = 0; i < REGPARM_MAX; i++)
1452 if (regno == x86_64_int_parameter_registers[i])
1453 return true;
1454 return false;
1455 }
1456
1457 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1458 for a call to a function whose data type is FNTYPE.
1459 For a library call, FNTYPE is 0. */
1460
1461 void
1462 init_cumulative_args (cum, fntype, libname)
1463 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1464 tree fntype; /* tree ptr for function decl */
1465 rtx libname; /* SYMBOL_REF of library name or 0 */
1466 {
1467 static CUMULATIVE_ARGS zero_cum;
1468 tree param, next_param;
1469
1470 if (TARGET_DEBUG_ARG)
1471 {
1472 fprintf (stderr, "\ninit_cumulative_args (");
1473 if (fntype)
1474 fprintf (stderr, "fntype code = %s, ret code = %s",
1475 tree_code_name[(int) TREE_CODE (fntype)],
1476 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1477 else
1478 fprintf (stderr, "no fntype");
1479
1480 if (libname)
1481 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1482 }
1483
1484 *cum = zero_cum;
1485
1486 /* Set up the number of registers to use for passing arguments. */
1487 cum->nregs = ix86_regparm;
1488 cum->sse_nregs = SSE_REGPARM_MAX;
1489 if (fntype && !TARGET_64BIT)
1490 {
1491 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1492
1493 if (attr)
1494 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1495 }
1496 cum->maybe_vaarg = false;
1497
1498 /* Determine if this function has variable arguments. This is
1499 indicated by the last argument being 'void_type_mode' if there
1500 are no variable arguments. If there are variable arguments, then
1501 we won't pass anything in registers */
1502
1503 if (cum->nregs)
1504 {
1505 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1506 param != 0; param = next_param)
1507 {
1508 next_param = TREE_CHAIN (param);
1509 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1510 {
1511 if (!TARGET_64BIT)
1512 cum->nregs = 0;
1513 cum->maybe_vaarg = true;
1514 }
1515 }
1516 }
1517 if ((!fntype && !libname)
1518 || (fntype && !TYPE_ARG_TYPES (fntype)))
1519 cum->maybe_vaarg = 1;
1520
1521 if (TARGET_DEBUG_ARG)
1522 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1523
1524 return;
1525 }
1526
1527 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1528 of this code is to classify each 8bytes of incoming argument by the register
1529 class and assign registers accordingly. */
1530
1531 /* Return the union class of CLASS1 and CLASS2.
1532 See the x86-64 PS ABI for details. */
1533
1534 static enum x86_64_reg_class
1535 merge_classes (class1, class2)
1536 enum x86_64_reg_class class1, class2;
1537 {
1538 /* Rule #1: If both classes are equal, this is the resulting class. */
1539 if (class1 == class2)
1540 return class1;
1541
1542 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1543 the other class. */
1544 if (class1 == X86_64_NO_CLASS)
1545 return class2;
1546 if (class2 == X86_64_NO_CLASS)
1547 return class1;
1548
1549 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1550 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1551 return X86_64_MEMORY_CLASS;
1552
1553 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1554 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1555 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1556 return X86_64_INTEGERSI_CLASS;
1557 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1558 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1559 return X86_64_INTEGER_CLASS;
1560
1561 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1562 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1563 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1564 return X86_64_MEMORY_CLASS;
1565
1566 /* Rule #6: Otherwise class SSE is used. */
1567 return X86_64_SSE_CLASS;
1568 }
1569
1570 /* Classify the argument of type TYPE and mode MODE.
1571 CLASSES will be filled by the register class used to pass each word
1572 of the operand. The number of words is returned. In case the parameter
1573 should be passed in memory, 0 is returned. As a special case for zero
1574 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1575
1576 BIT_OFFSET is used internally for handling records and specifies offset
1577 of the offset in bits modulo 256 to avoid overflow cases.
1578
1579 See the x86-64 PS ABI for details.
1580 */
1581
1582 static int
1583 classify_argument (mode, type, classes, bit_offset)
1584 enum machine_mode mode;
1585 tree type;
1586 enum x86_64_reg_class classes[MAX_CLASSES];
1587 int bit_offset;
1588 {
1589 int bytes =
1590 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1591 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1592
1593 if (type && AGGREGATE_TYPE_P (type))
1594 {
1595 int i;
1596 tree field;
1597 enum x86_64_reg_class subclasses[MAX_CLASSES];
1598
1599 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1600 if (bytes > 16)
1601 return 0;
1602
1603 for (i = 0; i < words; i++)
1604 classes[i] = X86_64_NO_CLASS;
1605
1606 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1607 signalize memory class, so handle it as special case. */
1608 if (!words)
1609 {
1610 classes[0] = X86_64_NO_CLASS;
1611 return 1;
1612 }
1613
1614 /* Classify each field of record and merge classes. */
1615 if (TREE_CODE (type) == RECORD_TYPE)
1616 {
1617 /* For classes first merge in the field of the subclasses. */
1618 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1619 {
1620 tree bases = TYPE_BINFO_BASETYPES (type);
1621 int n_bases = TREE_VEC_LENGTH (bases);
1622 int i;
1623
1624 for (i = 0; i < n_bases; ++i)
1625 {
1626 tree binfo = TREE_VEC_ELT (bases, i);
1627 int num;
1628 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1629 tree type = BINFO_TYPE (binfo);
1630
1631 num = classify_argument (TYPE_MODE (type),
1632 type, subclasses,
1633 (offset + bit_offset) % 256);
1634 if (!num)
1635 return 0;
1636 for (i = 0; i < num; i++)
1637 {
1638 int pos = (offset + bit_offset) / 8 / 8;
1639 classes[i + pos] =
1640 merge_classes (subclasses[i], classes[i + pos]);
1641 }
1642 }
1643 }
1644 /* And now merge the fields of structure. */
1645 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1646 {
1647 if (TREE_CODE (field) == FIELD_DECL)
1648 {
1649 int num;
1650
1651 /* Bitfields are always classified as integer. Handle them
1652 early, since later code would consider them to be
1653 misaligned integers. */
1654 if (DECL_BIT_FIELD (field))
1655 {
1656 for (i = int_bit_position (field) / 8 / 8;
1657 i < (int_bit_position (field)
1658 + tree_low_cst (DECL_SIZE (field), 0)
1659 + 63) / 8 / 8; i++)
1660 classes[i] =
1661 merge_classes (X86_64_INTEGER_CLASS,
1662 classes[i]);
1663 }
1664 else
1665 {
1666 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1667 TREE_TYPE (field), subclasses,
1668 (int_bit_position (field)
1669 + bit_offset) % 256);
1670 if (!num)
1671 return 0;
1672 for (i = 0; i < num; i++)
1673 {
1674 int pos =
1675 (int_bit_position (field) + bit_offset) / 8 / 8;
1676 classes[i + pos] =
1677 merge_classes (subclasses[i], classes[i + pos]);
1678 }
1679 }
1680 }
1681 }
1682 }
1683 /* Arrays are handled as small records. */
1684 else if (TREE_CODE (type) == ARRAY_TYPE)
1685 {
1686 int num;
1687 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1688 TREE_TYPE (type), subclasses, bit_offset);
1689 if (!num)
1690 return 0;
1691
1692 /* The partial classes are now full classes. */
1693 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1694 subclasses[0] = X86_64_SSE_CLASS;
1695 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1696 subclasses[0] = X86_64_INTEGER_CLASS;
1697
1698 for (i = 0; i < words; i++)
1699 classes[i] = subclasses[i % num];
1700 }
1701 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1702 else if (TREE_CODE (type) == UNION_TYPE
1703 || TREE_CODE (type) == QUAL_UNION_TYPE)
1704 {
1705 /* For classes first merge in the field of the subclasses. */
1706 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1707 {
1708 tree bases = TYPE_BINFO_BASETYPES (type);
1709 int n_bases = TREE_VEC_LENGTH (bases);
1710 int i;
1711
1712 for (i = 0; i < n_bases; ++i)
1713 {
1714 tree binfo = TREE_VEC_ELT (bases, i);
1715 int num;
1716 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1717 tree type = BINFO_TYPE (binfo);
1718
1719 num = classify_argument (TYPE_MODE (type),
1720 type, subclasses,
1721 (offset + bit_offset) % 256);
1722 if (!num)
1723 return 0;
1724 for (i = 0; i < num; i++)
1725 {
1726 int pos = (offset + bit_offset) / 8 / 8;
1727 classes[i + pos] =
1728 merge_classes (subclasses[i], classes[i + pos]);
1729 }
1730 }
1731 }
1732 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1733 {
1734 if (TREE_CODE (field) == FIELD_DECL)
1735 {
1736 int num;
1737 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1738 TREE_TYPE (field), subclasses,
1739 bit_offset);
1740 if (!num)
1741 return 0;
1742 for (i = 0; i < num; i++)
1743 classes[i] = merge_classes (subclasses[i], classes[i]);
1744 }
1745 }
1746 }
1747 else
1748 abort ();
1749
1750 /* Final merger cleanup. */
1751 for (i = 0; i < words; i++)
1752 {
1753 /* If one class is MEMORY, everything should be passed in
1754 memory. */
1755 if (classes[i] == X86_64_MEMORY_CLASS)
1756 return 0;
1757
1758 /* The X86_64_SSEUP_CLASS should be always preceded by
1759 X86_64_SSE_CLASS. */
1760 if (classes[i] == X86_64_SSEUP_CLASS
1761 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1762 classes[i] = X86_64_SSE_CLASS;
1763
1764 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1765 if (classes[i] == X86_64_X87UP_CLASS
1766 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1767 classes[i] = X86_64_SSE_CLASS;
1768 }
1769 return words;
1770 }
1771
1772 /* Compute alignment needed. We align all types to natural boundaries with
1773 exception of XFmode that is aligned to 64bits. */
1774 if (mode != VOIDmode && mode != BLKmode)
1775 {
1776 int mode_alignment = GET_MODE_BITSIZE (mode);
1777
1778 if (mode == XFmode)
1779 mode_alignment = 128;
1780 else if (mode == XCmode)
1781 mode_alignment = 256;
1782 /* Misaligned fields are always returned in memory. */
1783 if (bit_offset % mode_alignment)
1784 return 0;
1785 }
1786
1787 /* Classification of atomic types. */
1788 switch (mode)
1789 {
1790 case DImode:
1791 case SImode:
1792 case HImode:
1793 case QImode:
1794 case CSImode:
1795 case CHImode:
1796 case CQImode:
1797 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1798 classes[0] = X86_64_INTEGERSI_CLASS;
1799 else
1800 classes[0] = X86_64_INTEGER_CLASS;
1801 return 1;
1802 case CDImode:
1803 case TImode:
1804 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1805 return 2;
1806 case CTImode:
1807 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1808 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1809 return 4;
1810 case SFmode:
1811 if (!(bit_offset % 64))
1812 classes[0] = X86_64_SSESF_CLASS;
1813 else
1814 classes[0] = X86_64_SSE_CLASS;
1815 return 1;
1816 case DFmode:
1817 classes[0] = X86_64_SSEDF_CLASS;
1818 return 1;
1819 case TFmode:
1820 classes[0] = X86_64_X87_CLASS;
1821 classes[1] = X86_64_X87UP_CLASS;
1822 return 2;
1823 case TCmode:
1824 classes[0] = X86_64_X87_CLASS;
1825 classes[1] = X86_64_X87UP_CLASS;
1826 classes[2] = X86_64_X87_CLASS;
1827 classes[3] = X86_64_X87UP_CLASS;
1828 return 4;
1829 case DCmode:
1830 classes[0] = X86_64_SSEDF_CLASS;
1831 classes[1] = X86_64_SSEDF_CLASS;
1832 return 2;
1833 case SCmode:
1834 classes[0] = X86_64_SSE_CLASS;
1835 return 1;
1836 case V4SFmode:
1837 case V4SImode:
1838 case V16QImode:
1839 case V8HImode:
1840 case V2DFmode:
1841 case V2DImode:
1842 classes[0] = X86_64_SSE_CLASS;
1843 classes[1] = X86_64_SSEUP_CLASS;
1844 return 2;
1845 case V2SFmode:
1846 case V2SImode:
1847 case V4HImode:
1848 case V8QImode:
1849 classes[0] = X86_64_SSE_CLASS;
1850 return 1;
1851 case BLKmode:
1852 case VOIDmode:
1853 return 0;
1854 default:
1855 abort ();
1856 }
1857 }
1858
1859 /* Examine the argument and return set number of register required in each
1860 class. Return 0 iff parameter should be passed in memory. */
1861 static int
1862 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1863 enum machine_mode mode;
1864 tree type;
1865 int *int_nregs, *sse_nregs;
1866 int in_return;
1867 {
1868 enum x86_64_reg_class class[MAX_CLASSES];
1869 int n = classify_argument (mode, type, class, 0);
1870
1871 *int_nregs = 0;
1872 *sse_nregs = 0;
1873 if (!n)
1874 return 0;
1875 for (n--; n >= 0; n--)
1876 switch (class[n])
1877 {
1878 case X86_64_INTEGER_CLASS:
1879 case X86_64_INTEGERSI_CLASS:
1880 (*int_nregs)++;
1881 break;
1882 case X86_64_SSE_CLASS:
1883 case X86_64_SSESF_CLASS:
1884 case X86_64_SSEDF_CLASS:
1885 (*sse_nregs)++;
1886 break;
1887 case X86_64_NO_CLASS:
1888 case X86_64_SSEUP_CLASS:
1889 break;
1890 case X86_64_X87_CLASS:
1891 case X86_64_X87UP_CLASS:
1892 if (!in_return)
1893 return 0;
1894 break;
1895 case X86_64_MEMORY_CLASS:
1896 abort ();
1897 }
1898 return 1;
1899 }
1900 /* Construct container for the argument used by GCC interface. See
1901 FUNCTION_ARG for the detailed description. */
1902 static rtx
1903 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1904 enum machine_mode mode;
1905 tree type;
1906 int in_return;
1907 int nintregs, nsseregs;
1908 const int * intreg;
1909 int sse_regno;
1910 {
1911 enum machine_mode tmpmode;
1912 int bytes =
1913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1914 enum x86_64_reg_class class[MAX_CLASSES];
1915 int n;
1916 int i;
1917 int nexps = 0;
1918 int needed_sseregs, needed_intregs;
1919 rtx exp[MAX_CLASSES];
1920 rtx ret;
1921
1922 n = classify_argument (mode, type, class, 0);
1923 if (TARGET_DEBUG_ARG)
1924 {
1925 if (!n)
1926 fprintf (stderr, "Memory class\n");
1927 else
1928 {
1929 fprintf (stderr, "Classes:");
1930 for (i = 0; i < n; i++)
1931 {
1932 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1933 }
1934 fprintf (stderr, "\n");
1935 }
1936 }
1937 if (!n)
1938 return NULL;
1939 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1940 return NULL;
1941 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1942 return NULL;
1943
1944 /* First construct simple cases. Avoid SCmode, since we want to use
1945 single register to pass this type. */
1946 if (n == 1 && mode != SCmode)
1947 switch (class[0])
1948 {
1949 case X86_64_INTEGER_CLASS:
1950 case X86_64_INTEGERSI_CLASS:
1951 return gen_rtx_REG (mode, intreg[0]);
1952 case X86_64_SSE_CLASS:
1953 case X86_64_SSESF_CLASS:
1954 case X86_64_SSEDF_CLASS:
1955 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1956 case X86_64_X87_CLASS:
1957 return gen_rtx_REG (mode, FIRST_STACK_REG);
1958 case X86_64_NO_CLASS:
1959 /* Zero sized array, struct or class. */
1960 return NULL;
1961 default:
1962 abort ();
1963 }
1964 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1965 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1966 if (n == 2
1967 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1968 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1969 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1970 && class[1] == X86_64_INTEGER_CLASS
1971 && (mode == CDImode || mode == TImode)
1972 && intreg[0] + 1 == intreg[1])
1973 return gen_rtx_REG (mode, intreg[0]);
1974 if (n == 4
1975 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1976 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1977 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1978
1979 /* Otherwise figure out the entries of the PARALLEL. */
1980 for (i = 0; i < n; i++)
1981 {
1982 switch (class[i])
1983 {
1984 case X86_64_NO_CLASS:
1985 break;
1986 case X86_64_INTEGER_CLASS:
1987 case X86_64_INTEGERSI_CLASS:
1988 /* Merge TImodes on aligned occassions here too. */
1989 if (i * 8 + 8 > bytes)
1990 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1991 else if (class[i] == X86_64_INTEGERSI_CLASS)
1992 tmpmode = SImode;
1993 else
1994 tmpmode = DImode;
1995 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1996 if (tmpmode == BLKmode)
1997 tmpmode = DImode;
1998 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1999 gen_rtx_REG (tmpmode, *intreg),
2000 GEN_INT (i*8));
2001 intreg++;
2002 break;
2003 case X86_64_SSESF_CLASS:
2004 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2005 gen_rtx_REG (SFmode,
2006 SSE_REGNO (sse_regno)),
2007 GEN_INT (i*8));
2008 sse_regno++;
2009 break;
2010 case X86_64_SSEDF_CLASS:
2011 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2012 gen_rtx_REG (DFmode,
2013 SSE_REGNO (sse_regno)),
2014 GEN_INT (i*8));
2015 sse_regno++;
2016 break;
2017 case X86_64_SSE_CLASS:
2018 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2019 tmpmode = TImode, i++;
2020 else
2021 tmpmode = DImode;
2022 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2023 gen_rtx_REG (tmpmode,
2024 SSE_REGNO (sse_regno)),
2025 GEN_INT (i*8));
2026 sse_regno++;
2027 break;
2028 default:
2029 abort ();
2030 }
2031 }
2032 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2033 for (i = 0; i < nexps; i++)
2034 XVECEXP (ret, 0, i) = exp [i];
2035 return ret;
2036 }
2037
2038 /* Update the data in CUM to advance over an argument
2039 of mode MODE and data type TYPE.
2040 (TYPE is null for libcalls where that information may not be available.) */
2041
2042 void
2043 function_arg_advance (cum, mode, type, named)
2044 CUMULATIVE_ARGS *cum; /* current arg information */
2045 enum machine_mode mode; /* current arg mode */
2046 tree type; /* type of the argument or 0 if lib support */
2047 int named; /* whether or not the argument was named */
2048 {
2049 int bytes =
2050 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2051 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2052
2053 if (TARGET_DEBUG_ARG)
2054 fprintf (stderr,
2055 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2056 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2057 if (TARGET_64BIT)
2058 {
2059 int int_nregs, sse_nregs;
2060 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2061 cum->words += words;
2062 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2063 {
2064 cum->nregs -= int_nregs;
2065 cum->sse_nregs -= sse_nregs;
2066 cum->regno += int_nregs;
2067 cum->sse_regno += sse_nregs;
2068 }
2069 else
2070 cum->words += words;
2071 }
2072 else
2073 {
2074 if (TARGET_SSE && mode == TImode)
2075 {
2076 cum->sse_words += words;
2077 cum->sse_nregs -= 1;
2078 cum->sse_regno += 1;
2079 if (cum->sse_nregs <= 0)
2080 {
2081 cum->sse_nregs = 0;
2082 cum->sse_regno = 0;
2083 }
2084 }
2085 else
2086 {
2087 cum->words += words;
2088 cum->nregs -= words;
2089 cum->regno += words;
2090
2091 if (cum->nregs <= 0)
2092 {
2093 cum->nregs = 0;
2094 cum->regno = 0;
2095 }
2096 }
2097 }
2098 return;
2099 }
2100
2101 /* Define where to put the arguments to a function.
2102 Value is zero to push the argument on the stack,
2103 or a hard register in which to store the argument.
2104
2105 MODE is the argument's machine mode.
2106 TYPE is the data type of the argument (as a tree).
2107 This is null for libcalls where that information may
2108 not be available.
2109 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2110 the preceding args and about the function being called.
2111 NAMED is nonzero if this argument is a named parameter
2112 (otherwise it is an extra parameter matching an ellipsis). */
2113
2114 rtx
2115 function_arg (cum, mode, type, named)
2116 CUMULATIVE_ARGS *cum; /* current arg information */
2117 enum machine_mode mode; /* current arg mode */
2118 tree type; /* type of the argument or 0 if lib support */
2119 int named; /* != 0 for normal args, == 0 for ... args */
2120 {
2121 rtx ret = NULL_RTX;
2122 int bytes =
2123 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2124 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2125
2126 /* Handle an hidden AL argument containing number of registers for varargs
2127 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2128 any AL settings. */
2129 if (mode == VOIDmode)
2130 {
2131 if (TARGET_64BIT)
2132 return GEN_INT (cum->maybe_vaarg
2133 ? (cum->sse_nregs < 0
2134 ? SSE_REGPARM_MAX
2135 : cum->sse_regno)
2136 : -1);
2137 else
2138 return constm1_rtx;
2139 }
2140 if (TARGET_64BIT)
2141 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2142 &x86_64_int_parameter_registers [cum->regno],
2143 cum->sse_regno);
2144 else
2145 switch (mode)
2146 {
2147 /* For now, pass fp/complex values on the stack. */
2148 default:
2149 break;
2150
2151 case BLKmode:
2152 case DImode:
2153 case SImode:
2154 case HImode:
2155 case QImode:
2156 if (words <= cum->nregs)
2157 ret = gen_rtx_REG (mode, cum->regno);
2158 break;
2159 case TImode:
2160 if (cum->sse_nregs)
2161 ret = gen_rtx_REG (mode, cum->sse_regno);
2162 break;
2163 }
2164
2165 if (TARGET_DEBUG_ARG)
2166 {
2167 fprintf (stderr,
2168 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2169 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2170
2171 if (ret)
2172 print_simple_rtl (stderr, ret);
2173 else
2174 fprintf (stderr, ", stack");
2175
2176 fprintf (stderr, " )\n");
2177 }
2178
2179 return ret;
2180 }
2181
2182 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2183 and type. */
2184
2185 int
2186 ix86_function_arg_boundary (mode, type)
2187 enum machine_mode mode;
2188 tree type;
2189 {
2190 int align;
2191 if (!TARGET_64BIT)
2192 return PARM_BOUNDARY;
2193 if (type)
2194 align = TYPE_ALIGN (type);
2195 else
2196 align = GET_MODE_ALIGNMENT (mode);
2197 if (align < PARM_BOUNDARY)
2198 align = PARM_BOUNDARY;
2199 if (align > 128)
2200 align = 128;
2201 return align;
2202 }
2203
2204 /* Return true if N is a possible register number of function value. */
2205 bool
2206 ix86_function_value_regno_p (regno)
2207 int regno;
2208 {
2209 if (!TARGET_64BIT)
2210 {
2211 return ((regno) == 0
2212 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2213 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2214 }
2215 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2216 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2217 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2218 }
2219
2220 /* Define how to find the value returned by a function.
2221 VALTYPE is the data type of the value (as a tree).
2222 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2223 otherwise, FUNC is 0. */
2224 rtx
2225 ix86_function_value (valtype)
2226 tree valtype;
2227 {
2228 if (TARGET_64BIT)
2229 {
2230 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2231 REGPARM_MAX, SSE_REGPARM_MAX,
2232 x86_64_int_return_registers, 0);
2233 /* For zero sized structures, construct_continer return NULL, but we need
2234 to keep rest of compiler happy by returning meaningfull value. */
2235 if (!ret)
2236 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2237 return ret;
2238 }
2239 else
2240 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2241 }
2242
2243 /* Return false iff type is returned in memory. */
2244 int
2245 ix86_return_in_memory (type)
2246 tree type;
2247 {
2248 int needed_intregs, needed_sseregs;
2249 if (TARGET_64BIT)
2250 {
2251 return !examine_argument (TYPE_MODE (type), type, 1,
2252 &needed_intregs, &needed_sseregs);
2253 }
2254 else
2255 {
2256 if (TYPE_MODE (type) == BLKmode
2257 || (VECTOR_MODE_P (TYPE_MODE (type))
2258 && int_size_in_bytes (type) == 8)
2259 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2260 && TYPE_MODE (type) != TFmode
2261 && !VECTOR_MODE_P (TYPE_MODE (type))))
2262 return 1;
2263 return 0;
2264 }
2265 }
2266
2267 /* Define how to find the value returned by a library function
2268 assuming the value has mode MODE. */
2269 rtx
2270 ix86_libcall_value (mode)
2271 enum machine_mode mode;
2272 {
2273 if (TARGET_64BIT)
2274 {
2275 switch (mode)
2276 {
2277 case SFmode:
2278 case SCmode:
2279 case DFmode:
2280 case DCmode:
2281 return gen_rtx_REG (mode, FIRST_SSE_REG);
2282 case TFmode:
2283 case TCmode:
2284 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2285 default:
2286 return gen_rtx_REG (mode, 0);
2287 }
2288 }
2289 else
2290 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2291 }
2292 \f
2293 /* Create the va_list data type. */
2294
2295 tree
2296 ix86_build_va_list ()
2297 {
2298 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2299
2300 /* For i386 we use plain pointer to argument area. */
2301 if (!TARGET_64BIT)
2302 return build_pointer_type (char_type_node);
2303
2304 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2305 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2306
2307 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2308 unsigned_type_node);
2309 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2310 unsigned_type_node);
2311 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2312 ptr_type_node);
2313 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2314 ptr_type_node);
2315
2316 DECL_FIELD_CONTEXT (f_gpr) = record;
2317 DECL_FIELD_CONTEXT (f_fpr) = record;
2318 DECL_FIELD_CONTEXT (f_ovf) = record;
2319 DECL_FIELD_CONTEXT (f_sav) = record;
2320
2321 TREE_CHAIN (record) = type_decl;
2322 TYPE_NAME (record) = type_decl;
2323 TYPE_FIELDS (record) = f_gpr;
2324 TREE_CHAIN (f_gpr) = f_fpr;
2325 TREE_CHAIN (f_fpr) = f_ovf;
2326 TREE_CHAIN (f_ovf) = f_sav;
2327
2328 layout_type (record);
2329
2330 /* The correct type is an array type of one element. */
2331 return build_array_type (record, build_index_type (size_zero_node));
2332 }
2333
2334 /* Perform any needed actions needed for a function that is receiving a
2335 variable number of arguments.
2336
2337 CUM is as above.
2338
2339 MODE and TYPE are the mode and type of the current parameter.
2340
2341 PRETEND_SIZE is a variable that should be set to the amount of stack
2342 that must be pushed by the prolog to pretend that our caller pushed
2343 it.
2344
2345 Normally, this macro will push all remaining incoming registers on the
2346 stack and set PRETEND_SIZE to the length of the registers pushed. */
2347
2348 void
2349 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2350 CUMULATIVE_ARGS *cum;
2351 enum machine_mode mode;
2352 tree type;
2353 int *pretend_size ATTRIBUTE_UNUSED;
2354 int no_rtl;
2355
2356 {
2357 CUMULATIVE_ARGS next_cum;
2358 rtx save_area = NULL_RTX, mem;
2359 rtx label;
2360 rtx label_ref;
2361 rtx tmp_reg;
2362 rtx nsse_reg;
2363 int set;
2364 tree fntype;
2365 int stdarg_p;
2366 int i;
2367
2368 if (!TARGET_64BIT)
2369 return;
2370
2371 /* Indicate to allocate space on the stack for varargs save area. */
2372 ix86_save_varrargs_registers = 1;
2373
2374 fntype = TREE_TYPE (current_function_decl);
2375 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2376 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2377 != void_type_node));
2378
2379 /* For varargs, we do not want to skip the dummy va_dcl argument.
2380 For stdargs, we do want to skip the last named argument. */
2381 next_cum = *cum;
2382 if (stdarg_p)
2383 function_arg_advance (&next_cum, mode, type, 1);
2384
2385 if (!no_rtl)
2386 save_area = frame_pointer_rtx;
2387
2388 set = get_varargs_alias_set ();
2389
2390 for (i = next_cum.regno; i < ix86_regparm; i++)
2391 {
2392 mem = gen_rtx_MEM (Pmode,
2393 plus_constant (save_area, i * UNITS_PER_WORD));
2394 set_mem_alias_set (mem, set);
2395 emit_move_insn (mem, gen_rtx_REG (Pmode,
2396 x86_64_int_parameter_registers[i]));
2397 }
2398
2399 if (next_cum.sse_nregs)
2400 {
2401 /* Now emit code to save SSE registers. The AX parameter contains number
2402 of SSE parameter regsiters used to call this function. We use
2403 sse_prologue_save insn template that produces computed jump across
2404 SSE saves. We need some preparation work to get this working. */
2405
2406 label = gen_label_rtx ();
2407 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2408
2409 /* Compute address to jump to :
2410 label - 5*eax + nnamed_sse_arguments*5 */
2411 tmp_reg = gen_reg_rtx (Pmode);
2412 nsse_reg = gen_reg_rtx (Pmode);
2413 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2414 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2415 gen_rtx_MULT (Pmode, nsse_reg,
2416 GEN_INT (4))));
2417 if (next_cum.sse_regno)
2418 emit_move_insn
2419 (nsse_reg,
2420 gen_rtx_CONST (DImode,
2421 gen_rtx_PLUS (DImode,
2422 label_ref,
2423 GEN_INT (next_cum.sse_regno * 4))));
2424 else
2425 emit_move_insn (nsse_reg, label_ref);
2426 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2427
2428 /* Compute address of memory block we save into. We always use pointer
2429 pointing 127 bytes after first byte to store - this is needed to keep
2430 instruction size limited by 4 bytes. */
2431 tmp_reg = gen_reg_rtx (Pmode);
2432 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2433 plus_constant (save_area,
2434 8 * REGPARM_MAX + 127)));
2435 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2436 set_mem_alias_set (mem, set);
2437 set_mem_align (mem, BITS_PER_WORD);
2438
2439 /* And finally do the dirty job! */
2440 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2441 GEN_INT (next_cum.sse_regno), label));
2442 }
2443
2444 }
2445
2446 /* Implement va_start. */
2447
2448 void
2449 ix86_va_start (valist, nextarg)
2450 tree valist;
2451 rtx nextarg;
2452 {
2453 HOST_WIDE_INT words, n_gpr, n_fpr;
2454 tree f_gpr, f_fpr, f_ovf, f_sav;
2455 tree gpr, fpr, ovf, sav, t;
2456
2457 /* Only 64bit target needs something special. */
2458 if (!TARGET_64BIT)
2459 {
2460 std_expand_builtin_va_start (valist, nextarg);
2461 return;
2462 }
2463
2464 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2465 f_fpr = TREE_CHAIN (f_gpr);
2466 f_ovf = TREE_CHAIN (f_fpr);
2467 f_sav = TREE_CHAIN (f_ovf);
2468
2469 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2470 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2471 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2472 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2473 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2474
2475 /* Count number of gp and fp argument registers used. */
2476 words = current_function_args_info.words;
2477 n_gpr = current_function_args_info.regno;
2478 n_fpr = current_function_args_info.sse_regno;
2479
2480 if (TARGET_DEBUG_ARG)
2481 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2482 (int) words, (int) n_gpr, (int) n_fpr);
2483
2484 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2485 build_int_2 (n_gpr * 8, 0));
2486 TREE_SIDE_EFFECTS (t) = 1;
2487 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2488
2489 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2490 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2491 TREE_SIDE_EFFECTS (t) = 1;
2492 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2493
2494 /* Find the overflow area. */
2495 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2496 if (words != 0)
2497 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2498 build_int_2 (words * UNITS_PER_WORD, 0));
2499 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2500 TREE_SIDE_EFFECTS (t) = 1;
2501 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2502
2503 /* Find the register save area.
2504 Prologue of the function save it right above stack frame. */
2505 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2506 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2507 TREE_SIDE_EFFECTS (t) = 1;
2508 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2509 }
2510
2511 /* Implement va_arg. */
2512 rtx
2513 ix86_va_arg (valist, type)
2514 tree valist, type;
2515 {
2516 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2517 tree f_gpr, f_fpr, f_ovf, f_sav;
2518 tree gpr, fpr, ovf, sav, t;
2519 int size, rsize;
2520 rtx lab_false, lab_over = NULL_RTX;
2521 rtx addr_rtx, r;
2522 rtx container;
2523
2524 /* Only 64bit target needs something special. */
2525 if (!TARGET_64BIT)
2526 {
2527 return std_expand_builtin_va_arg (valist, type);
2528 }
2529
2530 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2531 f_fpr = TREE_CHAIN (f_gpr);
2532 f_ovf = TREE_CHAIN (f_fpr);
2533 f_sav = TREE_CHAIN (f_ovf);
2534
2535 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2536 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2537 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2538 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2539 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2540
2541 size = int_size_in_bytes (type);
2542 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2543
2544 container = construct_container (TYPE_MODE (type), type, 0,
2545 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2546 /*
2547 * Pull the value out of the saved registers ...
2548 */
2549
2550 addr_rtx = gen_reg_rtx (Pmode);
2551
2552 if (container)
2553 {
2554 rtx int_addr_rtx, sse_addr_rtx;
2555 int needed_intregs, needed_sseregs;
2556 int need_temp;
2557
2558 lab_over = gen_label_rtx ();
2559 lab_false = gen_label_rtx ();
2560
2561 examine_argument (TYPE_MODE (type), type, 0,
2562 &needed_intregs, &needed_sseregs);
2563
2564
2565 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2566 || TYPE_ALIGN (type) > 128);
2567
2568 /* In case we are passing structure, verify that it is consetuctive block
2569 on the register save area. If not we need to do moves. */
2570 if (!need_temp && !REG_P (container))
2571 {
2572 /* Verify that all registers are strictly consetuctive */
2573 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2574 {
2575 int i;
2576
2577 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2578 {
2579 rtx slot = XVECEXP (container, 0, i);
2580 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2581 || INTVAL (XEXP (slot, 1)) != i * 16)
2582 need_temp = 1;
2583 }
2584 }
2585 else
2586 {
2587 int i;
2588
2589 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2590 {
2591 rtx slot = XVECEXP (container, 0, i);
2592 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2593 || INTVAL (XEXP (slot, 1)) != i * 8)
2594 need_temp = 1;
2595 }
2596 }
2597 }
2598 if (!need_temp)
2599 {
2600 int_addr_rtx = addr_rtx;
2601 sse_addr_rtx = addr_rtx;
2602 }
2603 else
2604 {
2605 int_addr_rtx = gen_reg_rtx (Pmode);
2606 sse_addr_rtx = gen_reg_rtx (Pmode);
2607 }
2608 /* First ensure that we fit completely in registers. */
2609 if (needed_intregs)
2610 {
2611 emit_cmp_and_jump_insns (expand_expr
2612 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2613 GEN_INT ((REGPARM_MAX - needed_intregs +
2614 1) * 8), GE, const1_rtx, SImode,
2615 1, lab_false);
2616 }
2617 if (needed_sseregs)
2618 {
2619 emit_cmp_and_jump_insns (expand_expr
2620 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2621 GEN_INT ((SSE_REGPARM_MAX -
2622 needed_sseregs + 1) * 16 +
2623 REGPARM_MAX * 8), GE, const1_rtx,
2624 SImode, 1, lab_false);
2625 }
2626
2627 /* Compute index to start of area used for integer regs. */
2628 if (needed_intregs)
2629 {
2630 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2631 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2632 if (r != int_addr_rtx)
2633 emit_move_insn (int_addr_rtx, r);
2634 }
2635 if (needed_sseregs)
2636 {
2637 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2638 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2639 if (r != sse_addr_rtx)
2640 emit_move_insn (sse_addr_rtx, r);
2641 }
2642 if (need_temp)
2643 {
2644 int i;
2645 rtx mem;
2646
2647 /* Never use the memory itself, as it has the alias set. */
2648 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2649 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2650 set_mem_alias_set (mem, get_varargs_alias_set ());
2651 set_mem_align (mem, BITS_PER_UNIT);
2652
2653 for (i = 0; i < XVECLEN (container, 0); i++)
2654 {
2655 rtx slot = XVECEXP (container, 0, i);
2656 rtx reg = XEXP (slot, 0);
2657 enum machine_mode mode = GET_MODE (reg);
2658 rtx src_addr;
2659 rtx src_mem;
2660 int src_offset;
2661 rtx dest_mem;
2662
2663 if (SSE_REGNO_P (REGNO (reg)))
2664 {
2665 src_addr = sse_addr_rtx;
2666 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2667 }
2668 else
2669 {
2670 src_addr = int_addr_rtx;
2671 src_offset = REGNO (reg) * 8;
2672 }
2673 src_mem = gen_rtx_MEM (mode, src_addr);
2674 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2675 src_mem = adjust_address (src_mem, mode, src_offset);
2676 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2677 emit_move_insn (dest_mem, src_mem);
2678 }
2679 }
2680
2681 if (needed_intregs)
2682 {
2683 t =
2684 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2685 build_int_2 (needed_intregs * 8, 0));
2686 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2687 TREE_SIDE_EFFECTS (t) = 1;
2688 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2689 }
2690 if (needed_sseregs)
2691 {
2692 t =
2693 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2694 build_int_2 (needed_sseregs * 16, 0));
2695 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2696 TREE_SIDE_EFFECTS (t) = 1;
2697 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2698 }
2699
2700 emit_jump_insn (gen_jump (lab_over));
2701 emit_barrier ();
2702 emit_label (lab_false);
2703 }
2704
2705 /* ... otherwise out of the overflow area. */
2706
2707 /* Care for on-stack alignment if needed. */
2708 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2709 t = ovf;
2710 else
2711 {
2712 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2713 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2714 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2715 }
2716 t = save_expr (t);
2717
2718 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2719 if (r != addr_rtx)
2720 emit_move_insn (addr_rtx, r);
2721
2722 t =
2723 build (PLUS_EXPR, TREE_TYPE (t), t,
2724 build_int_2 (rsize * UNITS_PER_WORD, 0));
2725 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2726 TREE_SIDE_EFFECTS (t) = 1;
2727 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2728
2729 if (container)
2730 emit_label (lab_over);
2731
2732 return addr_rtx;
2733 }
2734 \f
2735 /* Return nonzero if OP is general operand representable on x86_64. */
2736
2737 int
2738 x86_64_general_operand (op, mode)
2739 rtx op;
2740 enum machine_mode mode;
2741 {
2742 if (!TARGET_64BIT)
2743 return general_operand (op, mode);
2744 if (nonimmediate_operand (op, mode))
2745 return 1;
2746 return x86_64_sign_extended_value (op);
2747 }
2748
2749 /* Return nonzero if OP is general operand representable on x86_64
2750 as either sign extended or zero extended constant. */
2751
2752 int
2753 x86_64_szext_general_operand (op, mode)
2754 rtx op;
2755 enum machine_mode mode;
2756 {
2757 if (!TARGET_64BIT)
2758 return general_operand (op, mode);
2759 if (nonimmediate_operand (op, mode))
2760 return 1;
2761 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2762 }
2763
2764 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2765
2766 int
2767 x86_64_nonmemory_operand (op, mode)
2768 rtx op;
2769 enum machine_mode mode;
2770 {
2771 if (!TARGET_64BIT)
2772 return nonmemory_operand (op, mode);
2773 if (register_operand (op, mode))
2774 return 1;
2775 return x86_64_sign_extended_value (op);
2776 }
2777
2778 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2779
2780 int
2781 x86_64_movabs_operand (op, mode)
2782 rtx op;
2783 enum machine_mode mode;
2784 {
2785 if (!TARGET_64BIT || !flag_pic)
2786 return nonmemory_operand (op, mode);
2787 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2788 return 1;
2789 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2790 return 1;
2791 return 0;
2792 }
2793
2794 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2795
2796 int
2797 x86_64_szext_nonmemory_operand (op, mode)
2798 rtx op;
2799 enum machine_mode mode;
2800 {
2801 if (!TARGET_64BIT)
2802 return nonmemory_operand (op, mode);
2803 if (register_operand (op, mode))
2804 return 1;
2805 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2806 }
2807
2808 /* Return nonzero if OP is immediate operand representable on x86_64. */
2809
2810 int
2811 x86_64_immediate_operand (op, mode)
2812 rtx op;
2813 enum machine_mode mode;
2814 {
2815 if (!TARGET_64BIT)
2816 return immediate_operand (op, mode);
2817 return x86_64_sign_extended_value (op);
2818 }
2819
2820 /* Return nonzero if OP is immediate operand representable on x86_64. */
2821
2822 int
2823 x86_64_zext_immediate_operand (op, mode)
2824 rtx op;
2825 enum machine_mode mode ATTRIBUTE_UNUSED;
2826 {
2827 return x86_64_zero_extended_value (op);
2828 }
2829
2830 /* Return nonzero if OP is (const_int 1), else return zero. */
2831
2832 int
2833 const_int_1_operand (op, mode)
2834 rtx op;
2835 enum machine_mode mode ATTRIBUTE_UNUSED;
2836 {
2837 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2838 }
2839
2840 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2841 for shift & compare patterns, as shifting by 0 does not change flags),
2842 else return zero. */
2843
2844 int
2845 const_int_1_31_operand (op, mode)
2846 rtx op;
2847 enum machine_mode mode ATTRIBUTE_UNUSED;
2848 {
2849 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2850 }
2851
2852 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2853 reference and a constant. */
2854
2855 int
2856 symbolic_operand (op, mode)
2857 register rtx op;
2858 enum machine_mode mode ATTRIBUTE_UNUSED;
2859 {
2860 switch (GET_CODE (op))
2861 {
2862 case SYMBOL_REF:
2863 case LABEL_REF:
2864 return 1;
2865
2866 case CONST:
2867 op = XEXP (op, 0);
2868 if (GET_CODE (op) == SYMBOL_REF
2869 || GET_CODE (op) == LABEL_REF
2870 || (GET_CODE (op) == UNSPEC
2871 && (XINT (op, 1) == UNSPEC_GOT
2872 || XINT (op, 1) == UNSPEC_GOTOFF
2873 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2874 return 1;
2875 if (GET_CODE (op) != PLUS
2876 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2877 return 0;
2878
2879 op = XEXP (op, 0);
2880 if (GET_CODE (op) == SYMBOL_REF
2881 || GET_CODE (op) == LABEL_REF)
2882 return 1;
2883 /* Only @GOTOFF gets offsets. */
2884 if (GET_CODE (op) != UNSPEC
2885 || XINT (op, 1) != UNSPEC_GOTOFF)
2886 return 0;
2887
2888 op = XVECEXP (op, 0, 0);
2889 if (GET_CODE (op) == SYMBOL_REF
2890 || GET_CODE (op) == LABEL_REF)
2891 return 1;
2892 return 0;
2893
2894 default:
2895 return 0;
2896 }
2897 }
2898
2899 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2900
2901 int
2902 pic_symbolic_operand (op, mode)
2903 register rtx op;
2904 enum machine_mode mode ATTRIBUTE_UNUSED;
2905 {
2906 if (GET_CODE (op) != CONST)
2907 return 0;
2908 op = XEXP (op, 0);
2909 if (TARGET_64BIT)
2910 {
2911 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2912 return 1;
2913 }
2914 else
2915 {
2916 if (GET_CODE (op) == UNSPEC)
2917 return 1;
2918 if (GET_CODE (op) != PLUS
2919 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2920 return 0;
2921 op = XEXP (op, 0);
2922 if (GET_CODE (op) == UNSPEC)
2923 return 1;
2924 }
2925 return 0;
2926 }
2927
2928 /* Return true if OP is a symbolic operand that resolves locally. */
2929
2930 static int
2931 local_symbolic_operand (op, mode)
2932 rtx op;
2933 enum machine_mode mode ATTRIBUTE_UNUSED;
2934 {
2935 if (GET_CODE (op) == LABEL_REF)
2936 return 1;
2937
2938 if (GET_CODE (op) == CONST
2939 && GET_CODE (XEXP (op, 0)) == PLUS
2940 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2941 op = XEXP (XEXP (op, 0), 0);
2942
2943 if (GET_CODE (op) != SYMBOL_REF)
2944 return 0;
2945
2946 /* These we've been told are local by varasm and encode_section_info
2947 respectively. */
2948 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2949 return 1;
2950
2951 /* There is, however, a not insubstantial body of code in the rest of
2952 the compiler that assumes it can just stick the results of
2953 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2954 /* ??? This is a hack. Should update the body of the compiler to
2955 always create a DECL an invoke targetm.encode_section_info. */
2956 if (strncmp (XSTR (op, 0), internal_label_prefix,
2957 internal_label_prefix_len) == 0)
2958 return 1;
2959
2960 return 0;
2961 }
2962
2963 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2964
2965 int
2966 tls_symbolic_operand (op, mode)
2967 register rtx op;
2968 enum machine_mode mode ATTRIBUTE_UNUSED;
2969 {
2970 const char *symbol_str;
2971
2972 if (GET_CODE (op) != SYMBOL_REF)
2973 return 0;
2974 symbol_str = XSTR (op, 0);
2975
2976 if (symbol_str[0] != '%')
2977 return 0;
2978 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
2979 }
2980
2981 static int
2982 tls_symbolic_operand_1 (op, kind)
2983 rtx op;
2984 enum tls_model kind;
2985 {
2986 const char *symbol_str;
2987
2988 if (GET_CODE (op) != SYMBOL_REF)
2989 return 0;
2990 symbol_str = XSTR (op, 0);
2991
2992 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
2993 }
2994
2995 int
2996 global_dynamic_symbolic_operand (op, mode)
2997 register rtx op;
2998 enum machine_mode mode ATTRIBUTE_UNUSED;
2999 {
3000 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3001 }
3002
3003 int
3004 local_dynamic_symbolic_operand (op, mode)
3005 register rtx op;
3006 enum machine_mode mode ATTRIBUTE_UNUSED;
3007 {
3008 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3009 }
3010
3011 int
3012 initial_exec_symbolic_operand (op, mode)
3013 register rtx op;
3014 enum machine_mode mode ATTRIBUTE_UNUSED;
3015 {
3016 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3017 }
3018
3019 int
3020 local_exec_symbolic_operand (op, mode)
3021 register rtx op;
3022 enum machine_mode mode ATTRIBUTE_UNUSED;
3023 {
3024 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3025 }
3026
3027 /* Test for a valid operand for a call instruction. Don't allow the
3028 arg pointer register or virtual regs since they may decay into
3029 reg + const, which the patterns can't handle. */
3030
3031 int
3032 call_insn_operand (op, mode)
3033 rtx op;
3034 enum machine_mode mode ATTRIBUTE_UNUSED;
3035 {
3036 /* Disallow indirect through a virtual register. This leads to
3037 compiler aborts when trying to eliminate them. */
3038 if (GET_CODE (op) == REG
3039 && (op == arg_pointer_rtx
3040 || op == frame_pointer_rtx
3041 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3042 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3043 return 0;
3044
3045 /* Disallow `call 1234'. Due to varying assembler lameness this
3046 gets either rejected or translated to `call .+1234'. */
3047 if (GET_CODE (op) == CONST_INT)
3048 return 0;
3049
3050 /* Explicitly allow SYMBOL_REF even if pic. */
3051 if (GET_CODE (op) == SYMBOL_REF)
3052 return 1;
3053
3054 /* Otherwise we can allow any general_operand in the address. */
3055 return general_operand (op, Pmode);
3056 }
3057
3058 int
3059 constant_call_address_operand (op, mode)
3060 rtx op;
3061 enum machine_mode mode ATTRIBUTE_UNUSED;
3062 {
3063 if (GET_CODE (op) == CONST
3064 && GET_CODE (XEXP (op, 0)) == PLUS
3065 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3066 op = XEXP (XEXP (op, 0), 0);
3067 return GET_CODE (op) == SYMBOL_REF;
3068 }
3069
3070 /* Match exactly zero and one. */
3071
3072 int
3073 const0_operand (op, mode)
3074 register rtx op;
3075 enum machine_mode mode;
3076 {
3077 return op == CONST0_RTX (mode);
3078 }
3079
3080 int
3081 const1_operand (op, mode)
3082 register rtx op;
3083 enum machine_mode mode ATTRIBUTE_UNUSED;
3084 {
3085 return op == const1_rtx;
3086 }
3087
3088 /* Match 2, 4, or 8. Used for leal multiplicands. */
3089
3090 int
3091 const248_operand (op, mode)
3092 register rtx op;
3093 enum machine_mode mode ATTRIBUTE_UNUSED;
3094 {
3095 return (GET_CODE (op) == CONST_INT
3096 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3097 }
3098
3099 /* True if this is a constant appropriate for an increment or decremenmt. */
3100
3101 int
3102 incdec_operand (op, mode)
3103 register rtx op;
3104 enum machine_mode mode ATTRIBUTE_UNUSED;
3105 {
3106 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3107 registers, since carry flag is not set. */
3108 if (TARGET_PENTIUM4 && !optimize_size)
3109 return 0;
3110 return op == const1_rtx || op == constm1_rtx;
3111 }
3112
3113 /* Return nonzero if OP is acceptable as operand of DImode shift
3114 expander. */
3115
3116 int
3117 shiftdi_operand (op, mode)
3118 rtx op;
3119 enum machine_mode mode ATTRIBUTE_UNUSED;
3120 {
3121 if (TARGET_64BIT)
3122 return nonimmediate_operand (op, mode);
3123 else
3124 return register_operand (op, mode);
3125 }
3126
3127 /* Return false if this is the stack pointer, or any other fake
3128 register eliminable to the stack pointer. Otherwise, this is
3129 a register operand.
3130
3131 This is used to prevent esp from being used as an index reg.
3132 Which would only happen in pathological cases. */
3133
3134 int
3135 reg_no_sp_operand (op, mode)
3136 register rtx op;
3137 enum machine_mode mode;
3138 {
3139 rtx t = op;
3140 if (GET_CODE (t) == SUBREG)
3141 t = SUBREG_REG (t);
3142 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3143 return 0;
3144
3145 return register_operand (op, mode);
3146 }
3147
3148 int
3149 mmx_reg_operand (op, mode)
3150 register rtx op;
3151 enum machine_mode mode ATTRIBUTE_UNUSED;
3152 {
3153 return MMX_REG_P (op);
3154 }
3155
3156 /* Return false if this is any eliminable register. Otherwise
3157 general_operand. */
3158
3159 int
3160 general_no_elim_operand (op, mode)
3161 register rtx op;
3162 enum machine_mode mode;
3163 {
3164 rtx t = op;
3165 if (GET_CODE (t) == SUBREG)
3166 t = SUBREG_REG (t);
3167 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3168 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3169 || t == virtual_stack_dynamic_rtx)
3170 return 0;
3171 if (REG_P (t)
3172 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3173 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3174 return 0;
3175
3176 return general_operand (op, mode);
3177 }
3178
3179 /* Return false if this is any eliminable register. Otherwise
3180 register_operand or const_int. */
3181
3182 int
3183 nonmemory_no_elim_operand (op, mode)
3184 register rtx op;
3185 enum machine_mode mode;
3186 {
3187 rtx t = op;
3188 if (GET_CODE (t) == SUBREG)
3189 t = SUBREG_REG (t);
3190 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3191 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3192 || t == virtual_stack_dynamic_rtx)
3193 return 0;
3194
3195 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3196 }
3197
3198 /* Return true if op is a Q_REGS class register. */
3199
3200 int
3201 q_regs_operand (op, mode)
3202 register rtx op;
3203 enum machine_mode mode;
3204 {
3205 if (mode != VOIDmode && GET_MODE (op) != mode)
3206 return 0;
3207 if (GET_CODE (op) == SUBREG)
3208 op = SUBREG_REG (op);
3209 return ANY_QI_REG_P (op);
3210 }
3211
3212 /* Return true if op is a NON_Q_REGS class register. */
3213
3214 int
3215 non_q_regs_operand (op, mode)
3216 register rtx op;
3217 enum machine_mode mode;
3218 {
3219 if (mode != VOIDmode && GET_MODE (op) != mode)
3220 return 0;
3221 if (GET_CODE (op) == SUBREG)
3222 op = SUBREG_REG (op);
3223 return NON_QI_REG_P (op);
3224 }
3225
3226 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3227 insns. */
3228 int
3229 sse_comparison_operator (op, mode)
3230 rtx op;
3231 enum machine_mode mode ATTRIBUTE_UNUSED;
3232 {
3233 enum rtx_code code = GET_CODE (op);
3234 switch (code)
3235 {
3236 /* Operations supported directly. */
3237 case EQ:
3238 case LT:
3239 case LE:
3240 case UNORDERED:
3241 case NE:
3242 case UNGE:
3243 case UNGT:
3244 case ORDERED:
3245 return 1;
3246 /* These are equivalent to ones above in non-IEEE comparisons. */
3247 case UNEQ:
3248 case UNLT:
3249 case UNLE:
3250 case LTGT:
3251 case GE:
3252 case GT:
3253 return !TARGET_IEEE_FP;
3254 default:
3255 return 0;
3256 }
3257 }
3258 /* Return 1 if OP is a valid comparison operator in valid mode. */
3259 int
3260 ix86_comparison_operator (op, mode)
3261 register rtx op;
3262 enum machine_mode mode;
3263 {
3264 enum machine_mode inmode;
3265 enum rtx_code code = GET_CODE (op);
3266 if (mode != VOIDmode && GET_MODE (op) != mode)
3267 return 0;
3268 if (GET_RTX_CLASS (code) != '<')
3269 return 0;
3270 inmode = GET_MODE (XEXP (op, 0));
3271
3272 if (inmode == CCFPmode || inmode == CCFPUmode)
3273 {
3274 enum rtx_code second_code, bypass_code;
3275 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3276 return (bypass_code == NIL && second_code == NIL);
3277 }
3278 switch (code)
3279 {
3280 case EQ: case NE:
3281 return 1;
3282 case LT: case GE:
3283 if (inmode == CCmode || inmode == CCGCmode
3284 || inmode == CCGOCmode || inmode == CCNOmode)
3285 return 1;
3286 return 0;
3287 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3288 if (inmode == CCmode)
3289 return 1;
3290 return 0;
3291 case GT: case LE:
3292 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3293 return 1;
3294 return 0;
3295 default:
3296 return 0;
3297 }
3298 }
3299
3300 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3301
3302 int
3303 fcmov_comparison_operator (op, mode)
3304 register rtx op;
3305 enum machine_mode mode;
3306 {
3307 enum machine_mode inmode;
3308 enum rtx_code code = GET_CODE (op);
3309 if (mode != VOIDmode && GET_MODE (op) != mode)
3310 return 0;
3311 if (GET_RTX_CLASS (code) != '<')
3312 return 0;
3313 inmode = GET_MODE (XEXP (op, 0));
3314 if (inmode == CCFPmode || inmode == CCFPUmode)
3315 {
3316 enum rtx_code second_code, bypass_code;
3317 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3318 if (bypass_code != NIL || second_code != NIL)
3319 return 0;
3320 code = ix86_fp_compare_code_to_integer (code);
3321 }
3322 /* i387 supports just limited amount of conditional codes. */
3323 switch (code)
3324 {
3325 case LTU: case GTU: case LEU: case GEU:
3326 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3327 return 1;
3328 return 0;
3329 case ORDERED: case UNORDERED:
3330 case EQ: case NE:
3331 return 1;
3332 default:
3333 return 0;
3334 }
3335 }
3336
3337 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3338
3339 int
3340 promotable_binary_operator (op, mode)
3341 register rtx op;
3342 enum machine_mode mode ATTRIBUTE_UNUSED;
3343 {
3344 switch (GET_CODE (op))
3345 {
3346 case MULT:
3347 /* Modern CPUs have same latency for HImode and SImode multiply,
3348 but 386 and 486 do HImode multiply faster. */
3349 return ix86_cpu > PROCESSOR_I486;
3350 case PLUS:
3351 case AND:
3352 case IOR:
3353 case XOR:
3354 case ASHIFT:
3355 return 1;
3356 default:
3357 return 0;
3358 }
3359 }
3360
3361 /* Nearly general operand, but accept any const_double, since we wish
3362 to be able to drop them into memory rather than have them get pulled
3363 into registers. */
3364
3365 int
3366 cmp_fp_expander_operand (op, mode)
3367 register rtx op;
3368 enum machine_mode mode;
3369 {
3370 if (mode != VOIDmode && mode != GET_MODE (op))
3371 return 0;
3372 if (GET_CODE (op) == CONST_DOUBLE)
3373 return 1;
3374 return general_operand (op, mode);
3375 }
3376
3377 /* Match an SI or HImode register for a zero_extract. */
3378
3379 int
3380 ext_register_operand (op, mode)
3381 register rtx op;
3382 enum machine_mode mode ATTRIBUTE_UNUSED;
3383 {
3384 int regno;
3385 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3386 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3387 return 0;
3388
3389 if (!register_operand (op, VOIDmode))
3390 return 0;
3391
3392 /* Be curefull to accept only registers having upper parts. */
3393 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3394 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3395 }
3396
3397 /* Return 1 if this is a valid binary floating-point operation.
3398 OP is the expression matched, and MODE is its mode. */
3399
3400 int
3401 binary_fp_operator (op, mode)
3402 register rtx op;
3403 enum machine_mode mode;
3404 {
3405 if (mode != VOIDmode && mode != GET_MODE (op))
3406 return 0;
3407
3408 switch (GET_CODE (op))
3409 {
3410 case PLUS:
3411 case MINUS:
3412 case MULT:
3413 case DIV:
3414 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3415
3416 default:
3417 return 0;
3418 }
3419 }
3420
3421 int
3422 mult_operator (op, mode)
3423 register rtx op;
3424 enum machine_mode mode ATTRIBUTE_UNUSED;
3425 {
3426 return GET_CODE (op) == MULT;
3427 }
3428
3429 int
3430 div_operator (op, mode)
3431 register rtx op;
3432 enum machine_mode mode ATTRIBUTE_UNUSED;
3433 {
3434 return GET_CODE (op) == DIV;
3435 }
3436
3437 int
3438 arith_or_logical_operator (op, mode)
3439 rtx op;
3440 enum machine_mode mode;
3441 {
3442 return ((mode == VOIDmode || GET_MODE (op) == mode)
3443 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3444 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3445 }
3446
3447 /* Returns 1 if OP is memory operand with a displacement. */
3448
3449 int
3450 memory_displacement_operand (op, mode)
3451 register rtx op;
3452 enum machine_mode mode;
3453 {
3454 struct ix86_address parts;
3455
3456 if (! memory_operand (op, mode))
3457 return 0;
3458
3459 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3460 abort ();
3461
3462 return parts.disp != NULL_RTX;
3463 }
3464
3465 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3466 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3467
3468 ??? It seems likely that this will only work because cmpsi is an
3469 expander, and no actual insns use this. */
3470
3471 int
3472 cmpsi_operand (op, mode)
3473 rtx op;
3474 enum machine_mode mode;
3475 {
3476 if (nonimmediate_operand (op, mode))
3477 return 1;
3478
3479 if (GET_CODE (op) == AND
3480 && GET_MODE (op) == SImode
3481 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3482 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3483 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3484 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3485 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3486 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3487 return 1;
3488
3489 return 0;
3490 }
3491
3492 /* Returns 1 if OP is memory operand that can not be represented by the
3493 modRM array. */
3494
3495 int
3496 long_memory_operand (op, mode)
3497 register rtx op;
3498 enum machine_mode mode;
3499 {
3500 if (! memory_operand (op, mode))
3501 return 0;
3502
3503 return memory_address_length (op) != 0;
3504 }
3505
3506 /* Return nonzero if the rtx is known aligned. */
3507
3508 int
3509 aligned_operand (op, mode)
3510 rtx op;
3511 enum machine_mode mode;
3512 {
3513 struct ix86_address parts;
3514
3515 if (!general_operand (op, mode))
3516 return 0;
3517
3518 /* Registers and immediate operands are always "aligned". */
3519 if (GET_CODE (op) != MEM)
3520 return 1;
3521
3522 /* Don't even try to do any aligned optimizations with volatiles. */
3523 if (MEM_VOLATILE_P (op))
3524 return 0;
3525
3526 op = XEXP (op, 0);
3527
3528 /* Pushes and pops are only valid on the stack pointer. */
3529 if (GET_CODE (op) == PRE_DEC
3530 || GET_CODE (op) == POST_INC)
3531 return 1;
3532
3533 /* Decode the address. */
3534 if (! ix86_decompose_address (op, &parts))
3535 abort ();
3536
3537 if (parts.base && GET_CODE (parts.base) == SUBREG)
3538 parts.base = SUBREG_REG (parts.base);
3539 if (parts.index && GET_CODE (parts.index) == SUBREG)
3540 parts.index = SUBREG_REG (parts.index);
3541
3542 /* Look for some component that isn't known to be aligned. */
3543 if (parts.index)
3544 {
3545 if (parts.scale < 4
3546 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3547 return 0;
3548 }
3549 if (parts.base)
3550 {
3551 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3552 return 0;
3553 }
3554 if (parts.disp)
3555 {
3556 if (GET_CODE (parts.disp) != CONST_INT
3557 || (INTVAL (parts.disp) & 3) != 0)
3558 return 0;
3559 }
3560
3561 /* Didn't find one -- this must be an aligned address. */
3562 return 1;
3563 }
3564 \f
3565 /* Return true if the constant is something that can be loaded with
3566 a special instruction. Only handle 0.0 and 1.0; others are less
3567 worthwhile. */
3568
3569 int
3570 standard_80387_constant_p (x)
3571 rtx x;
3572 {
3573 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3574 return -1;
3575 /* Note that on the 80387, other constants, such as pi, that we should support
3576 too. On some machines, these are much slower to load as standard constant,
3577 than to load from doubles in memory. */
3578 if (x == CONST0_RTX (GET_MODE (x)))
3579 return 1;
3580 if (x == CONST1_RTX (GET_MODE (x)))
3581 return 2;
3582 return 0;
3583 }
3584
3585 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3586 */
3587 int
3588 standard_sse_constant_p (x)
3589 rtx x;
3590 {
3591 if (GET_CODE (x) != CONST_DOUBLE)
3592 return -1;
3593 return (x == CONST0_RTX (GET_MODE (x)));
3594 }
3595
3596 /* Returns 1 if OP contains a symbol reference */
3597
3598 int
3599 symbolic_reference_mentioned_p (op)
3600 rtx op;
3601 {
3602 register const char *fmt;
3603 register int i;
3604
3605 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3606 return 1;
3607
3608 fmt = GET_RTX_FORMAT (GET_CODE (op));
3609 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3610 {
3611 if (fmt[i] == 'E')
3612 {
3613 register int j;
3614
3615 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3616 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3617 return 1;
3618 }
3619
3620 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3621 return 1;
3622 }
3623
3624 return 0;
3625 }
3626
3627 /* Return 1 if it is appropriate to emit `ret' instructions in the
3628 body of a function. Do this only if the epilogue is simple, needing a
3629 couple of insns. Prior to reloading, we can't tell how many registers
3630 must be saved, so return 0 then. Return 0 if there is no frame
3631 marker to de-allocate.
3632
3633 If NON_SAVING_SETJMP is defined and true, then it is not possible
3634 for the epilogue to be simple, so return 0. This is a special case
3635 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3636 until final, but jump_optimize may need to know sooner if a
3637 `return' is OK. */
3638
3639 int
3640 ix86_can_use_return_insn_p ()
3641 {
3642 struct ix86_frame frame;
3643
3644 #ifdef NON_SAVING_SETJMP
3645 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3646 return 0;
3647 #endif
3648
3649 if (! reload_completed || frame_pointer_needed)
3650 return 0;
3651
3652 /* Don't allow more than 32 pop, since that's all we can do
3653 with one instruction. */
3654 if (current_function_pops_args
3655 && current_function_args_size >= 32768)
3656 return 0;
3657
3658 ix86_compute_frame_layout (&frame);
3659 return frame.to_allocate == 0 && frame.nregs == 0;
3660 }
3661 \f
3662 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3663 int
3664 x86_64_sign_extended_value (value)
3665 rtx value;
3666 {
3667 switch (GET_CODE (value))
3668 {
3669 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3670 to be at least 32 and this all acceptable constants are
3671 represented as CONST_INT. */
3672 case CONST_INT:
3673 if (HOST_BITS_PER_WIDE_INT == 32)
3674 return 1;
3675 else
3676 {
3677 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3678 return trunc_int_for_mode (val, SImode) == val;
3679 }
3680 break;
3681
3682 /* For certain code models, the symbolic references are known to fit. */
3683 case SYMBOL_REF:
3684 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3685
3686 /* For certain code models, the code is near as well. */
3687 case LABEL_REF:
3688 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3689
3690 /* We also may accept the offsetted memory references in certain special
3691 cases. */
3692 case CONST:
3693 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3694 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3695 return 1;
3696 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3697 {
3698 rtx op1 = XEXP (XEXP (value, 0), 0);
3699 rtx op2 = XEXP (XEXP (value, 0), 1);
3700 HOST_WIDE_INT offset;
3701
3702 if (ix86_cmodel == CM_LARGE)
3703 return 0;
3704 if (GET_CODE (op2) != CONST_INT)
3705 return 0;
3706 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3707 switch (GET_CODE (op1))
3708 {
3709 case SYMBOL_REF:
3710 /* For CM_SMALL assume that latest object is 1MB before
3711 end of 31bits boundary. We may also accept pretty
3712 large negative constants knowing that all objects are
3713 in the positive half of address space. */
3714 if (ix86_cmodel == CM_SMALL
3715 && offset < 1024*1024*1024
3716 && trunc_int_for_mode (offset, SImode) == offset)
3717 return 1;
3718 /* For CM_KERNEL we know that all object resist in the
3719 negative half of 32bits address space. We may not
3720 accept negative offsets, since they may be just off
3721 and we may accept pretty large positive ones. */
3722 if (ix86_cmodel == CM_KERNEL
3723 && offset > 0
3724 && trunc_int_for_mode (offset, SImode) == offset)
3725 return 1;
3726 break;
3727 case LABEL_REF:
3728 /* These conditions are similar to SYMBOL_REF ones, just the
3729 constraints for code models differ. */
3730 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3731 && offset < 1024*1024*1024
3732 && trunc_int_for_mode (offset, SImode) == offset)
3733 return 1;
3734 if (ix86_cmodel == CM_KERNEL
3735 && offset > 0
3736 && trunc_int_for_mode (offset, SImode) == offset)
3737 return 1;
3738 break;
3739 default:
3740 return 0;
3741 }
3742 }
3743 return 0;
3744 default:
3745 return 0;
3746 }
3747 }
3748
3749 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3750 int
3751 x86_64_zero_extended_value (value)
3752 rtx value;
3753 {
3754 switch (GET_CODE (value))
3755 {
3756 case CONST_DOUBLE:
3757 if (HOST_BITS_PER_WIDE_INT == 32)
3758 return (GET_MODE (value) == VOIDmode
3759 && !CONST_DOUBLE_HIGH (value));
3760 else
3761 return 0;
3762 case CONST_INT:
3763 if (HOST_BITS_PER_WIDE_INT == 32)
3764 return INTVAL (value) >= 0;
3765 else
3766 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3767 break;
3768
3769 /* For certain code models, the symbolic references are known to fit. */
3770 case SYMBOL_REF:
3771 return ix86_cmodel == CM_SMALL;
3772
3773 /* For certain code models, the code is near as well. */
3774 case LABEL_REF:
3775 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3776
3777 /* We also may accept the offsetted memory references in certain special
3778 cases. */
3779 case CONST:
3780 if (GET_CODE (XEXP (value, 0)) == PLUS)
3781 {
3782 rtx op1 = XEXP (XEXP (value, 0), 0);
3783 rtx op2 = XEXP (XEXP (value, 0), 1);
3784
3785 if (ix86_cmodel == CM_LARGE)
3786 return 0;
3787 switch (GET_CODE (op1))
3788 {
3789 case SYMBOL_REF:
3790 return 0;
3791 /* For small code model we may accept pretty large positive
3792 offsets, since one bit is available for free. Negative
3793 offsets are limited by the size of NULL pointer area
3794 specified by the ABI. */
3795 if (ix86_cmodel == CM_SMALL
3796 && GET_CODE (op2) == CONST_INT
3797 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3798 && (trunc_int_for_mode (INTVAL (op2), SImode)
3799 == INTVAL (op2)))
3800 return 1;
3801 /* ??? For the kernel, we may accept adjustment of
3802 -0x10000000, since we know that it will just convert
3803 negative address space to positive, but perhaps this
3804 is not worthwhile. */
3805 break;
3806 case LABEL_REF:
3807 /* These conditions are similar to SYMBOL_REF ones, just the
3808 constraints for code models differ. */
3809 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3810 && GET_CODE (op2) == CONST_INT
3811 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3812 && (trunc_int_for_mode (INTVAL (op2), SImode)
3813 == INTVAL (op2)))
3814 return 1;
3815 break;
3816 default:
3817 return 0;
3818 }
3819 }
3820 return 0;
3821 default:
3822 return 0;
3823 }
3824 }
3825
3826 /* Value should be nonzero if functions must have frame pointers.
3827 Zero means the frame pointer need not be set up (and parms may
3828 be accessed via the stack pointer) in functions that seem suitable. */
3829
3830 int
3831 ix86_frame_pointer_required ()
3832 {
3833 /* If we accessed previous frames, then the generated code expects
3834 to be able to access the saved ebp value in our frame. */
3835 if (cfun->machine->accesses_prev_frame)
3836 return 1;
3837
3838 /* Several x86 os'es need a frame pointer for other reasons,
3839 usually pertaining to setjmp. */
3840 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3841 return 1;
3842
3843 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3844 the frame pointer by default. Turn it back on now if we've not
3845 got a leaf function. */
3846 if (TARGET_OMIT_LEAF_FRAME_POINTER
3847 && (!current_function_is_leaf || current_function_profile))
3848 return 1;
3849
3850 return 0;
3851 }
3852
3853 /* Record that the current function accesses previous call frames. */
3854
3855 void
3856 ix86_setup_frame_addresses ()
3857 {
3858 cfun->machine->accesses_prev_frame = 1;
3859 }
3860 \f
3861 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3862 # define USE_HIDDEN_LINKONCE 1
3863 #else
3864 # define USE_HIDDEN_LINKONCE 0
3865 #endif
3866
3867 static int pic_labels_used;
3868
3869 /* Fills in the label name that should be used for a pc thunk for
3870 the given register. */
3871
3872 static void
3873 get_pc_thunk_name (name, regno)
3874 char name[32];
3875 unsigned int regno;
3876 {
3877 if (USE_HIDDEN_LINKONCE)
3878 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3879 else
3880 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3881 }
3882
3883
3884 /* This function generates code for -fpic that loads %ebx with
3885 the return address of the caller and then returns. */
3886
3887 void
3888 ix86_asm_file_end (file)
3889 FILE *file;
3890 {
3891 rtx xops[2];
3892 int regno;
3893
3894 for (regno = 0; regno < 8; ++regno)
3895 {
3896 char name[32];
3897
3898 if (! ((pic_labels_used >> regno) & 1))
3899 continue;
3900
3901 get_pc_thunk_name (name, regno);
3902
3903 if (USE_HIDDEN_LINKONCE)
3904 {
3905 tree decl;
3906
3907 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3908 error_mark_node);
3909 TREE_PUBLIC (decl) = 1;
3910 TREE_STATIC (decl) = 1;
3911 DECL_ONE_ONLY (decl) = 1;
3912
3913 (*targetm.asm_out.unique_section) (decl, 0);
3914 named_section (decl, NULL, 0);
3915
3916 ASM_GLOBALIZE_LABEL (file, name);
3917 fputs ("\t.hidden\t", file);
3918 assemble_name (file, name);
3919 fputc ('\n', file);
3920 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3921 }
3922 else
3923 {
3924 text_section ();
3925 ASM_OUTPUT_LABEL (file, name);
3926 }
3927
3928 xops[0] = gen_rtx_REG (SImode, regno);
3929 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3930 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3931 output_asm_insn ("ret", xops);
3932 }
3933 }
3934
3935 /* Emit code for the SET_GOT patterns. */
3936
3937 const char *
3938 output_set_got (dest)
3939 rtx dest;
3940 {
3941 rtx xops[3];
3942
3943 xops[0] = dest;
3944 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3945
3946 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3947 {
3948 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3949
3950 if (!flag_pic)
3951 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3952 else
3953 output_asm_insn ("call\t%a2", xops);
3954
3955 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3956 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3957
3958 if (flag_pic)
3959 output_asm_insn ("pop{l}\t%0", xops);
3960 }
3961 else
3962 {
3963 char name[32];
3964 get_pc_thunk_name (name, REGNO (dest));
3965 pic_labels_used |= 1 << REGNO (dest);
3966
3967 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3968 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3969 output_asm_insn ("call\t%X2", xops);
3970 }
3971
3972 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3973 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3974 else
3975 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3976
3977 return "";
3978 }
3979
3980 /* Generate an "push" pattern for input ARG. */
3981
3982 static rtx
3983 gen_push (arg)
3984 rtx arg;
3985 {
3986 return gen_rtx_SET (VOIDmode,
3987 gen_rtx_MEM (Pmode,
3988 gen_rtx_PRE_DEC (Pmode,
3989 stack_pointer_rtx)),
3990 arg);
3991 }
3992
3993 /* Return >= 0 if there is an unused call-clobbered register available
3994 for the entire function. */
3995
3996 static unsigned int
3997 ix86_select_alt_pic_regnum ()
3998 {
3999 if (current_function_is_leaf && !current_function_profile)
4000 {
4001 int i;
4002 for (i = 2; i >= 0; --i)
4003 if (!regs_ever_live[i])
4004 return i;
4005 }
4006
4007 return INVALID_REGNUM;
4008 }
4009
4010 /* Return 1 if we need to save REGNO. */
4011 static int
4012 ix86_save_reg (regno, maybe_eh_return)
4013 unsigned int regno;
4014 int maybe_eh_return;
4015 {
4016 if (pic_offset_table_rtx
4017 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4018 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4019 || current_function_profile
4020 || current_function_calls_eh_return))
4021 {
4022 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4023 return 0;
4024 return 1;
4025 }
4026
4027 if (current_function_calls_eh_return && maybe_eh_return)
4028 {
4029 unsigned i;
4030 for (i = 0; ; i++)
4031 {
4032 unsigned test = EH_RETURN_DATA_REGNO (i);
4033 if (test == INVALID_REGNUM)
4034 break;
4035 if (test == regno)
4036 return 1;
4037 }
4038 }
4039
4040 return (regs_ever_live[regno]
4041 && !call_used_regs[regno]
4042 && !fixed_regs[regno]
4043 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4044 }
4045
4046 /* Return number of registers to be saved on the stack. */
4047
4048 static int
4049 ix86_nsaved_regs ()
4050 {
4051 int nregs = 0;
4052 int regno;
4053
4054 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4055 if (ix86_save_reg (regno, true))
4056 nregs++;
4057 return nregs;
4058 }
4059
4060 /* Return the offset between two registers, one to be eliminated, and the other
4061 its replacement, at the start of a routine. */
4062
4063 HOST_WIDE_INT
4064 ix86_initial_elimination_offset (from, to)
4065 int from;
4066 int to;
4067 {
4068 struct ix86_frame frame;
4069 ix86_compute_frame_layout (&frame);
4070
4071 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4072 return frame.hard_frame_pointer_offset;
4073 else if (from == FRAME_POINTER_REGNUM
4074 && to == HARD_FRAME_POINTER_REGNUM)
4075 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4076 else
4077 {
4078 if (to != STACK_POINTER_REGNUM)
4079 abort ();
4080 else if (from == ARG_POINTER_REGNUM)
4081 return frame.stack_pointer_offset;
4082 else if (from != FRAME_POINTER_REGNUM)
4083 abort ();
4084 else
4085 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4086 }
4087 }
4088
4089 /* Fill structure ix86_frame about frame of currently computed function. */
4090
4091 static void
4092 ix86_compute_frame_layout (frame)
4093 struct ix86_frame *frame;
4094 {
4095 HOST_WIDE_INT total_size;
4096 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4097 int offset;
4098 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4099 HOST_WIDE_INT size = get_frame_size ();
4100
4101 frame->nregs = ix86_nsaved_regs ();
4102 total_size = size;
4103
4104 /* Skip return address and saved base pointer. */
4105 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4106
4107 frame->hard_frame_pointer_offset = offset;
4108
4109 /* Do some sanity checking of stack_alignment_needed and
4110 preferred_alignment, since i386 port is the only using those features
4111 that may break easily. */
4112
4113 if (size && !stack_alignment_needed)
4114 abort ();
4115 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4116 abort ();
4117 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4118 abort ();
4119 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4120 abort ();
4121
4122 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4123 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4124
4125 /* Register save area */
4126 offset += frame->nregs * UNITS_PER_WORD;
4127
4128 /* Va-arg area */
4129 if (ix86_save_varrargs_registers)
4130 {
4131 offset += X86_64_VARARGS_SIZE;
4132 frame->va_arg_size = X86_64_VARARGS_SIZE;
4133 }
4134 else
4135 frame->va_arg_size = 0;
4136
4137 /* Align start of frame for local function. */
4138 frame->padding1 = ((offset + stack_alignment_needed - 1)
4139 & -stack_alignment_needed) - offset;
4140
4141 offset += frame->padding1;
4142
4143 /* Frame pointer points here. */
4144 frame->frame_pointer_offset = offset;
4145
4146 offset += size;
4147
4148 /* Add outgoing arguments area. Can be skipped if we eliminated
4149 all the function calls as dead code. */
4150 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4151 {
4152 offset += current_function_outgoing_args_size;
4153 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4154 }
4155 else
4156 frame->outgoing_arguments_size = 0;
4157
4158 /* Align stack boundary. Only needed if we're calling another function
4159 or using alloca. */
4160 if (!current_function_is_leaf || current_function_calls_alloca)
4161 frame->padding2 = ((offset + preferred_alignment - 1)
4162 & -preferred_alignment) - offset;
4163 else
4164 frame->padding2 = 0;
4165
4166 offset += frame->padding2;
4167
4168 /* We've reached end of stack frame. */
4169 frame->stack_pointer_offset = offset;
4170
4171 /* Size prologue needs to allocate. */
4172 frame->to_allocate =
4173 (size + frame->padding1 + frame->padding2
4174 + frame->outgoing_arguments_size + frame->va_arg_size);
4175
4176 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4177 && current_function_is_leaf)
4178 {
4179 frame->red_zone_size = frame->to_allocate;
4180 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4181 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4182 }
4183 else
4184 frame->red_zone_size = 0;
4185 frame->to_allocate -= frame->red_zone_size;
4186 frame->stack_pointer_offset -= frame->red_zone_size;
4187 #if 0
4188 fprintf (stderr, "nregs: %i\n", frame->nregs);
4189 fprintf (stderr, "size: %i\n", size);
4190 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4191 fprintf (stderr, "padding1: %i\n", frame->padding1);
4192 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4193 fprintf (stderr, "padding2: %i\n", frame->padding2);
4194 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4195 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4196 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4197 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4198 frame->hard_frame_pointer_offset);
4199 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4200 #endif
4201 }
4202
4203 /* Emit code to save registers in the prologue. */
4204
4205 static void
4206 ix86_emit_save_regs ()
4207 {
4208 register int regno;
4209 rtx insn;
4210
4211 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4212 if (ix86_save_reg (regno, true))
4213 {
4214 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4215 RTX_FRAME_RELATED_P (insn) = 1;
4216 }
4217 }
4218
4219 /* Emit code to save registers using MOV insns. First register
4220 is restored from POINTER + OFFSET. */
4221 static void
4222 ix86_emit_save_regs_using_mov (pointer, offset)
4223 rtx pointer;
4224 HOST_WIDE_INT offset;
4225 {
4226 int regno;
4227 rtx insn;
4228
4229 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4230 if (ix86_save_reg (regno, true))
4231 {
4232 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4233 Pmode, offset),
4234 gen_rtx_REG (Pmode, regno));
4235 RTX_FRAME_RELATED_P (insn) = 1;
4236 offset += UNITS_PER_WORD;
4237 }
4238 }
4239
4240 /* Expand the prologue into a bunch of separate insns. */
4241
4242 void
4243 ix86_expand_prologue ()
4244 {
4245 rtx insn;
4246 bool pic_reg_used;
4247 struct ix86_frame frame;
4248 int use_mov = 0;
4249 HOST_WIDE_INT allocate;
4250
4251 if (!optimize_size)
4252 {
4253 use_fast_prologue_epilogue
4254 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4255 if (TARGET_PROLOGUE_USING_MOVE)
4256 use_mov = use_fast_prologue_epilogue;
4257 }
4258 ix86_compute_frame_layout (&frame);
4259
4260 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4261 slower on all targets. Also sdb doesn't like it. */
4262
4263 if (frame_pointer_needed)
4264 {
4265 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4266 RTX_FRAME_RELATED_P (insn) = 1;
4267
4268 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4269 RTX_FRAME_RELATED_P (insn) = 1;
4270 }
4271
4272 allocate = frame.to_allocate;
4273 /* In case we are dealing only with single register and empty frame,
4274 push is equivalent of the mov+add sequence. */
4275 if (allocate == 0 && frame.nregs <= 1)
4276 use_mov = 0;
4277
4278 if (!use_mov)
4279 ix86_emit_save_regs ();
4280 else
4281 allocate += frame.nregs * UNITS_PER_WORD;
4282
4283 if (allocate == 0)
4284 ;
4285 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4286 {
4287 insn = emit_insn (gen_pro_epilogue_adjust_stack
4288 (stack_pointer_rtx, stack_pointer_rtx,
4289 GEN_INT (-allocate)));
4290 RTX_FRAME_RELATED_P (insn) = 1;
4291 }
4292 else
4293 {
4294 /* ??? Is this only valid for Win32? */
4295
4296 rtx arg0, sym;
4297
4298 if (TARGET_64BIT)
4299 abort ();
4300
4301 arg0 = gen_rtx_REG (SImode, 0);
4302 emit_move_insn (arg0, GEN_INT (allocate));
4303
4304 sym = gen_rtx_MEM (FUNCTION_MODE,
4305 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4306 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4307
4308 CALL_INSN_FUNCTION_USAGE (insn)
4309 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4310 CALL_INSN_FUNCTION_USAGE (insn));
4311 }
4312 if (use_mov)
4313 {
4314 if (!frame_pointer_needed || !frame.to_allocate)
4315 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4316 else
4317 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4318 -frame.nregs * UNITS_PER_WORD);
4319 }
4320
4321 #ifdef SUBTARGET_PROLOGUE
4322 SUBTARGET_PROLOGUE;
4323 #endif
4324
4325 pic_reg_used = false;
4326 if (pic_offset_table_rtx
4327 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4328 || current_function_profile))
4329 {
4330 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4331
4332 if (alt_pic_reg_used != INVALID_REGNUM)
4333 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4334
4335 pic_reg_used = true;
4336 }
4337
4338 if (pic_reg_used)
4339 {
4340 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4341
4342 /* Even with accurate pre-reload life analysis, we can wind up
4343 deleting all references to the pic register after reload.
4344 Consider if cross-jumping unifies two sides of a branch
4345 controled by a comparison vs the only read from a global.
4346 In which case, allow the set_got to be deleted, though we're
4347 too late to do anything about the ebx save in the prologue. */
4348 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4349 }
4350
4351 /* Prevent function calls from be scheduled before the call to mcount.
4352 In the pic_reg_used case, make sure that the got load isn't deleted. */
4353 if (current_function_profile)
4354 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4355 }
4356
4357 /* Emit code to restore saved registers using MOV insns. First register
4358 is restored from POINTER + OFFSET. */
4359 static void
4360 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4361 rtx pointer;
4362 int offset;
4363 int maybe_eh_return;
4364 {
4365 int regno;
4366
4367 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4368 if (ix86_save_reg (regno, maybe_eh_return))
4369 {
4370 emit_move_insn (gen_rtx_REG (Pmode, regno),
4371 adjust_address (gen_rtx_MEM (Pmode, pointer),
4372 Pmode, offset));
4373 offset += UNITS_PER_WORD;
4374 }
4375 }
4376
4377 /* Restore function stack, frame, and registers. */
4378
4379 void
4380 ix86_expand_epilogue (style)
4381 int style;
4382 {
4383 int regno;
4384 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4385 struct ix86_frame frame;
4386 HOST_WIDE_INT offset;
4387
4388 ix86_compute_frame_layout (&frame);
4389
4390 /* Calculate start of saved registers relative to ebp. Special care
4391 must be taken for the normal return case of a function using
4392 eh_return: the eax and edx registers are marked as saved, but not
4393 restored along this path. */
4394 offset = frame.nregs;
4395 if (current_function_calls_eh_return && style != 2)
4396 offset -= 2;
4397 offset *= -UNITS_PER_WORD;
4398
4399 /* If we're only restoring one register and sp is not valid then
4400 using a move instruction to restore the register since it's
4401 less work than reloading sp and popping the register.
4402
4403 The default code result in stack adjustment using add/lea instruction,
4404 while this code results in LEAVE instruction (or discrete equivalent),
4405 so it is profitable in some other cases as well. Especially when there
4406 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4407 and there is exactly one register to pop. This heruistic may need some
4408 tuning in future. */
4409 if ((!sp_valid && frame.nregs <= 1)
4410 || (TARGET_EPILOGUE_USING_MOVE
4411 && use_fast_prologue_epilogue
4412 && (frame.nregs > 1 || frame.to_allocate))
4413 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4414 || (frame_pointer_needed && TARGET_USE_LEAVE
4415 && use_fast_prologue_epilogue && frame.nregs == 1)
4416 || current_function_calls_eh_return)
4417 {
4418 /* Restore registers. We can use ebp or esp to address the memory
4419 locations. If both are available, default to ebp, since offsets
4420 are known to be small. Only exception is esp pointing directly to the
4421 end of block of saved registers, where we may simplify addressing
4422 mode. */
4423
4424 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4425 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4426 frame.to_allocate, style == 2);
4427 else
4428 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4429 offset, style == 2);
4430
4431 /* eh_return epilogues need %ecx added to the stack pointer. */
4432 if (style == 2)
4433 {
4434 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4435
4436 if (frame_pointer_needed)
4437 {
4438 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4439 tmp = plus_constant (tmp, UNITS_PER_WORD);
4440 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4441
4442 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4443 emit_move_insn (hard_frame_pointer_rtx, tmp);
4444
4445 emit_insn (gen_pro_epilogue_adjust_stack
4446 (stack_pointer_rtx, sa, const0_rtx));
4447 }
4448 else
4449 {
4450 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4451 tmp = plus_constant (tmp, (frame.to_allocate
4452 + frame.nregs * UNITS_PER_WORD));
4453 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4454 }
4455 }
4456 else if (!frame_pointer_needed)
4457 emit_insn (gen_pro_epilogue_adjust_stack
4458 (stack_pointer_rtx, stack_pointer_rtx,
4459 GEN_INT (frame.to_allocate
4460 + frame.nregs * UNITS_PER_WORD)));
4461 /* If not an i386, mov & pop is faster than "leave". */
4462 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4463 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4464 else
4465 {
4466 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4467 hard_frame_pointer_rtx,
4468 const0_rtx));
4469 if (TARGET_64BIT)
4470 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4471 else
4472 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4473 }
4474 }
4475 else
4476 {
4477 /* First step is to deallocate the stack frame so that we can
4478 pop the registers. */
4479 if (!sp_valid)
4480 {
4481 if (!frame_pointer_needed)
4482 abort ();
4483 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4484 hard_frame_pointer_rtx,
4485 GEN_INT (offset)));
4486 }
4487 else if (frame.to_allocate)
4488 emit_insn (gen_pro_epilogue_adjust_stack
4489 (stack_pointer_rtx, stack_pointer_rtx,
4490 GEN_INT (frame.to_allocate)));
4491
4492 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4493 if (ix86_save_reg (regno, false))
4494 {
4495 if (TARGET_64BIT)
4496 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4497 else
4498 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4499 }
4500 if (frame_pointer_needed)
4501 {
4502 /* Leave results in shorter dependency chains on CPUs that are
4503 able to grok it fast. */
4504 if (TARGET_USE_LEAVE)
4505 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4506 else if (TARGET_64BIT)
4507 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4508 else
4509 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4510 }
4511 }
4512
4513 /* Sibcall epilogues don't want a return instruction. */
4514 if (style == 0)
4515 return;
4516
4517 if (current_function_pops_args && current_function_args_size)
4518 {
4519 rtx popc = GEN_INT (current_function_pops_args);
4520
4521 /* i386 can only pop 64K bytes. If asked to pop more, pop
4522 return address, do explicit add, and jump indirectly to the
4523 caller. */
4524
4525 if (current_function_pops_args >= 65536)
4526 {
4527 rtx ecx = gen_rtx_REG (SImode, 2);
4528
4529 /* There are is no "pascal" calling convention in 64bit ABI. */
4530 if (TARGET_64BIT)
4531 abort ();
4532
4533 emit_insn (gen_popsi1 (ecx));
4534 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4535 emit_jump_insn (gen_return_indirect_internal (ecx));
4536 }
4537 else
4538 emit_jump_insn (gen_return_pop_internal (popc));
4539 }
4540 else
4541 emit_jump_insn (gen_return_internal ());
4542 }
4543
4544 /* Reset from the function's potential modifications. */
4545
4546 static void
4547 ix86_output_function_epilogue (file, size)
4548 FILE *file ATTRIBUTE_UNUSED;
4549 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4550 {
4551 if (pic_offset_table_rtx)
4552 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4553 }
4554 \f
4555 /* Extract the parts of an RTL expression that is a valid memory address
4556 for an instruction. Return 0 if the structure of the address is
4557 grossly off. Return -1 if the address contains ASHIFT, so it is not
4558 strictly valid, but still used for computing length of lea instruction.
4559 */
4560
4561 static int
4562 ix86_decompose_address (addr, out)
4563 register rtx addr;
4564 struct ix86_address *out;
4565 {
4566 rtx base = NULL_RTX;
4567 rtx index = NULL_RTX;
4568 rtx disp = NULL_RTX;
4569 HOST_WIDE_INT scale = 1;
4570 rtx scale_rtx = NULL_RTX;
4571 int retval = 1;
4572
4573 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4574 base = addr;
4575 else if (GET_CODE (addr) == PLUS)
4576 {
4577 rtx op0 = XEXP (addr, 0);
4578 rtx op1 = XEXP (addr, 1);
4579 enum rtx_code code0 = GET_CODE (op0);
4580 enum rtx_code code1 = GET_CODE (op1);
4581
4582 if (code0 == REG || code0 == SUBREG)
4583 {
4584 if (code1 == REG || code1 == SUBREG)
4585 index = op0, base = op1; /* index + base */
4586 else
4587 base = op0, disp = op1; /* base + displacement */
4588 }
4589 else if (code0 == MULT)
4590 {
4591 index = XEXP (op0, 0);
4592 scale_rtx = XEXP (op0, 1);
4593 if (code1 == REG || code1 == SUBREG)
4594 base = op1; /* index*scale + base */
4595 else
4596 disp = op1; /* index*scale + disp */
4597 }
4598 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4599 {
4600 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4601 scale_rtx = XEXP (XEXP (op0, 0), 1);
4602 base = XEXP (op0, 1);
4603 disp = op1;
4604 }
4605 else if (code0 == PLUS)
4606 {
4607 index = XEXP (op0, 0); /* index + base + disp */
4608 base = XEXP (op0, 1);
4609 disp = op1;
4610 }
4611 else
4612 return 0;
4613 }
4614 else if (GET_CODE (addr) == MULT)
4615 {
4616 index = XEXP (addr, 0); /* index*scale */
4617 scale_rtx = XEXP (addr, 1);
4618 }
4619 else if (GET_CODE (addr) == ASHIFT)
4620 {
4621 rtx tmp;
4622
4623 /* We're called for lea too, which implements ashift on occasion. */
4624 index = XEXP (addr, 0);
4625 tmp = XEXP (addr, 1);
4626 if (GET_CODE (tmp) != CONST_INT)
4627 return 0;
4628 scale = INTVAL (tmp);
4629 if ((unsigned HOST_WIDE_INT) scale > 3)
4630 return 0;
4631 scale = 1 << scale;
4632 retval = -1;
4633 }
4634 else
4635 disp = addr; /* displacement */
4636
4637 /* Extract the integral value of scale. */
4638 if (scale_rtx)
4639 {
4640 if (GET_CODE (scale_rtx) != CONST_INT)
4641 return 0;
4642 scale = INTVAL (scale_rtx);
4643 }
4644
4645 /* Allow arg pointer and stack pointer as index if there is not scaling */
4646 if (base && index && scale == 1
4647 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4648 || index == stack_pointer_rtx))
4649 {
4650 rtx tmp = base;
4651 base = index;
4652 index = tmp;
4653 }
4654
4655 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4656 if ((base == hard_frame_pointer_rtx
4657 || base == frame_pointer_rtx
4658 || base == arg_pointer_rtx) && !disp)
4659 disp = const0_rtx;
4660
4661 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4662 Avoid this by transforming to [%esi+0]. */
4663 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4664 && base && !index && !disp
4665 && REG_P (base)
4666 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4667 disp = const0_rtx;
4668
4669 /* Special case: encode reg+reg instead of reg*2. */
4670 if (!base && index && scale && scale == 2)
4671 base = index, scale = 1;
4672
4673 /* Special case: scaling cannot be encoded without base or displacement. */
4674 if (!base && !disp && index && scale != 1)
4675 disp = const0_rtx;
4676
4677 out->base = base;
4678 out->index = index;
4679 out->disp = disp;
4680 out->scale = scale;
4681
4682 return retval;
4683 }
4684 \f
4685 /* Return cost of the memory address x.
4686 For i386, it is better to use a complex address than let gcc copy
4687 the address into a reg and make a new pseudo. But not if the address
4688 requires to two regs - that would mean more pseudos with longer
4689 lifetimes. */
4690 int
4691 ix86_address_cost (x)
4692 rtx x;
4693 {
4694 struct ix86_address parts;
4695 int cost = 1;
4696
4697 if (!ix86_decompose_address (x, &parts))
4698 abort ();
4699
4700 if (parts.base && GET_CODE (parts.base) == SUBREG)
4701 parts.base = SUBREG_REG (parts.base);
4702 if (parts.index && GET_CODE (parts.index) == SUBREG)
4703 parts.index = SUBREG_REG (parts.index);
4704
4705 /* More complex memory references are better. */
4706 if (parts.disp && parts.disp != const0_rtx)
4707 cost--;
4708
4709 /* Attempt to minimize number of registers in the address. */
4710 if ((parts.base
4711 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4712 || (parts.index
4713 && (!REG_P (parts.index)
4714 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4715 cost++;
4716
4717 if (parts.base
4718 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4719 && parts.index
4720 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4721 && parts.base != parts.index)
4722 cost++;
4723
4724 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4725 since it's predecode logic can't detect the length of instructions
4726 and it degenerates to vector decoded. Increase cost of such
4727 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4728 to split such addresses or even refuse such addresses at all.
4729
4730 Following addressing modes are affected:
4731 [base+scale*index]
4732 [scale*index+disp]
4733 [base+index]
4734
4735 The first and last case may be avoidable by explicitly coding the zero in
4736 memory address, but I don't have AMD-K6 machine handy to check this
4737 theory. */
4738
4739 if (TARGET_K6
4740 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4741 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4742 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4743 cost += 10;
4744
4745 return cost;
4746 }
4747 \f
4748 /* If X is a machine specific address (i.e. a symbol or label being
4749 referenced as a displacement from the GOT implemented using an
4750 UNSPEC), then return the base term. Otherwise return X. */
4751
4752 rtx
4753 ix86_find_base_term (x)
4754 rtx x;
4755 {
4756 rtx term;
4757
4758 if (TARGET_64BIT)
4759 {
4760 if (GET_CODE (x) != CONST)
4761 return x;
4762 term = XEXP (x, 0);
4763 if (GET_CODE (term) == PLUS
4764 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4765 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4766 term = XEXP (term, 0);
4767 if (GET_CODE (term) != UNSPEC
4768 || XINT (term, 1) != UNSPEC_GOTPCREL)
4769 return x;
4770
4771 term = XVECEXP (term, 0, 0);
4772
4773 if (GET_CODE (term) != SYMBOL_REF
4774 && GET_CODE (term) != LABEL_REF)
4775 return x;
4776
4777 return term;
4778 }
4779
4780 if (GET_CODE (x) != PLUS
4781 || XEXP (x, 0) != pic_offset_table_rtx
4782 || GET_CODE (XEXP (x, 1)) != CONST)
4783 return x;
4784
4785 term = XEXP (XEXP (x, 1), 0);
4786
4787 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4788 term = XEXP (term, 0);
4789
4790 if (GET_CODE (term) != UNSPEC
4791 || XINT (term, 1) != UNSPEC_GOTOFF)
4792 return x;
4793
4794 term = XVECEXP (term, 0, 0);
4795
4796 if (GET_CODE (term) != SYMBOL_REF
4797 && GET_CODE (term) != LABEL_REF)
4798 return x;
4799
4800 return term;
4801 }
4802 \f
4803 /* Determine if a given RTX is a valid constant. We already know this
4804 satisfies CONSTANT_P. */
4805
4806 bool
4807 legitimate_constant_p (x)
4808 rtx x;
4809 {
4810 rtx inner;
4811
4812 switch (GET_CODE (x))
4813 {
4814 case SYMBOL_REF:
4815 /* TLS symbols are not constant. */
4816 if (tls_symbolic_operand (x, Pmode))
4817 return false;
4818 break;
4819
4820 case CONST:
4821 inner = XEXP (x, 0);
4822
4823 /* Offsets of TLS symbols are never valid.
4824 Discourage CSE from creating them. */
4825 if (GET_CODE (inner) == PLUS
4826 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4827 return false;
4828
4829 /* Only some unspecs are valid as "constants". */
4830 if (GET_CODE (inner) == UNSPEC)
4831 switch (XINT (inner, 1))
4832 {
4833 case UNSPEC_TPOFF:
4834 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4835 case UNSPEC_TP:
4836 return true;
4837 default:
4838 return false;
4839 }
4840 break;
4841
4842 default:
4843 break;
4844 }
4845
4846 /* Otherwise we handle everything else in the move patterns. */
4847 return true;
4848 }
4849
4850 /* Determine if a given RTX is a valid constant address. */
4851
4852 bool
4853 constant_address_p (x)
4854 rtx x;
4855 {
4856 switch (GET_CODE (x))
4857 {
4858 case LABEL_REF:
4859 case CONST_INT:
4860 return true;
4861
4862 case CONST_DOUBLE:
4863 return TARGET_64BIT;
4864
4865 case CONST:
4866 case SYMBOL_REF:
4867 return !flag_pic && legitimate_constant_p (x);
4868
4869 default:
4870 return false;
4871 }
4872 }
4873
4874 /* Nonzero if the constant value X is a legitimate general operand
4875 when generating PIC code. It is given that flag_pic is on and
4876 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4877
4878 bool
4879 legitimate_pic_operand_p (x)
4880 rtx x;
4881 {
4882 rtx inner;
4883
4884 switch (GET_CODE (x))
4885 {
4886 case CONST:
4887 inner = XEXP (x, 0);
4888
4889 /* Only some unspecs are valid as "constants". */
4890 if (GET_CODE (inner) == UNSPEC)
4891 switch (XINT (inner, 1))
4892 {
4893 case UNSPEC_TPOFF:
4894 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4895 case UNSPEC_TP:
4896 return true;
4897 default:
4898 return false;
4899 }
4900 /* FALLTHRU */
4901
4902 case SYMBOL_REF:
4903 case LABEL_REF:
4904 return legitimate_pic_address_disp_p (x);
4905
4906 default:
4907 return true;
4908 }
4909 }
4910
4911 /* Determine if a given CONST RTX is a valid memory displacement
4912 in PIC mode. */
4913
4914 int
4915 legitimate_pic_address_disp_p (disp)
4916 register rtx disp;
4917 {
4918 bool saw_plus;
4919
4920 /* In 64bit mode we can allow direct addresses of symbols and labels
4921 when they are not dynamic symbols. */
4922 if (TARGET_64BIT)
4923 {
4924 rtx x = disp;
4925 if (GET_CODE (disp) == CONST)
4926 x = XEXP (disp, 0);
4927 /* ??? Handle PIC code models */
4928 if (GET_CODE (x) == PLUS
4929 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4930 && ix86_cmodel == CM_SMALL_PIC
4931 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4932 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4933 x = XEXP (x, 0);
4934 if (local_symbolic_operand (x, Pmode))
4935 return 1;
4936 }
4937 if (GET_CODE (disp) != CONST)
4938 return 0;
4939 disp = XEXP (disp, 0);
4940
4941 if (TARGET_64BIT)
4942 {
4943 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4944 of GOT tables. We should not need these anyway. */
4945 if (GET_CODE (disp) != UNSPEC
4946 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4947 return 0;
4948
4949 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4950 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4951 return 0;
4952 return 1;
4953 }
4954
4955 saw_plus = false;
4956 if (GET_CODE (disp) == PLUS)
4957 {
4958 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4959 return 0;
4960 disp = XEXP (disp, 0);
4961 saw_plus = true;
4962 }
4963
4964 if (GET_CODE (disp) != UNSPEC)
4965 return 0;
4966
4967 switch (XINT (disp, 1))
4968 {
4969 case UNSPEC_GOT:
4970 if (saw_plus)
4971 return false;
4972 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4973 case UNSPEC_GOTOFF:
4974 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4975 case UNSPEC_GOTTPOFF:
4976 if (saw_plus)
4977 return false;
4978 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4979 case UNSPEC_NTPOFF:
4980 /* ??? Could support offset here. */
4981 if (saw_plus)
4982 return false;
4983 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4984 case UNSPEC_DTPOFF:
4985 /* ??? Could support offset here. */
4986 if (saw_plus)
4987 return false;
4988 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4989 }
4990
4991 return 0;
4992 }
4993
4994 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4995 memory address for an instruction. The MODE argument is the machine mode
4996 for the MEM expression that wants to use this address.
4997
4998 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4999 convert common non-canonical forms to canonical form so that they will
5000 be recognized. */
5001
5002 int
5003 legitimate_address_p (mode, addr, strict)
5004 enum machine_mode mode;
5005 register rtx addr;
5006 int strict;
5007 {
5008 struct ix86_address parts;
5009 rtx base, index, disp;
5010 HOST_WIDE_INT scale;
5011 const char *reason = NULL;
5012 rtx reason_rtx = NULL_RTX;
5013
5014 if (TARGET_DEBUG_ADDR)
5015 {
5016 fprintf (stderr,
5017 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5018 GET_MODE_NAME (mode), strict);
5019 debug_rtx (addr);
5020 }
5021
5022 if (ix86_decompose_address (addr, &parts) <= 0)
5023 {
5024 reason = "decomposition failed";
5025 goto report_error;
5026 }
5027
5028 base = parts.base;
5029 index = parts.index;
5030 disp = parts.disp;
5031 scale = parts.scale;
5032
5033 /* Validate base register.
5034
5035 Don't allow SUBREG's here, it can lead to spill failures when the base
5036 is one word out of a two word structure, which is represented internally
5037 as a DImode int. */
5038
5039 if (base)
5040 {
5041 rtx reg;
5042 reason_rtx = base;
5043
5044 if (GET_CODE (base) == SUBREG)
5045 reg = SUBREG_REG (base);
5046 else
5047 reg = base;
5048
5049 if (GET_CODE (reg) != REG)
5050 {
5051 reason = "base is not a register";
5052 goto report_error;
5053 }
5054
5055 if (GET_MODE (base) != Pmode)
5056 {
5057 reason = "base is not in Pmode";
5058 goto report_error;
5059 }
5060
5061 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5062 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5063 {
5064 reason = "base is not valid";
5065 goto report_error;
5066 }
5067 }
5068
5069 /* Validate index register.
5070
5071 Don't allow SUBREG's here, it can lead to spill failures when the index
5072 is one word out of a two word structure, which is represented internally
5073 as a DImode int. */
5074
5075 if (index)
5076 {
5077 rtx reg;
5078 reason_rtx = index;
5079
5080 if (GET_CODE (index) == SUBREG)
5081 reg = SUBREG_REG (index);
5082 else
5083 reg = index;
5084
5085 if (GET_CODE (reg) != REG)
5086 {
5087 reason = "index is not a register";
5088 goto report_error;
5089 }
5090
5091 if (GET_MODE (index) != Pmode)
5092 {
5093 reason = "index is not in Pmode";
5094 goto report_error;
5095 }
5096
5097 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5098 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5099 {
5100 reason = "index is not valid";
5101 goto report_error;
5102 }
5103 }
5104
5105 /* Validate scale factor. */
5106 if (scale != 1)
5107 {
5108 reason_rtx = GEN_INT (scale);
5109 if (!index)
5110 {
5111 reason = "scale without index";
5112 goto report_error;
5113 }
5114
5115 if (scale != 2 && scale != 4 && scale != 8)
5116 {
5117 reason = "scale is not a valid multiplier";
5118 goto report_error;
5119 }
5120 }
5121
5122 /* Validate displacement. */
5123 if (disp)
5124 {
5125 reason_rtx = disp;
5126
5127 if (TARGET_64BIT)
5128 {
5129 if (!x86_64_sign_extended_value (disp))
5130 {
5131 reason = "displacement is out of range";
5132 goto report_error;
5133 }
5134 }
5135 else
5136 {
5137 if (GET_CODE (disp) == CONST_DOUBLE)
5138 {
5139 reason = "displacement is a const_double";
5140 goto report_error;
5141 }
5142 }
5143
5144 if (GET_CODE (disp) == CONST
5145 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5146 switch (XINT (XEXP (disp, 0), 1))
5147 {
5148 case UNSPEC_GOT:
5149 case UNSPEC_GOTOFF:
5150 case UNSPEC_GOTPCREL:
5151 if (!flag_pic)
5152 abort ();
5153 goto is_legitimate_pic;
5154
5155 case UNSPEC_GOTTPOFF:
5156 case UNSPEC_NTPOFF:
5157 case UNSPEC_DTPOFF:
5158 break;
5159
5160 default:
5161 reason = "invalid address unspec";
5162 goto report_error;
5163 }
5164
5165 else if (flag_pic && SYMBOLIC_CONST (disp))
5166 {
5167 is_legitimate_pic:
5168 if (TARGET_64BIT && (index || base))
5169 {
5170 reason = "non-constant pic memory reference";
5171 goto report_error;
5172 }
5173 if (! legitimate_pic_address_disp_p (disp))
5174 {
5175 reason = "displacement is an invalid pic construct";
5176 goto report_error;
5177 }
5178
5179 /* This code used to verify that a symbolic pic displacement
5180 includes the pic_offset_table_rtx register.
5181
5182 While this is good idea, unfortunately these constructs may
5183 be created by "adds using lea" optimization for incorrect
5184 code like:
5185
5186 int a;
5187 int foo(int i)
5188 {
5189 return *(&a+i);
5190 }
5191
5192 This code is nonsensical, but results in addressing
5193 GOT table with pic_offset_table_rtx base. We can't
5194 just refuse it easily, since it gets matched by
5195 "addsi3" pattern, that later gets split to lea in the
5196 case output register differs from input. While this
5197 can be handled by separate addsi pattern for this case
5198 that never results in lea, this seems to be easier and
5199 correct fix for crash to disable this test. */
5200 }
5201 else if (!CONSTANT_ADDRESS_P (disp))
5202 {
5203 reason = "displacement is not constant";
5204 goto report_error;
5205 }
5206 }
5207
5208 /* Everything looks valid. */
5209 if (TARGET_DEBUG_ADDR)
5210 fprintf (stderr, "Success.\n");
5211 return TRUE;
5212
5213 report_error:
5214 if (TARGET_DEBUG_ADDR)
5215 {
5216 fprintf (stderr, "Error: %s\n", reason);
5217 debug_rtx (reason_rtx);
5218 }
5219 return FALSE;
5220 }
5221 \f
5222 /* Return an unique alias set for the GOT. */
5223
5224 static HOST_WIDE_INT
5225 ix86_GOT_alias_set ()
5226 {
5227 static HOST_WIDE_INT set = -1;
5228 if (set == -1)
5229 set = new_alias_set ();
5230 return set;
5231 }
5232
5233 /* Return a legitimate reference for ORIG (an address) using the
5234 register REG. If REG is 0, a new pseudo is generated.
5235
5236 There are two types of references that must be handled:
5237
5238 1. Global data references must load the address from the GOT, via
5239 the PIC reg. An insn is emitted to do this load, and the reg is
5240 returned.
5241
5242 2. Static data references, constant pool addresses, and code labels
5243 compute the address as an offset from the GOT, whose base is in
5244 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5245 differentiate them from global data objects. The returned
5246 address is the PIC reg + an unspec constant.
5247
5248 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5249 reg also appears in the address. */
5250
5251 rtx
5252 legitimize_pic_address (orig, reg)
5253 rtx orig;
5254 rtx reg;
5255 {
5256 rtx addr = orig;
5257 rtx new = orig;
5258 rtx base;
5259
5260 if (local_symbolic_operand (addr, Pmode))
5261 {
5262 /* In 64bit mode we can address such objects directly. */
5263 if (TARGET_64BIT)
5264 new = addr;
5265 else
5266 {
5267 /* This symbol may be referenced via a displacement from the PIC
5268 base address (@GOTOFF). */
5269
5270 if (reload_in_progress)
5271 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5272 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5273 new = gen_rtx_CONST (Pmode, new);
5274 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5275
5276 if (reg != 0)
5277 {
5278 emit_move_insn (reg, new);
5279 new = reg;
5280 }
5281 }
5282 }
5283 else if (GET_CODE (addr) == SYMBOL_REF)
5284 {
5285 if (TARGET_64BIT)
5286 {
5287 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5288 new = gen_rtx_CONST (Pmode, new);
5289 new = gen_rtx_MEM (Pmode, new);
5290 RTX_UNCHANGING_P (new) = 1;
5291 set_mem_alias_set (new, ix86_GOT_alias_set ());
5292
5293 if (reg == 0)
5294 reg = gen_reg_rtx (Pmode);
5295 /* Use directly gen_movsi, otherwise the address is loaded
5296 into register for CSE. We don't want to CSE this addresses,
5297 instead we CSE addresses from the GOT table, so skip this. */
5298 emit_insn (gen_movsi (reg, new));
5299 new = reg;
5300 }
5301 else
5302 {
5303 /* This symbol must be referenced via a load from the
5304 Global Offset Table (@GOT). */
5305
5306 if (reload_in_progress)
5307 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5308 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5309 new = gen_rtx_CONST (Pmode, new);
5310 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5311 new = gen_rtx_MEM (Pmode, new);
5312 RTX_UNCHANGING_P (new) = 1;
5313 set_mem_alias_set (new, ix86_GOT_alias_set ());
5314
5315 if (reg == 0)
5316 reg = gen_reg_rtx (Pmode);
5317 emit_move_insn (reg, new);
5318 new = reg;
5319 }
5320 }
5321 else
5322 {
5323 if (GET_CODE (addr) == CONST)
5324 {
5325 addr = XEXP (addr, 0);
5326
5327 /* We must match stuff we generate before. Assume the only
5328 unspecs that can get here are ours. Not that we could do
5329 anything with them anyway... */
5330 if (GET_CODE (addr) == UNSPEC
5331 || (GET_CODE (addr) == PLUS
5332 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5333 return orig;
5334 if (GET_CODE (addr) != PLUS)
5335 abort ();
5336 }
5337 if (GET_CODE (addr) == PLUS)
5338 {
5339 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5340
5341 /* Check first to see if this is a constant offset from a @GOTOFF
5342 symbol reference. */
5343 if (local_symbolic_operand (op0, Pmode)
5344 && GET_CODE (op1) == CONST_INT)
5345 {
5346 if (!TARGET_64BIT)
5347 {
5348 if (reload_in_progress)
5349 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5350 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5351 UNSPEC_GOTOFF);
5352 new = gen_rtx_PLUS (Pmode, new, op1);
5353 new = gen_rtx_CONST (Pmode, new);
5354 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5355
5356 if (reg != 0)
5357 {
5358 emit_move_insn (reg, new);
5359 new = reg;
5360 }
5361 }
5362 else
5363 {
5364 /* ??? We need to limit offsets here. */
5365 }
5366 }
5367 else
5368 {
5369 base = legitimize_pic_address (XEXP (addr, 0), reg);
5370 new = legitimize_pic_address (XEXP (addr, 1),
5371 base == reg ? NULL_RTX : reg);
5372
5373 if (GET_CODE (new) == CONST_INT)
5374 new = plus_constant (base, INTVAL (new));
5375 else
5376 {
5377 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5378 {
5379 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5380 new = XEXP (new, 1);
5381 }
5382 new = gen_rtx_PLUS (Pmode, base, new);
5383 }
5384 }
5385 }
5386 }
5387 return new;
5388 }
5389
5390 static void
5391 ix86_encode_section_info (decl, first)
5392 tree decl;
5393 int first ATTRIBUTE_UNUSED;
5394 {
5395 bool local_p = (*targetm.binds_local_p) (decl);
5396 rtx rtl, symbol;
5397
5398 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5399 if (GET_CODE (rtl) != MEM)
5400 return;
5401 symbol = XEXP (rtl, 0);
5402 if (GET_CODE (symbol) != SYMBOL_REF)
5403 return;
5404
5405 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5406 symbol so that we may access it directly in the GOT. */
5407
5408 if (flag_pic)
5409 SYMBOL_REF_FLAG (symbol) = local_p;
5410
5411 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5412 "local dynamic", "initial exec" or "local exec" TLS models
5413 respectively. */
5414
5415 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5416 {
5417 const char *symbol_str;
5418 char *newstr;
5419 size_t len;
5420 enum tls_model kind;
5421
5422 if (!flag_pic)
5423 {
5424 if (local_p)
5425 kind = TLS_MODEL_LOCAL_EXEC;
5426 else
5427 kind = TLS_MODEL_INITIAL_EXEC;
5428 }
5429 /* Local dynamic is inefficient when we're not combining the
5430 parts of the address. */
5431 else if (optimize && local_p)
5432 kind = TLS_MODEL_LOCAL_DYNAMIC;
5433 else
5434 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5435 if (kind < flag_tls_default)
5436 kind = flag_tls_default;
5437
5438 symbol_str = XSTR (symbol, 0);
5439
5440 if (symbol_str[0] == '%')
5441 {
5442 if (symbol_str[1] == tls_model_chars[kind])
5443 return;
5444 symbol_str += 2;
5445 }
5446 len = strlen (symbol_str) + 1;
5447 newstr = alloca (len + 2);
5448
5449 newstr[0] = '%';
5450 newstr[1] = tls_model_chars[kind];
5451 memcpy (newstr + 2, symbol_str, len);
5452
5453 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5454 }
5455 }
5456
5457 /* Undo the above when printing symbol names. */
5458
5459 static const char *
5460 ix86_strip_name_encoding (str)
5461 const char *str;
5462 {
5463 if (str[0] == '%')
5464 str += 2;
5465 if (str [0] == '*')
5466 str += 1;
5467 return str;
5468 }
5469 \f
5470 /* Load the thread pointer into a register. */
5471
5472 static rtx
5473 get_thread_pointer ()
5474 {
5475 rtx tp;
5476
5477 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5478 tp = gen_rtx_CONST (Pmode, tp);
5479 tp = force_reg (Pmode, tp);
5480
5481 return tp;
5482 }
5483
5484 /* Try machine-dependent ways of modifying an illegitimate address
5485 to be legitimate. If we find one, return the new, valid address.
5486 This macro is used in only one place: `memory_address' in explow.c.
5487
5488 OLDX is the address as it was before break_out_memory_refs was called.
5489 In some cases it is useful to look at this to decide what needs to be done.
5490
5491 MODE and WIN are passed so that this macro can use
5492 GO_IF_LEGITIMATE_ADDRESS.
5493
5494 It is always safe for this macro to do nothing. It exists to recognize
5495 opportunities to optimize the output.
5496
5497 For the 80386, we handle X+REG by loading X into a register R and
5498 using R+REG. R will go in a general reg and indexing will be used.
5499 However, if REG is a broken-out memory address or multiplication,
5500 nothing needs to be done because REG can certainly go in a general reg.
5501
5502 When -fpic is used, special handling is needed for symbolic references.
5503 See comments by legitimize_pic_address in i386.c for details. */
5504
5505 rtx
5506 legitimize_address (x, oldx, mode)
5507 register rtx x;
5508 register rtx oldx ATTRIBUTE_UNUSED;
5509 enum machine_mode mode;
5510 {
5511 int changed = 0;
5512 unsigned log;
5513
5514 if (TARGET_DEBUG_ADDR)
5515 {
5516 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5517 GET_MODE_NAME (mode));
5518 debug_rtx (x);
5519 }
5520
5521 log = tls_symbolic_operand (x, mode);
5522 if (log)
5523 {
5524 rtx dest, base, off, pic;
5525
5526 switch (log)
5527 {
5528 case TLS_MODEL_GLOBAL_DYNAMIC:
5529 dest = gen_reg_rtx (Pmode);
5530 emit_insn (gen_tls_global_dynamic (dest, x));
5531 break;
5532
5533 case TLS_MODEL_LOCAL_DYNAMIC:
5534 base = gen_reg_rtx (Pmode);
5535 emit_insn (gen_tls_local_dynamic_base (base));
5536
5537 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5538 off = gen_rtx_CONST (Pmode, off);
5539
5540 return gen_rtx_PLUS (Pmode, base, off);
5541
5542 case TLS_MODEL_INITIAL_EXEC:
5543 if (flag_pic)
5544 {
5545 if (reload_in_progress)
5546 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5547 pic = pic_offset_table_rtx;
5548 }
5549 else
5550 {
5551 pic = gen_reg_rtx (Pmode);
5552 emit_insn (gen_set_got (pic));
5553 }
5554
5555 base = get_thread_pointer ();
5556
5557 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5558 off = gen_rtx_CONST (Pmode, off);
5559 off = gen_rtx_PLUS (Pmode, pic, off);
5560 off = gen_rtx_MEM (Pmode, off);
5561 RTX_UNCHANGING_P (off) = 1;
5562 set_mem_alias_set (off, ix86_GOT_alias_set ());
5563
5564 /* Damn Sun for specifing a set of dynamic relocations without
5565 considering the two-operand nature of the architecture!
5566 We'd be much better off with a "GOTNTPOFF" relocation that
5567 already contained the negated constant. */
5568 /* ??? Using negl and reg+reg addressing appears to be a lose
5569 size-wise. The negl is two bytes, just like the extra movl
5570 incurred by the two-operand subl, but reg+reg addressing
5571 uses the two-byte modrm form, unlike plain reg. */
5572
5573 dest = gen_reg_rtx (Pmode);
5574 emit_insn (gen_subsi3 (dest, base, off));
5575 break;
5576
5577 case TLS_MODEL_LOCAL_EXEC:
5578 base = get_thread_pointer ();
5579
5580 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5581 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5582 off = gen_rtx_CONST (Pmode, off);
5583
5584 if (TARGET_GNU_TLS)
5585 return gen_rtx_PLUS (Pmode, base, off);
5586 else
5587 {
5588 dest = gen_reg_rtx (Pmode);
5589 emit_insn (gen_subsi3 (dest, base, off));
5590 }
5591 break;
5592
5593 default:
5594 abort ();
5595 }
5596
5597 return dest;
5598 }
5599
5600 if (flag_pic && SYMBOLIC_CONST (x))
5601 return legitimize_pic_address (x, 0);
5602
5603 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5604 if (GET_CODE (x) == ASHIFT
5605 && GET_CODE (XEXP (x, 1)) == CONST_INT
5606 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5607 {
5608 changed = 1;
5609 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5610 GEN_INT (1 << log));
5611 }
5612
5613 if (GET_CODE (x) == PLUS)
5614 {
5615 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5616
5617 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5618 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5619 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5620 {
5621 changed = 1;
5622 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5623 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5624 GEN_INT (1 << log));
5625 }
5626
5627 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5628 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5629 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5630 {
5631 changed = 1;
5632 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5633 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5634 GEN_INT (1 << log));
5635 }
5636
5637 /* Put multiply first if it isn't already. */
5638 if (GET_CODE (XEXP (x, 1)) == MULT)
5639 {
5640 rtx tmp = XEXP (x, 0);
5641 XEXP (x, 0) = XEXP (x, 1);
5642 XEXP (x, 1) = tmp;
5643 changed = 1;
5644 }
5645
5646 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5647 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5648 created by virtual register instantiation, register elimination, and
5649 similar optimizations. */
5650 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5651 {
5652 changed = 1;
5653 x = gen_rtx_PLUS (Pmode,
5654 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5655 XEXP (XEXP (x, 1), 0)),
5656 XEXP (XEXP (x, 1), 1));
5657 }
5658
5659 /* Canonicalize
5660 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5661 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5662 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5663 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5664 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5665 && CONSTANT_P (XEXP (x, 1)))
5666 {
5667 rtx constant;
5668 rtx other = NULL_RTX;
5669
5670 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5671 {
5672 constant = XEXP (x, 1);
5673 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5674 }
5675 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5676 {
5677 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5678 other = XEXP (x, 1);
5679 }
5680 else
5681 constant = 0;
5682
5683 if (constant)
5684 {
5685 changed = 1;
5686 x = gen_rtx_PLUS (Pmode,
5687 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5688 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5689 plus_constant (other, INTVAL (constant)));
5690 }
5691 }
5692
5693 if (changed && legitimate_address_p (mode, x, FALSE))
5694 return x;
5695
5696 if (GET_CODE (XEXP (x, 0)) == MULT)
5697 {
5698 changed = 1;
5699 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5700 }
5701
5702 if (GET_CODE (XEXP (x, 1)) == MULT)
5703 {
5704 changed = 1;
5705 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5706 }
5707
5708 if (changed
5709 && GET_CODE (XEXP (x, 1)) == REG
5710 && GET_CODE (XEXP (x, 0)) == REG)
5711 return x;
5712
5713 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5714 {
5715 changed = 1;
5716 x = legitimize_pic_address (x, 0);
5717 }
5718
5719 if (changed && legitimate_address_p (mode, x, FALSE))
5720 return x;
5721
5722 if (GET_CODE (XEXP (x, 0)) == REG)
5723 {
5724 register rtx temp = gen_reg_rtx (Pmode);
5725 register rtx val = force_operand (XEXP (x, 1), temp);
5726 if (val != temp)
5727 emit_move_insn (temp, val);
5728
5729 XEXP (x, 1) = temp;
5730 return x;
5731 }
5732
5733 else if (GET_CODE (XEXP (x, 1)) == REG)
5734 {
5735 register rtx temp = gen_reg_rtx (Pmode);
5736 register rtx val = force_operand (XEXP (x, 0), temp);
5737 if (val != temp)
5738 emit_move_insn (temp, val);
5739
5740 XEXP (x, 0) = temp;
5741 return x;
5742 }
5743 }
5744
5745 return x;
5746 }
5747 \f
5748 /* Print an integer constant expression in assembler syntax. Addition
5749 and subtraction are the only arithmetic that may appear in these
5750 expressions. FILE is the stdio stream to write to, X is the rtx, and
5751 CODE is the operand print code from the output string. */
5752
5753 static void
5754 output_pic_addr_const (file, x, code)
5755 FILE *file;
5756 rtx x;
5757 int code;
5758 {
5759 char buf[256];
5760
5761 switch (GET_CODE (x))
5762 {
5763 case PC:
5764 if (flag_pic)
5765 putc ('.', file);
5766 else
5767 abort ();
5768 break;
5769
5770 case SYMBOL_REF:
5771 assemble_name (file, XSTR (x, 0));
5772 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5773 fputs ("@PLT", file);
5774 break;
5775
5776 case LABEL_REF:
5777 x = XEXP (x, 0);
5778 /* FALLTHRU */
5779 case CODE_LABEL:
5780 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5781 assemble_name (asm_out_file, buf);
5782 break;
5783
5784 case CONST_INT:
5785 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5786 break;
5787
5788 case CONST:
5789 /* This used to output parentheses around the expression,
5790 but that does not work on the 386 (either ATT or BSD assembler). */
5791 output_pic_addr_const (file, XEXP (x, 0), code);
5792 break;
5793
5794 case CONST_DOUBLE:
5795 if (GET_MODE (x) == VOIDmode)
5796 {
5797 /* We can use %d if the number is <32 bits and positive. */
5798 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5799 fprintf (file, "0x%lx%08lx",
5800 (unsigned long) CONST_DOUBLE_HIGH (x),
5801 (unsigned long) CONST_DOUBLE_LOW (x));
5802 else
5803 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5804 }
5805 else
5806 /* We can't handle floating point constants;
5807 PRINT_OPERAND must handle them. */
5808 output_operand_lossage ("floating constant misused");
5809 break;
5810
5811 case PLUS:
5812 /* Some assemblers need integer constants to appear first. */
5813 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5814 {
5815 output_pic_addr_const (file, XEXP (x, 0), code);
5816 putc ('+', file);
5817 output_pic_addr_const (file, XEXP (x, 1), code);
5818 }
5819 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5820 {
5821 output_pic_addr_const (file, XEXP (x, 1), code);
5822 putc ('+', file);
5823 output_pic_addr_const (file, XEXP (x, 0), code);
5824 }
5825 else
5826 abort ();
5827 break;
5828
5829 case MINUS:
5830 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5831 output_pic_addr_const (file, XEXP (x, 0), code);
5832 putc ('-', file);
5833 output_pic_addr_const (file, XEXP (x, 1), code);
5834 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5835 break;
5836
5837 case UNSPEC:
5838 if (XVECLEN (x, 0) != 1)
5839 abort ();
5840 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5841 switch (XINT (x, 1))
5842 {
5843 case UNSPEC_GOT:
5844 fputs ("@GOT", file);
5845 break;
5846 case UNSPEC_GOTOFF:
5847 fputs ("@GOTOFF", file);
5848 break;
5849 case UNSPEC_GOTPCREL:
5850 fputs ("@GOTPCREL(%rip)", file);
5851 break;
5852 case UNSPEC_GOTTPOFF:
5853 fputs ("@GOTTPOFF", file);
5854 break;
5855 case UNSPEC_TPOFF:
5856 fputs ("@TPOFF", file);
5857 break;
5858 case UNSPEC_NTPOFF:
5859 fputs ("@NTPOFF", file);
5860 break;
5861 case UNSPEC_DTPOFF:
5862 fputs ("@DTPOFF", file);
5863 break;
5864 default:
5865 output_operand_lossage ("invalid UNSPEC as operand");
5866 break;
5867 }
5868 break;
5869
5870 default:
5871 output_operand_lossage ("invalid expression as operand");
5872 }
5873 }
5874
5875 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5876 We need to handle our special PIC relocations. */
5877
5878 void
5879 i386_dwarf_output_addr_const (file, x)
5880 FILE *file;
5881 rtx x;
5882 {
5883 #ifdef ASM_QUAD
5884 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5885 #else
5886 if (TARGET_64BIT)
5887 abort ();
5888 fprintf (file, "%s", ASM_LONG);
5889 #endif
5890 if (flag_pic)
5891 output_pic_addr_const (file, x, '\0');
5892 else
5893 output_addr_const (file, x);
5894 fputc ('\n', file);
5895 }
5896
5897 /* In the name of slightly smaller debug output, and to cater to
5898 general assembler losage, recognize PIC+GOTOFF and turn it back
5899 into a direct symbol reference. */
5900
5901 rtx
5902 i386_simplify_dwarf_addr (orig_x)
5903 rtx orig_x;
5904 {
5905 rtx x = orig_x, y;
5906
5907 if (GET_CODE (x) == MEM)
5908 x = XEXP (x, 0);
5909
5910 if (TARGET_64BIT)
5911 {
5912 if (GET_CODE (x) != CONST
5913 || GET_CODE (XEXP (x, 0)) != UNSPEC
5914 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5915 || GET_CODE (orig_x) != MEM)
5916 return orig_x;
5917 return XVECEXP (XEXP (x, 0), 0, 0);
5918 }
5919
5920 if (GET_CODE (x) != PLUS
5921 || GET_CODE (XEXP (x, 1)) != CONST)
5922 return orig_x;
5923
5924 if (GET_CODE (XEXP (x, 0)) == REG
5925 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5926 /* %ebx + GOT/GOTOFF */
5927 y = NULL;
5928 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5929 {
5930 /* %ebx + %reg * scale + GOT/GOTOFF */
5931 y = XEXP (x, 0);
5932 if (GET_CODE (XEXP (y, 0)) == REG
5933 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5934 y = XEXP (y, 1);
5935 else if (GET_CODE (XEXP (y, 1)) == REG
5936 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5937 y = XEXP (y, 0);
5938 else
5939 return orig_x;
5940 if (GET_CODE (y) != REG
5941 && GET_CODE (y) != MULT
5942 && GET_CODE (y) != ASHIFT)
5943 return orig_x;
5944 }
5945 else
5946 return orig_x;
5947
5948 x = XEXP (XEXP (x, 1), 0);
5949 if (GET_CODE (x) == UNSPEC
5950 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5951 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5952 {
5953 if (y)
5954 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5955 return XVECEXP (x, 0, 0);
5956 }
5957
5958 if (GET_CODE (x) == PLUS
5959 && GET_CODE (XEXP (x, 0)) == UNSPEC
5960 && GET_CODE (XEXP (x, 1)) == CONST_INT
5961 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5962 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5963 && GET_CODE (orig_x) != MEM)))
5964 {
5965 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5966 if (y)
5967 return gen_rtx_PLUS (Pmode, y, x);
5968 return x;
5969 }
5970
5971 return orig_x;
5972 }
5973 \f
5974 static void
5975 put_condition_code (code, mode, reverse, fp, file)
5976 enum rtx_code code;
5977 enum machine_mode mode;
5978 int reverse, fp;
5979 FILE *file;
5980 {
5981 const char *suffix;
5982
5983 if (mode == CCFPmode || mode == CCFPUmode)
5984 {
5985 enum rtx_code second_code, bypass_code;
5986 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5987 if (bypass_code != NIL || second_code != NIL)
5988 abort ();
5989 code = ix86_fp_compare_code_to_integer (code);
5990 mode = CCmode;
5991 }
5992 if (reverse)
5993 code = reverse_condition (code);
5994
5995 switch (code)
5996 {
5997 case EQ:
5998 suffix = "e";
5999 break;
6000 case NE:
6001 suffix = "ne";
6002 break;
6003 case GT:
6004 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6005 abort ();
6006 suffix = "g";
6007 break;
6008 case GTU:
6009 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6010 Those same assemblers have the same but opposite losage on cmov. */
6011 if (mode != CCmode)
6012 abort ();
6013 suffix = fp ? "nbe" : "a";
6014 break;
6015 case LT:
6016 if (mode == CCNOmode || mode == CCGOCmode)
6017 suffix = "s";
6018 else if (mode == CCmode || mode == CCGCmode)
6019 suffix = "l";
6020 else
6021 abort ();
6022 break;
6023 case LTU:
6024 if (mode != CCmode)
6025 abort ();
6026 suffix = "b";
6027 break;
6028 case GE:
6029 if (mode == CCNOmode || mode == CCGOCmode)
6030 suffix = "ns";
6031 else if (mode == CCmode || mode == CCGCmode)
6032 suffix = "ge";
6033 else
6034 abort ();
6035 break;
6036 case GEU:
6037 /* ??? As above. */
6038 if (mode != CCmode)
6039 abort ();
6040 suffix = fp ? "nb" : "ae";
6041 break;
6042 case LE:
6043 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6044 abort ();
6045 suffix = "le";
6046 break;
6047 case LEU:
6048 if (mode != CCmode)
6049 abort ();
6050 suffix = "be";
6051 break;
6052 case UNORDERED:
6053 suffix = fp ? "u" : "p";
6054 break;
6055 case ORDERED:
6056 suffix = fp ? "nu" : "np";
6057 break;
6058 default:
6059 abort ();
6060 }
6061 fputs (suffix, file);
6062 }
6063
6064 void
6065 print_reg (x, code, file)
6066 rtx x;
6067 int code;
6068 FILE *file;
6069 {
6070 if (REGNO (x) == ARG_POINTER_REGNUM
6071 || REGNO (x) == FRAME_POINTER_REGNUM
6072 || REGNO (x) == FLAGS_REG
6073 || REGNO (x) == FPSR_REG)
6074 abort ();
6075
6076 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6077 putc ('%', file);
6078
6079 if (code == 'w' || MMX_REG_P (x))
6080 code = 2;
6081 else if (code == 'b')
6082 code = 1;
6083 else if (code == 'k')
6084 code = 4;
6085 else if (code == 'q')
6086 code = 8;
6087 else if (code == 'y')
6088 code = 3;
6089 else if (code == 'h')
6090 code = 0;
6091 else
6092 code = GET_MODE_SIZE (GET_MODE (x));
6093
6094 /* Irritatingly, AMD extended registers use different naming convention
6095 from the normal registers. */
6096 if (REX_INT_REG_P (x))
6097 {
6098 if (!TARGET_64BIT)
6099 abort ();
6100 switch (code)
6101 {
6102 case 0:
6103 error ("extended registers have no high halves");
6104 break;
6105 case 1:
6106 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6107 break;
6108 case 2:
6109 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6110 break;
6111 case 4:
6112 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6113 break;
6114 case 8:
6115 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6116 break;
6117 default:
6118 error ("unsupported operand size for extended register");
6119 break;
6120 }
6121 return;
6122 }
6123 switch (code)
6124 {
6125 case 3:
6126 if (STACK_TOP_P (x))
6127 {
6128 fputs ("st(0)", file);
6129 break;
6130 }
6131 /* FALLTHRU */
6132 case 8:
6133 case 4:
6134 case 12:
6135 if (! ANY_FP_REG_P (x))
6136 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6137 /* FALLTHRU */
6138 case 16:
6139 case 2:
6140 fputs (hi_reg_name[REGNO (x)], file);
6141 break;
6142 case 1:
6143 fputs (qi_reg_name[REGNO (x)], file);
6144 break;
6145 case 0:
6146 fputs (qi_high_reg_name[REGNO (x)], file);
6147 break;
6148 default:
6149 abort ();
6150 }
6151 }
6152
6153 /* Locate some local-dynamic symbol still in use by this function
6154 so that we can print its name in some tls_local_dynamic_base
6155 pattern. */
6156
6157 static const char *
6158 get_some_local_dynamic_name ()
6159 {
6160 rtx insn;
6161
6162 if (cfun->machine->some_ld_name)
6163 return cfun->machine->some_ld_name;
6164
6165 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6166 if (INSN_P (insn)
6167 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6168 return cfun->machine->some_ld_name;
6169
6170 abort ();
6171 }
6172
6173 static int
6174 get_some_local_dynamic_name_1 (px, data)
6175 rtx *px;
6176 void *data ATTRIBUTE_UNUSED;
6177 {
6178 rtx x = *px;
6179
6180 if (GET_CODE (x) == SYMBOL_REF
6181 && local_dynamic_symbolic_operand (x, Pmode))
6182 {
6183 cfun->machine->some_ld_name = XSTR (x, 0);
6184 return 1;
6185 }
6186
6187 return 0;
6188 }
6189
6190 /* Meaning of CODE:
6191 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6192 C -- print opcode suffix for set/cmov insn.
6193 c -- like C, but print reversed condition
6194 F,f -- likewise, but for floating-point.
6195 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6196 nothing
6197 R -- print the prefix for register names.
6198 z -- print the opcode suffix for the size of the current operand.
6199 * -- print a star (in certain assembler syntax)
6200 A -- print an absolute memory reference.
6201 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6202 s -- print a shift double count, followed by the assemblers argument
6203 delimiter.
6204 b -- print the QImode name of the register for the indicated operand.
6205 %b0 would print %al if operands[0] is reg 0.
6206 w -- likewise, print the HImode name of the register.
6207 k -- likewise, print the SImode name of the register.
6208 q -- likewise, print the DImode name of the register.
6209 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6210 y -- print "st(0)" instead of "st" as a register.
6211 D -- print condition for SSE cmp instruction.
6212 P -- if PIC, print an @PLT suffix.
6213 X -- don't print any sort of PIC '@' suffix for a symbol.
6214 & -- print some in-use local-dynamic symbol name.
6215 */
6216
6217 void
6218 print_operand (file, x, code)
6219 FILE *file;
6220 rtx x;
6221 int code;
6222 {
6223 if (code)
6224 {
6225 switch (code)
6226 {
6227 case '*':
6228 if (ASSEMBLER_DIALECT == ASM_ATT)
6229 putc ('*', file);
6230 return;
6231
6232 case '&':
6233 assemble_name (file, get_some_local_dynamic_name ());
6234 return;
6235
6236 case 'A':
6237 if (ASSEMBLER_DIALECT == ASM_ATT)
6238 putc ('*', file);
6239 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6240 {
6241 /* Intel syntax. For absolute addresses, registers should not
6242 be surrounded by braces. */
6243 if (GET_CODE (x) != REG)
6244 {
6245 putc ('[', file);
6246 PRINT_OPERAND (file, x, 0);
6247 putc (']', file);
6248 return;
6249 }
6250 }
6251 else
6252 abort ();
6253
6254 PRINT_OPERAND (file, x, 0);
6255 return;
6256
6257
6258 case 'L':
6259 if (ASSEMBLER_DIALECT == ASM_ATT)
6260 putc ('l', file);
6261 return;
6262
6263 case 'W':
6264 if (ASSEMBLER_DIALECT == ASM_ATT)
6265 putc ('w', file);
6266 return;
6267
6268 case 'B':
6269 if (ASSEMBLER_DIALECT == ASM_ATT)
6270 putc ('b', file);
6271 return;
6272
6273 case 'Q':
6274 if (ASSEMBLER_DIALECT == ASM_ATT)
6275 putc ('l', file);
6276 return;
6277
6278 case 'S':
6279 if (ASSEMBLER_DIALECT == ASM_ATT)
6280 putc ('s', file);
6281 return;
6282
6283 case 'T':
6284 if (ASSEMBLER_DIALECT == ASM_ATT)
6285 putc ('t', file);
6286 return;
6287
6288 case 'z':
6289 /* 387 opcodes don't get size suffixes if the operands are
6290 registers. */
6291 if (STACK_REG_P (x))
6292 return;
6293
6294 /* Likewise if using Intel opcodes. */
6295 if (ASSEMBLER_DIALECT == ASM_INTEL)
6296 return;
6297
6298 /* This is the size of op from size of operand. */
6299 switch (GET_MODE_SIZE (GET_MODE (x)))
6300 {
6301 case 2:
6302 #ifdef HAVE_GAS_FILDS_FISTS
6303 putc ('s', file);
6304 #endif
6305 return;
6306
6307 case 4:
6308 if (GET_MODE (x) == SFmode)
6309 {
6310 putc ('s', file);
6311 return;
6312 }
6313 else
6314 putc ('l', file);
6315 return;
6316
6317 case 12:
6318 case 16:
6319 putc ('t', file);
6320 return;
6321
6322 case 8:
6323 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6324 {
6325 #ifdef GAS_MNEMONICS
6326 putc ('q', file);
6327 #else
6328 putc ('l', file);
6329 putc ('l', file);
6330 #endif
6331 }
6332 else
6333 putc ('l', file);
6334 return;
6335
6336 default:
6337 abort ();
6338 }
6339
6340 case 'b':
6341 case 'w':
6342 case 'k':
6343 case 'q':
6344 case 'h':
6345 case 'y':
6346 case 'X':
6347 case 'P':
6348 break;
6349
6350 case 's':
6351 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6352 {
6353 PRINT_OPERAND (file, x, 0);
6354 putc (',', file);
6355 }
6356 return;
6357
6358 case 'D':
6359 /* Little bit of braindamage here. The SSE compare instructions
6360 does use completely different names for the comparisons that the
6361 fp conditional moves. */
6362 switch (GET_CODE (x))
6363 {
6364 case EQ:
6365 case UNEQ:
6366 fputs ("eq", file);
6367 break;
6368 case LT:
6369 case UNLT:
6370 fputs ("lt", file);
6371 break;
6372 case LE:
6373 case UNLE:
6374 fputs ("le", file);
6375 break;
6376 case UNORDERED:
6377 fputs ("unord", file);
6378 break;
6379 case NE:
6380 case LTGT:
6381 fputs ("neq", file);
6382 break;
6383 case UNGE:
6384 case GE:
6385 fputs ("nlt", file);
6386 break;
6387 case UNGT:
6388 case GT:
6389 fputs ("nle", file);
6390 break;
6391 case ORDERED:
6392 fputs ("ord", file);
6393 break;
6394 default:
6395 abort ();
6396 break;
6397 }
6398 return;
6399 case 'O':
6400 #ifdef CMOV_SUN_AS_SYNTAX
6401 if (ASSEMBLER_DIALECT == ASM_ATT)
6402 {
6403 switch (GET_MODE (x))
6404 {
6405 case HImode: putc ('w', file); break;
6406 case SImode:
6407 case SFmode: putc ('l', file); break;
6408 case DImode:
6409 case DFmode: putc ('q', file); break;
6410 default: abort ();
6411 }
6412 putc ('.', file);
6413 }
6414 #endif
6415 return;
6416 case 'C':
6417 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6418 return;
6419 case 'F':
6420 #ifdef CMOV_SUN_AS_SYNTAX
6421 if (ASSEMBLER_DIALECT == ASM_ATT)
6422 putc ('.', file);
6423 #endif
6424 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6425 return;
6426
6427 /* Like above, but reverse condition */
6428 case 'c':
6429 /* Check to see if argument to %c is really a constant
6430 and not a condition code which needs to be reversed. */
6431 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6432 {
6433 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6434 return;
6435 }
6436 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6437 return;
6438 case 'f':
6439 #ifdef CMOV_SUN_AS_SYNTAX
6440 if (ASSEMBLER_DIALECT == ASM_ATT)
6441 putc ('.', file);
6442 #endif
6443 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6444 return;
6445 case '+':
6446 {
6447 rtx x;
6448
6449 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6450 return;
6451
6452 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6453 if (x)
6454 {
6455 int pred_val = INTVAL (XEXP (x, 0));
6456
6457 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6458 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6459 {
6460 int taken = pred_val > REG_BR_PROB_BASE / 2;
6461 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6462
6463 /* Emit hints only in the case default branch prediction
6464 heruistics would fail. */
6465 if (taken != cputaken)
6466 {
6467 /* We use 3e (DS) prefix for taken branches and
6468 2e (CS) prefix for not taken branches. */
6469 if (taken)
6470 fputs ("ds ; ", file);
6471 else
6472 fputs ("cs ; ", file);
6473 }
6474 }
6475 }
6476 return;
6477 }
6478 default:
6479 output_operand_lossage ("invalid operand code `%c'", code);
6480 }
6481 }
6482
6483 if (GET_CODE (x) == REG)
6484 {
6485 PRINT_REG (x, code, file);
6486 }
6487
6488 else if (GET_CODE (x) == MEM)
6489 {
6490 /* No `byte ptr' prefix for call instructions. */
6491 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6492 {
6493 const char * size;
6494 switch (GET_MODE_SIZE (GET_MODE (x)))
6495 {
6496 case 1: size = "BYTE"; break;
6497 case 2: size = "WORD"; break;
6498 case 4: size = "DWORD"; break;
6499 case 8: size = "QWORD"; break;
6500 case 12: size = "XWORD"; break;
6501 case 16: size = "XMMWORD"; break;
6502 default:
6503 abort ();
6504 }
6505
6506 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6507 if (code == 'b')
6508 size = "BYTE";
6509 else if (code == 'w')
6510 size = "WORD";
6511 else if (code == 'k')
6512 size = "DWORD";
6513
6514 fputs (size, file);
6515 fputs (" PTR ", file);
6516 }
6517
6518 x = XEXP (x, 0);
6519 if (flag_pic && CONSTANT_ADDRESS_P (x))
6520 output_pic_addr_const (file, x, code);
6521 /* Avoid (%rip) for call operands. */
6522 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6523 && GET_CODE (x) != CONST_INT)
6524 output_addr_const (file, x);
6525 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6526 output_operand_lossage ("invalid constraints for operand");
6527 else
6528 output_address (x);
6529 }
6530
6531 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6532 {
6533 REAL_VALUE_TYPE r;
6534 long l;
6535
6536 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6537 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6538
6539 if (ASSEMBLER_DIALECT == ASM_ATT)
6540 putc ('$', file);
6541 fprintf (file, "0x%lx", l);
6542 }
6543
6544 /* These float cases don't actually occur as immediate operands. */
6545 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6546 {
6547 REAL_VALUE_TYPE r;
6548 char dstr[30];
6549
6550 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6551 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6552 fprintf (file, "%s", dstr);
6553 }
6554
6555 else if (GET_CODE (x) == CONST_DOUBLE
6556 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6557 {
6558 REAL_VALUE_TYPE r;
6559 char dstr[30];
6560
6561 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6562 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6563 fprintf (file, "%s", dstr);
6564 }
6565
6566 else if (GET_CODE (x) == CONST
6567 && GET_CODE (XEXP (x, 0)) == UNSPEC
6568 && XINT (XEXP (x, 0), 1) == UNSPEC_TP)
6569 {
6570 if (ASSEMBLER_DIALECT == ASM_INTEL)
6571 fputs ("DWORD PTR ", file);
6572 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6573 putc ('%', file);
6574 fputs ("gs:0", file);
6575 }
6576
6577 else
6578 {
6579 if (code != 'P')
6580 {
6581 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6582 {
6583 if (ASSEMBLER_DIALECT == ASM_ATT)
6584 putc ('$', file);
6585 }
6586 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6587 || GET_CODE (x) == LABEL_REF)
6588 {
6589 if (ASSEMBLER_DIALECT == ASM_ATT)
6590 putc ('$', file);
6591 else
6592 fputs ("OFFSET FLAT:", file);
6593 }
6594 }
6595 if (GET_CODE (x) == CONST_INT)
6596 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6597 else if (flag_pic)
6598 output_pic_addr_const (file, x, code);
6599 else
6600 output_addr_const (file, x);
6601 }
6602 }
6603 \f
6604 /* Print a memory operand whose address is ADDR. */
6605
6606 void
6607 print_operand_address (file, addr)
6608 FILE *file;
6609 register rtx addr;
6610 {
6611 struct ix86_address parts;
6612 rtx base, index, disp;
6613 int scale;
6614
6615 if (! ix86_decompose_address (addr, &parts))
6616 abort ();
6617
6618 base = parts.base;
6619 index = parts.index;
6620 disp = parts.disp;
6621 scale = parts.scale;
6622
6623 if (!base && !index)
6624 {
6625 /* Displacement only requires special attention. */
6626
6627 if (GET_CODE (disp) == CONST_INT)
6628 {
6629 if (ASSEMBLER_DIALECT == ASM_INTEL)
6630 {
6631 if (USER_LABEL_PREFIX[0] == 0)
6632 putc ('%', file);
6633 fputs ("ds:", file);
6634 }
6635 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6636 }
6637 else if (flag_pic)
6638 output_pic_addr_const (file, addr, 0);
6639 else
6640 output_addr_const (file, addr);
6641
6642 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6643 if (TARGET_64BIT
6644 && (GET_CODE (addr) == SYMBOL_REF
6645 || GET_CODE (addr) == LABEL_REF
6646 || (GET_CODE (addr) == CONST
6647 && GET_CODE (XEXP (addr, 0)) == PLUS
6648 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6649 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6650 fputs ("(%rip)", file);
6651 }
6652 else
6653 {
6654 if (ASSEMBLER_DIALECT == ASM_ATT)
6655 {
6656 if (disp)
6657 {
6658 if (flag_pic)
6659 output_pic_addr_const (file, disp, 0);
6660 else if (GET_CODE (disp) == LABEL_REF)
6661 output_asm_label (disp);
6662 else
6663 output_addr_const (file, disp);
6664 }
6665
6666 putc ('(', file);
6667 if (base)
6668 PRINT_REG (base, 0, file);
6669 if (index)
6670 {
6671 putc (',', file);
6672 PRINT_REG (index, 0, file);
6673 if (scale != 1)
6674 fprintf (file, ",%d", scale);
6675 }
6676 putc (')', file);
6677 }
6678 else
6679 {
6680 rtx offset = NULL_RTX;
6681
6682 if (disp)
6683 {
6684 /* Pull out the offset of a symbol; print any symbol itself. */
6685 if (GET_CODE (disp) == CONST
6686 && GET_CODE (XEXP (disp, 0)) == PLUS
6687 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6688 {
6689 offset = XEXP (XEXP (disp, 0), 1);
6690 disp = gen_rtx_CONST (VOIDmode,
6691 XEXP (XEXP (disp, 0), 0));
6692 }
6693
6694 if (flag_pic)
6695 output_pic_addr_const (file, disp, 0);
6696 else if (GET_CODE (disp) == LABEL_REF)
6697 output_asm_label (disp);
6698 else if (GET_CODE (disp) == CONST_INT)
6699 offset = disp;
6700 else
6701 output_addr_const (file, disp);
6702 }
6703
6704 putc ('[', file);
6705 if (base)
6706 {
6707 PRINT_REG (base, 0, file);
6708 if (offset)
6709 {
6710 if (INTVAL (offset) >= 0)
6711 putc ('+', file);
6712 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6713 }
6714 }
6715 else if (offset)
6716 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6717 else
6718 putc ('0', file);
6719
6720 if (index)
6721 {
6722 putc ('+', file);
6723 PRINT_REG (index, 0, file);
6724 if (scale != 1)
6725 fprintf (file, "*%d", scale);
6726 }
6727 putc (']', file);
6728 }
6729 }
6730 }
6731
6732 bool
6733 output_addr_const_extra (file, x)
6734 FILE *file;
6735 rtx x;
6736 {
6737 rtx op;
6738
6739 if (GET_CODE (x) != UNSPEC)
6740 return false;
6741
6742 op = XVECEXP (x, 0, 0);
6743 switch (XINT (x, 1))
6744 {
6745 case UNSPEC_GOTTPOFF:
6746 output_addr_const (file, op);
6747 fputs ("@GOTTPOFF", file);
6748 break;
6749 case UNSPEC_TPOFF:
6750 output_addr_const (file, op);
6751 fputs ("@TPOFF", file);
6752 break;
6753 case UNSPEC_NTPOFF:
6754 output_addr_const (file, op);
6755 fputs ("@NTPOFF", file);
6756 break;
6757 case UNSPEC_DTPOFF:
6758 output_addr_const (file, op);
6759 fputs ("@DTPOFF", file);
6760 break;
6761
6762 default:
6763 return false;
6764 }
6765
6766 return true;
6767 }
6768 \f
6769 /* Split one or more DImode RTL references into pairs of SImode
6770 references. The RTL can be REG, offsettable MEM, integer constant, or
6771 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6772 split and "num" is its length. lo_half and hi_half are output arrays
6773 that parallel "operands". */
6774
6775 void
6776 split_di (operands, num, lo_half, hi_half)
6777 rtx operands[];
6778 int num;
6779 rtx lo_half[], hi_half[];
6780 {
6781 while (num--)
6782 {
6783 rtx op = operands[num];
6784
6785 /* simplify_subreg refuse to split volatile memory addresses,
6786 but we still have to handle it. */
6787 if (GET_CODE (op) == MEM)
6788 {
6789 lo_half[num] = adjust_address (op, SImode, 0);
6790 hi_half[num] = adjust_address (op, SImode, 4);
6791 }
6792 else
6793 {
6794 lo_half[num] = simplify_gen_subreg (SImode, op,
6795 GET_MODE (op) == VOIDmode
6796 ? DImode : GET_MODE (op), 0);
6797 hi_half[num] = simplify_gen_subreg (SImode, op,
6798 GET_MODE (op) == VOIDmode
6799 ? DImode : GET_MODE (op), 4);
6800 }
6801 }
6802 }
6803 /* Split one or more TImode RTL references into pairs of SImode
6804 references. The RTL can be REG, offsettable MEM, integer constant, or
6805 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6806 split and "num" is its length. lo_half and hi_half are output arrays
6807 that parallel "operands". */
6808
6809 void
6810 split_ti (operands, num, lo_half, hi_half)
6811 rtx operands[];
6812 int num;
6813 rtx lo_half[], hi_half[];
6814 {
6815 while (num--)
6816 {
6817 rtx op = operands[num];
6818
6819 /* simplify_subreg refuse to split volatile memory addresses, but we
6820 still have to handle it. */
6821 if (GET_CODE (op) == MEM)
6822 {
6823 lo_half[num] = adjust_address (op, DImode, 0);
6824 hi_half[num] = adjust_address (op, DImode, 8);
6825 }
6826 else
6827 {
6828 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6829 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6830 }
6831 }
6832 }
6833 \f
6834 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6835 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6836 is the expression of the binary operation. The output may either be
6837 emitted here, or returned to the caller, like all output_* functions.
6838
6839 There is no guarantee that the operands are the same mode, as they
6840 might be within FLOAT or FLOAT_EXTEND expressions. */
6841
6842 #ifndef SYSV386_COMPAT
6843 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6844 wants to fix the assemblers because that causes incompatibility
6845 with gcc. No-one wants to fix gcc because that causes
6846 incompatibility with assemblers... You can use the option of
6847 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6848 #define SYSV386_COMPAT 1
6849 #endif
6850
6851 const char *
6852 output_387_binary_op (insn, operands)
6853 rtx insn;
6854 rtx *operands;
6855 {
6856 static char buf[30];
6857 const char *p;
6858 const char *ssep;
6859 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6860
6861 #ifdef ENABLE_CHECKING
6862 /* Even if we do not want to check the inputs, this documents input
6863 constraints. Which helps in understanding the following code. */
6864 if (STACK_REG_P (operands[0])
6865 && ((REG_P (operands[1])
6866 && REGNO (operands[0]) == REGNO (operands[1])
6867 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6868 || (REG_P (operands[2])
6869 && REGNO (operands[0]) == REGNO (operands[2])
6870 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6871 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6872 ; /* ok */
6873 else if (!is_sse)
6874 abort ();
6875 #endif
6876
6877 switch (GET_CODE (operands[3]))
6878 {
6879 case PLUS:
6880 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6881 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6882 p = "fiadd";
6883 else
6884 p = "fadd";
6885 ssep = "add";
6886 break;
6887
6888 case MINUS:
6889 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6890 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6891 p = "fisub";
6892 else
6893 p = "fsub";
6894 ssep = "sub";
6895 break;
6896
6897 case MULT:
6898 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6899 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6900 p = "fimul";
6901 else
6902 p = "fmul";
6903 ssep = "mul";
6904 break;
6905
6906 case DIV:
6907 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6908 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6909 p = "fidiv";
6910 else
6911 p = "fdiv";
6912 ssep = "div";
6913 break;
6914
6915 default:
6916 abort ();
6917 }
6918
6919 if (is_sse)
6920 {
6921 strcpy (buf, ssep);
6922 if (GET_MODE (operands[0]) == SFmode)
6923 strcat (buf, "ss\t{%2, %0|%0, %2}");
6924 else
6925 strcat (buf, "sd\t{%2, %0|%0, %2}");
6926 return buf;
6927 }
6928 strcpy (buf, p);
6929
6930 switch (GET_CODE (operands[3]))
6931 {
6932 case MULT:
6933 case PLUS:
6934 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6935 {
6936 rtx temp = operands[2];
6937 operands[2] = operands[1];
6938 operands[1] = temp;
6939 }
6940
6941 /* know operands[0] == operands[1]. */
6942
6943 if (GET_CODE (operands[2]) == MEM)
6944 {
6945 p = "%z2\t%2";
6946 break;
6947 }
6948
6949 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6950 {
6951 if (STACK_TOP_P (operands[0]))
6952 /* How is it that we are storing to a dead operand[2]?
6953 Well, presumably operands[1] is dead too. We can't
6954 store the result to st(0) as st(0) gets popped on this
6955 instruction. Instead store to operands[2] (which I
6956 think has to be st(1)). st(1) will be popped later.
6957 gcc <= 2.8.1 didn't have this check and generated
6958 assembly code that the Unixware assembler rejected. */
6959 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6960 else
6961 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6962 break;
6963 }
6964
6965 if (STACK_TOP_P (operands[0]))
6966 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6967 else
6968 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6969 break;
6970
6971 case MINUS:
6972 case DIV:
6973 if (GET_CODE (operands[1]) == MEM)
6974 {
6975 p = "r%z1\t%1";
6976 break;
6977 }
6978
6979 if (GET_CODE (operands[2]) == MEM)
6980 {
6981 p = "%z2\t%2";
6982 break;
6983 }
6984
6985 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6986 {
6987 #if SYSV386_COMPAT
6988 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6989 derived assemblers, confusingly reverse the direction of
6990 the operation for fsub{r} and fdiv{r} when the
6991 destination register is not st(0). The Intel assembler
6992 doesn't have this brain damage. Read !SYSV386_COMPAT to
6993 figure out what the hardware really does. */
6994 if (STACK_TOP_P (operands[0]))
6995 p = "{p\t%0, %2|rp\t%2, %0}";
6996 else
6997 p = "{rp\t%2, %0|p\t%0, %2}";
6998 #else
6999 if (STACK_TOP_P (operands[0]))
7000 /* As above for fmul/fadd, we can't store to st(0). */
7001 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7002 else
7003 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7004 #endif
7005 break;
7006 }
7007
7008 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7009 {
7010 #if SYSV386_COMPAT
7011 if (STACK_TOP_P (operands[0]))
7012 p = "{rp\t%0, %1|p\t%1, %0}";
7013 else
7014 p = "{p\t%1, %0|rp\t%0, %1}";
7015 #else
7016 if (STACK_TOP_P (operands[0]))
7017 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7018 else
7019 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7020 #endif
7021 break;
7022 }
7023
7024 if (STACK_TOP_P (operands[0]))
7025 {
7026 if (STACK_TOP_P (operands[1]))
7027 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7028 else
7029 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7030 break;
7031 }
7032 else if (STACK_TOP_P (operands[1]))
7033 {
7034 #if SYSV386_COMPAT
7035 p = "{\t%1, %0|r\t%0, %1}";
7036 #else
7037 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7038 #endif
7039 }
7040 else
7041 {
7042 #if SYSV386_COMPAT
7043 p = "{r\t%2, %0|\t%0, %2}";
7044 #else
7045 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7046 #endif
7047 }
7048 break;
7049
7050 default:
7051 abort ();
7052 }
7053
7054 strcat (buf, p);
7055 return buf;
7056 }
7057
7058 /* Output code to initialize control word copies used by
7059 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7060 is set to control word rounding downwards. */
7061 void
7062 emit_i387_cw_initialization (normal, round_down)
7063 rtx normal, round_down;
7064 {
7065 rtx reg = gen_reg_rtx (HImode);
7066
7067 emit_insn (gen_x86_fnstcw_1 (normal));
7068 emit_move_insn (reg, normal);
7069 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7070 && !TARGET_64BIT)
7071 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7072 else
7073 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7074 emit_move_insn (round_down, reg);
7075 }
7076
7077 /* Output code for INSN to convert a float to a signed int. OPERANDS
7078 are the insn operands. The output may be [HSD]Imode and the input
7079 operand may be [SDX]Fmode. */
7080
7081 const char *
7082 output_fix_trunc (insn, operands)
7083 rtx insn;
7084 rtx *operands;
7085 {
7086 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7087 int dimode_p = GET_MODE (operands[0]) == DImode;
7088
7089 /* Jump through a hoop or two for DImode, since the hardware has no
7090 non-popping instruction. We used to do this a different way, but
7091 that was somewhat fragile and broke with post-reload splitters. */
7092 if (dimode_p && !stack_top_dies)
7093 output_asm_insn ("fld\t%y1", operands);
7094
7095 if (!STACK_TOP_P (operands[1]))
7096 abort ();
7097
7098 if (GET_CODE (operands[0]) != MEM)
7099 abort ();
7100
7101 output_asm_insn ("fldcw\t%3", operands);
7102 if (stack_top_dies || dimode_p)
7103 output_asm_insn ("fistp%z0\t%0", operands);
7104 else
7105 output_asm_insn ("fist%z0\t%0", operands);
7106 output_asm_insn ("fldcw\t%2", operands);
7107
7108 return "";
7109 }
7110
7111 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7112 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7113 when fucom should be used. */
7114
7115 const char *
7116 output_fp_compare (insn, operands, eflags_p, unordered_p)
7117 rtx insn;
7118 rtx *operands;
7119 int eflags_p, unordered_p;
7120 {
7121 int stack_top_dies;
7122 rtx cmp_op0 = operands[0];
7123 rtx cmp_op1 = operands[1];
7124 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7125
7126 if (eflags_p == 2)
7127 {
7128 cmp_op0 = cmp_op1;
7129 cmp_op1 = operands[2];
7130 }
7131 if (is_sse)
7132 {
7133 if (GET_MODE (operands[0]) == SFmode)
7134 if (unordered_p)
7135 return "ucomiss\t{%1, %0|%0, %1}";
7136 else
7137 return "comiss\t{%1, %0|%0, %y}";
7138 else
7139 if (unordered_p)
7140 return "ucomisd\t{%1, %0|%0, %1}";
7141 else
7142 return "comisd\t{%1, %0|%0, %y}";
7143 }
7144
7145 if (! STACK_TOP_P (cmp_op0))
7146 abort ();
7147
7148 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7149
7150 if (STACK_REG_P (cmp_op1)
7151 && stack_top_dies
7152 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7153 && REGNO (cmp_op1) != FIRST_STACK_REG)
7154 {
7155 /* If both the top of the 387 stack dies, and the other operand
7156 is also a stack register that dies, then this must be a
7157 `fcompp' float compare */
7158
7159 if (eflags_p == 1)
7160 {
7161 /* There is no double popping fcomi variant. Fortunately,
7162 eflags is immune from the fstp's cc clobbering. */
7163 if (unordered_p)
7164 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7165 else
7166 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7167 return "fstp\t%y0";
7168 }
7169 else
7170 {
7171 if (eflags_p == 2)
7172 {
7173 if (unordered_p)
7174 return "fucompp\n\tfnstsw\t%0";
7175 else
7176 return "fcompp\n\tfnstsw\t%0";
7177 }
7178 else
7179 {
7180 if (unordered_p)
7181 return "fucompp";
7182 else
7183 return "fcompp";
7184 }
7185 }
7186 }
7187 else
7188 {
7189 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7190
7191 static const char * const alt[24] =
7192 {
7193 "fcom%z1\t%y1",
7194 "fcomp%z1\t%y1",
7195 "fucom%z1\t%y1",
7196 "fucomp%z1\t%y1",
7197
7198 "ficom%z1\t%y1",
7199 "ficomp%z1\t%y1",
7200 NULL,
7201 NULL,
7202
7203 "fcomi\t{%y1, %0|%0, %y1}",
7204 "fcomip\t{%y1, %0|%0, %y1}",
7205 "fucomi\t{%y1, %0|%0, %y1}",
7206 "fucomip\t{%y1, %0|%0, %y1}",
7207
7208 NULL,
7209 NULL,
7210 NULL,
7211 NULL,
7212
7213 "fcom%z2\t%y2\n\tfnstsw\t%0",
7214 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7215 "fucom%z2\t%y2\n\tfnstsw\t%0",
7216 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7217
7218 "ficom%z2\t%y2\n\tfnstsw\t%0",
7219 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7220 NULL,
7221 NULL
7222 };
7223
7224 int mask;
7225 const char *ret;
7226
7227 mask = eflags_p << 3;
7228 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7229 mask |= unordered_p << 1;
7230 mask |= stack_top_dies;
7231
7232 if (mask >= 24)
7233 abort ();
7234 ret = alt[mask];
7235 if (ret == NULL)
7236 abort ();
7237
7238 return ret;
7239 }
7240 }
7241
7242 void
7243 ix86_output_addr_vec_elt (file, value)
7244 FILE *file;
7245 int value;
7246 {
7247 const char *directive = ASM_LONG;
7248
7249 if (TARGET_64BIT)
7250 {
7251 #ifdef ASM_QUAD
7252 directive = ASM_QUAD;
7253 #else
7254 abort ();
7255 #endif
7256 }
7257
7258 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7259 }
7260
7261 void
7262 ix86_output_addr_diff_elt (file, value, rel)
7263 FILE *file;
7264 int value, rel;
7265 {
7266 if (TARGET_64BIT)
7267 fprintf (file, "%s%s%d-%s%d\n",
7268 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7269 else if (HAVE_AS_GOTOFF_IN_DATA)
7270 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7271 else
7272 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7273 ASM_LONG, LPREFIX, value);
7274 }
7275 \f
7276 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7277 for the target. */
7278
7279 void
7280 ix86_expand_clear (dest)
7281 rtx dest;
7282 {
7283 rtx tmp;
7284
7285 /* We play register width games, which are only valid after reload. */
7286 if (!reload_completed)
7287 abort ();
7288
7289 /* Avoid HImode and its attendant prefix byte. */
7290 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7291 dest = gen_rtx_REG (SImode, REGNO (dest));
7292
7293 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7294
7295 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7296 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7297 {
7298 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7299 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7300 }
7301
7302 emit_insn (tmp);
7303 }
7304
7305 /* X is an unchanging MEM. If it is a constant pool reference, return
7306 the constant pool rtx, else NULL. */
7307
7308 static rtx
7309 maybe_get_pool_constant (x)
7310 rtx x;
7311 {
7312 x = XEXP (x, 0);
7313
7314 if (flag_pic)
7315 {
7316 if (GET_CODE (x) != PLUS)
7317 return NULL_RTX;
7318 if (XEXP (x, 0) != pic_offset_table_rtx)
7319 return NULL_RTX;
7320 x = XEXP (x, 1);
7321 if (GET_CODE (x) != CONST)
7322 return NULL_RTX;
7323 x = XEXP (x, 0);
7324 if (GET_CODE (x) != UNSPEC)
7325 return NULL_RTX;
7326 if (XINT (x, 1) != UNSPEC_GOTOFF)
7327 return NULL_RTX;
7328 x = XVECEXP (x, 0, 0);
7329 }
7330
7331 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7332 return get_pool_constant (x);
7333
7334 return NULL_RTX;
7335 }
7336
7337 void
7338 ix86_expand_move (mode, operands)
7339 enum machine_mode mode;
7340 rtx operands[];
7341 {
7342 int strict = (reload_in_progress || reload_completed);
7343 rtx insn, op0, op1, tmp;
7344
7345 op0 = operands[0];
7346 op1 = operands[1];
7347
7348 /* ??? We have a slight problem. We need to say that tls symbols are
7349 not legitimate constants so that reload does not helpfully reload
7350 these constants from a REG_EQUIV, which we cannot handle. (Recall
7351 that general- and local-dynamic address resolution requires a
7352 function call.)
7353
7354 However, if we say that tls symbols are not legitimate constants,
7355 then emit_move_insn helpfully drop them into the constant pool.
7356
7357 It is far easier to work around emit_move_insn than reload. Recognize
7358 the MEM that we would have created and extract the symbol_ref. */
7359
7360 if (mode == Pmode
7361 && GET_CODE (op1) == MEM
7362 && RTX_UNCHANGING_P (op1))
7363 {
7364 tmp = maybe_get_pool_constant (op1);
7365 /* Note that we only care about symbolic constants here, which
7366 unlike CONST_INT will always have a proper mode. */
7367 if (tmp && GET_MODE (tmp) == Pmode)
7368 op1 = tmp;
7369 }
7370
7371 if (tls_symbolic_operand (op1, Pmode))
7372 {
7373 op1 = legitimize_address (op1, op1, VOIDmode);
7374 if (GET_CODE (op0) == MEM)
7375 {
7376 tmp = gen_reg_rtx (mode);
7377 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7378 op1 = tmp;
7379 }
7380 }
7381 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7382 {
7383 if (GET_CODE (op0) == MEM)
7384 op1 = force_reg (Pmode, op1);
7385 else
7386 {
7387 rtx temp = op0;
7388 if (GET_CODE (temp) != REG)
7389 temp = gen_reg_rtx (Pmode);
7390 temp = legitimize_pic_address (op1, temp);
7391 if (temp == op0)
7392 return;
7393 op1 = temp;
7394 }
7395 }
7396 else
7397 {
7398 if (GET_CODE (op0) == MEM
7399 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7400 || !push_operand (op0, mode))
7401 && GET_CODE (op1) == MEM)
7402 op1 = force_reg (mode, op1);
7403
7404 if (push_operand (op0, mode)
7405 && ! general_no_elim_operand (op1, mode))
7406 op1 = copy_to_mode_reg (mode, op1);
7407
7408 /* Force large constants in 64bit compilation into register
7409 to get them CSEed. */
7410 if (TARGET_64BIT && mode == DImode
7411 && immediate_operand (op1, mode)
7412 && !x86_64_zero_extended_value (op1)
7413 && !register_operand (op0, mode)
7414 && optimize && !reload_completed && !reload_in_progress)
7415 op1 = copy_to_mode_reg (mode, op1);
7416
7417 if (FLOAT_MODE_P (mode))
7418 {
7419 /* If we are loading a floating point constant to a register,
7420 force the value to memory now, since we'll get better code
7421 out the back end. */
7422
7423 if (strict)
7424 ;
7425 else if (GET_CODE (op1) == CONST_DOUBLE
7426 && register_operand (op0, mode))
7427 op1 = validize_mem (force_const_mem (mode, op1));
7428 }
7429 }
7430
7431 insn = gen_rtx_SET (VOIDmode, op0, op1);
7432
7433 emit_insn (insn);
7434 }
7435
7436 void
7437 ix86_expand_vector_move (mode, operands)
7438 enum machine_mode mode;
7439 rtx operands[];
7440 {
7441 /* Force constants other than zero into memory. We do not know how
7442 the instructions used to build constants modify the upper 64 bits
7443 of the register, once we have that information we may be able
7444 to handle some of them more efficiently. */
7445 if ((reload_in_progress | reload_completed) == 0
7446 && register_operand (operands[0], mode)
7447 && CONSTANT_P (operands[1]))
7448 {
7449 rtx addr = gen_reg_rtx (Pmode);
7450 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7451 operands[1] = gen_rtx_MEM (mode, addr);
7452 }
7453
7454 /* Make operand1 a register if it isn't already. */
7455 if ((reload_in_progress | reload_completed) == 0
7456 && !register_operand (operands[0], mode)
7457 && !register_operand (operands[1], mode)
7458 && operands[1] != CONST0_RTX (mode))
7459 {
7460 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7461 emit_move_insn (operands[0], temp);
7462 return;
7463 }
7464
7465 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7466 }
7467
7468 /* Attempt to expand a binary operator. Make the expansion closer to the
7469 actual machine, then just general_operand, which will allow 3 separate
7470 memory references (one output, two input) in a single insn. */
7471
7472 void
7473 ix86_expand_binary_operator (code, mode, operands)
7474 enum rtx_code code;
7475 enum machine_mode mode;
7476 rtx operands[];
7477 {
7478 int matching_memory;
7479 rtx src1, src2, dst, op, clob;
7480
7481 dst = operands[0];
7482 src1 = operands[1];
7483 src2 = operands[2];
7484
7485 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7486 if (GET_RTX_CLASS (code) == 'c'
7487 && (rtx_equal_p (dst, src2)
7488 || immediate_operand (src1, mode)))
7489 {
7490 rtx temp = src1;
7491 src1 = src2;
7492 src2 = temp;
7493 }
7494
7495 /* If the destination is memory, and we do not have matching source
7496 operands, do things in registers. */
7497 matching_memory = 0;
7498 if (GET_CODE (dst) == MEM)
7499 {
7500 if (rtx_equal_p (dst, src1))
7501 matching_memory = 1;
7502 else if (GET_RTX_CLASS (code) == 'c'
7503 && rtx_equal_p (dst, src2))
7504 matching_memory = 2;
7505 else
7506 dst = gen_reg_rtx (mode);
7507 }
7508
7509 /* Both source operands cannot be in memory. */
7510 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7511 {
7512 if (matching_memory != 2)
7513 src2 = force_reg (mode, src2);
7514 else
7515 src1 = force_reg (mode, src1);
7516 }
7517
7518 /* If the operation is not commutable, source 1 cannot be a constant
7519 or non-matching memory. */
7520 if ((CONSTANT_P (src1)
7521 || (!matching_memory && GET_CODE (src1) == MEM))
7522 && GET_RTX_CLASS (code) != 'c')
7523 src1 = force_reg (mode, src1);
7524
7525 /* If optimizing, copy to regs to improve CSE */
7526 if (optimize && ! no_new_pseudos)
7527 {
7528 if (GET_CODE (dst) == MEM)
7529 dst = gen_reg_rtx (mode);
7530 if (GET_CODE (src1) == MEM)
7531 src1 = force_reg (mode, src1);
7532 if (GET_CODE (src2) == MEM)
7533 src2 = force_reg (mode, src2);
7534 }
7535
7536 /* Emit the instruction. */
7537
7538 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7539 if (reload_in_progress)
7540 {
7541 /* Reload doesn't know about the flags register, and doesn't know that
7542 it doesn't want to clobber it. We can only do this with PLUS. */
7543 if (code != PLUS)
7544 abort ();
7545 emit_insn (op);
7546 }
7547 else
7548 {
7549 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7550 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7551 }
7552
7553 /* Fix up the destination if needed. */
7554 if (dst != operands[0])
7555 emit_move_insn (operands[0], dst);
7556 }
7557
7558 /* Return TRUE or FALSE depending on whether the binary operator meets the
7559 appropriate constraints. */
7560
7561 int
7562 ix86_binary_operator_ok (code, mode, operands)
7563 enum rtx_code code;
7564 enum machine_mode mode ATTRIBUTE_UNUSED;
7565 rtx operands[3];
7566 {
7567 /* Both source operands cannot be in memory. */
7568 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7569 return 0;
7570 /* If the operation is not commutable, source 1 cannot be a constant. */
7571 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7572 return 0;
7573 /* If the destination is memory, we must have a matching source operand. */
7574 if (GET_CODE (operands[0]) == MEM
7575 && ! (rtx_equal_p (operands[0], operands[1])
7576 || (GET_RTX_CLASS (code) == 'c'
7577 && rtx_equal_p (operands[0], operands[2]))))
7578 return 0;
7579 /* If the operation is not commutable and the source 1 is memory, we must
7580 have a matching destination. */
7581 if (GET_CODE (operands[1]) == MEM
7582 && GET_RTX_CLASS (code) != 'c'
7583 && ! rtx_equal_p (operands[0], operands[1]))
7584 return 0;
7585 return 1;
7586 }
7587
7588 /* Attempt to expand a unary operator. Make the expansion closer to the
7589 actual machine, then just general_operand, which will allow 2 separate
7590 memory references (one output, one input) in a single insn. */
7591
7592 void
7593 ix86_expand_unary_operator (code, mode, operands)
7594 enum rtx_code code;
7595 enum machine_mode mode;
7596 rtx operands[];
7597 {
7598 int matching_memory;
7599 rtx src, dst, op, clob;
7600
7601 dst = operands[0];
7602 src = operands[1];
7603
7604 /* If the destination is memory, and we do not have matching source
7605 operands, do things in registers. */
7606 matching_memory = 0;
7607 if (GET_CODE (dst) == MEM)
7608 {
7609 if (rtx_equal_p (dst, src))
7610 matching_memory = 1;
7611 else
7612 dst = gen_reg_rtx (mode);
7613 }
7614
7615 /* When source operand is memory, destination must match. */
7616 if (!matching_memory && GET_CODE (src) == MEM)
7617 src = force_reg (mode, src);
7618
7619 /* If optimizing, copy to regs to improve CSE */
7620 if (optimize && ! no_new_pseudos)
7621 {
7622 if (GET_CODE (dst) == MEM)
7623 dst = gen_reg_rtx (mode);
7624 if (GET_CODE (src) == MEM)
7625 src = force_reg (mode, src);
7626 }
7627
7628 /* Emit the instruction. */
7629
7630 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7631 if (reload_in_progress || code == NOT)
7632 {
7633 /* Reload doesn't know about the flags register, and doesn't know that
7634 it doesn't want to clobber it. */
7635 if (code != NOT)
7636 abort ();
7637 emit_insn (op);
7638 }
7639 else
7640 {
7641 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7642 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7643 }
7644
7645 /* Fix up the destination if needed. */
7646 if (dst != operands[0])
7647 emit_move_insn (operands[0], dst);
7648 }
7649
7650 /* Return TRUE or FALSE depending on whether the unary operator meets the
7651 appropriate constraints. */
7652
7653 int
7654 ix86_unary_operator_ok (code, mode, operands)
7655 enum rtx_code code ATTRIBUTE_UNUSED;
7656 enum machine_mode mode ATTRIBUTE_UNUSED;
7657 rtx operands[2] ATTRIBUTE_UNUSED;
7658 {
7659 /* If one of operands is memory, source and destination must match. */
7660 if ((GET_CODE (operands[0]) == MEM
7661 || GET_CODE (operands[1]) == MEM)
7662 && ! rtx_equal_p (operands[0], operands[1]))
7663 return FALSE;
7664 return TRUE;
7665 }
7666
7667 /* Return TRUE or FALSE depending on whether the first SET in INSN
7668 has source and destination with matching CC modes, and that the
7669 CC mode is at least as constrained as REQ_MODE. */
7670
7671 int
7672 ix86_match_ccmode (insn, req_mode)
7673 rtx insn;
7674 enum machine_mode req_mode;
7675 {
7676 rtx set;
7677 enum machine_mode set_mode;
7678
7679 set = PATTERN (insn);
7680 if (GET_CODE (set) == PARALLEL)
7681 set = XVECEXP (set, 0, 0);
7682 if (GET_CODE (set) != SET)
7683 abort ();
7684 if (GET_CODE (SET_SRC (set)) != COMPARE)
7685 abort ();
7686
7687 set_mode = GET_MODE (SET_DEST (set));
7688 switch (set_mode)
7689 {
7690 case CCNOmode:
7691 if (req_mode != CCNOmode
7692 && (req_mode != CCmode
7693 || XEXP (SET_SRC (set), 1) != const0_rtx))
7694 return 0;
7695 break;
7696 case CCmode:
7697 if (req_mode == CCGCmode)
7698 return 0;
7699 /* FALLTHRU */
7700 case CCGCmode:
7701 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7702 return 0;
7703 /* FALLTHRU */
7704 case CCGOCmode:
7705 if (req_mode == CCZmode)
7706 return 0;
7707 /* FALLTHRU */
7708 case CCZmode:
7709 break;
7710
7711 default:
7712 abort ();
7713 }
7714
7715 return (GET_MODE (SET_SRC (set)) == set_mode);
7716 }
7717
7718 /* Generate insn patterns to do an integer compare of OPERANDS. */
7719
7720 static rtx
7721 ix86_expand_int_compare (code, op0, op1)
7722 enum rtx_code code;
7723 rtx op0, op1;
7724 {
7725 enum machine_mode cmpmode;
7726 rtx tmp, flags;
7727
7728 cmpmode = SELECT_CC_MODE (code, op0, op1);
7729 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7730
7731 /* This is very simple, but making the interface the same as in the
7732 FP case makes the rest of the code easier. */
7733 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7734 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7735
7736 /* Return the test that should be put into the flags user, i.e.
7737 the bcc, scc, or cmov instruction. */
7738 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7739 }
7740
7741 /* Figure out whether to use ordered or unordered fp comparisons.
7742 Return the appropriate mode to use. */
7743
7744 enum machine_mode
7745 ix86_fp_compare_mode (code)
7746 enum rtx_code code ATTRIBUTE_UNUSED;
7747 {
7748 /* ??? In order to make all comparisons reversible, we do all comparisons
7749 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7750 all forms trapping and nontrapping comparisons, we can make inequality
7751 comparisons trapping again, since it results in better code when using
7752 FCOM based compares. */
7753 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7754 }
7755
7756 enum machine_mode
7757 ix86_cc_mode (code, op0, op1)
7758 enum rtx_code code;
7759 rtx op0, op1;
7760 {
7761 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7762 return ix86_fp_compare_mode (code);
7763 switch (code)
7764 {
7765 /* Only zero flag is needed. */
7766 case EQ: /* ZF=0 */
7767 case NE: /* ZF!=0 */
7768 return CCZmode;
7769 /* Codes needing carry flag. */
7770 case GEU: /* CF=0 */
7771 case GTU: /* CF=0 & ZF=0 */
7772 case LTU: /* CF=1 */
7773 case LEU: /* CF=1 | ZF=1 */
7774 return CCmode;
7775 /* Codes possibly doable only with sign flag when
7776 comparing against zero. */
7777 case GE: /* SF=OF or SF=0 */
7778 case LT: /* SF<>OF or SF=1 */
7779 if (op1 == const0_rtx)
7780 return CCGOCmode;
7781 else
7782 /* For other cases Carry flag is not required. */
7783 return CCGCmode;
7784 /* Codes doable only with sign flag when comparing
7785 against zero, but we miss jump instruction for it
7786 so we need to use relational tests agains overflow
7787 that thus needs to be zero. */
7788 case GT: /* ZF=0 & SF=OF */
7789 case LE: /* ZF=1 | SF<>OF */
7790 if (op1 == const0_rtx)
7791 return CCNOmode;
7792 else
7793 return CCGCmode;
7794 /* strcmp pattern do (use flags) and combine may ask us for proper
7795 mode. */
7796 case USE:
7797 return CCmode;
7798 default:
7799 abort ();
7800 }
7801 }
7802
7803 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7804
7805 int
7806 ix86_use_fcomi_compare (code)
7807 enum rtx_code code ATTRIBUTE_UNUSED;
7808 {
7809 enum rtx_code swapped_code = swap_condition (code);
7810 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7811 || (ix86_fp_comparison_cost (swapped_code)
7812 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7813 }
7814
7815 /* Swap, force into registers, or otherwise massage the two operands
7816 to a fp comparison. The operands are updated in place; the new
7817 comparsion code is returned. */
7818
7819 static enum rtx_code
7820 ix86_prepare_fp_compare_args (code, pop0, pop1)
7821 enum rtx_code code;
7822 rtx *pop0, *pop1;
7823 {
7824 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7825 rtx op0 = *pop0, op1 = *pop1;
7826 enum machine_mode op_mode = GET_MODE (op0);
7827 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7828
7829 /* All of the unordered compare instructions only work on registers.
7830 The same is true of the XFmode compare instructions. The same is
7831 true of the fcomi compare instructions. */
7832
7833 if (!is_sse
7834 && (fpcmp_mode == CCFPUmode
7835 || op_mode == XFmode
7836 || op_mode == TFmode
7837 || ix86_use_fcomi_compare (code)))
7838 {
7839 op0 = force_reg (op_mode, op0);
7840 op1 = force_reg (op_mode, op1);
7841 }
7842 else
7843 {
7844 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7845 things around if they appear profitable, otherwise force op0
7846 into a register. */
7847
7848 if (standard_80387_constant_p (op0) == 0
7849 || (GET_CODE (op0) == MEM
7850 && ! (standard_80387_constant_p (op1) == 0
7851 || GET_CODE (op1) == MEM)))
7852 {
7853 rtx tmp;
7854 tmp = op0, op0 = op1, op1 = tmp;
7855 code = swap_condition (code);
7856 }
7857
7858 if (GET_CODE (op0) != REG)
7859 op0 = force_reg (op_mode, op0);
7860
7861 if (CONSTANT_P (op1))
7862 {
7863 if (standard_80387_constant_p (op1))
7864 op1 = force_reg (op_mode, op1);
7865 else
7866 op1 = validize_mem (force_const_mem (op_mode, op1));
7867 }
7868 }
7869
7870 /* Try to rearrange the comparison to make it cheaper. */
7871 if (ix86_fp_comparison_cost (code)
7872 > ix86_fp_comparison_cost (swap_condition (code))
7873 && (GET_CODE (op1) == REG || !no_new_pseudos))
7874 {
7875 rtx tmp;
7876 tmp = op0, op0 = op1, op1 = tmp;
7877 code = swap_condition (code);
7878 if (GET_CODE (op0) != REG)
7879 op0 = force_reg (op_mode, op0);
7880 }
7881
7882 *pop0 = op0;
7883 *pop1 = op1;
7884 return code;
7885 }
7886
7887 /* Convert comparison codes we use to represent FP comparison to integer
7888 code that will result in proper branch. Return UNKNOWN if no such code
7889 is available. */
7890 static enum rtx_code
7891 ix86_fp_compare_code_to_integer (code)
7892 enum rtx_code code;
7893 {
7894 switch (code)
7895 {
7896 case GT:
7897 return GTU;
7898 case GE:
7899 return GEU;
7900 case ORDERED:
7901 case UNORDERED:
7902 return code;
7903 break;
7904 case UNEQ:
7905 return EQ;
7906 break;
7907 case UNLT:
7908 return LTU;
7909 break;
7910 case UNLE:
7911 return LEU;
7912 break;
7913 case LTGT:
7914 return NE;
7915 break;
7916 default:
7917 return UNKNOWN;
7918 }
7919 }
7920
7921 /* Split comparison code CODE into comparisons we can do using branch
7922 instructions. BYPASS_CODE is comparison code for branch that will
7923 branch around FIRST_CODE and SECOND_CODE. If some of branches
7924 is not required, set value to NIL.
7925 We never require more than two branches. */
7926 static void
7927 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7928 enum rtx_code code, *bypass_code, *first_code, *second_code;
7929 {
7930 *first_code = code;
7931 *bypass_code = NIL;
7932 *second_code = NIL;
7933
7934 /* The fcomi comparison sets flags as follows:
7935
7936 cmp ZF PF CF
7937 > 0 0 0
7938 < 0 0 1
7939 = 1 0 0
7940 un 1 1 1 */
7941
7942 switch (code)
7943 {
7944 case GT: /* GTU - CF=0 & ZF=0 */
7945 case GE: /* GEU - CF=0 */
7946 case ORDERED: /* PF=0 */
7947 case UNORDERED: /* PF=1 */
7948 case UNEQ: /* EQ - ZF=1 */
7949 case UNLT: /* LTU - CF=1 */
7950 case UNLE: /* LEU - CF=1 | ZF=1 */
7951 case LTGT: /* EQ - ZF=0 */
7952 break;
7953 case LT: /* LTU - CF=1 - fails on unordered */
7954 *first_code = UNLT;
7955 *bypass_code = UNORDERED;
7956 break;
7957 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7958 *first_code = UNLE;
7959 *bypass_code = UNORDERED;
7960 break;
7961 case EQ: /* EQ - ZF=1 - fails on unordered */
7962 *first_code = UNEQ;
7963 *bypass_code = UNORDERED;
7964 break;
7965 case NE: /* NE - ZF=0 - fails on unordered */
7966 *first_code = LTGT;
7967 *second_code = UNORDERED;
7968 break;
7969 case UNGE: /* GEU - CF=0 - fails on unordered */
7970 *first_code = GE;
7971 *second_code = UNORDERED;
7972 break;
7973 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7974 *first_code = GT;
7975 *second_code = UNORDERED;
7976 break;
7977 default:
7978 abort ();
7979 }
7980 if (!TARGET_IEEE_FP)
7981 {
7982 *second_code = NIL;
7983 *bypass_code = NIL;
7984 }
7985 }
7986
7987 /* Return cost of comparison done fcom + arithmetics operations on AX.
7988 All following functions do use number of instructions as an cost metrics.
7989 In future this should be tweaked to compute bytes for optimize_size and
7990 take into account performance of various instructions on various CPUs. */
7991 static int
7992 ix86_fp_comparison_arithmetics_cost (code)
7993 enum rtx_code code;
7994 {
7995 if (!TARGET_IEEE_FP)
7996 return 4;
7997 /* The cost of code output by ix86_expand_fp_compare. */
7998 switch (code)
7999 {
8000 case UNLE:
8001 case UNLT:
8002 case LTGT:
8003 case GT:
8004 case GE:
8005 case UNORDERED:
8006 case ORDERED:
8007 case UNEQ:
8008 return 4;
8009 break;
8010 case LT:
8011 case NE:
8012 case EQ:
8013 case UNGE:
8014 return 5;
8015 break;
8016 case LE:
8017 case UNGT:
8018 return 6;
8019 break;
8020 default:
8021 abort ();
8022 }
8023 }
8024
8025 /* Return cost of comparison done using fcomi operation.
8026 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8027 static int
8028 ix86_fp_comparison_fcomi_cost (code)
8029 enum rtx_code code;
8030 {
8031 enum rtx_code bypass_code, first_code, second_code;
8032 /* Return arbitarily high cost when instruction is not supported - this
8033 prevents gcc from using it. */
8034 if (!TARGET_CMOVE)
8035 return 1024;
8036 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8037 return (bypass_code != NIL || second_code != NIL) + 2;
8038 }
8039
8040 /* Return cost of comparison done using sahf operation.
8041 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8042 static int
8043 ix86_fp_comparison_sahf_cost (code)
8044 enum rtx_code code;
8045 {
8046 enum rtx_code bypass_code, first_code, second_code;
8047 /* Return arbitarily high cost when instruction is not preferred - this
8048 avoids gcc from using it. */
8049 if (!TARGET_USE_SAHF && !optimize_size)
8050 return 1024;
8051 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8052 return (bypass_code != NIL || second_code != NIL) + 3;
8053 }
8054
8055 /* Compute cost of the comparison done using any method.
8056 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8057 static int
8058 ix86_fp_comparison_cost (code)
8059 enum rtx_code code;
8060 {
8061 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8062 int min;
8063
8064 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8065 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8066
8067 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8068 if (min > sahf_cost)
8069 min = sahf_cost;
8070 if (min > fcomi_cost)
8071 min = fcomi_cost;
8072 return min;
8073 }
8074
8075 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8076
8077 static rtx
8078 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8079 enum rtx_code code;
8080 rtx op0, op1, scratch;
8081 rtx *second_test;
8082 rtx *bypass_test;
8083 {
8084 enum machine_mode fpcmp_mode, intcmp_mode;
8085 rtx tmp, tmp2;
8086 int cost = ix86_fp_comparison_cost (code);
8087 enum rtx_code bypass_code, first_code, second_code;
8088
8089 fpcmp_mode = ix86_fp_compare_mode (code);
8090 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8091
8092 if (second_test)
8093 *second_test = NULL_RTX;
8094 if (bypass_test)
8095 *bypass_test = NULL_RTX;
8096
8097 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8098
8099 /* Do fcomi/sahf based test when profitable. */
8100 if ((bypass_code == NIL || bypass_test)
8101 && (second_code == NIL || second_test)
8102 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8103 {
8104 if (TARGET_CMOVE)
8105 {
8106 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8107 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8108 tmp);
8109 emit_insn (tmp);
8110 }
8111 else
8112 {
8113 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8114 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8115 if (!scratch)
8116 scratch = gen_reg_rtx (HImode);
8117 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8118 emit_insn (gen_x86_sahf_1 (scratch));
8119 }
8120
8121 /* The FP codes work out to act like unsigned. */
8122 intcmp_mode = fpcmp_mode;
8123 code = first_code;
8124 if (bypass_code != NIL)
8125 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8126 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8127 const0_rtx);
8128 if (second_code != NIL)
8129 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8130 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8131 const0_rtx);
8132 }
8133 else
8134 {
8135 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8136 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8137 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8138 if (!scratch)
8139 scratch = gen_reg_rtx (HImode);
8140 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8141
8142 /* In the unordered case, we have to check C2 for NaN's, which
8143 doesn't happen to work out to anything nice combination-wise.
8144 So do some bit twiddling on the value we've got in AH to come
8145 up with an appropriate set of condition codes. */
8146
8147 intcmp_mode = CCNOmode;
8148 switch (code)
8149 {
8150 case GT:
8151 case UNGT:
8152 if (code == GT || !TARGET_IEEE_FP)
8153 {
8154 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8155 code = EQ;
8156 }
8157 else
8158 {
8159 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8160 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8161 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8162 intcmp_mode = CCmode;
8163 code = GEU;
8164 }
8165 break;
8166 case LT:
8167 case UNLT:
8168 if (code == LT && TARGET_IEEE_FP)
8169 {
8170 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8171 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8172 intcmp_mode = CCmode;
8173 code = EQ;
8174 }
8175 else
8176 {
8177 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8178 code = NE;
8179 }
8180 break;
8181 case GE:
8182 case UNGE:
8183 if (code == GE || !TARGET_IEEE_FP)
8184 {
8185 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8186 code = EQ;
8187 }
8188 else
8189 {
8190 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8191 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8192 GEN_INT (0x01)));
8193 code = NE;
8194 }
8195 break;
8196 case LE:
8197 case UNLE:
8198 if (code == LE && TARGET_IEEE_FP)
8199 {
8200 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8201 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8202 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8203 intcmp_mode = CCmode;
8204 code = LTU;
8205 }
8206 else
8207 {
8208 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8209 code = NE;
8210 }
8211 break;
8212 case EQ:
8213 case UNEQ:
8214 if (code == EQ && TARGET_IEEE_FP)
8215 {
8216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8217 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8218 intcmp_mode = CCmode;
8219 code = EQ;
8220 }
8221 else
8222 {
8223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8224 code = NE;
8225 break;
8226 }
8227 break;
8228 case NE:
8229 case LTGT:
8230 if (code == NE && TARGET_IEEE_FP)
8231 {
8232 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8233 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8234 GEN_INT (0x40)));
8235 code = NE;
8236 }
8237 else
8238 {
8239 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8240 code = EQ;
8241 }
8242 break;
8243
8244 case UNORDERED:
8245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8246 code = NE;
8247 break;
8248 case ORDERED:
8249 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8250 code = EQ;
8251 break;
8252
8253 default:
8254 abort ();
8255 }
8256 }
8257
8258 /* Return the test that should be put into the flags user, i.e.
8259 the bcc, scc, or cmov instruction. */
8260 return gen_rtx_fmt_ee (code, VOIDmode,
8261 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8262 const0_rtx);
8263 }
8264
8265 rtx
8266 ix86_expand_compare (code, second_test, bypass_test)
8267 enum rtx_code code;
8268 rtx *second_test, *bypass_test;
8269 {
8270 rtx op0, op1, ret;
8271 op0 = ix86_compare_op0;
8272 op1 = ix86_compare_op1;
8273
8274 if (second_test)
8275 *second_test = NULL_RTX;
8276 if (bypass_test)
8277 *bypass_test = NULL_RTX;
8278
8279 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8280 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8281 second_test, bypass_test);
8282 else
8283 ret = ix86_expand_int_compare (code, op0, op1);
8284
8285 return ret;
8286 }
8287
8288 /* Return true if the CODE will result in nontrivial jump sequence. */
8289 bool
8290 ix86_fp_jump_nontrivial_p (code)
8291 enum rtx_code code;
8292 {
8293 enum rtx_code bypass_code, first_code, second_code;
8294 if (!TARGET_CMOVE)
8295 return true;
8296 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8297 return bypass_code != NIL || second_code != NIL;
8298 }
8299
8300 void
8301 ix86_expand_branch (code, label)
8302 enum rtx_code code;
8303 rtx label;
8304 {
8305 rtx tmp;
8306
8307 switch (GET_MODE (ix86_compare_op0))
8308 {
8309 case QImode:
8310 case HImode:
8311 case SImode:
8312 simple:
8313 tmp = ix86_expand_compare (code, NULL, NULL);
8314 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8315 gen_rtx_LABEL_REF (VOIDmode, label),
8316 pc_rtx);
8317 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8318 return;
8319
8320 case SFmode:
8321 case DFmode:
8322 case XFmode:
8323 case TFmode:
8324 {
8325 rtvec vec;
8326 int use_fcomi;
8327 enum rtx_code bypass_code, first_code, second_code;
8328
8329 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8330 &ix86_compare_op1);
8331
8332 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8333
8334 /* Check whether we will use the natural sequence with one jump. If
8335 so, we can expand jump early. Otherwise delay expansion by
8336 creating compound insn to not confuse optimizers. */
8337 if (bypass_code == NIL && second_code == NIL
8338 && TARGET_CMOVE)
8339 {
8340 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8341 gen_rtx_LABEL_REF (VOIDmode, label),
8342 pc_rtx, NULL_RTX);
8343 }
8344 else
8345 {
8346 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8347 ix86_compare_op0, ix86_compare_op1);
8348 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8349 gen_rtx_LABEL_REF (VOIDmode, label),
8350 pc_rtx);
8351 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8352
8353 use_fcomi = ix86_use_fcomi_compare (code);
8354 vec = rtvec_alloc (3 + !use_fcomi);
8355 RTVEC_ELT (vec, 0) = tmp;
8356 RTVEC_ELT (vec, 1)
8357 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8358 RTVEC_ELT (vec, 2)
8359 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8360 if (! use_fcomi)
8361 RTVEC_ELT (vec, 3)
8362 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8363
8364 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8365 }
8366 return;
8367 }
8368
8369 case DImode:
8370 if (TARGET_64BIT)
8371 goto simple;
8372 /* Expand DImode branch into multiple compare+branch. */
8373 {
8374 rtx lo[2], hi[2], label2;
8375 enum rtx_code code1, code2, code3;
8376
8377 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8378 {
8379 tmp = ix86_compare_op0;
8380 ix86_compare_op0 = ix86_compare_op1;
8381 ix86_compare_op1 = tmp;
8382 code = swap_condition (code);
8383 }
8384 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8385 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8386
8387 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8388 avoid two branches. This costs one extra insn, so disable when
8389 optimizing for size. */
8390
8391 if ((code == EQ || code == NE)
8392 && (!optimize_size
8393 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8394 {
8395 rtx xor0, xor1;
8396
8397 xor1 = hi[0];
8398 if (hi[1] != const0_rtx)
8399 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8400 NULL_RTX, 0, OPTAB_WIDEN);
8401
8402 xor0 = lo[0];
8403 if (lo[1] != const0_rtx)
8404 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8405 NULL_RTX, 0, OPTAB_WIDEN);
8406
8407 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8408 NULL_RTX, 0, OPTAB_WIDEN);
8409
8410 ix86_compare_op0 = tmp;
8411 ix86_compare_op1 = const0_rtx;
8412 ix86_expand_branch (code, label);
8413 return;
8414 }
8415
8416 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8417 op1 is a constant and the low word is zero, then we can just
8418 examine the high word. */
8419
8420 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8421 switch (code)
8422 {
8423 case LT: case LTU: case GE: case GEU:
8424 ix86_compare_op0 = hi[0];
8425 ix86_compare_op1 = hi[1];
8426 ix86_expand_branch (code, label);
8427 return;
8428 default:
8429 break;
8430 }
8431
8432 /* Otherwise, we need two or three jumps. */
8433
8434 label2 = gen_label_rtx ();
8435
8436 code1 = code;
8437 code2 = swap_condition (code);
8438 code3 = unsigned_condition (code);
8439
8440 switch (code)
8441 {
8442 case LT: case GT: case LTU: case GTU:
8443 break;
8444
8445 case LE: code1 = LT; code2 = GT; break;
8446 case GE: code1 = GT; code2 = LT; break;
8447 case LEU: code1 = LTU; code2 = GTU; break;
8448 case GEU: code1 = GTU; code2 = LTU; break;
8449
8450 case EQ: code1 = NIL; code2 = NE; break;
8451 case NE: code2 = NIL; break;
8452
8453 default:
8454 abort ();
8455 }
8456
8457 /*
8458 * a < b =>
8459 * if (hi(a) < hi(b)) goto true;
8460 * if (hi(a) > hi(b)) goto false;
8461 * if (lo(a) < lo(b)) goto true;
8462 * false:
8463 */
8464
8465 ix86_compare_op0 = hi[0];
8466 ix86_compare_op1 = hi[1];
8467
8468 if (code1 != NIL)
8469 ix86_expand_branch (code1, label);
8470 if (code2 != NIL)
8471 ix86_expand_branch (code2, label2);
8472
8473 ix86_compare_op0 = lo[0];
8474 ix86_compare_op1 = lo[1];
8475 ix86_expand_branch (code3, label);
8476
8477 if (code2 != NIL)
8478 emit_label (label2);
8479 return;
8480 }
8481
8482 default:
8483 abort ();
8484 }
8485 }
8486
8487 /* Split branch based on floating point condition. */
8488 void
8489 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8490 enum rtx_code code;
8491 rtx op1, op2, target1, target2, tmp;
8492 {
8493 rtx second, bypass;
8494 rtx label = NULL_RTX;
8495 rtx condition;
8496 int bypass_probability = -1, second_probability = -1, probability = -1;
8497 rtx i;
8498
8499 if (target2 != pc_rtx)
8500 {
8501 rtx tmp = target2;
8502 code = reverse_condition_maybe_unordered (code);
8503 target2 = target1;
8504 target1 = tmp;
8505 }
8506
8507 condition = ix86_expand_fp_compare (code, op1, op2,
8508 tmp, &second, &bypass);
8509
8510 if (split_branch_probability >= 0)
8511 {
8512 /* Distribute the probabilities across the jumps.
8513 Assume the BYPASS and SECOND to be always test
8514 for UNORDERED. */
8515 probability = split_branch_probability;
8516
8517 /* Value of 1 is low enough to make no need for probability
8518 to be updated. Later we may run some experiments and see
8519 if unordered values are more frequent in practice. */
8520 if (bypass)
8521 bypass_probability = 1;
8522 if (second)
8523 second_probability = 1;
8524 }
8525 if (bypass != NULL_RTX)
8526 {
8527 label = gen_label_rtx ();
8528 i = emit_jump_insn (gen_rtx_SET
8529 (VOIDmode, pc_rtx,
8530 gen_rtx_IF_THEN_ELSE (VOIDmode,
8531 bypass,
8532 gen_rtx_LABEL_REF (VOIDmode,
8533 label),
8534 pc_rtx)));
8535 if (bypass_probability >= 0)
8536 REG_NOTES (i)
8537 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8538 GEN_INT (bypass_probability),
8539 REG_NOTES (i));
8540 }
8541 i = emit_jump_insn (gen_rtx_SET
8542 (VOIDmode, pc_rtx,
8543 gen_rtx_IF_THEN_ELSE (VOIDmode,
8544 condition, target1, target2)));
8545 if (probability >= 0)
8546 REG_NOTES (i)
8547 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8548 GEN_INT (probability),
8549 REG_NOTES (i));
8550 if (second != NULL_RTX)
8551 {
8552 i = emit_jump_insn (gen_rtx_SET
8553 (VOIDmode, pc_rtx,
8554 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8555 target2)));
8556 if (second_probability >= 0)
8557 REG_NOTES (i)
8558 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8559 GEN_INT (second_probability),
8560 REG_NOTES (i));
8561 }
8562 if (label != NULL_RTX)
8563 emit_label (label);
8564 }
8565
8566 int
8567 ix86_expand_setcc (code, dest)
8568 enum rtx_code code;
8569 rtx dest;
8570 {
8571 rtx ret, tmp, tmpreg;
8572 rtx second_test, bypass_test;
8573
8574 if (GET_MODE (ix86_compare_op0) == DImode
8575 && !TARGET_64BIT)
8576 return 0; /* FAIL */
8577
8578 if (GET_MODE (dest) != QImode)
8579 abort ();
8580
8581 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8582 PUT_MODE (ret, QImode);
8583
8584 tmp = dest;
8585 tmpreg = dest;
8586
8587 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8588 if (bypass_test || second_test)
8589 {
8590 rtx test = second_test;
8591 int bypass = 0;
8592 rtx tmp2 = gen_reg_rtx (QImode);
8593 if (bypass_test)
8594 {
8595 if (second_test)
8596 abort ();
8597 test = bypass_test;
8598 bypass = 1;
8599 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8600 }
8601 PUT_MODE (test, QImode);
8602 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8603
8604 if (bypass)
8605 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8606 else
8607 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8608 }
8609
8610 return 1; /* DONE */
8611 }
8612
8613 int
8614 ix86_expand_int_movcc (operands)
8615 rtx operands[];
8616 {
8617 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8618 rtx compare_seq, compare_op;
8619 rtx second_test, bypass_test;
8620 enum machine_mode mode = GET_MODE (operands[0]);
8621
8622 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8623 In case comparsion is done with immediate, we can convert it to LTU or
8624 GEU by altering the integer. */
8625
8626 if ((code == LEU || code == GTU)
8627 && GET_CODE (ix86_compare_op1) == CONST_INT
8628 && mode != HImode
8629 && INTVAL (ix86_compare_op1) != -1
8630 /* For x86-64, the immediate field in the instruction is 32-bit
8631 signed, so we can't increment a DImode value above 0x7fffffff. */
8632 && (!TARGET_64BIT
8633 || GET_MODE (ix86_compare_op0) != DImode
8634 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8635 && GET_CODE (operands[2]) == CONST_INT
8636 && GET_CODE (operands[3]) == CONST_INT)
8637 {
8638 if (code == LEU)
8639 code = LTU;
8640 else
8641 code = GEU;
8642 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8643 GET_MODE (ix86_compare_op0));
8644 }
8645
8646 start_sequence ();
8647 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8648 compare_seq = get_insns ();
8649 end_sequence ();
8650
8651 compare_code = GET_CODE (compare_op);
8652
8653 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8654 HImode insns, we'd be swallowed in word prefix ops. */
8655
8656 if (mode != HImode
8657 && (mode != DImode || TARGET_64BIT)
8658 && GET_CODE (operands[2]) == CONST_INT
8659 && GET_CODE (operands[3]) == CONST_INT)
8660 {
8661 rtx out = operands[0];
8662 HOST_WIDE_INT ct = INTVAL (operands[2]);
8663 HOST_WIDE_INT cf = INTVAL (operands[3]);
8664 HOST_WIDE_INT diff;
8665
8666 if ((compare_code == LTU || compare_code == GEU)
8667 && !second_test && !bypass_test)
8668 {
8669
8670 /* Detect overlap between destination and compare sources. */
8671 rtx tmp = out;
8672
8673 /* To simplify rest of code, restrict to the GEU case. */
8674 if (compare_code == LTU)
8675 {
8676 int tmp = ct;
8677 ct = cf;
8678 cf = tmp;
8679 compare_code = reverse_condition (compare_code);
8680 code = reverse_condition (code);
8681 }
8682 diff = ct - cf;
8683
8684 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8685 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8686 tmp = gen_reg_rtx (mode);
8687
8688 emit_insn (compare_seq);
8689 if (mode == DImode)
8690 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8691 else
8692 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8693
8694 if (diff == 1)
8695 {
8696 /*
8697 * cmpl op0,op1
8698 * sbbl dest,dest
8699 * [addl dest, ct]
8700 *
8701 * Size 5 - 8.
8702 */
8703 if (ct)
8704 tmp = expand_simple_binop (mode, PLUS,
8705 tmp, GEN_INT (ct),
8706 tmp, 1, OPTAB_DIRECT);
8707 }
8708 else if (cf == -1)
8709 {
8710 /*
8711 * cmpl op0,op1
8712 * sbbl dest,dest
8713 * orl $ct, dest
8714 *
8715 * Size 8.
8716 */
8717 tmp = expand_simple_binop (mode, IOR,
8718 tmp, GEN_INT (ct),
8719 tmp, 1, OPTAB_DIRECT);
8720 }
8721 else if (diff == -1 && ct)
8722 {
8723 /*
8724 * cmpl op0,op1
8725 * sbbl dest,dest
8726 * xorl $-1, dest
8727 * [addl dest, cf]
8728 *
8729 * Size 8 - 11.
8730 */
8731 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8732 if (cf)
8733 tmp = expand_simple_binop (mode, PLUS,
8734 tmp, GEN_INT (cf),
8735 tmp, 1, OPTAB_DIRECT);
8736 }
8737 else
8738 {
8739 /*
8740 * cmpl op0,op1
8741 * sbbl dest,dest
8742 * andl cf - ct, dest
8743 * [addl dest, ct]
8744 *
8745 * Size 8 - 11.
8746 */
8747 tmp = expand_simple_binop (mode, AND,
8748 tmp,
8749 gen_int_mode (cf - ct, mode),
8750 tmp, 1, OPTAB_DIRECT);
8751 if (ct)
8752 tmp = expand_simple_binop (mode, PLUS,
8753 tmp, GEN_INT (ct),
8754 tmp, 1, OPTAB_DIRECT);
8755 }
8756
8757 if (tmp != out)
8758 emit_move_insn (out, tmp);
8759
8760 return 1; /* DONE */
8761 }
8762
8763 diff = ct - cf;
8764 if (diff < 0)
8765 {
8766 HOST_WIDE_INT tmp;
8767 tmp = ct, ct = cf, cf = tmp;
8768 diff = -diff;
8769 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8770 {
8771 /* We may be reversing unordered compare to normal compare, that
8772 is not valid in general (we may convert non-trapping condition
8773 to trapping one), however on i386 we currently emit all
8774 comparisons unordered. */
8775 compare_code = reverse_condition_maybe_unordered (compare_code);
8776 code = reverse_condition_maybe_unordered (code);
8777 }
8778 else
8779 {
8780 compare_code = reverse_condition (compare_code);
8781 code = reverse_condition (code);
8782 }
8783 }
8784
8785 compare_code = NIL;
8786 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8787 && GET_CODE (ix86_compare_op1) == CONST_INT)
8788 {
8789 if (ix86_compare_op1 == const0_rtx
8790 && (code == LT || code == GE))
8791 compare_code = code;
8792 else if (ix86_compare_op1 == constm1_rtx)
8793 {
8794 if (code == LE)
8795 compare_code = LT;
8796 else if (code == GT)
8797 compare_code = GE;
8798 }
8799 }
8800
8801 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8802 if (compare_code != NIL
8803 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8804 && (cf == -1 || ct == -1))
8805 {
8806 /* If lea code below could be used, only optimize
8807 if it results in a 2 insn sequence. */
8808
8809 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8810 || diff == 3 || diff == 5 || diff == 9)
8811 || (compare_code == LT && ct == -1)
8812 || (compare_code == GE && cf == -1))
8813 {
8814 /*
8815 * notl op1 (if necessary)
8816 * sarl $31, op1
8817 * orl cf, op1
8818 */
8819 if (ct != -1)
8820 {
8821 cf = ct;
8822 ct = -1;
8823 code = reverse_condition (code);
8824 }
8825
8826 out = emit_store_flag (out, code, ix86_compare_op0,
8827 ix86_compare_op1, VOIDmode, 0, -1);
8828
8829 out = expand_simple_binop (mode, IOR,
8830 out, GEN_INT (cf),
8831 out, 1, OPTAB_DIRECT);
8832 if (out != operands[0])
8833 emit_move_insn (operands[0], out);
8834
8835 return 1; /* DONE */
8836 }
8837 }
8838
8839 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8840 || diff == 3 || diff == 5 || diff == 9)
8841 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8842 {
8843 /*
8844 * xorl dest,dest
8845 * cmpl op1,op2
8846 * setcc dest
8847 * lea cf(dest*(ct-cf)),dest
8848 *
8849 * Size 14.
8850 *
8851 * This also catches the degenerate setcc-only case.
8852 */
8853
8854 rtx tmp;
8855 int nops;
8856
8857 out = emit_store_flag (out, code, ix86_compare_op0,
8858 ix86_compare_op1, VOIDmode, 0, 1);
8859
8860 nops = 0;
8861 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8862 done in proper mode to match. */
8863 if (diff == 1)
8864 tmp = out;
8865 else
8866 {
8867 rtx out1;
8868 out1 = out;
8869 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8870 nops++;
8871 if (diff & 1)
8872 {
8873 tmp = gen_rtx_PLUS (mode, tmp, out1);
8874 nops++;
8875 }
8876 }
8877 if (cf != 0)
8878 {
8879 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8880 nops++;
8881 }
8882 if (tmp != out
8883 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8884 {
8885 if (nops == 1)
8886 {
8887 rtx clob;
8888
8889 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8890 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8891
8892 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8893 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8894 emit_insn (tmp);
8895 }
8896 else
8897 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8898 }
8899 if (out != operands[0])
8900 emit_move_insn (operands[0], out);
8901
8902 return 1; /* DONE */
8903 }
8904
8905 /*
8906 * General case: Jumpful:
8907 * xorl dest,dest cmpl op1, op2
8908 * cmpl op1, op2 movl ct, dest
8909 * setcc dest jcc 1f
8910 * decl dest movl cf, dest
8911 * andl (cf-ct),dest 1:
8912 * addl ct,dest
8913 *
8914 * Size 20. Size 14.
8915 *
8916 * This is reasonably steep, but branch mispredict costs are
8917 * high on modern cpus, so consider failing only if optimizing
8918 * for space.
8919 *
8920 * %%% Parameterize branch_cost on the tuning architecture, then
8921 * use that. The 80386 couldn't care less about mispredicts.
8922 */
8923
8924 if (!optimize_size && !TARGET_CMOVE)
8925 {
8926 if (ct == 0)
8927 {
8928 ct = cf;
8929 cf = 0;
8930 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8931 /* We may be reversing unordered compare to normal compare,
8932 that is not valid in general (we may convert non-trapping
8933 condition to trapping one), however on i386 we currently
8934 emit all comparisons unordered. */
8935 code = reverse_condition_maybe_unordered (code);
8936 else
8937 {
8938 code = reverse_condition (code);
8939 if (compare_code != NIL)
8940 compare_code = reverse_condition (compare_code);
8941 }
8942 }
8943
8944 if (compare_code != NIL)
8945 {
8946 /* notl op1 (if needed)
8947 sarl $31, op1
8948 andl (cf-ct), op1
8949 addl ct, op1
8950
8951 For x < 0 (resp. x <= -1) there will be no notl,
8952 so if possible swap the constants to get rid of the
8953 complement.
8954 True/false will be -1/0 while code below (store flag
8955 followed by decrement) is 0/-1, so the constants need
8956 to be exchanged once more. */
8957
8958 if (compare_code == GE || !cf)
8959 {
8960 code = reverse_condition (code);
8961 compare_code = LT;
8962 }
8963 else
8964 {
8965 HOST_WIDE_INT tmp = cf;
8966 cf = ct;
8967 ct = tmp;
8968 }
8969
8970 out = emit_store_flag (out, code, ix86_compare_op0,
8971 ix86_compare_op1, VOIDmode, 0, -1);
8972 }
8973 else
8974 {
8975 out = emit_store_flag (out, code, ix86_compare_op0,
8976 ix86_compare_op1, VOIDmode, 0, 1);
8977
8978 out = expand_simple_binop (mode, PLUS,
8979 out, constm1_rtx,
8980 out, 1, OPTAB_DIRECT);
8981 }
8982
8983 out = expand_simple_binop (mode, AND,
8984 out,
8985 gen_int_mode (cf - ct, mode),
8986 out, 1, OPTAB_DIRECT);
8987 out = expand_simple_binop (mode, PLUS,
8988 out, GEN_INT (ct),
8989 out, 1, OPTAB_DIRECT);
8990 if (out != operands[0])
8991 emit_move_insn (operands[0], out);
8992
8993 return 1; /* DONE */
8994 }
8995 }
8996
8997 if (!TARGET_CMOVE)
8998 {
8999 /* Try a few things more with specific constants and a variable. */
9000
9001 optab op;
9002 rtx var, orig_out, out, tmp;
9003
9004 if (optimize_size)
9005 return 0; /* FAIL */
9006
9007 /* If one of the two operands is an interesting constant, load a
9008 constant with the above and mask it in with a logical operation. */
9009
9010 if (GET_CODE (operands[2]) == CONST_INT)
9011 {
9012 var = operands[3];
9013 if (INTVAL (operands[2]) == 0)
9014 operands[3] = constm1_rtx, op = and_optab;
9015 else if (INTVAL (operands[2]) == -1)
9016 operands[3] = const0_rtx, op = ior_optab;
9017 else
9018 return 0; /* FAIL */
9019 }
9020 else if (GET_CODE (operands[3]) == CONST_INT)
9021 {
9022 var = operands[2];
9023 if (INTVAL (operands[3]) == 0)
9024 operands[2] = constm1_rtx, op = and_optab;
9025 else if (INTVAL (operands[3]) == -1)
9026 operands[2] = const0_rtx, op = ior_optab;
9027 else
9028 return 0; /* FAIL */
9029 }
9030 else
9031 return 0; /* FAIL */
9032
9033 orig_out = operands[0];
9034 tmp = gen_reg_rtx (mode);
9035 operands[0] = tmp;
9036
9037 /* Recurse to get the constant loaded. */
9038 if (ix86_expand_int_movcc (operands) == 0)
9039 return 0; /* FAIL */
9040
9041 /* Mask in the interesting variable. */
9042 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9043 OPTAB_WIDEN);
9044 if (out != orig_out)
9045 emit_move_insn (orig_out, out);
9046
9047 return 1; /* DONE */
9048 }
9049
9050 /*
9051 * For comparison with above,
9052 *
9053 * movl cf,dest
9054 * movl ct,tmp
9055 * cmpl op1,op2
9056 * cmovcc tmp,dest
9057 *
9058 * Size 15.
9059 */
9060
9061 if (! nonimmediate_operand (operands[2], mode))
9062 operands[2] = force_reg (mode, operands[2]);
9063 if (! nonimmediate_operand (operands[3], mode))
9064 operands[3] = force_reg (mode, operands[3]);
9065
9066 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9067 {
9068 rtx tmp = gen_reg_rtx (mode);
9069 emit_move_insn (tmp, operands[3]);
9070 operands[3] = tmp;
9071 }
9072 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9073 {
9074 rtx tmp = gen_reg_rtx (mode);
9075 emit_move_insn (tmp, operands[2]);
9076 operands[2] = tmp;
9077 }
9078 if (! register_operand (operands[2], VOIDmode)
9079 && ! register_operand (operands[3], VOIDmode))
9080 operands[2] = force_reg (mode, operands[2]);
9081
9082 emit_insn (compare_seq);
9083 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9084 gen_rtx_IF_THEN_ELSE (mode,
9085 compare_op, operands[2],
9086 operands[3])));
9087 if (bypass_test)
9088 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9089 gen_rtx_IF_THEN_ELSE (mode,
9090 bypass_test,
9091 operands[3],
9092 operands[0])));
9093 if (second_test)
9094 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9095 gen_rtx_IF_THEN_ELSE (mode,
9096 second_test,
9097 operands[2],
9098 operands[0])));
9099
9100 return 1; /* DONE */
9101 }
9102
9103 int
9104 ix86_expand_fp_movcc (operands)
9105 rtx operands[];
9106 {
9107 enum rtx_code code;
9108 rtx tmp;
9109 rtx compare_op, second_test, bypass_test;
9110
9111 /* For SF/DFmode conditional moves based on comparisons
9112 in same mode, we may want to use SSE min/max instructions. */
9113 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9114 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9115 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9116 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9117 && (!TARGET_IEEE_FP
9118 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9119 /* We may be called from the post-reload splitter. */
9120 && (!REG_P (operands[0])
9121 || SSE_REG_P (operands[0])
9122 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9123 {
9124 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9125 code = GET_CODE (operands[1]);
9126
9127 /* See if we have (cross) match between comparison operands and
9128 conditional move operands. */
9129 if (rtx_equal_p (operands[2], op1))
9130 {
9131 rtx tmp = op0;
9132 op0 = op1;
9133 op1 = tmp;
9134 code = reverse_condition_maybe_unordered (code);
9135 }
9136 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9137 {
9138 /* Check for min operation. */
9139 if (code == LT)
9140 {
9141 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9142 if (memory_operand (op0, VOIDmode))
9143 op0 = force_reg (GET_MODE (operands[0]), op0);
9144 if (GET_MODE (operands[0]) == SFmode)
9145 emit_insn (gen_minsf3 (operands[0], op0, op1));
9146 else
9147 emit_insn (gen_mindf3 (operands[0], op0, op1));
9148 return 1;
9149 }
9150 /* Check for max operation. */
9151 if (code == GT)
9152 {
9153 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9154 if (memory_operand (op0, VOIDmode))
9155 op0 = force_reg (GET_MODE (operands[0]), op0);
9156 if (GET_MODE (operands[0]) == SFmode)
9157 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9158 else
9159 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9160 return 1;
9161 }
9162 }
9163 /* Manage condition to be sse_comparison_operator. In case we are
9164 in non-ieee mode, try to canonicalize the destination operand
9165 to be first in the comparison - this helps reload to avoid extra
9166 moves. */
9167 if (!sse_comparison_operator (operands[1], VOIDmode)
9168 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9169 {
9170 rtx tmp = ix86_compare_op0;
9171 ix86_compare_op0 = ix86_compare_op1;
9172 ix86_compare_op1 = tmp;
9173 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9174 VOIDmode, ix86_compare_op0,
9175 ix86_compare_op1);
9176 }
9177 /* Similary try to manage result to be first operand of conditional
9178 move. We also don't support the NE comparison on SSE, so try to
9179 avoid it. */
9180 if ((rtx_equal_p (operands[0], operands[3])
9181 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9182 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9183 {
9184 rtx tmp = operands[2];
9185 operands[2] = operands[3];
9186 operands[3] = tmp;
9187 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9188 (GET_CODE (operands[1])),
9189 VOIDmode, ix86_compare_op0,
9190 ix86_compare_op1);
9191 }
9192 if (GET_MODE (operands[0]) == SFmode)
9193 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9194 operands[2], operands[3],
9195 ix86_compare_op0, ix86_compare_op1));
9196 else
9197 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9198 operands[2], operands[3],
9199 ix86_compare_op0, ix86_compare_op1));
9200 return 1;
9201 }
9202
9203 /* The floating point conditional move instructions don't directly
9204 support conditions resulting from a signed integer comparison. */
9205
9206 code = GET_CODE (operands[1]);
9207 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9208
9209 /* The floating point conditional move instructions don't directly
9210 support signed integer comparisons. */
9211
9212 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9213 {
9214 if (second_test != NULL || bypass_test != NULL)
9215 abort ();
9216 tmp = gen_reg_rtx (QImode);
9217 ix86_expand_setcc (code, tmp);
9218 code = NE;
9219 ix86_compare_op0 = tmp;
9220 ix86_compare_op1 = const0_rtx;
9221 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9222 }
9223 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9224 {
9225 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9226 emit_move_insn (tmp, operands[3]);
9227 operands[3] = tmp;
9228 }
9229 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9230 {
9231 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9232 emit_move_insn (tmp, operands[2]);
9233 operands[2] = tmp;
9234 }
9235
9236 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9237 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9238 compare_op,
9239 operands[2],
9240 operands[3])));
9241 if (bypass_test)
9242 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9243 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9244 bypass_test,
9245 operands[3],
9246 operands[0])));
9247 if (second_test)
9248 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9249 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9250 second_test,
9251 operands[2],
9252 operands[0])));
9253
9254 return 1;
9255 }
9256
9257 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9258 works for floating pointer parameters and nonoffsetable memories.
9259 For pushes, it returns just stack offsets; the values will be saved
9260 in the right order. Maximally three parts are generated. */
9261
9262 static int
9263 ix86_split_to_parts (operand, parts, mode)
9264 rtx operand;
9265 rtx *parts;
9266 enum machine_mode mode;
9267 {
9268 int size;
9269
9270 if (!TARGET_64BIT)
9271 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9272 else
9273 size = (GET_MODE_SIZE (mode) + 4) / 8;
9274
9275 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9276 abort ();
9277 if (size < 2 || size > 3)
9278 abort ();
9279
9280 /* Optimize constant pool reference to immediates. This is used by fp
9281 moves, that force all constants to memory to allow combining. */
9282 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9283 {
9284 rtx tmp = maybe_get_pool_constant (operand);
9285 if (tmp)
9286 operand = tmp;
9287 }
9288
9289 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9290 {
9291 /* The only non-offsetable memories we handle are pushes. */
9292 if (! push_operand (operand, VOIDmode))
9293 abort ();
9294
9295 operand = copy_rtx (operand);
9296 PUT_MODE (operand, Pmode);
9297 parts[0] = parts[1] = parts[2] = operand;
9298 }
9299 else if (!TARGET_64BIT)
9300 {
9301 if (mode == DImode)
9302 split_di (&operand, 1, &parts[0], &parts[1]);
9303 else
9304 {
9305 if (REG_P (operand))
9306 {
9307 if (!reload_completed)
9308 abort ();
9309 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9310 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9311 if (size == 3)
9312 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9313 }
9314 else if (offsettable_memref_p (operand))
9315 {
9316 operand = adjust_address (operand, SImode, 0);
9317 parts[0] = operand;
9318 parts[1] = adjust_address (operand, SImode, 4);
9319 if (size == 3)
9320 parts[2] = adjust_address (operand, SImode, 8);
9321 }
9322 else if (GET_CODE (operand) == CONST_DOUBLE)
9323 {
9324 REAL_VALUE_TYPE r;
9325 long l[4];
9326
9327 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9328 switch (mode)
9329 {
9330 case XFmode:
9331 case TFmode:
9332 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9333 parts[2] = gen_int_mode (l[2], SImode);
9334 break;
9335 case DFmode:
9336 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9337 break;
9338 default:
9339 abort ();
9340 }
9341 parts[1] = gen_int_mode (l[1], SImode);
9342 parts[0] = gen_int_mode (l[0], SImode);
9343 }
9344 else
9345 abort ();
9346 }
9347 }
9348 else
9349 {
9350 if (mode == TImode)
9351 split_ti (&operand, 1, &parts[0], &parts[1]);
9352 if (mode == XFmode || mode == TFmode)
9353 {
9354 if (REG_P (operand))
9355 {
9356 if (!reload_completed)
9357 abort ();
9358 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9359 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9360 }
9361 else if (offsettable_memref_p (operand))
9362 {
9363 operand = adjust_address (operand, DImode, 0);
9364 parts[0] = operand;
9365 parts[1] = adjust_address (operand, SImode, 8);
9366 }
9367 else if (GET_CODE (operand) == CONST_DOUBLE)
9368 {
9369 REAL_VALUE_TYPE r;
9370 long l[3];
9371
9372 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9373 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9374 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9375 if (HOST_BITS_PER_WIDE_INT >= 64)
9376 parts[0]
9377 = gen_int_mode
9378 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9379 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9380 DImode);
9381 else
9382 parts[0] = immed_double_const (l[0], l[1], DImode);
9383 parts[1] = gen_int_mode (l[2], SImode);
9384 }
9385 else
9386 abort ();
9387 }
9388 }
9389
9390 return size;
9391 }
9392
9393 /* Emit insns to perform a move or push of DI, DF, and XF values.
9394 Return false when normal moves are needed; true when all required
9395 insns have been emitted. Operands 2-4 contain the input values
9396 int the correct order; operands 5-7 contain the output values. */
9397
9398 void
9399 ix86_split_long_move (operands)
9400 rtx operands[];
9401 {
9402 rtx part[2][3];
9403 int nparts;
9404 int push = 0;
9405 int collisions = 0;
9406 enum machine_mode mode = GET_MODE (operands[0]);
9407
9408 /* The DFmode expanders may ask us to move double.
9409 For 64bit target this is single move. By hiding the fact
9410 here we simplify i386.md splitters. */
9411 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9412 {
9413 /* Optimize constant pool reference to immediates. This is used by
9414 fp moves, that force all constants to memory to allow combining. */
9415
9416 if (GET_CODE (operands[1]) == MEM
9417 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9418 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9419 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9420 if (push_operand (operands[0], VOIDmode))
9421 {
9422 operands[0] = copy_rtx (operands[0]);
9423 PUT_MODE (operands[0], Pmode);
9424 }
9425 else
9426 operands[0] = gen_lowpart (DImode, operands[0]);
9427 operands[1] = gen_lowpart (DImode, operands[1]);
9428 emit_move_insn (operands[0], operands[1]);
9429 return;
9430 }
9431
9432 /* The only non-offsettable memory we handle is push. */
9433 if (push_operand (operands[0], VOIDmode))
9434 push = 1;
9435 else if (GET_CODE (operands[0]) == MEM
9436 && ! offsettable_memref_p (operands[0]))
9437 abort ();
9438
9439 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9440 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9441
9442 /* When emitting push, take care for source operands on the stack. */
9443 if (push && GET_CODE (operands[1]) == MEM
9444 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9445 {
9446 if (nparts == 3)
9447 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9448 XEXP (part[1][2], 0));
9449 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9450 XEXP (part[1][1], 0));
9451 }
9452
9453 /* We need to do copy in the right order in case an address register
9454 of the source overlaps the destination. */
9455 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9456 {
9457 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9458 collisions++;
9459 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9460 collisions++;
9461 if (nparts == 3
9462 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9463 collisions++;
9464
9465 /* Collision in the middle part can be handled by reordering. */
9466 if (collisions == 1 && nparts == 3
9467 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9468 {
9469 rtx tmp;
9470 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9471 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9472 }
9473
9474 /* If there are more collisions, we can't handle it by reordering.
9475 Do an lea to the last part and use only one colliding move. */
9476 else if (collisions > 1)
9477 {
9478 collisions = 1;
9479 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9480 XEXP (part[1][0], 0)));
9481 part[1][0] = change_address (part[1][0],
9482 TARGET_64BIT ? DImode : SImode,
9483 part[0][nparts - 1]);
9484 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9485 if (nparts == 3)
9486 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9487 }
9488 }
9489
9490 if (push)
9491 {
9492 if (!TARGET_64BIT)
9493 {
9494 if (nparts == 3)
9495 {
9496 /* We use only first 12 bytes of TFmode value, but for pushing we
9497 are required to adjust stack as if we were pushing real 16byte
9498 value. */
9499 if (mode == TFmode && !TARGET_64BIT)
9500 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9501 GEN_INT (-4)));
9502 emit_move_insn (part[0][2], part[1][2]);
9503 }
9504 }
9505 else
9506 {
9507 /* In 64bit mode we don't have 32bit push available. In case this is
9508 register, it is OK - we will just use larger counterpart. We also
9509 retype memory - these comes from attempt to avoid REX prefix on
9510 moving of second half of TFmode value. */
9511 if (GET_MODE (part[1][1]) == SImode)
9512 {
9513 if (GET_CODE (part[1][1]) == MEM)
9514 part[1][1] = adjust_address (part[1][1], DImode, 0);
9515 else if (REG_P (part[1][1]))
9516 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9517 else
9518 abort ();
9519 if (GET_MODE (part[1][0]) == SImode)
9520 part[1][0] = part[1][1];
9521 }
9522 }
9523 emit_move_insn (part[0][1], part[1][1]);
9524 emit_move_insn (part[0][0], part[1][0]);
9525 return;
9526 }
9527
9528 /* Choose correct order to not overwrite the source before it is copied. */
9529 if ((REG_P (part[0][0])
9530 && REG_P (part[1][1])
9531 && (REGNO (part[0][0]) == REGNO (part[1][1])
9532 || (nparts == 3
9533 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9534 || (collisions > 0
9535 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9536 {
9537 if (nparts == 3)
9538 {
9539 operands[2] = part[0][2];
9540 operands[3] = part[0][1];
9541 operands[4] = part[0][0];
9542 operands[5] = part[1][2];
9543 operands[6] = part[1][1];
9544 operands[7] = part[1][0];
9545 }
9546 else
9547 {
9548 operands[2] = part[0][1];
9549 operands[3] = part[0][0];
9550 operands[5] = part[1][1];
9551 operands[6] = part[1][0];
9552 }
9553 }
9554 else
9555 {
9556 if (nparts == 3)
9557 {
9558 operands[2] = part[0][0];
9559 operands[3] = part[0][1];
9560 operands[4] = part[0][2];
9561 operands[5] = part[1][0];
9562 operands[6] = part[1][1];
9563 operands[7] = part[1][2];
9564 }
9565 else
9566 {
9567 operands[2] = part[0][0];
9568 operands[3] = part[0][1];
9569 operands[5] = part[1][0];
9570 operands[6] = part[1][1];
9571 }
9572 }
9573 emit_move_insn (operands[2], operands[5]);
9574 emit_move_insn (operands[3], operands[6]);
9575 if (nparts == 3)
9576 emit_move_insn (operands[4], operands[7]);
9577
9578 return;
9579 }
9580
9581 void
9582 ix86_split_ashldi (operands, scratch)
9583 rtx *operands, scratch;
9584 {
9585 rtx low[2], high[2];
9586 int count;
9587
9588 if (GET_CODE (operands[2]) == CONST_INT)
9589 {
9590 split_di (operands, 2, low, high);
9591 count = INTVAL (operands[2]) & 63;
9592
9593 if (count >= 32)
9594 {
9595 emit_move_insn (high[0], low[1]);
9596 emit_move_insn (low[0], const0_rtx);
9597
9598 if (count > 32)
9599 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9600 }
9601 else
9602 {
9603 if (!rtx_equal_p (operands[0], operands[1]))
9604 emit_move_insn (operands[0], operands[1]);
9605 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9606 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9607 }
9608 }
9609 else
9610 {
9611 if (!rtx_equal_p (operands[0], operands[1]))
9612 emit_move_insn (operands[0], operands[1]);
9613
9614 split_di (operands, 1, low, high);
9615
9616 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9617 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9618
9619 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9620 {
9621 if (! no_new_pseudos)
9622 scratch = force_reg (SImode, const0_rtx);
9623 else
9624 emit_move_insn (scratch, const0_rtx);
9625
9626 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9627 scratch));
9628 }
9629 else
9630 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9631 }
9632 }
9633
9634 void
9635 ix86_split_ashrdi (operands, scratch)
9636 rtx *operands, scratch;
9637 {
9638 rtx low[2], high[2];
9639 int count;
9640
9641 if (GET_CODE (operands[2]) == CONST_INT)
9642 {
9643 split_di (operands, 2, low, high);
9644 count = INTVAL (operands[2]) & 63;
9645
9646 if (count >= 32)
9647 {
9648 emit_move_insn (low[0], high[1]);
9649
9650 if (! reload_completed)
9651 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9652 else
9653 {
9654 emit_move_insn (high[0], low[0]);
9655 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9656 }
9657
9658 if (count > 32)
9659 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9660 }
9661 else
9662 {
9663 if (!rtx_equal_p (operands[0], operands[1]))
9664 emit_move_insn (operands[0], operands[1]);
9665 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9666 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9667 }
9668 }
9669 else
9670 {
9671 if (!rtx_equal_p (operands[0], operands[1]))
9672 emit_move_insn (operands[0], operands[1]);
9673
9674 split_di (operands, 1, low, high);
9675
9676 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9677 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9678
9679 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9680 {
9681 if (! no_new_pseudos)
9682 scratch = gen_reg_rtx (SImode);
9683 emit_move_insn (scratch, high[0]);
9684 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9685 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9686 scratch));
9687 }
9688 else
9689 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9690 }
9691 }
9692
9693 void
9694 ix86_split_lshrdi (operands, scratch)
9695 rtx *operands, scratch;
9696 {
9697 rtx low[2], high[2];
9698 int count;
9699
9700 if (GET_CODE (operands[2]) == CONST_INT)
9701 {
9702 split_di (operands, 2, low, high);
9703 count = INTVAL (operands[2]) & 63;
9704
9705 if (count >= 32)
9706 {
9707 emit_move_insn (low[0], high[1]);
9708 emit_move_insn (high[0], const0_rtx);
9709
9710 if (count > 32)
9711 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9712 }
9713 else
9714 {
9715 if (!rtx_equal_p (operands[0], operands[1]))
9716 emit_move_insn (operands[0], operands[1]);
9717 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9718 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9719 }
9720 }
9721 else
9722 {
9723 if (!rtx_equal_p (operands[0], operands[1]))
9724 emit_move_insn (operands[0], operands[1]);
9725
9726 split_di (operands, 1, low, high);
9727
9728 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9729 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9730
9731 /* Heh. By reversing the arguments, we can reuse this pattern. */
9732 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9733 {
9734 if (! no_new_pseudos)
9735 scratch = force_reg (SImode, const0_rtx);
9736 else
9737 emit_move_insn (scratch, const0_rtx);
9738
9739 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9740 scratch));
9741 }
9742 else
9743 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9744 }
9745 }
9746
9747 /* Helper function for the string operations below. Dest VARIABLE whether
9748 it is aligned to VALUE bytes. If true, jump to the label. */
9749 static rtx
9750 ix86_expand_aligntest (variable, value)
9751 rtx variable;
9752 int value;
9753 {
9754 rtx label = gen_label_rtx ();
9755 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9756 if (GET_MODE (variable) == DImode)
9757 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9758 else
9759 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9760 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9761 1, label);
9762 return label;
9763 }
9764
9765 /* Adjust COUNTER by the VALUE. */
9766 static void
9767 ix86_adjust_counter (countreg, value)
9768 rtx countreg;
9769 HOST_WIDE_INT value;
9770 {
9771 if (GET_MODE (countreg) == DImode)
9772 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9773 else
9774 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9775 }
9776
9777 /* Zero extend possibly SImode EXP to Pmode register. */
9778 rtx
9779 ix86_zero_extend_to_Pmode (exp)
9780 rtx exp;
9781 {
9782 rtx r;
9783 if (GET_MODE (exp) == VOIDmode)
9784 return force_reg (Pmode, exp);
9785 if (GET_MODE (exp) == Pmode)
9786 return copy_to_mode_reg (Pmode, exp);
9787 r = gen_reg_rtx (Pmode);
9788 emit_insn (gen_zero_extendsidi2 (r, exp));
9789 return r;
9790 }
9791
9792 /* Expand string move (memcpy) operation. Use i386 string operations when
9793 profitable. expand_clrstr contains similar code. */
9794 int
9795 ix86_expand_movstr (dst, src, count_exp, align_exp)
9796 rtx dst, src, count_exp, align_exp;
9797 {
9798 rtx srcreg, destreg, countreg;
9799 enum machine_mode counter_mode;
9800 HOST_WIDE_INT align = 0;
9801 unsigned HOST_WIDE_INT count = 0;
9802 rtx insns;
9803
9804 start_sequence ();
9805
9806 if (GET_CODE (align_exp) == CONST_INT)
9807 align = INTVAL (align_exp);
9808
9809 /* This simple hack avoids all inlining code and simplifies code below. */
9810 if (!TARGET_ALIGN_STRINGOPS)
9811 align = 64;
9812
9813 if (GET_CODE (count_exp) == CONST_INT)
9814 count = INTVAL (count_exp);
9815
9816 /* Figure out proper mode for counter. For 32bits it is always SImode,
9817 for 64bits use SImode when possible, otherwise DImode.
9818 Set count to number of bytes copied when known at compile time. */
9819 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9820 || x86_64_zero_extended_value (count_exp))
9821 counter_mode = SImode;
9822 else
9823 counter_mode = DImode;
9824
9825 if (counter_mode != SImode && counter_mode != DImode)
9826 abort ();
9827
9828 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9829 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9830
9831 emit_insn (gen_cld ());
9832
9833 /* When optimizing for size emit simple rep ; movsb instruction for
9834 counts not divisible by 4. */
9835
9836 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9837 {
9838 countreg = ix86_zero_extend_to_Pmode (count_exp);
9839 if (TARGET_64BIT)
9840 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9841 destreg, srcreg, countreg));
9842 else
9843 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9844 destreg, srcreg, countreg));
9845 }
9846
9847 /* For constant aligned (or small unaligned) copies use rep movsl
9848 followed by code copying the rest. For PentiumPro ensure 8 byte
9849 alignment to allow rep movsl acceleration. */
9850
9851 else if (count != 0
9852 && (align >= 8
9853 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9854 || optimize_size || count < (unsigned int) 64))
9855 {
9856 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9857 if (count & ~(size - 1))
9858 {
9859 countreg = copy_to_mode_reg (counter_mode,
9860 GEN_INT ((count >> (size == 4 ? 2 : 3))
9861 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9862 countreg = ix86_zero_extend_to_Pmode (countreg);
9863 if (size == 4)
9864 {
9865 if (TARGET_64BIT)
9866 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9867 destreg, srcreg, countreg));
9868 else
9869 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9870 destreg, srcreg, countreg));
9871 }
9872 else
9873 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9874 destreg, srcreg, countreg));
9875 }
9876 if (size == 8 && (count & 0x04))
9877 emit_insn (gen_strmovsi (destreg, srcreg));
9878 if (count & 0x02)
9879 emit_insn (gen_strmovhi (destreg, srcreg));
9880 if (count & 0x01)
9881 emit_insn (gen_strmovqi (destreg, srcreg));
9882 }
9883 /* The generic code based on the glibc implementation:
9884 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9885 allowing accelerated copying there)
9886 - copy the data using rep movsl
9887 - copy the rest. */
9888 else
9889 {
9890 rtx countreg2;
9891 rtx label = NULL;
9892 int desired_alignment = (TARGET_PENTIUMPRO
9893 && (count == 0 || count >= (unsigned int) 260)
9894 ? 8 : UNITS_PER_WORD);
9895
9896 /* In case we don't know anything about the alignment, default to
9897 library version, since it is usually equally fast and result in
9898 shorter code. */
9899 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9900 {
9901 end_sequence ();
9902 return 0;
9903 }
9904
9905 if (TARGET_SINGLE_STRINGOP)
9906 emit_insn (gen_cld ());
9907
9908 countreg2 = gen_reg_rtx (Pmode);
9909 countreg = copy_to_mode_reg (counter_mode, count_exp);
9910
9911 /* We don't use loops to align destination and to copy parts smaller
9912 than 4 bytes, because gcc is able to optimize such code better (in
9913 the case the destination or the count really is aligned, gcc is often
9914 able to predict the branches) and also it is friendlier to the
9915 hardware branch prediction.
9916
9917 Using loops is benefical for generic case, because we can
9918 handle small counts using the loops. Many CPUs (such as Athlon)
9919 have large REP prefix setup costs.
9920
9921 This is quite costy. Maybe we can revisit this decision later or
9922 add some customizability to this code. */
9923
9924 if (count == 0 && align < desired_alignment)
9925 {
9926 label = gen_label_rtx ();
9927 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
9928 LEU, 0, counter_mode, 1, label);
9929 }
9930 if (align <= 1)
9931 {
9932 rtx label = ix86_expand_aligntest (destreg, 1);
9933 emit_insn (gen_strmovqi (destreg, srcreg));
9934 ix86_adjust_counter (countreg, 1);
9935 emit_label (label);
9936 LABEL_NUSES (label) = 1;
9937 }
9938 if (align <= 2)
9939 {
9940 rtx label = ix86_expand_aligntest (destreg, 2);
9941 emit_insn (gen_strmovhi (destreg, srcreg));
9942 ix86_adjust_counter (countreg, 2);
9943 emit_label (label);
9944 LABEL_NUSES (label) = 1;
9945 }
9946 if (align <= 4 && desired_alignment > 4)
9947 {
9948 rtx label = ix86_expand_aligntest (destreg, 4);
9949 emit_insn (gen_strmovsi (destreg, srcreg));
9950 ix86_adjust_counter (countreg, 4);
9951 emit_label (label);
9952 LABEL_NUSES (label) = 1;
9953 }
9954
9955 if (label && desired_alignment > 4 && !TARGET_64BIT)
9956 {
9957 emit_label (label);
9958 LABEL_NUSES (label) = 1;
9959 label = NULL_RTX;
9960 }
9961 if (!TARGET_SINGLE_STRINGOP)
9962 emit_insn (gen_cld ());
9963 if (TARGET_64BIT)
9964 {
9965 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9966 GEN_INT (3)));
9967 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9968 destreg, srcreg, countreg2));
9969 }
9970 else
9971 {
9972 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9973 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9974 destreg, srcreg, countreg2));
9975 }
9976
9977 if (label)
9978 {
9979 emit_label (label);
9980 LABEL_NUSES (label) = 1;
9981 }
9982 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9983 emit_insn (gen_strmovsi (destreg, srcreg));
9984 if ((align <= 4 || count == 0) && TARGET_64BIT)
9985 {
9986 rtx label = ix86_expand_aligntest (countreg, 4);
9987 emit_insn (gen_strmovsi (destreg, srcreg));
9988 emit_label (label);
9989 LABEL_NUSES (label) = 1;
9990 }
9991 if (align > 2 && count != 0 && (count & 2))
9992 emit_insn (gen_strmovhi (destreg, srcreg));
9993 if (align <= 2 || count == 0)
9994 {
9995 rtx label = ix86_expand_aligntest (countreg, 2);
9996 emit_insn (gen_strmovhi (destreg, srcreg));
9997 emit_label (label);
9998 LABEL_NUSES (label) = 1;
9999 }
10000 if (align > 1 && count != 0 && (count & 1))
10001 emit_insn (gen_strmovqi (destreg, srcreg));
10002 if (align <= 1 || count == 0)
10003 {
10004 rtx label = ix86_expand_aligntest (countreg, 1);
10005 emit_insn (gen_strmovqi (destreg, srcreg));
10006 emit_label (label);
10007 LABEL_NUSES (label) = 1;
10008 }
10009 }
10010
10011 insns = get_insns ();
10012 end_sequence ();
10013
10014 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10015 emit_insn (insns);
10016 return 1;
10017 }
10018
10019 /* Expand string clear operation (bzero). Use i386 string operations when
10020 profitable. expand_movstr contains similar code. */
10021 int
10022 ix86_expand_clrstr (src, count_exp, align_exp)
10023 rtx src, count_exp, align_exp;
10024 {
10025 rtx destreg, zeroreg, countreg;
10026 enum machine_mode counter_mode;
10027 HOST_WIDE_INT align = 0;
10028 unsigned HOST_WIDE_INT count = 0;
10029
10030 if (GET_CODE (align_exp) == CONST_INT)
10031 align = INTVAL (align_exp);
10032
10033 /* This simple hack avoids all inlining code and simplifies code below. */
10034 if (!TARGET_ALIGN_STRINGOPS)
10035 align = 32;
10036
10037 if (GET_CODE (count_exp) == CONST_INT)
10038 count = INTVAL (count_exp);
10039 /* Figure out proper mode for counter. For 32bits it is always SImode,
10040 for 64bits use SImode when possible, otherwise DImode.
10041 Set count to number of bytes copied when known at compile time. */
10042 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10043 || x86_64_zero_extended_value (count_exp))
10044 counter_mode = SImode;
10045 else
10046 counter_mode = DImode;
10047
10048 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10049
10050 emit_insn (gen_cld ());
10051
10052 /* When optimizing for size emit simple rep ; movsb instruction for
10053 counts not divisible by 4. */
10054
10055 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10056 {
10057 countreg = ix86_zero_extend_to_Pmode (count_exp);
10058 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10059 if (TARGET_64BIT)
10060 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10061 destreg, countreg));
10062 else
10063 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10064 destreg, countreg));
10065 }
10066 else if (count != 0
10067 && (align >= 8
10068 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10069 || optimize_size || count < (unsigned int) 64))
10070 {
10071 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10072 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10073 if (count & ~(size - 1))
10074 {
10075 countreg = copy_to_mode_reg (counter_mode,
10076 GEN_INT ((count >> (size == 4 ? 2 : 3))
10077 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10078 countreg = ix86_zero_extend_to_Pmode (countreg);
10079 if (size == 4)
10080 {
10081 if (TARGET_64BIT)
10082 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10083 destreg, countreg));
10084 else
10085 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10086 destreg, countreg));
10087 }
10088 else
10089 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10090 destreg, countreg));
10091 }
10092 if (size == 8 && (count & 0x04))
10093 emit_insn (gen_strsetsi (destreg,
10094 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10095 if (count & 0x02)
10096 emit_insn (gen_strsethi (destreg,
10097 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10098 if (count & 0x01)
10099 emit_insn (gen_strsetqi (destreg,
10100 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10101 }
10102 else
10103 {
10104 rtx countreg2;
10105 rtx label = NULL;
10106 /* Compute desired alignment of the string operation. */
10107 int desired_alignment = (TARGET_PENTIUMPRO
10108 && (count == 0 || count >= (unsigned int) 260)
10109 ? 8 : UNITS_PER_WORD);
10110
10111 /* In case we don't know anything about the alignment, default to
10112 library version, since it is usually equally fast and result in
10113 shorter code. */
10114 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10115 return 0;
10116
10117 if (TARGET_SINGLE_STRINGOP)
10118 emit_insn (gen_cld ());
10119
10120 countreg2 = gen_reg_rtx (Pmode);
10121 countreg = copy_to_mode_reg (counter_mode, count_exp);
10122 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10123
10124 if (count == 0 && align < desired_alignment)
10125 {
10126 label = gen_label_rtx ();
10127 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10128 LEU, 0, counter_mode, 1, label);
10129 }
10130 if (align <= 1)
10131 {
10132 rtx label = ix86_expand_aligntest (destreg, 1);
10133 emit_insn (gen_strsetqi (destreg,
10134 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10135 ix86_adjust_counter (countreg, 1);
10136 emit_label (label);
10137 LABEL_NUSES (label) = 1;
10138 }
10139 if (align <= 2)
10140 {
10141 rtx label = ix86_expand_aligntest (destreg, 2);
10142 emit_insn (gen_strsethi (destreg,
10143 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10144 ix86_adjust_counter (countreg, 2);
10145 emit_label (label);
10146 LABEL_NUSES (label) = 1;
10147 }
10148 if (align <= 4 && desired_alignment > 4)
10149 {
10150 rtx label = ix86_expand_aligntest (destreg, 4);
10151 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10152 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10153 : zeroreg)));
10154 ix86_adjust_counter (countreg, 4);
10155 emit_label (label);
10156 LABEL_NUSES (label) = 1;
10157 }
10158
10159 if (label && desired_alignment > 4 && !TARGET_64BIT)
10160 {
10161 emit_label (label);
10162 LABEL_NUSES (label) = 1;
10163 label = NULL_RTX;
10164 }
10165
10166 if (!TARGET_SINGLE_STRINGOP)
10167 emit_insn (gen_cld ());
10168 if (TARGET_64BIT)
10169 {
10170 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10171 GEN_INT (3)));
10172 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10173 destreg, countreg2));
10174 }
10175 else
10176 {
10177 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10178 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10179 destreg, countreg2));
10180 }
10181 if (label)
10182 {
10183 emit_label (label);
10184 LABEL_NUSES (label) = 1;
10185 }
10186
10187 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10188 emit_insn (gen_strsetsi (destreg,
10189 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10190 if (TARGET_64BIT && (align <= 4 || count == 0))
10191 {
10192 rtx label = ix86_expand_aligntest (countreg, 2);
10193 emit_insn (gen_strsetsi (destreg,
10194 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10195 emit_label (label);
10196 LABEL_NUSES (label) = 1;
10197 }
10198 if (align > 2 && count != 0 && (count & 2))
10199 emit_insn (gen_strsethi (destreg,
10200 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10201 if (align <= 2 || count == 0)
10202 {
10203 rtx label = ix86_expand_aligntest (countreg, 2);
10204 emit_insn (gen_strsethi (destreg,
10205 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10206 emit_label (label);
10207 LABEL_NUSES (label) = 1;
10208 }
10209 if (align > 1 && count != 0 && (count & 1))
10210 emit_insn (gen_strsetqi (destreg,
10211 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10212 if (align <= 1 || count == 0)
10213 {
10214 rtx label = ix86_expand_aligntest (countreg, 1);
10215 emit_insn (gen_strsetqi (destreg,
10216 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10217 emit_label (label);
10218 LABEL_NUSES (label) = 1;
10219 }
10220 }
10221 return 1;
10222 }
10223 /* Expand strlen. */
10224 int
10225 ix86_expand_strlen (out, src, eoschar, align)
10226 rtx out, src, eoschar, align;
10227 {
10228 rtx addr, scratch1, scratch2, scratch3, scratch4;
10229
10230 /* The generic case of strlen expander is long. Avoid it's
10231 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10232
10233 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10234 && !TARGET_INLINE_ALL_STRINGOPS
10235 && !optimize_size
10236 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10237 return 0;
10238
10239 addr = force_reg (Pmode, XEXP (src, 0));
10240 scratch1 = gen_reg_rtx (Pmode);
10241
10242 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10243 && !optimize_size)
10244 {
10245 /* Well it seems that some optimizer does not combine a call like
10246 foo(strlen(bar), strlen(bar));
10247 when the move and the subtraction is done here. It does calculate
10248 the length just once when these instructions are done inside of
10249 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10250 often used and I use one fewer register for the lifetime of
10251 output_strlen_unroll() this is better. */
10252
10253 emit_move_insn (out, addr);
10254
10255 ix86_expand_strlensi_unroll_1 (out, align);
10256
10257 /* strlensi_unroll_1 returns the address of the zero at the end of
10258 the string, like memchr(), so compute the length by subtracting
10259 the start address. */
10260 if (TARGET_64BIT)
10261 emit_insn (gen_subdi3 (out, out, addr));
10262 else
10263 emit_insn (gen_subsi3 (out, out, addr));
10264 }
10265 else
10266 {
10267 scratch2 = gen_reg_rtx (Pmode);
10268 scratch3 = gen_reg_rtx (Pmode);
10269 scratch4 = force_reg (Pmode, constm1_rtx);
10270
10271 emit_move_insn (scratch3, addr);
10272 eoschar = force_reg (QImode, eoschar);
10273
10274 emit_insn (gen_cld ());
10275 if (TARGET_64BIT)
10276 {
10277 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10278 align, scratch4, scratch3));
10279 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10280 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10281 }
10282 else
10283 {
10284 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10285 align, scratch4, scratch3));
10286 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10287 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10288 }
10289 }
10290 return 1;
10291 }
10292
10293 /* Expand the appropriate insns for doing strlen if not just doing
10294 repnz; scasb
10295
10296 out = result, initialized with the start address
10297 align_rtx = alignment of the address.
10298 scratch = scratch register, initialized with the startaddress when
10299 not aligned, otherwise undefined
10300
10301 This is just the body. It needs the initialisations mentioned above and
10302 some address computing at the end. These things are done in i386.md. */
10303
10304 static void
10305 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10306 rtx out, align_rtx;
10307 {
10308 int align;
10309 rtx tmp;
10310 rtx align_2_label = NULL_RTX;
10311 rtx align_3_label = NULL_RTX;
10312 rtx align_4_label = gen_label_rtx ();
10313 rtx end_0_label = gen_label_rtx ();
10314 rtx mem;
10315 rtx tmpreg = gen_reg_rtx (SImode);
10316 rtx scratch = gen_reg_rtx (SImode);
10317
10318 align = 0;
10319 if (GET_CODE (align_rtx) == CONST_INT)
10320 align = INTVAL (align_rtx);
10321
10322 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10323
10324 /* Is there a known alignment and is it less than 4? */
10325 if (align < 4)
10326 {
10327 rtx scratch1 = gen_reg_rtx (Pmode);
10328 emit_move_insn (scratch1, out);
10329 /* Is there a known alignment and is it not 2? */
10330 if (align != 2)
10331 {
10332 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10333 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10334
10335 /* Leave just the 3 lower bits. */
10336 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10337 NULL_RTX, 0, OPTAB_WIDEN);
10338
10339 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10340 Pmode, 1, align_4_label);
10341 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10342 Pmode, 1, align_2_label);
10343 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10344 Pmode, 1, align_3_label);
10345 }
10346 else
10347 {
10348 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10349 check if is aligned to 4 - byte. */
10350
10351 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10352 NULL_RTX, 0, OPTAB_WIDEN);
10353
10354 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10355 Pmode, 1, align_4_label);
10356 }
10357
10358 mem = gen_rtx_MEM (QImode, out);
10359
10360 /* Now compare the bytes. */
10361
10362 /* Compare the first n unaligned byte on a byte per byte basis. */
10363 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10364 QImode, 1, end_0_label);
10365
10366 /* Increment the address. */
10367 if (TARGET_64BIT)
10368 emit_insn (gen_adddi3 (out, out, const1_rtx));
10369 else
10370 emit_insn (gen_addsi3 (out, out, const1_rtx));
10371
10372 /* Not needed with an alignment of 2 */
10373 if (align != 2)
10374 {
10375 emit_label (align_2_label);
10376
10377 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10378 end_0_label);
10379
10380 if (TARGET_64BIT)
10381 emit_insn (gen_adddi3 (out, out, const1_rtx));
10382 else
10383 emit_insn (gen_addsi3 (out, out, const1_rtx));
10384
10385 emit_label (align_3_label);
10386 }
10387
10388 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10389 end_0_label);
10390
10391 if (TARGET_64BIT)
10392 emit_insn (gen_adddi3 (out, out, const1_rtx));
10393 else
10394 emit_insn (gen_addsi3 (out, out, const1_rtx));
10395 }
10396
10397 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10398 align this loop. It gives only huge programs, but does not help to
10399 speed up. */
10400 emit_label (align_4_label);
10401
10402 mem = gen_rtx_MEM (SImode, out);
10403 emit_move_insn (scratch, mem);
10404 if (TARGET_64BIT)
10405 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10406 else
10407 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10408
10409 /* This formula yields a nonzero result iff one of the bytes is zero.
10410 This saves three branches inside loop and many cycles. */
10411
10412 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10413 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10414 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10415 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10416 gen_int_mode (0x80808080, SImode)));
10417 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10418 align_4_label);
10419
10420 if (TARGET_CMOVE)
10421 {
10422 rtx reg = gen_reg_rtx (SImode);
10423 rtx reg2 = gen_reg_rtx (Pmode);
10424 emit_move_insn (reg, tmpreg);
10425 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10426
10427 /* If zero is not in the first two bytes, move two bytes forward. */
10428 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10429 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10430 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10431 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10432 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10433 reg,
10434 tmpreg)));
10435 /* Emit lea manually to avoid clobbering of flags. */
10436 emit_insn (gen_rtx_SET (SImode, reg2,
10437 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10438
10439 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10440 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10441 emit_insn (gen_rtx_SET (VOIDmode, out,
10442 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10443 reg2,
10444 out)));
10445
10446 }
10447 else
10448 {
10449 rtx end_2_label = gen_label_rtx ();
10450 /* Is zero in the first two bytes? */
10451
10452 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10453 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10454 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10455 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10456 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10457 pc_rtx);
10458 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10459 JUMP_LABEL (tmp) = end_2_label;
10460
10461 /* Not in the first two. Move two bytes forward. */
10462 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10463 if (TARGET_64BIT)
10464 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10465 else
10466 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10467
10468 emit_label (end_2_label);
10469
10470 }
10471
10472 /* Avoid branch in fixing the byte. */
10473 tmpreg = gen_lowpart (QImode, tmpreg);
10474 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10475 if (TARGET_64BIT)
10476 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10477 else
10478 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10479
10480 emit_label (end_0_label);
10481 }
10482
10483 void
10484 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10485 rtx retval, fnaddr, callarg1, callarg2, pop;
10486 {
10487 rtx use = NULL, call;
10488
10489 if (pop == const0_rtx)
10490 pop = NULL;
10491 if (TARGET_64BIT && pop)
10492 abort ();
10493
10494 /* Static functions and indirect calls don't need the pic register. */
10495 if (! TARGET_64BIT && flag_pic
10496 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10497 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10498 use_reg (&use, pic_offset_table_rtx);
10499
10500 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10501 {
10502 rtx al = gen_rtx_REG (QImode, 0);
10503 emit_move_insn (al, callarg2);
10504 use_reg (&use, al);
10505 }
10506
10507 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10508 {
10509 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10510 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10511 }
10512
10513 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10514 if (retval)
10515 call = gen_rtx_SET (VOIDmode, retval, call);
10516 if (pop)
10517 {
10518 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10519 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10520 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10521 }
10522
10523 call = emit_call_insn (call);
10524 if (use)
10525 CALL_INSN_FUNCTION_USAGE (call) = use;
10526 }
10527
10528 \f
10529 /* Clear stack slot assignments remembered from previous functions.
10530 This is called from INIT_EXPANDERS once before RTL is emitted for each
10531 function. */
10532
10533 static struct machine_function *
10534 ix86_init_machine_status ()
10535 {
10536 return ggc_alloc_cleared (sizeof (struct machine_function));
10537 }
10538
10539 /* Return a MEM corresponding to a stack slot with mode MODE.
10540 Allocate a new slot if necessary.
10541
10542 The RTL for a function can have several slots available: N is
10543 which slot to use. */
10544
10545 rtx
10546 assign_386_stack_local (mode, n)
10547 enum machine_mode mode;
10548 int n;
10549 {
10550 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10551 abort ();
10552
10553 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10554 ix86_stack_locals[(int) mode][n]
10555 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10556
10557 return ix86_stack_locals[(int) mode][n];
10558 }
10559
10560 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10561
10562 static GTY(()) rtx ix86_tls_symbol;
10563 rtx
10564 ix86_tls_get_addr ()
10565 {
10566
10567 if (!ix86_tls_symbol)
10568 {
10569 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10570 ? "___tls_get_addr"
10571 : "__tls_get_addr"));
10572 }
10573
10574 return ix86_tls_symbol;
10575 }
10576 \f
10577 /* Calculate the length of the memory address in the instruction
10578 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10579
10580 static int
10581 memory_address_length (addr)
10582 rtx addr;
10583 {
10584 struct ix86_address parts;
10585 rtx base, index, disp;
10586 int len;
10587
10588 if (GET_CODE (addr) == PRE_DEC
10589 || GET_CODE (addr) == POST_INC
10590 || GET_CODE (addr) == PRE_MODIFY
10591 || GET_CODE (addr) == POST_MODIFY)
10592 return 0;
10593
10594 if (! ix86_decompose_address (addr, &parts))
10595 abort ();
10596
10597 base = parts.base;
10598 index = parts.index;
10599 disp = parts.disp;
10600 len = 0;
10601
10602 /* Register Indirect. */
10603 if (base && !index && !disp)
10604 {
10605 /* Special cases: ebp and esp need the two-byte modrm form. */
10606 if (addr == stack_pointer_rtx
10607 || addr == arg_pointer_rtx
10608 || addr == frame_pointer_rtx
10609 || addr == hard_frame_pointer_rtx)
10610 len = 1;
10611 }
10612
10613 /* Direct Addressing. */
10614 else if (disp && !base && !index)
10615 len = 4;
10616
10617 else
10618 {
10619 /* Find the length of the displacement constant. */
10620 if (disp)
10621 {
10622 if (GET_CODE (disp) == CONST_INT
10623 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10624 len = 1;
10625 else
10626 len = 4;
10627 }
10628
10629 /* An index requires the two-byte modrm form. */
10630 if (index)
10631 len += 1;
10632 }
10633
10634 return len;
10635 }
10636
10637 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10638 is set, expect that insn have 8bit immediate alternative. */
10639 int
10640 ix86_attr_length_immediate_default (insn, shortform)
10641 rtx insn;
10642 int shortform;
10643 {
10644 int len = 0;
10645 int i;
10646 extract_insn_cached (insn);
10647 for (i = recog_data.n_operands - 1; i >= 0; --i)
10648 if (CONSTANT_P (recog_data.operand[i]))
10649 {
10650 if (len)
10651 abort ();
10652 if (shortform
10653 && GET_CODE (recog_data.operand[i]) == CONST_INT
10654 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10655 len = 1;
10656 else
10657 {
10658 switch (get_attr_mode (insn))
10659 {
10660 case MODE_QI:
10661 len+=1;
10662 break;
10663 case MODE_HI:
10664 len+=2;
10665 break;
10666 case MODE_SI:
10667 len+=4;
10668 break;
10669 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10670 case MODE_DI:
10671 len+=4;
10672 break;
10673 default:
10674 fatal_insn ("unknown insn mode", insn);
10675 }
10676 }
10677 }
10678 return len;
10679 }
10680 /* Compute default value for "length_address" attribute. */
10681 int
10682 ix86_attr_length_address_default (insn)
10683 rtx insn;
10684 {
10685 int i;
10686 extract_insn_cached (insn);
10687 for (i = recog_data.n_operands - 1; i >= 0; --i)
10688 if (GET_CODE (recog_data.operand[i]) == MEM)
10689 {
10690 return memory_address_length (XEXP (recog_data.operand[i], 0));
10691 break;
10692 }
10693 return 0;
10694 }
10695 \f
10696 /* Return the maximum number of instructions a cpu can issue. */
10697
10698 static int
10699 ix86_issue_rate ()
10700 {
10701 switch (ix86_cpu)
10702 {
10703 case PROCESSOR_PENTIUM:
10704 case PROCESSOR_K6:
10705 return 2;
10706
10707 case PROCESSOR_PENTIUMPRO:
10708 case PROCESSOR_PENTIUM4:
10709 case PROCESSOR_ATHLON:
10710 return 3;
10711
10712 default:
10713 return 1;
10714 }
10715 }
10716
10717 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10718 by DEP_INSN and nothing set by DEP_INSN. */
10719
10720 static int
10721 ix86_flags_dependant (insn, dep_insn, insn_type)
10722 rtx insn, dep_insn;
10723 enum attr_type insn_type;
10724 {
10725 rtx set, set2;
10726
10727 /* Simplify the test for uninteresting insns. */
10728 if (insn_type != TYPE_SETCC
10729 && insn_type != TYPE_ICMOV
10730 && insn_type != TYPE_FCMOV
10731 && insn_type != TYPE_IBR)
10732 return 0;
10733
10734 if ((set = single_set (dep_insn)) != 0)
10735 {
10736 set = SET_DEST (set);
10737 set2 = NULL_RTX;
10738 }
10739 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10740 && XVECLEN (PATTERN (dep_insn), 0) == 2
10741 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10742 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10743 {
10744 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10745 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10746 }
10747 else
10748 return 0;
10749
10750 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10751 return 0;
10752
10753 /* This test is true if the dependent insn reads the flags but
10754 not any other potentially set register. */
10755 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10756 return 0;
10757
10758 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10759 return 0;
10760
10761 return 1;
10762 }
10763
10764 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10765 address with operands set by DEP_INSN. */
10766
10767 static int
10768 ix86_agi_dependant (insn, dep_insn, insn_type)
10769 rtx insn, dep_insn;
10770 enum attr_type insn_type;
10771 {
10772 rtx addr;
10773
10774 if (insn_type == TYPE_LEA
10775 && TARGET_PENTIUM)
10776 {
10777 addr = PATTERN (insn);
10778 if (GET_CODE (addr) == SET)
10779 ;
10780 else if (GET_CODE (addr) == PARALLEL
10781 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10782 addr = XVECEXP (addr, 0, 0);
10783 else
10784 abort ();
10785 addr = SET_SRC (addr);
10786 }
10787 else
10788 {
10789 int i;
10790 extract_insn_cached (insn);
10791 for (i = recog_data.n_operands - 1; i >= 0; --i)
10792 if (GET_CODE (recog_data.operand[i]) == MEM)
10793 {
10794 addr = XEXP (recog_data.operand[i], 0);
10795 goto found;
10796 }
10797 return 0;
10798 found:;
10799 }
10800
10801 return modified_in_p (addr, dep_insn);
10802 }
10803
10804 static int
10805 ix86_adjust_cost (insn, link, dep_insn, cost)
10806 rtx insn, link, dep_insn;
10807 int cost;
10808 {
10809 enum attr_type insn_type, dep_insn_type;
10810 enum attr_memory memory, dep_memory;
10811 rtx set, set2;
10812 int dep_insn_code_number;
10813
10814 /* Anti and output depenancies have zero cost on all CPUs. */
10815 if (REG_NOTE_KIND (link) != 0)
10816 return 0;
10817
10818 dep_insn_code_number = recog_memoized (dep_insn);
10819
10820 /* If we can't recognize the insns, we can't really do anything. */
10821 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10822 return cost;
10823
10824 insn_type = get_attr_type (insn);
10825 dep_insn_type = get_attr_type (dep_insn);
10826
10827 switch (ix86_cpu)
10828 {
10829 case PROCESSOR_PENTIUM:
10830 /* Address Generation Interlock adds a cycle of latency. */
10831 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10832 cost += 1;
10833
10834 /* ??? Compares pair with jump/setcc. */
10835 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10836 cost = 0;
10837
10838 /* Floating point stores require value to be ready one cycle ealier. */
10839 if (insn_type == TYPE_FMOV
10840 && get_attr_memory (insn) == MEMORY_STORE
10841 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10842 cost += 1;
10843 break;
10844
10845 case PROCESSOR_PENTIUMPRO:
10846 memory = get_attr_memory (insn);
10847 dep_memory = get_attr_memory (dep_insn);
10848
10849 /* Since we can't represent delayed latencies of load+operation,
10850 increase the cost here for non-imov insns. */
10851 if (dep_insn_type != TYPE_IMOV
10852 && dep_insn_type != TYPE_FMOV
10853 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10854 cost += 1;
10855
10856 /* INT->FP conversion is expensive. */
10857 if (get_attr_fp_int_src (dep_insn))
10858 cost += 5;
10859
10860 /* There is one cycle extra latency between an FP op and a store. */
10861 if (insn_type == TYPE_FMOV
10862 && (set = single_set (dep_insn)) != NULL_RTX
10863 && (set2 = single_set (insn)) != NULL_RTX
10864 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10865 && GET_CODE (SET_DEST (set2)) == MEM)
10866 cost += 1;
10867
10868 /* Show ability of reorder buffer to hide latency of load by executing
10869 in parallel with previous instruction in case
10870 previous instruction is not needed to compute the address. */
10871 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10872 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10873 {
10874 /* Claim moves to take one cycle, as core can issue one load
10875 at time and the next load can start cycle later. */
10876 if (dep_insn_type == TYPE_IMOV
10877 || dep_insn_type == TYPE_FMOV)
10878 cost = 1;
10879 else if (cost > 1)
10880 cost--;
10881 }
10882 break;
10883
10884 case PROCESSOR_K6:
10885 memory = get_attr_memory (insn);
10886 dep_memory = get_attr_memory (dep_insn);
10887 /* The esp dependency is resolved before the instruction is really
10888 finished. */
10889 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10890 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10891 return 1;
10892
10893 /* Since we can't represent delayed latencies of load+operation,
10894 increase the cost here for non-imov insns. */
10895 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10896 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10897
10898 /* INT->FP conversion is expensive. */
10899 if (get_attr_fp_int_src (dep_insn))
10900 cost += 5;
10901
10902 /* Show ability of reorder buffer to hide latency of load by executing
10903 in parallel with previous instruction in case
10904 previous instruction is not needed to compute the address. */
10905 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10906 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10907 {
10908 /* Claim moves to take one cycle, as core can issue one load
10909 at time and the next load can start cycle later. */
10910 if (dep_insn_type == TYPE_IMOV
10911 || dep_insn_type == TYPE_FMOV)
10912 cost = 1;
10913 else if (cost > 2)
10914 cost -= 2;
10915 else
10916 cost = 1;
10917 }
10918 break;
10919
10920 case PROCESSOR_ATHLON:
10921 memory = get_attr_memory (insn);
10922 dep_memory = get_attr_memory (dep_insn);
10923
10924 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10925 {
10926 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10927 cost += 2;
10928 else
10929 cost += 3;
10930 }
10931 /* Show ability of reorder buffer to hide latency of load by executing
10932 in parallel with previous instruction in case
10933 previous instruction is not needed to compute the address. */
10934 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10935 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10936 {
10937 /* Claim moves to take one cycle, as core can issue one load
10938 at time and the next load can start cycle later. */
10939 if (dep_insn_type == TYPE_IMOV
10940 || dep_insn_type == TYPE_FMOV)
10941 cost = 0;
10942 else if (cost >= 3)
10943 cost -= 3;
10944 else
10945 cost = 0;
10946 }
10947
10948 default:
10949 break;
10950 }
10951
10952 return cost;
10953 }
10954
10955 static union
10956 {
10957 struct ppro_sched_data
10958 {
10959 rtx decode[3];
10960 int issued_this_cycle;
10961 } ppro;
10962 } ix86_sched_data;
10963
10964 static enum attr_ppro_uops
10965 ix86_safe_ppro_uops (insn)
10966 rtx insn;
10967 {
10968 if (recog_memoized (insn) >= 0)
10969 return get_attr_ppro_uops (insn);
10970 else
10971 return PPRO_UOPS_MANY;
10972 }
10973
10974 static void
10975 ix86_dump_ppro_packet (dump)
10976 FILE *dump;
10977 {
10978 if (ix86_sched_data.ppro.decode[0])
10979 {
10980 fprintf (dump, "PPRO packet: %d",
10981 INSN_UID (ix86_sched_data.ppro.decode[0]));
10982 if (ix86_sched_data.ppro.decode[1])
10983 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10984 if (ix86_sched_data.ppro.decode[2])
10985 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10986 fputc ('\n', dump);
10987 }
10988 }
10989
10990 /* We're beginning a new block. Initialize data structures as necessary. */
10991
10992 static void
10993 ix86_sched_init (dump, sched_verbose, veclen)
10994 FILE *dump ATTRIBUTE_UNUSED;
10995 int sched_verbose ATTRIBUTE_UNUSED;
10996 int veclen ATTRIBUTE_UNUSED;
10997 {
10998 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10999 }
11000
11001 /* Shift INSN to SLOT, and shift everything else down. */
11002
11003 static void
11004 ix86_reorder_insn (insnp, slot)
11005 rtx *insnp, *slot;
11006 {
11007 if (insnp != slot)
11008 {
11009 rtx insn = *insnp;
11010 do
11011 insnp[0] = insnp[1];
11012 while (++insnp != slot);
11013 *insnp = insn;
11014 }
11015 }
11016
11017 static void
11018 ix86_sched_reorder_ppro (ready, e_ready)
11019 rtx *ready;
11020 rtx *e_ready;
11021 {
11022 rtx decode[3];
11023 enum attr_ppro_uops cur_uops;
11024 int issued_this_cycle;
11025 rtx *insnp;
11026 int i;
11027
11028 /* At this point .ppro.decode contains the state of the three
11029 decoders from last "cycle". That is, those insns that were
11030 actually independent. But here we're scheduling for the
11031 decoder, and we may find things that are decodable in the
11032 same cycle. */
11033
11034 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11035 issued_this_cycle = 0;
11036
11037 insnp = e_ready;
11038 cur_uops = ix86_safe_ppro_uops (*insnp);
11039
11040 /* If the decoders are empty, and we've a complex insn at the
11041 head of the priority queue, let it issue without complaint. */
11042 if (decode[0] == NULL)
11043 {
11044 if (cur_uops == PPRO_UOPS_MANY)
11045 {
11046 decode[0] = *insnp;
11047 goto ppro_done;
11048 }
11049
11050 /* Otherwise, search for a 2-4 uop unsn to issue. */
11051 while (cur_uops != PPRO_UOPS_FEW)
11052 {
11053 if (insnp == ready)
11054 break;
11055 cur_uops = ix86_safe_ppro_uops (*--insnp);
11056 }
11057
11058 /* If so, move it to the head of the line. */
11059 if (cur_uops == PPRO_UOPS_FEW)
11060 ix86_reorder_insn (insnp, e_ready);
11061
11062 /* Issue the head of the queue. */
11063 issued_this_cycle = 1;
11064 decode[0] = *e_ready--;
11065 }
11066
11067 /* Look for simple insns to fill in the other two slots. */
11068 for (i = 1; i < 3; ++i)
11069 if (decode[i] == NULL)
11070 {
11071 if (ready > e_ready)
11072 goto ppro_done;
11073
11074 insnp = e_ready;
11075 cur_uops = ix86_safe_ppro_uops (*insnp);
11076 while (cur_uops != PPRO_UOPS_ONE)
11077 {
11078 if (insnp == ready)
11079 break;
11080 cur_uops = ix86_safe_ppro_uops (*--insnp);
11081 }
11082
11083 /* Found one. Move it to the head of the queue and issue it. */
11084 if (cur_uops == PPRO_UOPS_ONE)
11085 {
11086 ix86_reorder_insn (insnp, e_ready);
11087 decode[i] = *e_ready--;
11088 issued_this_cycle++;
11089 continue;
11090 }
11091
11092 /* ??? Didn't find one. Ideally, here we would do a lazy split
11093 of 2-uop insns, issue one and queue the other. */
11094 }
11095
11096 ppro_done:
11097 if (issued_this_cycle == 0)
11098 issued_this_cycle = 1;
11099 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11100 }
11101
11102 /* We are about to being issuing insns for this clock cycle.
11103 Override the default sort algorithm to better slot instructions. */
11104 static int
11105 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11106 FILE *dump ATTRIBUTE_UNUSED;
11107 int sched_verbose ATTRIBUTE_UNUSED;
11108 rtx *ready;
11109 int *n_readyp;
11110 int clock_var ATTRIBUTE_UNUSED;
11111 {
11112 int n_ready = *n_readyp;
11113 rtx *e_ready = ready + n_ready - 1;
11114
11115 /* Make sure to go ahead and initialize key items in
11116 ix86_sched_data if we are not going to bother trying to
11117 reorder the ready queue. */
11118 if (n_ready < 2)
11119 {
11120 ix86_sched_data.ppro.issued_this_cycle = 1;
11121 goto out;
11122 }
11123
11124 switch (ix86_cpu)
11125 {
11126 default:
11127 break;
11128
11129 case PROCESSOR_PENTIUMPRO:
11130 ix86_sched_reorder_ppro (ready, e_ready);
11131 break;
11132 }
11133
11134 out:
11135 return ix86_issue_rate ();
11136 }
11137
11138 /* We are about to issue INSN. Return the number of insns left on the
11139 ready queue that can be issued this cycle. */
11140
11141 static int
11142 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11143 FILE *dump;
11144 int sched_verbose;
11145 rtx insn;
11146 int can_issue_more;
11147 {
11148 int i;
11149 switch (ix86_cpu)
11150 {
11151 default:
11152 return can_issue_more - 1;
11153
11154 case PROCESSOR_PENTIUMPRO:
11155 {
11156 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11157
11158 if (uops == PPRO_UOPS_MANY)
11159 {
11160 if (sched_verbose)
11161 ix86_dump_ppro_packet (dump);
11162 ix86_sched_data.ppro.decode[0] = insn;
11163 ix86_sched_data.ppro.decode[1] = NULL;
11164 ix86_sched_data.ppro.decode[2] = NULL;
11165 if (sched_verbose)
11166 ix86_dump_ppro_packet (dump);
11167 ix86_sched_data.ppro.decode[0] = NULL;
11168 }
11169 else if (uops == PPRO_UOPS_FEW)
11170 {
11171 if (sched_verbose)
11172 ix86_dump_ppro_packet (dump);
11173 ix86_sched_data.ppro.decode[0] = insn;
11174 ix86_sched_data.ppro.decode[1] = NULL;
11175 ix86_sched_data.ppro.decode[2] = NULL;
11176 }
11177 else
11178 {
11179 for (i = 0; i < 3; ++i)
11180 if (ix86_sched_data.ppro.decode[i] == NULL)
11181 {
11182 ix86_sched_data.ppro.decode[i] = insn;
11183 break;
11184 }
11185 if (i == 3)
11186 abort ();
11187 if (i == 2)
11188 {
11189 if (sched_verbose)
11190 ix86_dump_ppro_packet (dump);
11191 ix86_sched_data.ppro.decode[0] = NULL;
11192 ix86_sched_data.ppro.decode[1] = NULL;
11193 ix86_sched_data.ppro.decode[2] = NULL;
11194 }
11195 }
11196 }
11197 return --ix86_sched_data.ppro.issued_this_cycle;
11198 }
11199 }
11200
11201 static int
11202 ia32_use_dfa_pipeline_interface ()
11203 {
11204 if (ix86_cpu == PROCESSOR_PENTIUM)
11205 return 1;
11206 return 0;
11207 }
11208
11209 /* How many alternative schedules to try. This should be as wide as the
11210 scheduling freedom in the DFA, but no wider. Making this value too
11211 large results extra work for the scheduler. */
11212
11213 static int
11214 ia32_multipass_dfa_lookahead ()
11215 {
11216 if (ix86_cpu == PROCESSOR_PENTIUM)
11217 return 2;
11218 else
11219 return 0;
11220 }
11221
11222 \f
11223 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11224 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11225 appropriate. */
11226
11227 void
11228 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11229 rtx insns;
11230 rtx dstref, srcref, dstreg, srcreg;
11231 {
11232 rtx insn;
11233
11234 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11235 if (INSN_P (insn))
11236 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11237 dstreg, srcreg);
11238 }
11239
11240 /* Subroutine of above to actually do the updating by recursively walking
11241 the rtx. */
11242
11243 static void
11244 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11245 rtx x;
11246 rtx dstref, srcref, dstreg, srcreg;
11247 {
11248 enum rtx_code code = GET_CODE (x);
11249 const char *format_ptr = GET_RTX_FORMAT (code);
11250 int i, j;
11251
11252 if (code == MEM && XEXP (x, 0) == dstreg)
11253 MEM_COPY_ATTRIBUTES (x, dstref);
11254 else if (code == MEM && XEXP (x, 0) == srcreg)
11255 MEM_COPY_ATTRIBUTES (x, srcref);
11256
11257 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11258 {
11259 if (*format_ptr == 'e')
11260 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11261 dstreg, srcreg);
11262 else if (*format_ptr == 'E')
11263 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11264 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11265 dstreg, srcreg);
11266 }
11267 }
11268 \f
11269 /* Compute the alignment given to a constant that is being placed in memory.
11270 EXP is the constant and ALIGN is the alignment that the object would
11271 ordinarily have.
11272 The value of this function is used instead of that alignment to align
11273 the object. */
11274
11275 int
11276 ix86_constant_alignment (exp, align)
11277 tree exp;
11278 int align;
11279 {
11280 if (TREE_CODE (exp) == REAL_CST)
11281 {
11282 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11283 return 64;
11284 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11285 return 128;
11286 }
11287 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11288 && align < 256)
11289 return 256;
11290
11291 return align;
11292 }
11293
11294 /* Compute the alignment for a static variable.
11295 TYPE is the data type, and ALIGN is the alignment that
11296 the object would ordinarily have. The value of this function is used
11297 instead of that alignment to align the object. */
11298
11299 int
11300 ix86_data_alignment (type, align)
11301 tree type;
11302 int align;
11303 {
11304 if (AGGREGATE_TYPE_P (type)
11305 && TYPE_SIZE (type)
11306 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11307 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11308 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11309 return 256;
11310
11311 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11312 to 16byte boundary. */
11313 if (TARGET_64BIT)
11314 {
11315 if (AGGREGATE_TYPE_P (type)
11316 && TYPE_SIZE (type)
11317 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11318 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11319 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11320 return 128;
11321 }
11322
11323 if (TREE_CODE (type) == ARRAY_TYPE)
11324 {
11325 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11326 return 64;
11327 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11328 return 128;
11329 }
11330 else if (TREE_CODE (type) == COMPLEX_TYPE)
11331 {
11332
11333 if (TYPE_MODE (type) == DCmode && align < 64)
11334 return 64;
11335 if (TYPE_MODE (type) == XCmode && align < 128)
11336 return 128;
11337 }
11338 else if ((TREE_CODE (type) == RECORD_TYPE
11339 || TREE_CODE (type) == UNION_TYPE
11340 || TREE_CODE (type) == QUAL_UNION_TYPE)
11341 && TYPE_FIELDS (type))
11342 {
11343 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11344 return 64;
11345 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11346 return 128;
11347 }
11348 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11349 || TREE_CODE (type) == INTEGER_TYPE)
11350 {
11351 if (TYPE_MODE (type) == DFmode && align < 64)
11352 return 64;
11353 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11354 return 128;
11355 }
11356
11357 return align;
11358 }
11359
11360 /* Compute the alignment for a local variable.
11361 TYPE is the data type, and ALIGN is the alignment that
11362 the object would ordinarily have. The value of this macro is used
11363 instead of that alignment to align the object. */
11364
11365 int
11366 ix86_local_alignment (type, align)
11367 tree type;
11368 int align;
11369 {
11370 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11371 to 16byte boundary. */
11372 if (TARGET_64BIT)
11373 {
11374 if (AGGREGATE_TYPE_P (type)
11375 && TYPE_SIZE (type)
11376 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11377 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11378 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11379 return 128;
11380 }
11381 if (TREE_CODE (type) == ARRAY_TYPE)
11382 {
11383 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11384 return 64;
11385 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11386 return 128;
11387 }
11388 else if (TREE_CODE (type) == COMPLEX_TYPE)
11389 {
11390 if (TYPE_MODE (type) == DCmode && align < 64)
11391 return 64;
11392 if (TYPE_MODE (type) == XCmode && align < 128)
11393 return 128;
11394 }
11395 else if ((TREE_CODE (type) == RECORD_TYPE
11396 || TREE_CODE (type) == UNION_TYPE
11397 || TREE_CODE (type) == QUAL_UNION_TYPE)
11398 && TYPE_FIELDS (type))
11399 {
11400 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11401 return 64;
11402 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11403 return 128;
11404 }
11405 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11406 || TREE_CODE (type) == INTEGER_TYPE)
11407 {
11408
11409 if (TYPE_MODE (type) == DFmode && align < 64)
11410 return 64;
11411 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11412 return 128;
11413 }
11414 return align;
11415 }
11416 \f
11417 /* Emit RTL insns to initialize the variable parts of a trampoline.
11418 FNADDR is an RTX for the address of the function's pure code.
11419 CXT is an RTX for the static chain value for the function. */
11420 void
11421 x86_initialize_trampoline (tramp, fnaddr, cxt)
11422 rtx tramp, fnaddr, cxt;
11423 {
11424 if (!TARGET_64BIT)
11425 {
11426 /* Compute offset from the end of the jmp to the target function. */
11427 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11428 plus_constant (tramp, 10),
11429 NULL_RTX, 1, OPTAB_DIRECT);
11430 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11431 gen_int_mode (0xb9, QImode));
11432 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11433 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11434 gen_int_mode (0xe9, QImode));
11435 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11436 }
11437 else
11438 {
11439 int offset = 0;
11440 /* Try to load address using shorter movl instead of movabs.
11441 We may want to support movq for kernel mode, but kernel does not use
11442 trampolines at the moment. */
11443 if (x86_64_zero_extended_value (fnaddr))
11444 {
11445 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11446 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11447 gen_int_mode (0xbb41, HImode));
11448 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11449 gen_lowpart (SImode, fnaddr));
11450 offset += 6;
11451 }
11452 else
11453 {
11454 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11455 gen_int_mode (0xbb49, HImode));
11456 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11457 fnaddr);
11458 offset += 10;
11459 }
11460 /* Load static chain using movabs to r10. */
11461 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11462 gen_int_mode (0xba49, HImode));
11463 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11464 cxt);
11465 offset += 10;
11466 /* Jump to the r11 */
11467 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11468 gen_int_mode (0xff49, HImode));
11469 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11470 gen_int_mode (0xe3, QImode));
11471 offset += 3;
11472 if (offset > TRAMPOLINE_SIZE)
11473 abort ();
11474 }
11475 }
11476 \f
11477 #define def_builtin(MASK, NAME, TYPE, CODE) \
11478 do { \
11479 if ((MASK) & target_flags) \
11480 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11481 NULL, NULL_TREE); \
11482 } while (0)
11483
11484 struct builtin_description
11485 {
11486 const unsigned int mask;
11487 const enum insn_code icode;
11488 const char *const name;
11489 const enum ix86_builtins code;
11490 const enum rtx_code comparison;
11491 const unsigned int flag;
11492 };
11493
11494 /* Used for builtins that are enabled both by -msse and -msse2. */
11495 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11496
11497 static const struct builtin_description bdesc_comi[] =
11498 {
11499 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11500 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11501 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11502 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11503 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11504 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11505 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11506 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11507 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11508 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11509 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11510 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11511 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11512 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11513 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11514 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11515 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11516 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11517 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11518 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11519 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11520 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11521 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11522 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11523 };
11524
11525 static const struct builtin_description bdesc_2arg[] =
11526 {
11527 /* SSE */
11528 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11529 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11530 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11531 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11532 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11533 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11534 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11535 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11536
11537 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11538 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11539 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11540 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11541 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11542 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11543 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11544 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11545 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11546 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11547 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11548 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11549 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11550 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11551 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11552 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11553 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11554 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11555 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11556 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11557 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11558 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11559 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11560 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11561
11562 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11563 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11564 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11565 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11566
11567 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11568 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11569 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11570 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11571 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11572
11573 /* MMX */
11574 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11575 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11576 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11577 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11578 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11579 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11580
11581 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11582 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11583 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11584 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11585 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11586 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11587 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11588 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11589
11590 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11591 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11592 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11593
11594 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11595 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11596 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11597 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11598
11599 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11600 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11601
11602 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11603 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11604 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11605 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11606 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11607 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11608
11609 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11610 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11611 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11612 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11613
11614 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11615 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11616 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11617 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11618 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11619 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11620
11621 /* Special. */
11622 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11623 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11624 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11625
11626 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11627 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11628
11629 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11630 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11631 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11632 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11633 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11634 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11635
11636 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11637 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11638 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11639 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11640 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11641 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11642
11643 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11644 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11645 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11646 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11647
11648 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11649 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11650
11651 /* SSE2 */
11652 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11653 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11654 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11655 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11656 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11657 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11658 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11659 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11660
11661 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11662 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11663 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11664 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11665 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11666 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11667 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11668 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11669 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11670 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11671 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11672 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11673 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11674 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11675 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11676 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11677 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11678 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11679 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11680 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11681 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11682 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11683 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11684 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11685
11686 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11687 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11688 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11689 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11690
11691 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11692 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11693 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11694 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11695
11696 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11697 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11698 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11699
11700 /* SSE2 MMX */
11701 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11702 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11703 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11704 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11705 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11706 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11707 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11708 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11709
11710 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11711 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11712 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11713 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11714 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11715 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11716 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11717 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11718
11719 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11720 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11721 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11722 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11723
11724 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11725 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11726 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11727 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11728
11729 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11730 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11731
11732 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11733 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11734 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11735 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11736 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11737 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11738
11739 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11740 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11741 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11742 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11743
11744 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11745 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11746 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11747 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11748 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11750
11751 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11752 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11753 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11754
11755 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11756 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11757
11758 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11759 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11760 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11761 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11762 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11763 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11764
11765 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11766 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11767 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11768 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11769 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11770 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11771
11772 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11773 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11774 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11775 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11776
11777 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11778
11779 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11780 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11781 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11782 };
11783
11784 static const struct builtin_description bdesc_1arg[] =
11785 {
11786 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11787 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11788
11789 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11790 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11791 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11792
11793 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11794 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11795 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11796 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11797
11798 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11799 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11800 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11801
11802 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11803
11804 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11805 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11806
11807 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11808 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11809 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11810 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11811 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11812
11813 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11814
11815 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11816 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11817
11818 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11819 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11820 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11821 };
11822
11823 void
11824 ix86_init_builtins ()
11825 {
11826 if (TARGET_MMX)
11827 ix86_init_mmx_sse_builtins ();
11828 }
11829
11830 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11831 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11832 builtins. */
11833 static void
11834 ix86_init_mmx_sse_builtins ()
11835 {
11836 const struct builtin_description * d;
11837 size_t i;
11838
11839 tree pchar_type_node = build_pointer_type (char_type_node);
11840 tree pfloat_type_node = build_pointer_type (float_type_node);
11841 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11842 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11843 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11844
11845 /* Comparisons. */
11846 tree int_ftype_v4sf_v4sf
11847 = build_function_type_list (integer_type_node,
11848 V4SF_type_node, V4SF_type_node, NULL_TREE);
11849 tree v4si_ftype_v4sf_v4sf
11850 = build_function_type_list (V4SI_type_node,
11851 V4SF_type_node, V4SF_type_node, NULL_TREE);
11852 /* MMX/SSE/integer conversions. */
11853 tree int_ftype_v4sf
11854 = build_function_type_list (integer_type_node,
11855 V4SF_type_node, NULL_TREE);
11856 tree int_ftype_v8qi
11857 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
11858 tree v4sf_ftype_v4sf_int
11859 = build_function_type_list (V4SF_type_node,
11860 V4SF_type_node, integer_type_node, NULL_TREE);
11861 tree v4sf_ftype_v4sf_v2si
11862 = build_function_type_list (V4SF_type_node,
11863 V4SF_type_node, V2SI_type_node, NULL_TREE);
11864 tree int_ftype_v4hi_int
11865 = build_function_type_list (integer_type_node,
11866 V4HI_type_node, integer_type_node, NULL_TREE);
11867 tree v4hi_ftype_v4hi_int_int
11868 = build_function_type_list (V4HI_type_node, V4HI_type_node,
11869 integer_type_node, integer_type_node,
11870 NULL_TREE);
11871 /* Miscellaneous. */
11872 tree v8qi_ftype_v4hi_v4hi
11873 = build_function_type_list (V8QI_type_node,
11874 V4HI_type_node, V4HI_type_node, NULL_TREE);
11875 tree v4hi_ftype_v2si_v2si
11876 = build_function_type_list (V4HI_type_node,
11877 V2SI_type_node, V2SI_type_node, NULL_TREE);
11878 tree v4sf_ftype_v4sf_v4sf_int
11879 = build_function_type_list (V4SF_type_node,
11880 V4SF_type_node, V4SF_type_node,
11881 integer_type_node, NULL_TREE);
11882 tree v2si_ftype_v4hi_v4hi
11883 = build_function_type_list (V2SI_type_node,
11884 V4HI_type_node, V4HI_type_node, NULL_TREE);
11885 tree v4hi_ftype_v4hi_int
11886 = build_function_type_list (V4HI_type_node,
11887 V4HI_type_node, integer_type_node, NULL_TREE);
11888 tree v4hi_ftype_v4hi_di
11889 = build_function_type_list (V4HI_type_node,
11890 V4HI_type_node, long_long_unsigned_type_node,
11891 NULL_TREE);
11892 tree v2si_ftype_v2si_di
11893 = build_function_type_list (V2SI_type_node,
11894 V2SI_type_node, long_long_unsigned_type_node,
11895 NULL_TREE);
11896 tree void_ftype_void
11897 = build_function_type (void_type_node, void_list_node);
11898 tree void_ftype_unsigned
11899 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
11900 tree unsigned_ftype_void
11901 = build_function_type (unsigned_type_node, void_list_node);
11902 tree di_ftype_void
11903 = build_function_type (long_long_unsigned_type_node, void_list_node);
11904 tree v4sf_ftype_void
11905 = build_function_type (V4SF_type_node, void_list_node);
11906 tree v2si_ftype_v4sf
11907 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
11908 /* Loads/stores. */
11909 tree void_ftype_v8qi_v8qi_pchar
11910 = build_function_type_list (void_type_node,
11911 V8QI_type_node, V8QI_type_node,
11912 pchar_type_node, NULL_TREE);
11913 tree v4sf_ftype_pfloat
11914 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
11915 /* @@@ the type is bogus */
11916 tree v4sf_ftype_v4sf_pv2si
11917 = build_function_type_list (V4SF_type_node,
11918 V4SF_type_node, pv2di_type_node, NULL_TREE);
11919 tree void_ftype_pv2si_v4sf
11920 = build_function_type_list (void_type_node,
11921 pv2di_type_node, V4SF_type_node, NULL_TREE);
11922 tree void_ftype_pfloat_v4sf
11923 = build_function_type_list (void_type_node,
11924 pfloat_type_node, V4SF_type_node, NULL_TREE);
11925 tree void_ftype_pdi_di
11926 = build_function_type_list (void_type_node,
11927 pdi_type_node, long_long_unsigned_type_node,
11928 NULL_TREE);
11929 tree void_ftype_pv2di_v2di
11930 = build_function_type_list (void_type_node,
11931 pv2di_type_node, V2DI_type_node, NULL_TREE);
11932 /* Normal vector unops. */
11933 tree v4sf_ftype_v4sf
11934 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
11935
11936 /* Normal vector binops. */
11937 tree v4sf_ftype_v4sf_v4sf
11938 = build_function_type_list (V4SF_type_node,
11939 V4SF_type_node, V4SF_type_node, NULL_TREE);
11940 tree v8qi_ftype_v8qi_v8qi
11941 = build_function_type_list (V8QI_type_node,
11942 V8QI_type_node, V8QI_type_node, NULL_TREE);
11943 tree v4hi_ftype_v4hi_v4hi
11944 = build_function_type_list (V4HI_type_node,
11945 V4HI_type_node, V4HI_type_node, NULL_TREE);
11946 tree v2si_ftype_v2si_v2si
11947 = build_function_type_list (V2SI_type_node,
11948 V2SI_type_node, V2SI_type_node, NULL_TREE);
11949 tree di_ftype_di_di
11950 = build_function_type_list (long_long_unsigned_type_node,
11951 long_long_unsigned_type_node,
11952 long_long_unsigned_type_node, NULL_TREE);
11953
11954 tree v2si_ftype_v2sf
11955 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
11956 tree v2sf_ftype_v2si
11957 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
11958 tree v2si_ftype_v2si
11959 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
11960 tree v2sf_ftype_v2sf
11961 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
11962 tree v2sf_ftype_v2sf_v2sf
11963 = build_function_type_list (V2SF_type_node,
11964 V2SF_type_node, V2SF_type_node, NULL_TREE);
11965 tree v2si_ftype_v2sf_v2sf
11966 = build_function_type_list (V2SI_type_node,
11967 V2SF_type_node, V2SF_type_node, NULL_TREE);
11968 tree pint_type_node = build_pointer_type (integer_type_node);
11969 tree pdouble_type_node = build_pointer_type (double_type_node);
11970 tree int_ftype_v2df_v2df
11971 = build_function_type_list (integer_type_node,
11972 V2DF_type_node, V2DF_type_node, NULL_TREE);
11973
11974 tree ti_ftype_void
11975 = build_function_type (intTI_type_node, void_list_node);
11976 tree ti_ftype_ti_ti
11977 = build_function_type_list (intTI_type_node,
11978 intTI_type_node, intTI_type_node, NULL_TREE);
11979 tree void_ftype_pvoid
11980 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
11981 tree v2di_ftype_di
11982 = build_function_type_list (V2DI_type_node,
11983 long_long_unsigned_type_node, NULL_TREE);
11984 tree v4sf_ftype_v4si
11985 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
11986 tree v4si_ftype_v4sf
11987 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
11988 tree v2df_ftype_v4si
11989 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
11990 tree v4si_ftype_v2df
11991 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
11992 tree v2si_ftype_v2df
11993 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
11994 tree v4sf_ftype_v2df
11995 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
11996 tree v2df_ftype_v2si
11997 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
11998 tree v2df_ftype_v4sf
11999 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12000 tree int_ftype_v2df
12001 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12002 tree v2df_ftype_v2df_int
12003 = build_function_type_list (V2DF_type_node,
12004 V2DF_type_node, integer_type_node, NULL_TREE);
12005 tree v4sf_ftype_v4sf_v2df
12006 = build_function_type_list (V4SF_type_node,
12007 V4SF_type_node, V2DF_type_node, NULL_TREE);
12008 tree v2df_ftype_v2df_v4sf
12009 = build_function_type_list (V2DF_type_node,
12010 V2DF_type_node, V4SF_type_node, NULL_TREE);
12011 tree v2df_ftype_v2df_v2df_int
12012 = build_function_type_list (V2DF_type_node,
12013 V2DF_type_node, V2DF_type_node,
12014 integer_type_node,
12015 NULL_TREE);
12016 tree v2df_ftype_v2df_pv2si
12017 = build_function_type_list (V2DF_type_node,
12018 V2DF_type_node, pv2si_type_node, NULL_TREE);
12019 tree void_ftype_pv2si_v2df
12020 = build_function_type_list (void_type_node,
12021 pv2si_type_node, V2DF_type_node, NULL_TREE);
12022 tree void_ftype_pdouble_v2df
12023 = build_function_type_list (void_type_node,
12024 pdouble_type_node, V2DF_type_node, NULL_TREE);
12025 tree void_ftype_pint_int
12026 = build_function_type_list (void_type_node,
12027 pint_type_node, integer_type_node, NULL_TREE);
12028 tree void_ftype_v16qi_v16qi_pchar
12029 = build_function_type_list (void_type_node,
12030 V16QI_type_node, V16QI_type_node,
12031 pchar_type_node, NULL_TREE);
12032 tree v2df_ftype_pdouble
12033 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12034 tree v2df_ftype_v2df_v2df
12035 = build_function_type_list (V2DF_type_node,
12036 V2DF_type_node, V2DF_type_node, NULL_TREE);
12037 tree v16qi_ftype_v16qi_v16qi
12038 = build_function_type_list (V16QI_type_node,
12039 V16QI_type_node, V16QI_type_node, NULL_TREE);
12040 tree v8hi_ftype_v8hi_v8hi
12041 = build_function_type_list (V8HI_type_node,
12042 V8HI_type_node, V8HI_type_node, NULL_TREE);
12043 tree v4si_ftype_v4si_v4si
12044 = build_function_type_list (V4SI_type_node,
12045 V4SI_type_node, V4SI_type_node, NULL_TREE);
12046 tree v2di_ftype_v2di_v2di
12047 = build_function_type_list (V2DI_type_node,
12048 V2DI_type_node, V2DI_type_node, NULL_TREE);
12049 tree v2di_ftype_v2df_v2df
12050 = build_function_type_list (V2DI_type_node,
12051 V2DF_type_node, V2DF_type_node, NULL_TREE);
12052 tree v2df_ftype_v2df
12053 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12054 tree v2df_ftype_double
12055 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12056 tree v2df_ftype_double_double
12057 = build_function_type_list (V2DF_type_node,
12058 double_type_node, double_type_node, NULL_TREE);
12059 tree int_ftype_v8hi_int
12060 = build_function_type_list (integer_type_node,
12061 V8HI_type_node, integer_type_node, NULL_TREE);
12062 tree v8hi_ftype_v8hi_int_int
12063 = build_function_type_list (V8HI_type_node,
12064 V8HI_type_node, integer_type_node,
12065 integer_type_node, NULL_TREE);
12066 tree v2di_ftype_v2di_int
12067 = build_function_type_list (V2DI_type_node,
12068 V2DI_type_node, integer_type_node, NULL_TREE);
12069 tree v4si_ftype_v4si_int
12070 = build_function_type_list (V4SI_type_node,
12071 V4SI_type_node, integer_type_node, NULL_TREE);
12072 tree v8hi_ftype_v8hi_int
12073 = build_function_type_list (V8HI_type_node,
12074 V8HI_type_node, integer_type_node, NULL_TREE);
12075 tree v8hi_ftype_v8hi_v2di
12076 = build_function_type_list (V8HI_type_node,
12077 V8HI_type_node, V2DI_type_node, NULL_TREE);
12078 tree v4si_ftype_v4si_v2di
12079 = build_function_type_list (V4SI_type_node,
12080 V4SI_type_node, V2DI_type_node, NULL_TREE);
12081 tree v4si_ftype_v8hi_v8hi
12082 = build_function_type_list (V4SI_type_node,
12083 V8HI_type_node, V8HI_type_node, NULL_TREE);
12084 tree di_ftype_v8qi_v8qi
12085 = build_function_type_list (long_long_unsigned_type_node,
12086 V8QI_type_node, V8QI_type_node, NULL_TREE);
12087 tree v2di_ftype_v16qi_v16qi
12088 = build_function_type_list (V2DI_type_node,
12089 V16QI_type_node, V16QI_type_node, NULL_TREE);
12090 tree int_ftype_v16qi
12091 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12092
12093 /* Add all builtins that are more or less simple operations on two
12094 operands. */
12095 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12096 {
12097 /* Use one of the operands; the target can have a different mode for
12098 mask-generating compares. */
12099 enum machine_mode mode;
12100 tree type;
12101
12102 if (d->name == 0)
12103 continue;
12104 mode = insn_data[d->icode].operand[1].mode;
12105
12106 switch (mode)
12107 {
12108 case V16QImode:
12109 type = v16qi_ftype_v16qi_v16qi;
12110 break;
12111 case V8HImode:
12112 type = v8hi_ftype_v8hi_v8hi;
12113 break;
12114 case V4SImode:
12115 type = v4si_ftype_v4si_v4si;
12116 break;
12117 case V2DImode:
12118 type = v2di_ftype_v2di_v2di;
12119 break;
12120 case V2DFmode:
12121 type = v2df_ftype_v2df_v2df;
12122 break;
12123 case TImode:
12124 type = ti_ftype_ti_ti;
12125 break;
12126 case V4SFmode:
12127 type = v4sf_ftype_v4sf_v4sf;
12128 break;
12129 case V8QImode:
12130 type = v8qi_ftype_v8qi_v8qi;
12131 break;
12132 case V4HImode:
12133 type = v4hi_ftype_v4hi_v4hi;
12134 break;
12135 case V2SImode:
12136 type = v2si_ftype_v2si_v2si;
12137 break;
12138 case DImode:
12139 type = di_ftype_di_di;
12140 break;
12141
12142 default:
12143 abort ();
12144 }
12145
12146 /* Override for comparisons. */
12147 if (d->icode == CODE_FOR_maskcmpv4sf3
12148 || d->icode == CODE_FOR_maskncmpv4sf3
12149 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12150 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12151 type = v4si_ftype_v4sf_v4sf;
12152
12153 if (d->icode == CODE_FOR_maskcmpv2df3
12154 || d->icode == CODE_FOR_maskncmpv2df3
12155 || d->icode == CODE_FOR_vmmaskcmpv2df3
12156 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12157 type = v2di_ftype_v2df_v2df;
12158
12159 def_builtin (d->mask, d->name, type, d->code);
12160 }
12161
12162 /* Add the remaining MMX insns with somewhat more complicated types. */
12163 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12164 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12165 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12166 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12167 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12168 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12169 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12170
12171 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12172 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12173 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12174
12175 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12176 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12177
12178 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12179 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12180
12181 /* comi/ucomi insns. */
12182 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12183 if (d->mask == MASK_SSE2)
12184 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12185 else
12186 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12187
12188 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12189 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12190 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12191
12192 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12193 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12194 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12195 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12196 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12197 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12198
12199 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12200 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12201 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12202 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12203
12204 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12205 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12206
12207 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12208
12209 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12210 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12211 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12212 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12213 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12214 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12215
12216 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12217 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12218 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12219 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12220
12221 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12222 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12223 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12224 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12225
12226 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12227
12228 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12229
12230 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12231 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12232 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12233 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12234 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12235 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12236
12237 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12238
12239 /* Original 3DNow! */
12240 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12241 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12242 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12243 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12244 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12245 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12246 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12247 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12248 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12249 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12250 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12251 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12252 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12253 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12254 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12255 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12256 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12257 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12258 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12259 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12260
12261 /* 3DNow! extension as used in the Athlon CPU. */
12262 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12263 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12264 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12265 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12266 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12267 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12268
12269 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12270
12271 /* SSE2 */
12272 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12273 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12274
12275 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12276 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12277
12278 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12279 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12280 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12281 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12282 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12283 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12284
12285 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12286 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12287 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12288 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12289
12290 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12291 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12292 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12293 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12294 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12295
12296 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12297 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12298 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12299 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12300
12301 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12302 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12303
12304 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12305
12306 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12307 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12308
12309 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12310 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12311 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12312 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12313 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12314
12315 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12316
12317 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12318 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12319
12320 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12321 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12322 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12323
12324 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12325 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12326 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12327
12328 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12329 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12330 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12331 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12332 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12333 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12334 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12335
12336 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12337 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12338 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12339
12340 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12341 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12342 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12343
12344 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12345 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12346 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12347
12348 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12349 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12350
12351 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12352 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12353 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12354
12355 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12356 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12357 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12358
12359 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12360 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12361
12362 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12363 }
12364
12365 /* Errors in the source file can cause expand_expr to return const0_rtx
12366 where we expect a vector. To avoid crashing, use one of the vector
12367 clear instructions. */
12368 static rtx
12369 safe_vector_operand (x, mode)
12370 rtx x;
12371 enum machine_mode mode;
12372 {
12373 if (x != const0_rtx)
12374 return x;
12375 x = gen_reg_rtx (mode);
12376
12377 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12378 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12379 : gen_rtx_SUBREG (DImode, x, 0)));
12380 else
12381 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12382 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12383 return x;
12384 }
12385
12386 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12387
12388 static rtx
12389 ix86_expand_binop_builtin (icode, arglist, target)
12390 enum insn_code icode;
12391 tree arglist;
12392 rtx target;
12393 {
12394 rtx pat;
12395 tree arg0 = TREE_VALUE (arglist);
12396 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12397 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12398 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12399 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12400 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12401 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12402
12403 if (VECTOR_MODE_P (mode0))
12404 op0 = safe_vector_operand (op0, mode0);
12405 if (VECTOR_MODE_P (mode1))
12406 op1 = safe_vector_operand (op1, mode1);
12407
12408 if (! target
12409 || GET_MODE (target) != tmode
12410 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12411 target = gen_reg_rtx (tmode);
12412
12413 /* In case the insn wants input operands in modes different from
12414 the result, abort. */
12415 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12416 abort ();
12417
12418 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12419 op0 = copy_to_mode_reg (mode0, op0);
12420 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12421 op1 = copy_to_mode_reg (mode1, op1);
12422
12423 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12424 yet one of the two must not be a memory. This is normally enforced
12425 by expanders, but we didn't bother to create one here. */
12426 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12427 op0 = copy_to_mode_reg (mode0, op0);
12428
12429 pat = GEN_FCN (icode) (target, op0, op1);
12430 if (! pat)
12431 return 0;
12432 emit_insn (pat);
12433 return target;
12434 }
12435
12436 /* In type_for_mode we restrict the ability to create TImode types
12437 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12438 to have a V4SFmode signature. Convert them in-place to TImode. */
12439
12440 static rtx
12441 ix86_expand_timode_binop_builtin (icode, arglist, target)
12442 enum insn_code icode;
12443 tree arglist;
12444 rtx target;
12445 {
12446 rtx pat;
12447 tree arg0 = TREE_VALUE (arglist);
12448 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12449 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12450 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12451
12452 op0 = gen_lowpart (TImode, op0);
12453 op1 = gen_lowpart (TImode, op1);
12454 target = gen_reg_rtx (TImode);
12455
12456 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12457 op0 = copy_to_mode_reg (TImode, op0);
12458 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12459 op1 = copy_to_mode_reg (TImode, op1);
12460
12461 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12462 yet one of the two must not be a memory. This is normally enforced
12463 by expanders, but we didn't bother to create one here. */
12464 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12465 op0 = copy_to_mode_reg (TImode, op0);
12466
12467 pat = GEN_FCN (icode) (target, op0, op1);
12468 if (! pat)
12469 return 0;
12470 emit_insn (pat);
12471
12472 return gen_lowpart (V4SFmode, target);
12473 }
12474
12475 /* Subroutine of ix86_expand_builtin to take care of stores. */
12476
12477 static rtx
12478 ix86_expand_store_builtin (icode, arglist)
12479 enum insn_code icode;
12480 tree arglist;
12481 {
12482 rtx pat;
12483 tree arg0 = TREE_VALUE (arglist);
12484 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12485 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12486 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12487 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12488 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12489
12490 if (VECTOR_MODE_P (mode1))
12491 op1 = safe_vector_operand (op1, mode1);
12492
12493 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12494
12495 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12496 op1 = copy_to_mode_reg (mode1, op1);
12497
12498 pat = GEN_FCN (icode) (op0, op1);
12499 if (pat)
12500 emit_insn (pat);
12501 return 0;
12502 }
12503
12504 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12505
12506 static rtx
12507 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12508 enum insn_code icode;
12509 tree arglist;
12510 rtx target;
12511 int do_load;
12512 {
12513 rtx pat;
12514 tree arg0 = TREE_VALUE (arglist);
12515 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12516 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12517 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12518
12519 if (! target
12520 || GET_MODE (target) != tmode
12521 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12522 target = gen_reg_rtx (tmode);
12523 if (do_load)
12524 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12525 else
12526 {
12527 if (VECTOR_MODE_P (mode0))
12528 op0 = safe_vector_operand (op0, mode0);
12529
12530 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12531 op0 = copy_to_mode_reg (mode0, op0);
12532 }
12533
12534 pat = GEN_FCN (icode) (target, op0);
12535 if (! pat)
12536 return 0;
12537 emit_insn (pat);
12538 return target;
12539 }
12540
12541 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12542 sqrtss, rsqrtss, rcpss. */
12543
12544 static rtx
12545 ix86_expand_unop1_builtin (icode, arglist, target)
12546 enum insn_code icode;
12547 tree arglist;
12548 rtx target;
12549 {
12550 rtx pat;
12551 tree arg0 = TREE_VALUE (arglist);
12552 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12553 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12554 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12555
12556 if (! target
12557 || GET_MODE (target) != tmode
12558 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12559 target = gen_reg_rtx (tmode);
12560
12561 if (VECTOR_MODE_P (mode0))
12562 op0 = safe_vector_operand (op0, mode0);
12563
12564 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12565 op0 = copy_to_mode_reg (mode0, op0);
12566
12567 op1 = op0;
12568 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12569 op1 = copy_to_mode_reg (mode0, op1);
12570
12571 pat = GEN_FCN (icode) (target, op0, op1);
12572 if (! pat)
12573 return 0;
12574 emit_insn (pat);
12575 return target;
12576 }
12577
12578 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12579
12580 static rtx
12581 ix86_expand_sse_compare (d, arglist, target)
12582 const struct builtin_description *d;
12583 tree arglist;
12584 rtx target;
12585 {
12586 rtx pat;
12587 tree arg0 = TREE_VALUE (arglist);
12588 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12589 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12590 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12591 rtx op2;
12592 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12593 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12594 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12595 enum rtx_code comparison = d->comparison;
12596
12597 if (VECTOR_MODE_P (mode0))
12598 op0 = safe_vector_operand (op0, mode0);
12599 if (VECTOR_MODE_P (mode1))
12600 op1 = safe_vector_operand (op1, mode1);
12601
12602 /* Swap operands if we have a comparison that isn't available in
12603 hardware. */
12604 if (d->flag)
12605 {
12606 rtx tmp = gen_reg_rtx (mode1);
12607 emit_move_insn (tmp, op1);
12608 op1 = op0;
12609 op0 = tmp;
12610 }
12611
12612 if (! target
12613 || GET_MODE (target) != tmode
12614 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12615 target = gen_reg_rtx (tmode);
12616
12617 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12618 op0 = copy_to_mode_reg (mode0, op0);
12619 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12620 op1 = copy_to_mode_reg (mode1, op1);
12621
12622 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12623 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12624 if (! pat)
12625 return 0;
12626 emit_insn (pat);
12627 return target;
12628 }
12629
12630 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12631
12632 static rtx
12633 ix86_expand_sse_comi (d, arglist, target)
12634 const struct builtin_description *d;
12635 tree arglist;
12636 rtx target;
12637 {
12638 rtx pat;
12639 tree arg0 = TREE_VALUE (arglist);
12640 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12641 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12642 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12643 rtx op2;
12644 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12645 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12646 enum rtx_code comparison = d->comparison;
12647
12648 if (VECTOR_MODE_P (mode0))
12649 op0 = safe_vector_operand (op0, mode0);
12650 if (VECTOR_MODE_P (mode1))
12651 op1 = safe_vector_operand (op1, mode1);
12652
12653 /* Swap operands if we have a comparison that isn't available in
12654 hardware. */
12655 if (d->flag)
12656 {
12657 rtx tmp = op1;
12658 op1 = op0;
12659 op0 = tmp;
12660 }
12661
12662 target = gen_reg_rtx (SImode);
12663 emit_move_insn (target, const0_rtx);
12664 target = gen_rtx_SUBREG (QImode, target, 0);
12665
12666 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12667 op0 = copy_to_mode_reg (mode0, op0);
12668 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12669 op1 = copy_to_mode_reg (mode1, op1);
12670
12671 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12672 pat = GEN_FCN (d->icode) (op0, op1, op2);
12673 if (! pat)
12674 return 0;
12675 emit_insn (pat);
12676 emit_insn (gen_rtx_SET (VOIDmode,
12677 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12678 gen_rtx_fmt_ee (comparison, QImode,
12679 gen_rtx_REG (CCmode, FLAGS_REG),
12680 const0_rtx)));
12681
12682 return SUBREG_REG (target);
12683 }
12684
12685 /* Expand an expression EXP that calls a built-in function,
12686 with result going to TARGET if that's convenient
12687 (and in mode MODE if that's convenient).
12688 SUBTARGET may be used as the target for computing one of EXP's operands.
12689 IGNORE is nonzero if the value is to be ignored. */
12690
12691 rtx
12692 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12693 tree exp;
12694 rtx target;
12695 rtx subtarget ATTRIBUTE_UNUSED;
12696 enum machine_mode mode ATTRIBUTE_UNUSED;
12697 int ignore ATTRIBUTE_UNUSED;
12698 {
12699 const struct builtin_description *d;
12700 size_t i;
12701 enum insn_code icode;
12702 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12703 tree arglist = TREE_OPERAND (exp, 1);
12704 tree arg0, arg1, arg2;
12705 rtx op0, op1, op2, pat;
12706 enum machine_mode tmode, mode0, mode1, mode2;
12707 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12708
12709 switch (fcode)
12710 {
12711 case IX86_BUILTIN_EMMS:
12712 emit_insn (gen_emms ());
12713 return 0;
12714
12715 case IX86_BUILTIN_SFENCE:
12716 emit_insn (gen_sfence ());
12717 return 0;
12718
12719 case IX86_BUILTIN_PEXTRW:
12720 case IX86_BUILTIN_PEXTRW128:
12721 icode = (fcode == IX86_BUILTIN_PEXTRW
12722 ? CODE_FOR_mmx_pextrw
12723 : CODE_FOR_sse2_pextrw);
12724 arg0 = TREE_VALUE (arglist);
12725 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12726 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12727 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12728 tmode = insn_data[icode].operand[0].mode;
12729 mode0 = insn_data[icode].operand[1].mode;
12730 mode1 = insn_data[icode].operand[2].mode;
12731
12732 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12733 op0 = copy_to_mode_reg (mode0, op0);
12734 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12735 {
12736 /* @@@ better error message */
12737 error ("selector must be an immediate");
12738 return gen_reg_rtx (tmode);
12739 }
12740 if (target == 0
12741 || GET_MODE (target) != tmode
12742 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12743 target = gen_reg_rtx (tmode);
12744 pat = GEN_FCN (icode) (target, op0, op1);
12745 if (! pat)
12746 return 0;
12747 emit_insn (pat);
12748 return target;
12749
12750 case IX86_BUILTIN_PINSRW:
12751 case IX86_BUILTIN_PINSRW128:
12752 icode = (fcode == IX86_BUILTIN_PINSRW
12753 ? CODE_FOR_mmx_pinsrw
12754 : CODE_FOR_sse2_pinsrw);
12755 arg0 = TREE_VALUE (arglist);
12756 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12757 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12758 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12759 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12760 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12761 tmode = insn_data[icode].operand[0].mode;
12762 mode0 = insn_data[icode].operand[1].mode;
12763 mode1 = insn_data[icode].operand[2].mode;
12764 mode2 = insn_data[icode].operand[3].mode;
12765
12766 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12767 op0 = copy_to_mode_reg (mode0, op0);
12768 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12769 op1 = copy_to_mode_reg (mode1, op1);
12770 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12771 {
12772 /* @@@ better error message */
12773 error ("selector must be an immediate");
12774 return const0_rtx;
12775 }
12776 if (target == 0
12777 || GET_MODE (target) != tmode
12778 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12779 target = gen_reg_rtx (tmode);
12780 pat = GEN_FCN (icode) (target, op0, op1, op2);
12781 if (! pat)
12782 return 0;
12783 emit_insn (pat);
12784 return target;
12785
12786 case IX86_BUILTIN_MASKMOVQ:
12787 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12788 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12789 : CODE_FOR_sse2_maskmovdqu);
12790 /* Note the arg order is different from the operand order. */
12791 arg1 = TREE_VALUE (arglist);
12792 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12793 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12794 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12795 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12796 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12797 mode0 = insn_data[icode].operand[0].mode;
12798 mode1 = insn_data[icode].operand[1].mode;
12799 mode2 = insn_data[icode].operand[2].mode;
12800
12801 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12802 op0 = copy_to_mode_reg (mode0, op0);
12803 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12804 op1 = copy_to_mode_reg (mode1, op1);
12805 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12806 op2 = copy_to_mode_reg (mode2, op2);
12807 pat = GEN_FCN (icode) (op0, op1, op2);
12808 if (! pat)
12809 return 0;
12810 emit_insn (pat);
12811 return 0;
12812
12813 case IX86_BUILTIN_SQRTSS:
12814 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12815 case IX86_BUILTIN_RSQRTSS:
12816 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12817 case IX86_BUILTIN_RCPSS:
12818 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12819
12820 case IX86_BUILTIN_ANDPS:
12821 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12822 arglist, target);
12823 case IX86_BUILTIN_ANDNPS:
12824 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12825 arglist, target);
12826 case IX86_BUILTIN_ORPS:
12827 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12828 arglist, target);
12829 case IX86_BUILTIN_XORPS:
12830 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12831 arglist, target);
12832
12833 case IX86_BUILTIN_LOADAPS:
12834 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12835
12836 case IX86_BUILTIN_LOADUPS:
12837 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12838
12839 case IX86_BUILTIN_STOREAPS:
12840 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12841 case IX86_BUILTIN_STOREUPS:
12842 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12843
12844 case IX86_BUILTIN_LOADSS:
12845 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12846
12847 case IX86_BUILTIN_STORESS:
12848 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12849
12850 case IX86_BUILTIN_LOADHPS:
12851 case IX86_BUILTIN_LOADLPS:
12852 case IX86_BUILTIN_LOADHPD:
12853 case IX86_BUILTIN_LOADLPD:
12854 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12855 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12856 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12857 : CODE_FOR_sse2_movlpd);
12858 arg0 = TREE_VALUE (arglist);
12859 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12860 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12861 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12862 tmode = insn_data[icode].operand[0].mode;
12863 mode0 = insn_data[icode].operand[1].mode;
12864 mode1 = insn_data[icode].operand[2].mode;
12865
12866 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12867 op0 = copy_to_mode_reg (mode0, op0);
12868 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12869 if (target == 0
12870 || GET_MODE (target) != tmode
12871 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12872 target = gen_reg_rtx (tmode);
12873 pat = GEN_FCN (icode) (target, op0, op1);
12874 if (! pat)
12875 return 0;
12876 emit_insn (pat);
12877 return target;
12878
12879 case IX86_BUILTIN_STOREHPS:
12880 case IX86_BUILTIN_STORELPS:
12881 case IX86_BUILTIN_STOREHPD:
12882 case IX86_BUILTIN_STORELPD:
12883 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12884 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12885 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12886 : CODE_FOR_sse2_movlpd);
12887 arg0 = TREE_VALUE (arglist);
12888 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12889 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12890 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12891 mode0 = insn_data[icode].operand[1].mode;
12892 mode1 = insn_data[icode].operand[2].mode;
12893
12894 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12895 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12896 op1 = copy_to_mode_reg (mode1, op1);
12897
12898 pat = GEN_FCN (icode) (op0, op0, op1);
12899 if (! pat)
12900 return 0;
12901 emit_insn (pat);
12902 return 0;
12903
12904 case IX86_BUILTIN_MOVNTPS:
12905 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
12906 case IX86_BUILTIN_MOVNTQ:
12907 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
12908
12909 case IX86_BUILTIN_LDMXCSR:
12910 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12911 target = assign_386_stack_local (SImode, 0);
12912 emit_move_insn (target, op0);
12913 emit_insn (gen_ldmxcsr (target));
12914 return 0;
12915
12916 case IX86_BUILTIN_STMXCSR:
12917 target = assign_386_stack_local (SImode, 0);
12918 emit_insn (gen_stmxcsr (target));
12919 return copy_to_mode_reg (SImode, target);
12920
12921 case IX86_BUILTIN_SHUFPS:
12922 case IX86_BUILTIN_SHUFPD:
12923 icode = (fcode == IX86_BUILTIN_SHUFPS
12924 ? CODE_FOR_sse_shufps
12925 : CODE_FOR_sse2_shufpd);
12926 arg0 = TREE_VALUE (arglist);
12927 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12928 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12929 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12930 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12931 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12932 tmode = insn_data[icode].operand[0].mode;
12933 mode0 = insn_data[icode].operand[1].mode;
12934 mode1 = insn_data[icode].operand[2].mode;
12935 mode2 = insn_data[icode].operand[3].mode;
12936
12937 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12938 op0 = copy_to_mode_reg (mode0, op0);
12939 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12940 op1 = copy_to_mode_reg (mode1, op1);
12941 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12942 {
12943 /* @@@ better error message */
12944 error ("mask must be an immediate");
12945 return gen_reg_rtx (tmode);
12946 }
12947 if (target == 0
12948 || GET_MODE (target) != tmode
12949 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12950 target = gen_reg_rtx (tmode);
12951 pat = GEN_FCN (icode) (target, op0, op1, op2);
12952 if (! pat)
12953 return 0;
12954 emit_insn (pat);
12955 return target;
12956
12957 case IX86_BUILTIN_PSHUFW:
12958 case IX86_BUILTIN_PSHUFD:
12959 case IX86_BUILTIN_PSHUFHW:
12960 case IX86_BUILTIN_PSHUFLW:
12961 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
12962 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
12963 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
12964 : CODE_FOR_mmx_pshufw);
12965 arg0 = TREE_VALUE (arglist);
12966 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12967 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12968 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12969 tmode = insn_data[icode].operand[0].mode;
12970 mode1 = insn_data[icode].operand[1].mode;
12971 mode2 = insn_data[icode].operand[2].mode;
12972
12973 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12974 op0 = copy_to_mode_reg (mode1, op0);
12975 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12976 {
12977 /* @@@ better error message */
12978 error ("mask must be an immediate");
12979 return const0_rtx;
12980 }
12981 if (target == 0
12982 || GET_MODE (target) != tmode
12983 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12984 target = gen_reg_rtx (tmode);
12985 pat = GEN_FCN (icode) (target, op0, op1);
12986 if (! pat)
12987 return 0;
12988 emit_insn (pat);
12989 return target;
12990
12991 case IX86_BUILTIN_FEMMS:
12992 emit_insn (gen_femms ());
12993 return NULL_RTX;
12994
12995 case IX86_BUILTIN_PAVGUSB:
12996 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12997
12998 case IX86_BUILTIN_PF2ID:
12999 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13000
13001 case IX86_BUILTIN_PFACC:
13002 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13003
13004 case IX86_BUILTIN_PFADD:
13005 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13006
13007 case IX86_BUILTIN_PFCMPEQ:
13008 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13009
13010 case IX86_BUILTIN_PFCMPGE:
13011 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13012
13013 case IX86_BUILTIN_PFCMPGT:
13014 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13015
13016 case IX86_BUILTIN_PFMAX:
13017 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13018
13019 case IX86_BUILTIN_PFMIN:
13020 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13021
13022 case IX86_BUILTIN_PFMUL:
13023 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13024
13025 case IX86_BUILTIN_PFRCP:
13026 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13027
13028 case IX86_BUILTIN_PFRCPIT1:
13029 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13030
13031 case IX86_BUILTIN_PFRCPIT2:
13032 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13033
13034 case IX86_BUILTIN_PFRSQIT1:
13035 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13036
13037 case IX86_BUILTIN_PFRSQRT:
13038 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13039
13040 case IX86_BUILTIN_PFSUB:
13041 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13042
13043 case IX86_BUILTIN_PFSUBR:
13044 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13045
13046 case IX86_BUILTIN_PI2FD:
13047 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13048
13049 case IX86_BUILTIN_PMULHRW:
13050 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13051
13052 case IX86_BUILTIN_PF2IW:
13053 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13054
13055 case IX86_BUILTIN_PFNACC:
13056 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13057
13058 case IX86_BUILTIN_PFPNACC:
13059 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13060
13061 case IX86_BUILTIN_PI2FW:
13062 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13063
13064 case IX86_BUILTIN_PSWAPDSI:
13065 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13066
13067 case IX86_BUILTIN_PSWAPDSF:
13068 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13069
13070 case IX86_BUILTIN_SSE_ZERO:
13071 target = gen_reg_rtx (V4SFmode);
13072 emit_insn (gen_sse_clrv4sf (target));
13073 return target;
13074
13075 case IX86_BUILTIN_MMX_ZERO:
13076 target = gen_reg_rtx (DImode);
13077 emit_insn (gen_mmx_clrdi (target));
13078 return target;
13079
13080 case IX86_BUILTIN_SQRTSD:
13081 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13082 case IX86_BUILTIN_LOADAPD:
13083 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13084 case IX86_BUILTIN_LOADUPD:
13085 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13086
13087 case IX86_BUILTIN_STOREAPD:
13088 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13089 case IX86_BUILTIN_STOREUPD:
13090 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13091
13092 case IX86_BUILTIN_LOADSD:
13093 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13094
13095 case IX86_BUILTIN_STORESD:
13096 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13097
13098 case IX86_BUILTIN_SETPD1:
13099 target = assign_386_stack_local (DFmode, 0);
13100 arg0 = TREE_VALUE (arglist);
13101 emit_move_insn (adjust_address (target, DFmode, 0),
13102 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13103 op0 = gen_reg_rtx (V2DFmode);
13104 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13105 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13106 return op0;
13107
13108 case IX86_BUILTIN_SETPD:
13109 target = assign_386_stack_local (V2DFmode, 0);
13110 arg0 = TREE_VALUE (arglist);
13111 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13112 emit_move_insn (adjust_address (target, DFmode, 0),
13113 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13114 emit_move_insn (adjust_address (target, DFmode, 8),
13115 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13116 op0 = gen_reg_rtx (V2DFmode);
13117 emit_insn (gen_sse2_movapd (op0, target));
13118 return op0;
13119
13120 case IX86_BUILTIN_LOADRPD:
13121 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13122 gen_reg_rtx (V2DFmode), 1);
13123 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13124 return target;
13125
13126 case IX86_BUILTIN_LOADPD1:
13127 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13128 gen_reg_rtx (V2DFmode), 1);
13129 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13130 return target;
13131
13132 case IX86_BUILTIN_STOREPD1:
13133 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13134 case IX86_BUILTIN_STORERPD:
13135 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13136
13137 case IX86_BUILTIN_MFENCE:
13138 emit_insn (gen_sse2_mfence ());
13139 return 0;
13140 case IX86_BUILTIN_LFENCE:
13141 emit_insn (gen_sse2_lfence ());
13142 return 0;
13143
13144 case IX86_BUILTIN_CLFLUSH:
13145 arg0 = TREE_VALUE (arglist);
13146 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13147 icode = CODE_FOR_sse2_clflush;
13148 mode0 = insn_data[icode].operand[0].mode;
13149 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13150 op0 = copy_to_mode_reg (mode0, op0);
13151
13152 emit_insn (gen_sse2_clflush (op0));
13153 return 0;
13154
13155 case IX86_BUILTIN_MOVNTPD:
13156 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13157 case IX86_BUILTIN_MOVNTDQ:
13158 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13159 case IX86_BUILTIN_MOVNTI:
13160 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13161
13162 default:
13163 break;
13164 }
13165
13166 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13167 if (d->code == fcode)
13168 {
13169 /* Compares are treated specially. */
13170 if (d->icode == CODE_FOR_maskcmpv4sf3
13171 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13172 || d->icode == CODE_FOR_maskncmpv4sf3
13173 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13174 || d->icode == CODE_FOR_maskcmpv2df3
13175 || d->icode == CODE_FOR_vmmaskcmpv2df3
13176 || d->icode == CODE_FOR_maskncmpv2df3
13177 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13178 return ix86_expand_sse_compare (d, arglist, target);
13179
13180 return ix86_expand_binop_builtin (d->icode, arglist, target);
13181 }
13182
13183 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13184 if (d->code == fcode)
13185 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13186
13187 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13188 if (d->code == fcode)
13189 return ix86_expand_sse_comi (d, arglist, target);
13190
13191 /* @@@ Should really do something sensible here. */
13192 return 0;
13193 }
13194
13195 /* Store OPERAND to the memory after reload is completed. This means
13196 that we can't easily use assign_stack_local. */
13197 rtx
13198 ix86_force_to_memory (mode, operand)
13199 enum machine_mode mode;
13200 rtx operand;
13201 {
13202 rtx result;
13203 if (!reload_completed)
13204 abort ();
13205 if (TARGET_64BIT && TARGET_RED_ZONE)
13206 {
13207 result = gen_rtx_MEM (mode,
13208 gen_rtx_PLUS (Pmode,
13209 stack_pointer_rtx,
13210 GEN_INT (-RED_ZONE_SIZE)));
13211 emit_move_insn (result, operand);
13212 }
13213 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13214 {
13215 switch (mode)
13216 {
13217 case HImode:
13218 case SImode:
13219 operand = gen_lowpart (DImode, operand);
13220 /* FALLTHRU */
13221 case DImode:
13222 emit_insn (
13223 gen_rtx_SET (VOIDmode,
13224 gen_rtx_MEM (DImode,
13225 gen_rtx_PRE_DEC (DImode,
13226 stack_pointer_rtx)),
13227 operand));
13228 break;
13229 default:
13230 abort ();
13231 }
13232 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13233 }
13234 else
13235 {
13236 switch (mode)
13237 {
13238 case DImode:
13239 {
13240 rtx operands[2];
13241 split_di (&operand, 1, operands, operands + 1);
13242 emit_insn (
13243 gen_rtx_SET (VOIDmode,
13244 gen_rtx_MEM (SImode,
13245 gen_rtx_PRE_DEC (Pmode,
13246 stack_pointer_rtx)),
13247 operands[1]));
13248 emit_insn (
13249 gen_rtx_SET (VOIDmode,
13250 gen_rtx_MEM (SImode,
13251 gen_rtx_PRE_DEC (Pmode,
13252 stack_pointer_rtx)),
13253 operands[0]));
13254 }
13255 break;
13256 case HImode:
13257 /* It is better to store HImodes as SImodes. */
13258 if (!TARGET_PARTIAL_REG_STALL)
13259 operand = gen_lowpart (SImode, operand);
13260 /* FALLTHRU */
13261 case SImode:
13262 emit_insn (
13263 gen_rtx_SET (VOIDmode,
13264 gen_rtx_MEM (GET_MODE (operand),
13265 gen_rtx_PRE_DEC (SImode,
13266 stack_pointer_rtx)),
13267 operand));
13268 break;
13269 default:
13270 abort ();
13271 }
13272 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13273 }
13274 return result;
13275 }
13276
13277 /* Free operand from the memory. */
13278 void
13279 ix86_free_from_memory (mode)
13280 enum machine_mode mode;
13281 {
13282 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13283 {
13284 int size;
13285
13286 if (mode == DImode || TARGET_64BIT)
13287 size = 8;
13288 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13289 size = 2;
13290 else
13291 size = 4;
13292 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13293 to pop or add instruction if registers are available. */
13294 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13295 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13296 GEN_INT (size))));
13297 }
13298 }
13299
13300 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13301 QImode must go into class Q_REGS.
13302 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13303 movdf to do mem-to-mem moves through integer regs. */
13304 enum reg_class
13305 ix86_preferred_reload_class (x, class)
13306 rtx x;
13307 enum reg_class class;
13308 {
13309 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13310 {
13311 /* SSE can't load any constant directly yet. */
13312 if (SSE_CLASS_P (class))
13313 return NO_REGS;
13314 /* Floats can load 0 and 1. */
13315 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13316 {
13317 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13318 if (MAYBE_SSE_CLASS_P (class))
13319 return (reg_class_subset_p (class, GENERAL_REGS)
13320 ? GENERAL_REGS : FLOAT_REGS);
13321 else
13322 return class;
13323 }
13324 /* General regs can load everything. */
13325 if (reg_class_subset_p (class, GENERAL_REGS))
13326 return GENERAL_REGS;
13327 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13328 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13329 return NO_REGS;
13330 }
13331 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13332 return NO_REGS;
13333 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13334 return Q_REGS;
13335 return class;
13336 }
13337
13338 /* If we are copying between general and FP registers, we need a memory
13339 location. The same is true for SSE and MMX registers.
13340
13341 The macro can't work reliably when one of the CLASSES is class containing
13342 registers from multiple units (SSE, MMX, integer). We avoid this by never
13343 combining those units in single alternative in the machine description.
13344 Ensure that this constraint holds to avoid unexpected surprises.
13345
13346 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13347 enforce these sanity checks. */
13348 int
13349 ix86_secondary_memory_needed (class1, class2, mode, strict)
13350 enum reg_class class1, class2;
13351 enum machine_mode mode;
13352 int strict;
13353 {
13354 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13355 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13356 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13357 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13358 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13359 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13360 {
13361 if (strict)
13362 abort ();
13363 else
13364 return 1;
13365 }
13366 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13367 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13368 && (mode) != SImode)
13369 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13370 && (mode) != SImode));
13371 }
13372 /* Return the cost of moving data from a register in class CLASS1 to
13373 one in class CLASS2.
13374
13375 It is not required that the cost always equal 2 when FROM is the same as TO;
13376 on some machines it is expensive to move between registers if they are not
13377 general registers. */
13378 int
13379 ix86_register_move_cost (mode, class1, class2)
13380 enum machine_mode mode;
13381 enum reg_class class1, class2;
13382 {
13383 /* In case we require secondary memory, compute cost of the store followed
13384 by load. In case of copying from general_purpose_register we may emit
13385 multiple stores followed by single load causing memory size mismatch
13386 stall. Count this as arbitarily high cost of 20. */
13387 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13388 {
13389 int add_cost = 0;
13390 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13391 add_cost = 20;
13392 return (MEMORY_MOVE_COST (mode, class1, 0)
13393 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13394 }
13395 /* Moves between SSE/MMX and integer unit are expensive. */
13396 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13397 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13398 return ix86_cost->mmxsse_to_integer;
13399 if (MAYBE_FLOAT_CLASS_P (class1))
13400 return ix86_cost->fp_move;
13401 if (MAYBE_SSE_CLASS_P (class1))
13402 return ix86_cost->sse_move;
13403 if (MAYBE_MMX_CLASS_P (class1))
13404 return ix86_cost->mmx_move;
13405 return 2;
13406 }
13407
13408 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13409 int
13410 ix86_hard_regno_mode_ok (regno, mode)
13411 int regno;
13412 enum machine_mode mode;
13413 {
13414 /* Flags and only flags can only hold CCmode values. */
13415 if (CC_REGNO_P (regno))
13416 return GET_MODE_CLASS (mode) == MODE_CC;
13417 if (GET_MODE_CLASS (mode) == MODE_CC
13418 || GET_MODE_CLASS (mode) == MODE_RANDOM
13419 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13420 return 0;
13421 if (FP_REGNO_P (regno))
13422 return VALID_FP_MODE_P (mode);
13423 if (SSE_REGNO_P (regno))
13424 return VALID_SSE_REG_MODE (mode);
13425 if (MMX_REGNO_P (regno))
13426 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13427 /* We handle both integer and floats in the general purpose registers.
13428 In future we should be able to handle vector modes as well. */
13429 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13430 return 0;
13431 /* Take care for QImode values - they can be in non-QI regs, but then
13432 they do cause partial register stalls. */
13433 if (regno < 4 || mode != QImode || TARGET_64BIT)
13434 return 1;
13435 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13436 }
13437
13438 /* Return the cost of moving data of mode M between a
13439 register and memory. A value of 2 is the default; this cost is
13440 relative to those in `REGISTER_MOVE_COST'.
13441
13442 If moving between registers and memory is more expensive than
13443 between two registers, you should define this macro to express the
13444 relative cost.
13445
13446 Model also increased moving costs of QImode registers in non
13447 Q_REGS classes.
13448 */
13449 int
13450 ix86_memory_move_cost (mode, class, in)
13451 enum machine_mode mode;
13452 enum reg_class class;
13453 int in;
13454 {
13455 if (FLOAT_CLASS_P (class))
13456 {
13457 int index;
13458 switch (mode)
13459 {
13460 case SFmode:
13461 index = 0;
13462 break;
13463 case DFmode:
13464 index = 1;
13465 break;
13466 case XFmode:
13467 case TFmode:
13468 index = 2;
13469 break;
13470 default:
13471 return 100;
13472 }
13473 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13474 }
13475 if (SSE_CLASS_P (class))
13476 {
13477 int index;
13478 switch (GET_MODE_SIZE (mode))
13479 {
13480 case 4:
13481 index = 0;
13482 break;
13483 case 8:
13484 index = 1;
13485 break;
13486 case 16:
13487 index = 2;
13488 break;
13489 default:
13490 return 100;
13491 }
13492 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13493 }
13494 if (MMX_CLASS_P (class))
13495 {
13496 int index;
13497 switch (GET_MODE_SIZE (mode))
13498 {
13499 case 4:
13500 index = 0;
13501 break;
13502 case 8:
13503 index = 1;
13504 break;
13505 default:
13506 return 100;
13507 }
13508 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13509 }
13510 switch (GET_MODE_SIZE (mode))
13511 {
13512 case 1:
13513 if (in)
13514 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13515 : ix86_cost->movzbl_load);
13516 else
13517 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13518 : ix86_cost->int_store[0] + 4);
13519 break;
13520 case 2:
13521 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13522 default:
13523 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13524 if (mode == TFmode)
13525 mode = XFmode;
13526 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13527 * (int) GET_MODE_SIZE (mode) / 4);
13528 }
13529 }
13530
13531 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13532 static void
13533 ix86_svr3_asm_out_constructor (symbol, priority)
13534 rtx symbol;
13535 int priority ATTRIBUTE_UNUSED;
13536 {
13537 init_section ();
13538 fputs ("\tpushl $", asm_out_file);
13539 assemble_name (asm_out_file, XSTR (symbol, 0));
13540 fputc ('\n', asm_out_file);
13541 }
13542 #endif
13543
13544 /* Order the registers for register allocator. */
13545
13546 void
13547 x86_order_regs_for_local_alloc ()
13548 {
13549 int pos = 0;
13550 int i;
13551
13552 /* First allocate the local general purpose registers. */
13553 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13554 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13555 reg_alloc_order [pos++] = i;
13556
13557 /* Global general purpose registers. */
13558 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13559 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13560 reg_alloc_order [pos++] = i;
13561
13562 /* x87 registers come first in case we are doing FP math
13563 using them. */
13564 if (!TARGET_SSE_MATH)
13565 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13566 reg_alloc_order [pos++] = i;
13567
13568 /* SSE registers. */
13569 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13570 reg_alloc_order [pos++] = i;
13571 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13572 reg_alloc_order [pos++] = i;
13573
13574 /* x87 registerts. */
13575 if (TARGET_SSE_MATH)
13576 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13577 reg_alloc_order [pos++] = i;
13578
13579 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13580 reg_alloc_order [pos++] = i;
13581
13582 /* Initialize the rest of array as we do not allocate some registers
13583 at all. */
13584 while (pos < FIRST_PSEUDO_REGISTER)
13585 reg_alloc_order [pos++] = 0;
13586 }
13587
13588 void
13589 x86_output_mi_thunk (file, delta, function)
13590 FILE *file;
13591 int delta;
13592 tree function;
13593 {
13594 tree parm;
13595 rtx xops[3];
13596
13597 if (ix86_regparm > 0)
13598 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13599 else
13600 parm = NULL_TREE;
13601 for (; parm; parm = TREE_CHAIN (parm))
13602 if (TREE_VALUE (parm) == void_type_node)
13603 break;
13604
13605 xops[0] = GEN_INT (delta);
13606 if (TARGET_64BIT)
13607 {
13608 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13609 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13610 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13611 if (flag_pic)
13612 {
13613 fprintf (file, "\tjmp *");
13614 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13615 fprintf (file, "@GOTPCREL(%%rip)\n");
13616 }
13617 else
13618 {
13619 fprintf (file, "\tjmp ");
13620 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13621 fprintf (file, "\n");
13622 }
13623 }
13624 else
13625 {
13626 if (parm)
13627 xops[1] = gen_rtx_REG (SImode, 0);
13628 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13629 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13630 else
13631 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13632 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13633
13634 if (flag_pic)
13635 {
13636 xops[0] = pic_offset_table_rtx;
13637 xops[1] = gen_label_rtx ();
13638 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13639
13640 if (ix86_regparm > 2)
13641 abort ();
13642 output_asm_insn ("push{l}\t%0", xops);
13643 output_asm_insn ("call\t%P1", xops);
13644 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13645 output_asm_insn ("pop{l}\t%0", xops);
13646 output_asm_insn
13647 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13648 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13649 output_asm_insn
13650 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13651 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13652 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13653 }
13654 else
13655 {
13656 fprintf (file, "\tjmp ");
13657 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13658 fprintf (file, "\n");
13659 }
13660 }
13661 }
13662
13663 int
13664 x86_field_alignment (field, computed)
13665 tree field;
13666 int computed;
13667 {
13668 enum machine_mode mode;
13669 if (TARGET_64BIT || DECL_USER_ALIGN (field) || TARGET_ALIGN_DOUBLE)
13670 return computed;
13671 mode = TYPE_MODE (TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE
13672 ? get_inner_array_type (field) : TREE_TYPE (field));
13673 if ((mode == DFmode || mode == DCmode
13674 || mode == DImode || mode == CDImode)
13675 && !TARGET_ALIGN_DOUBLE)
13676 return MIN (32, computed);
13677 return computed;
13678 }
13679
13680 /* Implement machine specific optimizations.
13681 At the moment we implement single transformation: AMD Athlon works faster
13682 when RET is not destination of conditional jump or directly preceeded
13683 by other jump instruction. We avoid the penalty by inserting NOP just
13684 before the RET instructions in such cases. */
13685 void
13686 x86_machine_dependent_reorg (first)
13687 rtx first ATTRIBUTE_UNUSED;
13688 {
13689 edge e;
13690
13691 if (!TARGET_ATHLON || !optimize || optimize_size)
13692 return;
13693 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13694 {
13695 basic_block bb = e->src;
13696 rtx ret = bb->end;
13697 rtx prev;
13698 bool insert = false;
13699
13700 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13701 continue;
13702 prev = prev_nonnote_insn (ret);
13703 if (prev && GET_CODE (prev) == CODE_LABEL)
13704 {
13705 edge e;
13706 for (e = bb->pred; e; e = e->pred_next)
13707 if (EDGE_FREQUENCY (e) && e->src->index > 0
13708 && !(e->flags & EDGE_FALLTHRU))
13709 insert = 1;
13710 }
13711 if (!insert)
13712 {
13713 prev = prev_real_insn (ret);
13714 if (prev && GET_CODE (prev) == JUMP_INSN
13715 && any_condjump_p (prev))
13716 insert = 1;
13717 }
13718 if (insert)
13719 emit_insn_before (gen_nop (), ret);
13720 }
13721 }
13722
13723 #include "gt-i386.h"
This page took 1.407461 seconds and 6 git commands to generate.