]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.md (UNSPEC_GOTNTPOFF, [...]): New.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 };
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 };
125
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 };
162
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
198 };
199
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
235 };
236
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
272 };
273
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309 };
310
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 };
347
348 const struct processor_costs *ix86_cost = &pentium_cost;
349
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
399
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403 #define FAST_PROLOGUE_INSN_COUNT 30
404
405 /* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407 static int use_fast_prologue_epilogue;
408
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
413
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
416
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
418 {
419 /* ax, dx, cx, bx */
420 AREG, DREG, CREG, BREG,
421 /* si, di, bp, sp */
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
423 /* FP registers */
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
426 /* arg pointer */
427 NON_Q_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
431 SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
433 MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
437 SSE_REGS, SSE_REGS,
438 };
439
440 /* The "default" register map used in 32bit mode. */
441
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
443 {
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
451 };
452
453 static int const x86_64_int_parameter_registers[6] =
454 {
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
457 };
458
459 static int const x86_64_int_return_registers[4] =
460 {
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
462 };
463
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
466 {
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
474 };
475
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
520 numbers.
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
529 */
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
531 {
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
539 };
540
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
543
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
546
547 /* The encoding characters for the four TLS models present in ELF. */
548
549 static char const tls_model_chars[] = " GLil";
550
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
554
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function GTY(())
557 {
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
562 };
563
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
566
567 /* Structure describing stack frame layout.
568 Stack grows downward:
569
570 [arguments]
571 <- ARG_POINTER
572 saved pc
573
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
576 [saved regs]
577
578 [padding1] \
579 )
580 [va_arg registers] (
581 > to_allocate <- FRAME_POINTER
582 [frame] (
583 )
584 [padding2] /
585 */
586 struct ix86_frame
587 {
588 int nregs;
589 int padding1;
590 int va_arg_size;
591 HOST_WIDE_INT frame;
592 int padding2;
593 int outgoing_arguments_size;
594 int red_zone_size;
595
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
601 };
602
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
607 /* Parsed value. */
608 enum cmodel ix86_cmodel;
609 /* Asm dialect. */
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
612 /* TLS dialext. */
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
615
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
618
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
623
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
628
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
631
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
634
635 /* ix86_regparm_string as a number */
636 int ix86_regparm;
637
638 /* Alignment to use for loops and jumps: */
639
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
642
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
645
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
648
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
651
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
655
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
658
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
662 \f
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
667 int, int, FILE *));
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
673 rtx *, rtx *));
674 static rtx get_thread_pointer PARAMS ((void));
675 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
676 static rtx gen_push PARAMS ((rtx));
677 static int memory_address_length PARAMS ((rtx addr));
678 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681 static void ix86_dump_ppro_packet PARAMS ((FILE *));
682 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
683 static struct machine_function * ix86_init_machine_status PARAMS ((void));
684 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
685 static int ix86_nsaved_regs PARAMS ((void));
686 static void ix86_emit_save_regs PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
688 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
689 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
704
705 struct ix86_address
706 {
707 rtx base, index, disp;
708 HOST_WIDE_INT scale;
709 };
710
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
712
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
715 ATTRIBUTE_UNUSED;
716
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
719 tree, rtx));
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
721 tree, rtx));
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
726 tree, rtx));
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
731 enum rtx_code *,
732 enum rtx_code *,
733 enum rtx_code *));
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
735 rtx *, rtx *));
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
741 static int ix86_save_reg PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
743 static int ix86_comp_type_attributes PARAMS ((tree, tree));
744 const struct attribute_spec ix86_attribute_table[];
745 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static int ix86_value_regno PARAMS ((enum machine_mode));
748
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
751 #endif
752
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
757
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
760 */
761 enum x86_64_reg_class
762 {
763 X86_64_NO_CLASS,
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
766 X86_64_SSE_CLASS,
767 X86_64_SSESF_CLASS,
768 X86_64_SSEDF_CLASS,
769 X86_64_SSEUP_CLASS,
770 X86_64_X87_CLASS,
771 X86_64_X87UP_CLASS,
772 X86_64_MEMORY_CLASS
773 };
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
776
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
780 int));
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
782 int *));
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
784 const int *, int));
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
787 \f
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
794 #endif
795
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
798
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
801
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
804
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
807
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
812
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
817 #ifdef ASM_QUAD
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
820 #endif
821
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
828
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
845
846 #ifdef HAVE_AS_TLS
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
849 #endif
850
851 struct gcc_target targetm = TARGET_INITIALIZER;
852 \f
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
857 been parsed.
858
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
861
862 void
863 override_options ()
864 {
865 int i;
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
868
869 static struct ptt
870 {
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
880 }
881 const processor_target_table[PROCESSOR_max] =
882 {
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
890 };
891
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
893 static struct pta
894 {
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
897 const enum pta_flags
898 {
899 PTA_SSE = 1,
900 PTA_SSE2 = 2,
901 PTA_MMX = 4,
902 PTA_PREFETCH_SSE = 8,
903 PTA_3DNOW = 16,
904 PTA_3DNOW_A = 64
905 } flags;
906 }
907 const processor_alias_table[] =
908 {
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
915 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
916 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
917 {"i686", PROCESSOR_PENTIUMPRO, 0},
918 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
919 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
920 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
921 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
922 PTA_MMX | PTA_PREFETCH_SSE},
923 {"k6", PROCESSOR_K6, PTA_MMX},
924 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
925 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
926 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
927 | PTA_3DNOW_A},
928 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
929 | PTA_3DNOW | PTA_3DNOW_A},
930 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
931 | PTA_3DNOW_A | PTA_SSE},
932 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
933 | PTA_3DNOW_A | PTA_SSE},
934 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
935 | PTA_3DNOW_A | PTA_SSE},
936 };
937
938 int const pta_size = ARRAY_SIZE (processor_alias_table);
939
940 #ifdef SUBTARGET_OVERRIDE_OPTIONS
941 SUBTARGET_OVERRIDE_OPTIONS;
942 #endif
943
944 if (!ix86_cpu_string && ix86_arch_string)
945 ix86_cpu_string = ix86_arch_string;
946 if (!ix86_cpu_string)
947 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
948 if (!ix86_arch_string)
949 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
950
951 if (ix86_cmodel_string != 0)
952 {
953 if (!strcmp (ix86_cmodel_string, "small"))
954 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
955 else if (flag_pic)
956 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
957 else if (!strcmp (ix86_cmodel_string, "32"))
958 ix86_cmodel = CM_32;
959 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
960 ix86_cmodel = CM_KERNEL;
961 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
962 ix86_cmodel = CM_MEDIUM;
963 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
964 ix86_cmodel = CM_LARGE;
965 else
966 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
967 }
968 else
969 {
970 ix86_cmodel = CM_32;
971 if (TARGET_64BIT)
972 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
973 }
974 if (ix86_asm_string != 0)
975 {
976 if (!strcmp (ix86_asm_string, "intel"))
977 ix86_asm_dialect = ASM_INTEL;
978 else if (!strcmp (ix86_asm_string, "att"))
979 ix86_asm_dialect = ASM_ATT;
980 else
981 error ("bad value (%s) for -masm= switch", ix86_asm_string);
982 }
983 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
984 error ("code model `%s' not supported in the %s bit mode",
985 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
986 if (ix86_cmodel == CM_LARGE)
987 sorry ("code model `large' not supported yet");
988 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
989 sorry ("%i-bit mode not compiled in",
990 (target_flags & MASK_64BIT) ? 64 : 32);
991
992 for (i = 0; i < pta_size; i++)
993 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
994 {
995 ix86_arch = processor_alias_table[i].processor;
996 /* Default cpu tuning to the architecture. */
997 ix86_cpu = ix86_arch;
998 if (processor_alias_table[i].flags & PTA_MMX
999 && !(target_flags & MASK_MMX_SET))
1000 target_flags |= MASK_MMX;
1001 if (processor_alias_table[i].flags & PTA_3DNOW
1002 && !(target_flags & MASK_3DNOW_SET))
1003 target_flags |= MASK_3DNOW;
1004 if (processor_alias_table[i].flags & PTA_3DNOW_A
1005 && !(target_flags & MASK_3DNOW_A_SET))
1006 target_flags |= MASK_3DNOW_A;
1007 if (processor_alias_table[i].flags & PTA_SSE
1008 && !(target_flags & MASK_SSE_SET))
1009 target_flags |= MASK_SSE;
1010 if (processor_alias_table[i].flags & PTA_SSE2
1011 && !(target_flags & MASK_SSE2_SET))
1012 target_flags |= MASK_SSE2;
1013 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1014 x86_prefetch_sse = true;
1015 break;
1016 }
1017
1018 if (i == pta_size)
1019 error ("bad value (%s) for -march= switch", ix86_arch_string);
1020
1021 for (i = 0; i < pta_size; i++)
1022 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1023 {
1024 ix86_cpu = processor_alias_table[i].processor;
1025 break;
1026 }
1027 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1028 x86_prefetch_sse = true;
1029 if (i == pta_size)
1030 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1031
1032 if (optimize_size)
1033 ix86_cost = &size_cost;
1034 else
1035 ix86_cost = processor_target_table[ix86_cpu].cost;
1036 target_flags |= processor_target_table[ix86_cpu].target_enable;
1037 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1038
1039 /* Arrange to set up i386_stack_locals for all functions. */
1040 init_machine_status = ix86_init_machine_status;
1041
1042 /* Validate -mregparm= value. */
1043 if (ix86_regparm_string)
1044 {
1045 i = atoi (ix86_regparm_string);
1046 if (i < 0 || i > REGPARM_MAX)
1047 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1048 else
1049 ix86_regparm = i;
1050 }
1051 else
1052 if (TARGET_64BIT)
1053 ix86_regparm = REGPARM_MAX;
1054
1055 /* If the user has provided any of the -malign-* options,
1056 warn and use that value only if -falign-* is not set.
1057 Remove this code in GCC 3.2 or later. */
1058 if (ix86_align_loops_string)
1059 {
1060 warning ("-malign-loops is obsolete, use -falign-loops");
1061 if (align_loops == 0)
1062 {
1063 i = atoi (ix86_align_loops_string);
1064 if (i < 0 || i > MAX_CODE_ALIGN)
1065 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1066 else
1067 align_loops = 1 << i;
1068 }
1069 }
1070
1071 if (ix86_align_jumps_string)
1072 {
1073 warning ("-malign-jumps is obsolete, use -falign-jumps");
1074 if (align_jumps == 0)
1075 {
1076 i = atoi (ix86_align_jumps_string);
1077 if (i < 0 || i > MAX_CODE_ALIGN)
1078 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1079 else
1080 align_jumps = 1 << i;
1081 }
1082 }
1083
1084 if (ix86_align_funcs_string)
1085 {
1086 warning ("-malign-functions is obsolete, use -falign-functions");
1087 if (align_functions == 0)
1088 {
1089 i = atoi (ix86_align_funcs_string);
1090 if (i < 0 || i > MAX_CODE_ALIGN)
1091 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1092 else
1093 align_functions = 1 << i;
1094 }
1095 }
1096
1097 /* Default align_* from the processor table. */
1098 if (align_loops == 0)
1099 {
1100 align_loops = processor_target_table[ix86_cpu].align_loop;
1101 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1102 }
1103 if (align_jumps == 0)
1104 {
1105 align_jumps = processor_target_table[ix86_cpu].align_jump;
1106 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1107 }
1108 if (align_functions == 0)
1109 {
1110 align_functions = processor_target_table[ix86_cpu].align_func;
1111 }
1112
1113 /* Validate -mpreferred-stack-boundary= value, or provide default.
1114 The default of 128 bits is for Pentium III's SSE __m128, but we
1115 don't want additional code to keep the stack aligned when
1116 optimizing for code size. */
1117 ix86_preferred_stack_boundary = (optimize_size
1118 ? TARGET_64BIT ? 64 : 32
1119 : 128);
1120 if (ix86_preferred_stack_boundary_string)
1121 {
1122 i = atoi (ix86_preferred_stack_boundary_string);
1123 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1124 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1125 TARGET_64BIT ? 3 : 2);
1126 else
1127 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1128 }
1129
1130 /* Validate -mbranch-cost= value, or provide default. */
1131 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1132 if (ix86_branch_cost_string)
1133 {
1134 i = atoi (ix86_branch_cost_string);
1135 if (i < 0 || i > 5)
1136 error ("-mbranch-cost=%d is not between 0 and 5", i);
1137 else
1138 ix86_branch_cost = i;
1139 }
1140
1141 if (ix86_tls_dialect_string)
1142 {
1143 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1144 ix86_tls_dialect = TLS_DIALECT_GNU;
1145 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1146 ix86_tls_dialect = TLS_DIALECT_SUN;
1147 else
1148 error ("bad value (%s) for -mtls-dialect= switch",
1149 ix86_tls_dialect_string);
1150 }
1151
1152 if (profile_flag)
1153 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1154
1155 /* Keep nonleaf frame pointers. */
1156 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1157 flag_omit_frame_pointer = 1;
1158
1159 /* If we're doing fast math, we don't care about comparison order
1160 wrt NaNs. This lets us use a shorter comparison sequence. */
1161 if (flag_unsafe_math_optimizations)
1162 target_flags &= ~MASK_IEEE_FP;
1163
1164 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1165 since the insns won't need emulation. */
1166 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1167 target_flags &= ~MASK_NO_FANCY_MATH_387;
1168
1169 if (TARGET_64BIT)
1170 {
1171 if (TARGET_ALIGN_DOUBLE)
1172 error ("-malign-double makes no sense in the 64bit mode");
1173 if (TARGET_RTD)
1174 error ("-mrtd calling convention not supported in the 64bit mode");
1175 /* Enable by default the SSE and MMX builtins. */
1176 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1177 ix86_fpmath = FPMATH_SSE;
1178 }
1179 else
1180 ix86_fpmath = FPMATH_387;
1181
1182 if (ix86_fpmath_string != 0)
1183 {
1184 if (! strcmp (ix86_fpmath_string, "387"))
1185 ix86_fpmath = FPMATH_387;
1186 else if (! strcmp (ix86_fpmath_string, "sse"))
1187 {
1188 if (!TARGET_SSE)
1189 {
1190 warning ("SSE instruction set disabled, using 387 arithmetics");
1191 ix86_fpmath = FPMATH_387;
1192 }
1193 else
1194 ix86_fpmath = FPMATH_SSE;
1195 }
1196 else if (! strcmp (ix86_fpmath_string, "387,sse")
1197 || ! strcmp (ix86_fpmath_string, "sse,387"))
1198 {
1199 if (!TARGET_SSE)
1200 {
1201 warning ("SSE instruction set disabled, using 387 arithmetics");
1202 ix86_fpmath = FPMATH_387;
1203 }
1204 else if (!TARGET_80387)
1205 {
1206 warning ("387 instruction set disabled, using SSE arithmetics");
1207 ix86_fpmath = FPMATH_SSE;
1208 }
1209 else
1210 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1211 }
1212 else
1213 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1214 }
1215
1216 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1217 on by -msse. */
1218 if (TARGET_SSE)
1219 {
1220 target_flags |= MASK_MMX;
1221 x86_prefetch_sse = true;
1222 }
1223
1224 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1225 if (TARGET_3DNOW)
1226 {
1227 target_flags |= MASK_MMX;
1228 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1229 extensions it adds. */
1230 if (x86_3dnow_a & (1 << ix86_arch))
1231 target_flags |= MASK_3DNOW_A;
1232 }
1233 if ((x86_accumulate_outgoing_args & CPUMASK)
1234 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1235 && !optimize_size)
1236 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1237
1238 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1239 {
1240 char *p;
1241 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1242 p = strchr (internal_label_prefix, 'X');
1243 internal_label_prefix_len = p - internal_label_prefix;
1244 *p = '\0';
1245 }
1246 }
1247 \f
1248 void
1249 optimization_options (level, size)
1250 int level;
1251 int size ATTRIBUTE_UNUSED;
1252 {
1253 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1254 make the problem with not enough registers even worse. */
1255 #ifdef INSN_SCHEDULING
1256 if (level > 1)
1257 flag_schedule_insns = 0;
1258 #endif
1259 if (TARGET_64BIT && optimize >= 1)
1260 flag_omit_frame_pointer = 1;
1261 if (TARGET_64BIT)
1262 {
1263 flag_pcc_struct_return = 0;
1264 flag_asynchronous_unwind_tables = 1;
1265 }
1266 if (profile_flag)
1267 flag_omit_frame_pointer = 0;
1268 }
1269 \f
1270 /* Table of valid machine attributes. */
1271 const struct attribute_spec ix86_attribute_table[] =
1272 {
1273 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1274 /* Stdcall attribute says callee is responsible for popping arguments
1275 if they are not variable. */
1276 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1277 /* Cdecl attribute says the callee is a normal C declaration */
1278 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1279 /* Regparm attribute specifies how many integer arguments are to be
1280 passed in registers. */
1281 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1282 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1283 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1284 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1285 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1286 #endif
1287 { NULL, 0, 0, false, false, false, NULL }
1288 };
1289
1290 /* Handle a "cdecl" or "stdcall" attribute;
1291 arguments as in struct attribute_spec.handler. */
1292 static tree
1293 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1294 tree *node;
1295 tree name;
1296 tree args ATTRIBUTE_UNUSED;
1297 int flags ATTRIBUTE_UNUSED;
1298 bool *no_add_attrs;
1299 {
1300 if (TREE_CODE (*node) != FUNCTION_TYPE
1301 && TREE_CODE (*node) != METHOD_TYPE
1302 && TREE_CODE (*node) != FIELD_DECL
1303 && TREE_CODE (*node) != TYPE_DECL)
1304 {
1305 warning ("`%s' attribute only applies to functions",
1306 IDENTIFIER_POINTER (name));
1307 *no_add_attrs = true;
1308 }
1309
1310 if (TARGET_64BIT)
1311 {
1312 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1313 *no_add_attrs = true;
1314 }
1315
1316 return NULL_TREE;
1317 }
1318
1319 /* Handle a "regparm" attribute;
1320 arguments as in struct attribute_spec.handler. */
1321 static tree
1322 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1323 tree *node;
1324 tree name;
1325 tree args;
1326 int flags ATTRIBUTE_UNUSED;
1327 bool *no_add_attrs;
1328 {
1329 if (TREE_CODE (*node) != FUNCTION_TYPE
1330 && TREE_CODE (*node) != METHOD_TYPE
1331 && TREE_CODE (*node) != FIELD_DECL
1332 && TREE_CODE (*node) != TYPE_DECL)
1333 {
1334 warning ("`%s' attribute only applies to functions",
1335 IDENTIFIER_POINTER (name));
1336 *no_add_attrs = true;
1337 }
1338 else
1339 {
1340 tree cst;
1341
1342 cst = TREE_VALUE (args);
1343 if (TREE_CODE (cst) != INTEGER_CST)
1344 {
1345 warning ("`%s' attribute requires an integer constant argument",
1346 IDENTIFIER_POINTER (name));
1347 *no_add_attrs = true;
1348 }
1349 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1350 {
1351 warning ("argument to `%s' attribute larger than %d",
1352 IDENTIFIER_POINTER (name), REGPARM_MAX);
1353 *no_add_attrs = true;
1354 }
1355 }
1356
1357 return NULL_TREE;
1358 }
1359
1360 /* Return 0 if the attributes for two types are incompatible, 1 if they
1361 are compatible, and 2 if they are nearly compatible (which causes a
1362 warning to be generated). */
1363
1364 static int
1365 ix86_comp_type_attributes (type1, type2)
1366 tree type1;
1367 tree type2;
1368 {
1369 /* Check for mismatch of non-default calling convention. */
1370 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1371
1372 if (TREE_CODE (type1) != FUNCTION_TYPE)
1373 return 1;
1374
1375 /* Check for mismatched return types (cdecl vs stdcall). */
1376 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1377 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1378 return 0;
1379 return 1;
1380 }
1381 \f
1382 /* Value is the number of bytes of arguments automatically
1383 popped when returning from a subroutine call.
1384 FUNDECL is the declaration node of the function (as a tree),
1385 FUNTYPE is the data type of the function (as a tree),
1386 or for a library call it is an identifier node for the subroutine name.
1387 SIZE is the number of bytes of arguments passed on the stack.
1388
1389 On the 80386, the RTD insn may be used to pop them if the number
1390 of args is fixed, but if the number is variable then the caller
1391 must pop them all. RTD can't be used for library calls now
1392 because the library is compiled with the Unix compiler.
1393 Use of RTD is a selectable option, since it is incompatible with
1394 standard Unix calling sequences. If the option is not selected,
1395 the caller must always pop the args.
1396
1397 The attribute stdcall is equivalent to RTD on a per module basis. */
1398
1399 int
1400 ix86_return_pops_args (fundecl, funtype, size)
1401 tree fundecl;
1402 tree funtype;
1403 int size;
1404 {
1405 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1406
1407 /* Cdecl functions override -mrtd, and never pop the stack. */
1408 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1409
1410 /* Stdcall functions will pop the stack if not variable args. */
1411 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1412 rtd = 1;
1413
1414 if (rtd
1415 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1416 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1417 == void_type_node)))
1418 return size;
1419 }
1420
1421 /* Lose any fake structure return argument if it is passed on the stack. */
1422 if (aggregate_value_p (TREE_TYPE (funtype))
1423 && !TARGET_64BIT)
1424 {
1425 int nregs = ix86_regparm;
1426
1427 if (funtype)
1428 {
1429 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1430
1431 if (attr)
1432 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1433 }
1434
1435 if (!nregs)
1436 return GET_MODE_SIZE (Pmode);
1437 }
1438
1439 return 0;
1440 }
1441 \f
1442 /* Argument support functions. */
1443
1444 /* Return true when register may be used to pass function parameters. */
1445 bool
1446 ix86_function_arg_regno_p (regno)
1447 int regno;
1448 {
1449 int i;
1450 if (!TARGET_64BIT)
1451 return (regno < REGPARM_MAX
1452 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1453 if (SSE_REGNO_P (regno) && TARGET_SSE)
1454 return true;
1455 /* RAX is used as hidden argument to va_arg functions. */
1456 if (!regno)
1457 return true;
1458 for (i = 0; i < REGPARM_MAX; i++)
1459 if (regno == x86_64_int_parameter_registers[i])
1460 return true;
1461 return false;
1462 }
1463
1464 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1465 for a call to a function whose data type is FNTYPE.
1466 For a library call, FNTYPE is 0. */
1467
1468 void
1469 init_cumulative_args (cum, fntype, libname)
1470 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1471 tree fntype; /* tree ptr for function decl */
1472 rtx libname; /* SYMBOL_REF of library name or 0 */
1473 {
1474 static CUMULATIVE_ARGS zero_cum;
1475 tree param, next_param;
1476
1477 if (TARGET_DEBUG_ARG)
1478 {
1479 fprintf (stderr, "\ninit_cumulative_args (");
1480 if (fntype)
1481 fprintf (stderr, "fntype code = %s, ret code = %s",
1482 tree_code_name[(int) TREE_CODE (fntype)],
1483 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1484 else
1485 fprintf (stderr, "no fntype");
1486
1487 if (libname)
1488 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1489 }
1490
1491 *cum = zero_cum;
1492
1493 /* Set up the number of registers to use for passing arguments. */
1494 cum->nregs = ix86_regparm;
1495 cum->sse_nregs = SSE_REGPARM_MAX;
1496 if (fntype && !TARGET_64BIT)
1497 {
1498 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1499
1500 if (attr)
1501 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1502 }
1503 cum->maybe_vaarg = false;
1504
1505 /* Determine if this function has variable arguments. This is
1506 indicated by the last argument being 'void_type_mode' if there
1507 are no variable arguments. If there are variable arguments, then
1508 we won't pass anything in registers */
1509
1510 if (cum->nregs)
1511 {
1512 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1513 param != 0; param = next_param)
1514 {
1515 next_param = TREE_CHAIN (param);
1516 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1517 {
1518 if (!TARGET_64BIT)
1519 cum->nregs = 0;
1520 cum->maybe_vaarg = true;
1521 }
1522 }
1523 }
1524 if ((!fntype && !libname)
1525 || (fntype && !TYPE_ARG_TYPES (fntype)))
1526 cum->maybe_vaarg = 1;
1527
1528 if (TARGET_DEBUG_ARG)
1529 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1530
1531 return;
1532 }
1533
1534 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1535 of this code is to classify each 8bytes of incoming argument by the register
1536 class and assign registers accordingly. */
1537
1538 /* Return the union class of CLASS1 and CLASS2.
1539 See the x86-64 PS ABI for details. */
1540
1541 static enum x86_64_reg_class
1542 merge_classes (class1, class2)
1543 enum x86_64_reg_class class1, class2;
1544 {
1545 /* Rule #1: If both classes are equal, this is the resulting class. */
1546 if (class1 == class2)
1547 return class1;
1548
1549 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1550 the other class. */
1551 if (class1 == X86_64_NO_CLASS)
1552 return class2;
1553 if (class2 == X86_64_NO_CLASS)
1554 return class1;
1555
1556 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1557 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1558 return X86_64_MEMORY_CLASS;
1559
1560 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1561 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1562 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1563 return X86_64_INTEGERSI_CLASS;
1564 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1565 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1566 return X86_64_INTEGER_CLASS;
1567
1568 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1569 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1570 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1571 return X86_64_MEMORY_CLASS;
1572
1573 /* Rule #6: Otherwise class SSE is used. */
1574 return X86_64_SSE_CLASS;
1575 }
1576
1577 /* Classify the argument of type TYPE and mode MODE.
1578 CLASSES will be filled by the register class used to pass each word
1579 of the operand. The number of words is returned. In case the parameter
1580 should be passed in memory, 0 is returned. As a special case for zero
1581 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1582
1583 BIT_OFFSET is used internally for handling records and specifies offset
1584 of the offset in bits modulo 256 to avoid overflow cases.
1585
1586 See the x86-64 PS ABI for details.
1587 */
1588
1589 static int
1590 classify_argument (mode, type, classes, bit_offset)
1591 enum machine_mode mode;
1592 tree type;
1593 enum x86_64_reg_class classes[MAX_CLASSES];
1594 int bit_offset;
1595 {
1596 int bytes =
1597 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1598 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1599
1600 /* Variable sized entities are always passed/returned in memory. */
1601 if (bytes < 0)
1602 return 0;
1603
1604 if (type && AGGREGATE_TYPE_P (type))
1605 {
1606 int i;
1607 tree field;
1608 enum x86_64_reg_class subclasses[MAX_CLASSES];
1609
1610 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1611 if (bytes > 16)
1612 return 0;
1613
1614 for (i = 0; i < words; i++)
1615 classes[i] = X86_64_NO_CLASS;
1616
1617 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1618 signalize memory class, so handle it as special case. */
1619 if (!words)
1620 {
1621 classes[0] = X86_64_NO_CLASS;
1622 return 1;
1623 }
1624
1625 /* Classify each field of record and merge classes. */
1626 if (TREE_CODE (type) == RECORD_TYPE)
1627 {
1628 /* For classes first merge in the field of the subclasses. */
1629 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1630 {
1631 tree bases = TYPE_BINFO_BASETYPES (type);
1632 int n_bases = TREE_VEC_LENGTH (bases);
1633 int i;
1634
1635 for (i = 0; i < n_bases; ++i)
1636 {
1637 tree binfo = TREE_VEC_ELT (bases, i);
1638 int num;
1639 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1640 tree type = BINFO_TYPE (binfo);
1641
1642 num = classify_argument (TYPE_MODE (type),
1643 type, subclasses,
1644 (offset + bit_offset) % 256);
1645 if (!num)
1646 return 0;
1647 for (i = 0; i < num; i++)
1648 {
1649 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1650 classes[i + pos] =
1651 merge_classes (subclasses[i], classes[i + pos]);
1652 }
1653 }
1654 }
1655 /* And now merge the fields of structure. */
1656 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1657 {
1658 if (TREE_CODE (field) == FIELD_DECL)
1659 {
1660 int num;
1661
1662 /* Bitfields are always classified as integer. Handle them
1663 early, since later code would consider them to be
1664 misaligned integers. */
1665 if (DECL_BIT_FIELD (field))
1666 {
1667 for (i = int_bit_position (field) / 8 / 8;
1668 i < (int_bit_position (field)
1669 + tree_low_cst (DECL_SIZE (field), 0)
1670 + 63) / 8 / 8; i++)
1671 classes[i] =
1672 merge_classes (X86_64_INTEGER_CLASS,
1673 classes[i]);
1674 }
1675 else
1676 {
1677 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1678 TREE_TYPE (field), subclasses,
1679 (int_bit_position (field)
1680 + bit_offset) % 256);
1681 if (!num)
1682 return 0;
1683 for (i = 0; i < num; i++)
1684 {
1685 int pos =
1686 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1687 classes[i + pos] =
1688 merge_classes (subclasses[i], classes[i + pos]);
1689 }
1690 }
1691 }
1692 }
1693 }
1694 /* Arrays are handled as small records. */
1695 else if (TREE_CODE (type) == ARRAY_TYPE)
1696 {
1697 int num;
1698 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1699 TREE_TYPE (type), subclasses, bit_offset);
1700 if (!num)
1701 return 0;
1702
1703 /* The partial classes are now full classes. */
1704 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1705 subclasses[0] = X86_64_SSE_CLASS;
1706 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1707 subclasses[0] = X86_64_INTEGER_CLASS;
1708
1709 for (i = 0; i < words; i++)
1710 classes[i] = subclasses[i % num];
1711 }
1712 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1713 else if (TREE_CODE (type) == UNION_TYPE
1714 || TREE_CODE (type) == QUAL_UNION_TYPE)
1715 {
1716 /* For classes first merge in the field of the subclasses. */
1717 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1718 {
1719 tree bases = TYPE_BINFO_BASETYPES (type);
1720 int n_bases = TREE_VEC_LENGTH (bases);
1721 int i;
1722
1723 for (i = 0; i < n_bases; ++i)
1724 {
1725 tree binfo = TREE_VEC_ELT (bases, i);
1726 int num;
1727 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1728 tree type = BINFO_TYPE (binfo);
1729
1730 num = classify_argument (TYPE_MODE (type),
1731 type, subclasses,
1732 (offset + (bit_offset % 64)) % 256);
1733 if (!num)
1734 return 0;
1735 for (i = 0; i < num; i++)
1736 {
1737 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1738 classes[i + pos] =
1739 merge_classes (subclasses[i], classes[i + pos]);
1740 }
1741 }
1742 }
1743 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1744 {
1745 if (TREE_CODE (field) == FIELD_DECL)
1746 {
1747 int num;
1748 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1749 TREE_TYPE (field), subclasses,
1750 bit_offset);
1751 if (!num)
1752 return 0;
1753 for (i = 0; i < num; i++)
1754 classes[i] = merge_classes (subclasses[i], classes[i]);
1755 }
1756 }
1757 }
1758 else
1759 abort ();
1760
1761 /* Final merger cleanup. */
1762 for (i = 0; i < words; i++)
1763 {
1764 /* If one class is MEMORY, everything should be passed in
1765 memory. */
1766 if (classes[i] == X86_64_MEMORY_CLASS)
1767 return 0;
1768
1769 /* The X86_64_SSEUP_CLASS should be always preceded by
1770 X86_64_SSE_CLASS. */
1771 if (classes[i] == X86_64_SSEUP_CLASS
1772 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1773 classes[i] = X86_64_SSE_CLASS;
1774
1775 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1776 if (classes[i] == X86_64_X87UP_CLASS
1777 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1778 classes[i] = X86_64_SSE_CLASS;
1779 }
1780 return words;
1781 }
1782
1783 /* Compute alignment needed. We align all types to natural boundaries with
1784 exception of XFmode that is aligned to 64bits. */
1785 if (mode != VOIDmode && mode != BLKmode)
1786 {
1787 int mode_alignment = GET_MODE_BITSIZE (mode);
1788
1789 if (mode == XFmode)
1790 mode_alignment = 128;
1791 else if (mode == XCmode)
1792 mode_alignment = 256;
1793 /* Misaligned fields are always returned in memory. */
1794 if (bit_offset % mode_alignment)
1795 return 0;
1796 }
1797
1798 /* Classification of atomic types. */
1799 switch (mode)
1800 {
1801 case DImode:
1802 case SImode:
1803 case HImode:
1804 case QImode:
1805 case CSImode:
1806 case CHImode:
1807 case CQImode:
1808 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1809 classes[0] = X86_64_INTEGERSI_CLASS;
1810 else
1811 classes[0] = X86_64_INTEGER_CLASS;
1812 return 1;
1813 case CDImode:
1814 case TImode:
1815 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1816 return 2;
1817 case CTImode:
1818 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1819 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1820 return 4;
1821 case SFmode:
1822 if (!(bit_offset % 64))
1823 classes[0] = X86_64_SSESF_CLASS;
1824 else
1825 classes[0] = X86_64_SSE_CLASS;
1826 return 1;
1827 case DFmode:
1828 classes[0] = X86_64_SSEDF_CLASS;
1829 return 1;
1830 case TFmode:
1831 classes[0] = X86_64_X87_CLASS;
1832 classes[1] = X86_64_X87UP_CLASS;
1833 return 2;
1834 case TCmode:
1835 classes[0] = X86_64_X87_CLASS;
1836 classes[1] = X86_64_X87UP_CLASS;
1837 classes[2] = X86_64_X87_CLASS;
1838 classes[3] = X86_64_X87UP_CLASS;
1839 return 4;
1840 case DCmode:
1841 classes[0] = X86_64_SSEDF_CLASS;
1842 classes[1] = X86_64_SSEDF_CLASS;
1843 return 2;
1844 case SCmode:
1845 classes[0] = X86_64_SSE_CLASS;
1846 return 1;
1847 case V4SFmode:
1848 case V4SImode:
1849 case V16QImode:
1850 case V8HImode:
1851 case V2DFmode:
1852 case V2DImode:
1853 classes[0] = X86_64_SSE_CLASS;
1854 classes[1] = X86_64_SSEUP_CLASS;
1855 return 2;
1856 case V2SFmode:
1857 case V2SImode:
1858 case V4HImode:
1859 case V8QImode:
1860 classes[0] = X86_64_SSE_CLASS;
1861 return 1;
1862 case BLKmode:
1863 case VOIDmode:
1864 return 0;
1865 default:
1866 abort ();
1867 }
1868 }
1869
1870 /* Examine the argument and return set number of register required in each
1871 class. Return 0 iff parameter should be passed in memory. */
1872 static int
1873 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1874 enum machine_mode mode;
1875 tree type;
1876 int *int_nregs, *sse_nregs;
1877 int in_return;
1878 {
1879 enum x86_64_reg_class class[MAX_CLASSES];
1880 int n = classify_argument (mode, type, class, 0);
1881
1882 *int_nregs = 0;
1883 *sse_nregs = 0;
1884 if (!n)
1885 return 0;
1886 for (n--; n >= 0; n--)
1887 switch (class[n])
1888 {
1889 case X86_64_INTEGER_CLASS:
1890 case X86_64_INTEGERSI_CLASS:
1891 (*int_nregs)++;
1892 break;
1893 case X86_64_SSE_CLASS:
1894 case X86_64_SSESF_CLASS:
1895 case X86_64_SSEDF_CLASS:
1896 (*sse_nregs)++;
1897 break;
1898 case X86_64_NO_CLASS:
1899 case X86_64_SSEUP_CLASS:
1900 break;
1901 case X86_64_X87_CLASS:
1902 case X86_64_X87UP_CLASS:
1903 if (!in_return)
1904 return 0;
1905 break;
1906 case X86_64_MEMORY_CLASS:
1907 abort ();
1908 }
1909 return 1;
1910 }
1911 /* Construct container for the argument used by GCC interface. See
1912 FUNCTION_ARG for the detailed description. */
1913 static rtx
1914 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1915 enum machine_mode mode;
1916 tree type;
1917 int in_return;
1918 int nintregs, nsseregs;
1919 const int * intreg;
1920 int sse_regno;
1921 {
1922 enum machine_mode tmpmode;
1923 int bytes =
1924 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1925 enum x86_64_reg_class class[MAX_CLASSES];
1926 int n;
1927 int i;
1928 int nexps = 0;
1929 int needed_sseregs, needed_intregs;
1930 rtx exp[MAX_CLASSES];
1931 rtx ret;
1932
1933 n = classify_argument (mode, type, class, 0);
1934 if (TARGET_DEBUG_ARG)
1935 {
1936 if (!n)
1937 fprintf (stderr, "Memory class\n");
1938 else
1939 {
1940 fprintf (stderr, "Classes:");
1941 for (i = 0; i < n; i++)
1942 {
1943 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1944 }
1945 fprintf (stderr, "\n");
1946 }
1947 }
1948 if (!n)
1949 return NULL;
1950 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1951 return NULL;
1952 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1953 return NULL;
1954
1955 /* First construct simple cases. Avoid SCmode, since we want to use
1956 single register to pass this type. */
1957 if (n == 1 && mode != SCmode)
1958 switch (class[0])
1959 {
1960 case X86_64_INTEGER_CLASS:
1961 case X86_64_INTEGERSI_CLASS:
1962 return gen_rtx_REG (mode, intreg[0]);
1963 case X86_64_SSE_CLASS:
1964 case X86_64_SSESF_CLASS:
1965 case X86_64_SSEDF_CLASS:
1966 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1967 case X86_64_X87_CLASS:
1968 return gen_rtx_REG (mode, FIRST_STACK_REG);
1969 case X86_64_NO_CLASS:
1970 /* Zero sized array, struct or class. */
1971 return NULL;
1972 default:
1973 abort ();
1974 }
1975 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1976 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1977 if (n == 2
1978 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1979 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1980 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1981 && class[1] == X86_64_INTEGER_CLASS
1982 && (mode == CDImode || mode == TImode)
1983 && intreg[0] + 1 == intreg[1])
1984 return gen_rtx_REG (mode, intreg[0]);
1985 if (n == 4
1986 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1987 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1988 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1989
1990 /* Otherwise figure out the entries of the PARALLEL. */
1991 for (i = 0; i < n; i++)
1992 {
1993 switch (class[i])
1994 {
1995 case X86_64_NO_CLASS:
1996 break;
1997 case X86_64_INTEGER_CLASS:
1998 case X86_64_INTEGERSI_CLASS:
1999 /* Merge TImodes on aligned occassions here too. */
2000 if (i * 8 + 8 > bytes)
2001 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2002 else if (class[i] == X86_64_INTEGERSI_CLASS)
2003 tmpmode = SImode;
2004 else
2005 tmpmode = DImode;
2006 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2007 if (tmpmode == BLKmode)
2008 tmpmode = DImode;
2009 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2010 gen_rtx_REG (tmpmode, *intreg),
2011 GEN_INT (i*8));
2012 intreg++;
2013 break;
2014 case X86_64_SSESF_CLASS:
2015 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2016 gen_rtx_REG (SFmode,
2017 SSE_REGNO (sse_regno)),
2018 GEN_INT (i*8));
2019 sse_regno++;
2020 break;
2021 case X86_64_SSEDF_CLASS:
2022 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2023 gen_rtx_REG (DFmode,
2024 SSE_REGNO (sse_regno)),
2025 GEN_INT (i*8));
2026 sse_regno++;
2027 break;
2028 case X86_64_SSE_CLASS:
2029 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2030 tmpmode = TImode, i++;
2031 else
2032 tmpmode = DImode;
2033 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2034 gen_rtx_REG (tmpmode,
2035 SSE_REGNO (sse_regno)),
2036 GEN_INT (i*8));
2037 sse_regno++;
2038 break;
2039 default:
2040 abort ();
2041 }
2042 }
2043 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2044 for (i = 0; i < nexps; i++)
2045 XVECEXP (ret, 0, i) = exp [i];
2046 return ret;
2047 }
2048
2049 /* Update the data in CUM to advance over an argument
2050 of mode MODE and data type TYPE.
2051 (TYPE is null for libcalls where that information may not be available.) */
2052
2053 void
2054 function_arg_advance (cum, mode, type, named)
2055 CUMULATIVE_ARGS *cum; /* current arg information */
2056 enum machine_mode mode; /* current arg mode */
2057 tree type; /* type of the argument or 0 if lib support */
2058 int named; /* whether or not the argument was named */
2059 {
2060 int bytes =
2061 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2062 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2063
2064 if (TARGET_DEBUG_ARG)
2065 fprintf (stderr,
2066 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2067 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2068 if (TARGET_64BIT)
2069 {
2070 int int_nregs, sse_nregs;
2071 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2072 cum->words += words;
2073 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2074 {
2075 cum->nregs -= int_nregs;
2076 cum->sse_nregs -= sse_nregs;
2077 cum->regno += int_nregs;
2078 cum->sse_regno += sse_nregs;
2079 }
2080 else
2081 cum->words += words;
2082 }
2083 else
2084 {
2085 if (TARGET_SSE && mode == TImode)
2086 {
2087 cum->sse_words += words;
2088 cum->sse_nregs -= 1;
2089 cum->sse_regno += 1;
2090 if (cum->sse_nregs <= 0)
2091 {
2092 cum->sse_nregs = 0;
2093 cum->sse_regno = 0;
2094 }
2095 }
2096 else
2097 {
2098 cum->words += words;
2099 cum->nregs -= words;
2100 cum->regno += words;
2101
2102 if (cum->nregs <= 0)
2103 {
2104 cum->nregs = 0;
2105 cum->regno = 0;
2106 }
2107 }
2108 }
2109 return;
2110 }
2111
2112 /* Define where to put the arguments to a function.
2113 Value is zero to push the argument on the stack,
2114 or a hard register in which to store the argument.
2115
2116 MODE is the argument's machine mode.
2117 TYPE is the data type of the argument (as a tree).
2118 This is null for libcalls where that information may
2119 not be available.
2120 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2121 the preceding args and about the function being called.
2122 NAMED is nonzero if this argument is a named parameter
2123 (otherwise it is an extra parameter matching an ellipsis). */
2124
2125 rtx
2126 function_arg (cum, mode, type, named)
2127 CUMULATIVE_ARGS *cum; /* current arg information */
2128 enum machine_mode mode; /* current arg mode */
2129 tree type; /* type of the argument or 0 if lib support */
2130 int named; /* != 0 for normal args, == 0 for ... args */
2131 {
2132 rtx ret = NULL_RTX;
2133 int bytes =
2134 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2135 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2136
2137 /* Handle an hidden AL argument containing number of registers for varargs
2138 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2139 any AL settings. */
2140 if (mode == VOIDmode)
2141 {
2142 if (TARGET_64BIT)
2143 return GEN_INT (cum->maybe_vaarg
2144 ? (cum->sse_nregs < 0
2145 ? SSE_REGPARM_MAX
2146 : cum->sse_regno)
2147 : -1);
2148 else
2149 return constm1_rtx;
2150 }
2151 if (TARGET_64BIT)
2152 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2153 &x86_64_int_parameter_registers [cum->regno],
2154 cum->sse_regno);
2155 else
2156 switch (mode)
2157 {
2158 /* For now, pass fp/complex values on the stack. */
2159 default:
2160 break;
2161
2162 case BLKmode:
2163 case DImode:
2164 case SImode:
2165 case HImode:
2166 case QImode:
2167 if (words <= cum->nregs)
2168 ret = gen_rtx_REG (mode, cum->regno);
2169 break;
2170 case TImode:
2171 if (cum->sse_nregs)
2172 ret = gen_rtx_REG (mode, cum->sse_regno);
2173 break;
2174 }
2175
2176 if (TARGET_DEBUG_ARG)
2177 {
2178 fprintf (stderr,
2179 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2180 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2181
2182 if (ret)
2183 print_simple_rtl (stderr, ret);
2184 else
2185 fprintf (stderr, ", stack");
2186
2187 fprintf (stderr, " )\n");
2188 }
2189
2190 return ret;
2191 }
2192
2193 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2194 and type. */
2195
2196 int
2197 ix86_function_arg_boundary (mode, type)
2198 enum machine_mode mode;
2199 tree type;
2200 {
2201 int align;
2202 if (!TARGET_64BIT)
2203 return PARM_BOUNDARY;
2204 if (type)
2205 align = TYPE_ALIGN (type);
2206 else
2207 align = GET_MODE_ALIGNMENT (mode);
2208 if (align < PARM_BOUNDARY)
2209 align = PARM_BOUNDARY;
2210 if (align > 128)
2211 align = 128;
2212 return align;
2213 }
2214
2215 /* Return true if N is a possible register number of function value. */
2216 bool
2217 ix86_function_value_regno_p (regno)
2218 int regno;
2219 {
2220 if (!TARGET_64BIT)
2221 {
2222 return ((regno) == 0
2223 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2224 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2225 }
2226 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2227 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2228 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2229 }
2230
2231 /* Define how to find the value returned by a function.
2232 VALTYPE is the data type of the value (as a tree).
2233 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2234 otherwise, FUNC is 0. */
2235 rtx
2236 ix86_function_value (valtype)
2237 tree valtype;
2238 {
2239 if (TARGET_64BIT)
2240 {
2241 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2242 REGPARM_MAX, SSE_REGPARM_MAX,
2243 x86_64_int_return_registers, 0);
2244 /* For zero sized structures, construct_continer return NULL, but we need
2245 to keep rest of compiler happy by returning meaningfull value. */
2246 if (!ret)
2247 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2248 return ret;
2249 }
2250 else
2251 return gen_rtx_REG (TYPE_MODE (valtype),
2252 ix86_value_regno (TYPE_MODE (valtype)));
2253 }
2254
2255 /* Return false iff type is returned in memory. */
2256 int
2257 ix86_return_in_memory (type)
2258 tree type;
2259 {
2260 int needed_intregs, needed_sseregs;
2261 if (TARGET_64BIT)
2262 {
2263 return !examine_argument (TYPE_MODE (type), type, 1,
2264 &needed_intregs, &needed_sseregs);
2265 }
2266 else
2267 {
2268 if (TYPE_MODE (type) == BLKmode
2269 || (VECTOR_MODE_P (TYPE_MODE (type))
2270 && int_size_in_bytes (type) == 8)
2271 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2272 && TYPE_MODE (type) != TFmode
2273 && !VECTOR_MODE_P (TYPE_MODE (type))))
2274 return 1;
2275 return 0;
2276 }
2277 }
2278
2279 /* Define how to find the value returned by a library function
2280 assuming the value has mode MODE. */
2281 rtx
2282 ix86_libcall_value (mode)
2283 enum machine_mode mode;
2284 {
2285 if (TARGET_64BIT)
2286 {
2287 switch (mode)
2288 {
2289 case SFmode:
2290 case SCmode:
2291 case DFmode:
2292 case DCmode:
2293 return gen_rtx_REG (mode, FIRST_SSE_REG);
2294 case TFmode:
2295 case TCmode:
2296 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2297 default:
2298 return gen_rtx_REG (mode, 0);
2299 }
2300 }
2301 else
2302 return gen_rtx_REG (mode, ix86_value_regno (mode));
2303 }
2304
2305 /* Given a mode, return the register to use for a return value. */
2306
2307 static int
2308 ix86_value_regno (mode)
2309 enum machine_mode mode;
2310 {
2311 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2312 return FIRST_FLOAT_REG;
2313 if (mode == TImode || VECTOR_MODE_P (mode))
2314 return FIRST_SSE_REG;
2315 return 0;
2316 }
2317 \f
2318 /* Create the va_list data type. */
2319
2320 tree
2321 ix86_build_va_list ()
2322 {
2323 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2324
2325 /* For i386 we use plain pointer to argument area. */
2326 if (!TARGET_64BIT)
2327 return build_pointer_type (char_type_node);
2328
2329 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2330 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2331
2332 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2333 unsigned_type_node);
2334 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2335 unsigned_type_node);
2336 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2337 ptr_type_node);
2338 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2339 ptr_type_node);
2340
2341 DECL_FIELD_CONTEXT (f_gpr) = record;
2342 DECL_FIELD_CONTEXT (f_fpr) = record;
2343 DECL_FIELD_CONTEXT (f_ovf) = record;
2344 DECL_FIELD_CONTEXT (f_sav) = record;
2345
2346 TREE_CHAIN (record) = type_decl;
2347 TYPE_NAME (record) = type_decl;
2348 TYPE_FIELDS (record) = f_gpr;
2349 TREE_CHAIN (f_gpr) = f_fpr;
2350 TREE_CHAIN (f_fpr) = f_ovf;
2351 TREE_CHAIN (f_ovf) = f_sav;
2352
2353 layout_type (record);
2354
2355 /* The correct type is an array type of one element. */
2356 return build_array_type (record, build_index_type (size_zero_node));
2357 }
2358
2359 /* Perform any needed actions needed for a function that is receiving a
2360 variable number of arguments.
2361
2362 CUM is as above.
2363
2364 MODE and TYPE are the mode and type of the current parameter.
2365
2366 PRETEND_SIZE is a variable that should be set to the amount of stack
2367 that must be pushed by the prolog to pretend that our caller pushed
2368 it.
2369
2370 Normally, this macro will push all remaining incoming registers on the
2371 stack and set PRETEND_SIZE to the length of the registers pushed. */
2372
2373 void
2374 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2375 CUMULATIVE_ARGS *cum;
2376 enum machine_mode mode;
2377 tree type;
2378 int *pretend_size ATTRIBUTE_UNUSED;
2379 int no_rtl;
2380
2381 {
2382 CUMULATIVE_ARGS next_cum;
2383 rtx save_area = NULL_RTX, mem;
2384 rtx label;
2385 rtx label_ref;
2386 rtx tmp_reg;
2387 rtx nsse_reg;
2388 int set;
2389 tree fntype;
2390 int stdarg_p;
2391 int i;
2392
2393 if (!TARGET_64BIT)
2394 return;
2395
2396 /* Indicate to allocate space on the stack for varargs save area. */
2397 ix86_save_varrargs_registers = 1;
2398
2399 fntype = TREE_TYPE (current_function_decl);
2400 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2401 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2402 != void_type_node));
2403
2404 /* For varargs, we do not want to skip the dummy va_dcl argument.
2405 For stdargs, we do want to skip the last named argument. */
2406 next_cum = *cum;
2407 if (stdarg_p)
2408 function_arg_advance (&next_cum, mode, type, 1);
2409
2410 if (!no_rtl)
2411 save_area = frame_pointer_rtx;
2412
2413 set = get_varargs_alias_set ();
2414
2415 for (i = next_cum.regno; i < ix86_regparm; i++)
2416 {
2417 mem = gen_rtx_MEM (Pmode,
2418 plus_constant (save_area, i * UNITS_PER_WORD));
2419 set_mem_alias_set (mem, set);
2420 emit_move_insn (mem, gen_rtx_REG (Pmode,
2421 x86_64_int_parameter_registers[i]));
2422 }
2423
2424 if (next_cum.sse_nregs)
2425 {
2426 /* Now emit code to save SSE registers. The AX parameter contains number
2427 of SSE parameter regsiters used to call this function. We use
2428 sse_prologue_save insn template that produces computed jump across
2429 SSE saves. We need some preparation work to get this working. */
2430
2431 label = gen_label_rtx ();
2432 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2433
2434 /* Compute address to jump to :
2435 label - 5*eax + nnamed_sse_arguments*5 */
2436 tmp_reg = gen_reg_rtx (Pmode);
2437 nsse_reg = gen_reg_rtx (Pmode);
2438 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2439 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2440 gen_rtx_MULT (Pmode, nsse_reg,
2441 GEN_INT (4))));
2442 if (next_cum.sse_regno)
2443 emit_move_insn
2444 (nsse_reg,
2445 gen_rtx_CONST (DImode,
2446 gen_rtx_PLUS (DImode,
2447 label_ref,
2448 GEN_INT (next_cum.sse_regno * 4))));
2449 else
2450 emit_move_insn (nsse_reg, label_ref);
2451 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2452
2453 /* Compute address of memory block we save into. We always use pointer
2454 pointing 127 bytes after first byte to store - this is needed to keep
2455 instruction size limited by 4 bytes. */
2456 tmp_reg = gen_reg_rtx (Pmode);
2457 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2458 plus_constant (save_area,
2459 8 * REGPARM_MAX + 127)));
2460 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2461 set_mem_alias_set (mem, set);
2462 set_mem_align (mem, BITS_PER_WORD);
2463
2464 /* And finally do the dirty job! */
2465 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2466 GEN_INT (next_cum.sse_regno), label));
2467 }
2468
2469 }
2470
2471 /* Implement va_start. */
2472
2473 void
2474 ix86_va_start (valist, nextarg)
2475 tree valist;
2476 rtx nextarg;
2477 {
2478 HOST_WIDE_INT words, n_gpr, n_fpr;
2479 tree f_gpr, f_fpr, f_ovf, f_sav;
2480 tree gpr, fpr, ovf, sav, t;
2481
2482 /* Only 64bit target needs something special. */
2483 if (!TARGET_64BIT)
2484 {
2485 std_expand_builtin_va_start (valist, nextarg);
2486 return;
2487 }
2488
2489 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2490 f_fpr = TREE_CHAIN (f_gpr);
2491 f_ovf = TREE_CHAIN (f_fpr);
2492 f_sav = TREE_CHAIN (f_ovf);
2493
2494 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2495 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2496 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2497 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2498 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2499
2500 /* Count number of gp and fp argument registers used. */
2501 words = current_function_args_info.words;
2502 n_gpr = current_function_args_info.regno;
2503 n_fpr = current_function_args_info.sse_regno;
2504
2505 if (TARGET_DEBUG_ARG)
2506 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2507 (int) words, (int) n_gpr, (int) n_fpr);
2508
2509 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2510 build_int_2 (n_gpr * 8, 0));
2511 TREE_SIDE_EFFECTS (t) = 1;
2512 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2513
2514 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2515 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2516 TREE_SIDE_EFFECTS (t) = 1;
2517 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2518
2519 /* Find the overflow area. */
2520 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2521 if (words != 0)
2522 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2523 build_int_2 (words * UNITS_PER_WORD, 0));
2524 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2525 TREE_SIDE_EFFECTS (t) = 1;
2526 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2527
2528 /* Find the register save area.
2529 Prologue of the function save it right above stack frame. */
2530 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2531 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2532 TREE_SIDE_EFFECTS (t) = 1;
2533 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2534 }
2535
2536 /* Implement va_arg. */
2537 rtx
2538 ix86_va_arg (valist, type)
2539 tree valist, type;
2540 {
2541 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2542 tree f_gpr, f_fpr, f_ovf, f_sav;
2543 tree gpr, fpr, ovf, sav, t;
2544 int size, rsize;
2545 rtx lab_false, lab_over = NULL_RTX;
2546 rtx addr_rtx, r;
2547 rtx container;
2548
2549 /* Only 64bit target needs something special. */
2550 if (!TARGET_64BIT)
2551 {
2552 return std_expand_builtin_va_arg (valist, type);
2553 }
2554
2555 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2556 f_fpr = TREE_CHAIN (f_gpr);
2557 f_ovf = TREE_CHAIN (f_fpr);
2558 f_sav = TREE_CHAIN (f_ovf);
2559
2560 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2561 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2562 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2563 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2564 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2565
2566 size = int_size_in_bytes (type);
2567 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2568
2569 container = construct_container (TYPE_MODE (type), type, 0,
2570 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2571 /*
2572 * Pull the value out of the saved registers ...
2573 */
2574
2575 addr_rtx = gen_reg_rtx (Pmode);
2576
2577 if (container)
2578 {
2579 rtx int_addr_rtx, sse_addr_rtx;
2580 int needed_intregs, needed_sseregs;
2581 int need_temp;
2582
2583 lab_over = gen_label_rtx ();
2584 lab_false = gen_label_rtx ();
2585
2586 examine_argument (TYPE_MODE (type), type, 0,
2587 &needed_intregs, &needed_sseregs);
2588
2589
2590 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2591 || TYPE_ALIGN (type) > 128);
2592
2593 /* In case we are passing structure, verify that it is consetuctive block
2594 on the register save area. If not we need to do moves. */
2595 if (!need_temp && !REG_P (container))
2596 {
2597 /* Verify that all registers are strictly consetuctive */
2598 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2599 {
2600 int i;
2601
2602 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2603 {
2604 rtx slot = XVECEXP (container, 0, i);
2605 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2606 || INTVAL (XEXP (slot, 1)) != i * 16)
2607 need_temp = 1;
2608 }
2609 }
2610 else
2611 {
2612 int i;
2613
2614 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2615 {
2616 rtx slot = XVECEXP (container, 0, i);
2617 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2618 || INTVAL (XEXP (slot, 1)) != i * 8)
2619 need_temp = 1;
2620 }
2621 }
2622 }
2623 if (!need_temp)
2624 {
2625 int_addr_rtx = addr_rtx;
2626 sse_addr_rtx = addr_rtx;
2627 }
2628 else
2629 {
2630 int_addr_rtx = gen_reg_rtx (Pmode);
2631 sse_addr_rtx = gen_reg_rtx (Pmode);
2632 }
2633 /* First ensure that we fit completely in registers. */
2634 if (needed_intregs)
2635 {
2636 emit_cmp_and_jump_insns (expand_expr
2637 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2638 GEN_INT ((REGPARM_MAX - needed_intregs +
2639 1) * 8), GE, const1_rtx, SImode,
2640 1, lab_false);
2641 }
2642 if (needed_sseregs)
2643 {
2644 emit_cmp_and_jump_insns (expand_expr
2645 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2646 GEN_INT ((SSE_REGPARM_MAX -
2647 needed_sseregs + 1) * 16 +
2648 REGPARM_MAX * 8), GE, const1_rtx,
2649 SImode, 1, lab_false);
2650 }
2651
2652 /* Compute index to start of area used for integer regs. */
2653 if (needed_intregs)
2654 {
2655 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2656 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2657 if (r != int_addr_rtx)
2658 emit_move_insn (int_addr_rtx, r);
2659 }
2660 if (needed_sseregs)
2661 {
2662 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2663 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2664 if (r != sse_addr_rtx)
2665 emit_move_insn (sse_addr_rtx, r);
2666 }
2667 if (need_temp)
2668 {
2669 int i;
2670 rtx mem;
2671
2672 /* Never use the memory itself, as it has the alias set. */
2673 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2674 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2675 set_mem_alias_set (mem, get_varargs_alias_set ());
2676 set_mem_align (mem, BITS_PER_UNIT);
2677
2678 for (i = 0; i < XVECLEN (container, 0); i++)
2679 {
2680 rtx slot = XVECEXP (container, 0, i);
2681 rtx reg = XEXP (slot, 0);
2682 enum machine_mode mode = GET_MODE (reg);
2683 rtx src_addr;
2684 rtx src_mem;
2685 int src_offset;
2686 rtx dest_mem;
2687
2688 if (SSE_REGNO_P (REGNO (reg)))
2689 {
2690 src_addr = sse_addr_rtx;
2691 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2692 }
2693 else
2694 {
2695 src_addr = int_addr_rtx;
2696 src_offset = REGNO (reg) * 8;
2697 }
2698 src_mem = gen_rtx_MEM (mode, src_addr);
2699 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2700 src_mem = adjust_address (src_mem, mode, src_offset);
2701 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2702 emit_move_insn (dest_mem, src_mem);
2703 }
2704 }
2705
2706 if (needed_intregs)
2707 {
2708 t =
2709 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2710 build_int_2 (needed_intregs * 8, 0));
2711 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2712 TREE_SIDE_EFFECTS (t) = 1;
2713 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2714 }
2715 if (needed_sseregs)
2716 {
2717 t =
2718 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2719 build_int_2 (needed_sseregs * 16, 0));
2720 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2721 TREE_SIDE_EFFECTS (t) = 1;
2722 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2723 }
2724
2725 emit_jump_insn (gen_jump (lab_over));
2726 emit_barrier ();
2727 emit_label (lab_false);
2728 }
2729
2730 /* ... otherwise out of the overflow area. */
2731
2732 /* Care for on-stack alignment if needed. */
2733 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2734 t = ovf;
2735 else
2736 {
2737 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2738 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2739 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2740 }
2741 t = save_expr (t);
2742
2743 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2744 if (r != addr_rtx)
2745 emit_move_insn (addr_rtx, r);
2746
2747 t =
2748 build (PLUS_EXPR, TREE_TYPE (t), t,
2749 build_int_2 (rsize * UNITS_PER_WORD, 0));
2750 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2751 TREE_SIDE_EFFECTS (t) = 1;
2752 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2753
2754 if (container)
2755 emit_label (lab_over);
2756
2757 return addr_rtx;
2758 }
2759 \f
2760 /* Return nonzero if OP is either a i387 or SSE fp register. */
2761 int
2762 any_fp_register_operand (op, mode)
2763 rtx op;
2764 enum machine_mode mode ATTRIBUTE_UNUSED;
2765 {
2766 return ANY_FP_REG_P (op);
2767 }
2768
2769 /* Return nonzero if OP is an i387 fp register. */
2770 int
2771 fp_register_operand (op, mode)
2772 rtx op;
2773 enum machine_mode mode ATTRIBUTE_UNUSED;
2774 {
2775 return FP_REG_P (op);
2776 }
2777
2778 /* Return nonzero if OP is a non-fp register_operand. */
2779 int
2780 register_and_not_any_fp_reg_operand (op, mode)
2781 rtx op;
2782 enum machine_mode mode;
2783 {
2784 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2785 }
2786
2787 /* Return nonzero of OP is a register operand other than an
2788 i387 fp register. */
2789 int
2790 register_and_not_fp_reg_operand (op, mode)
2791 rtx op;
2792 enum machine_mode mode;
2793 {
2794 return register_operand (op, mode) && !FP_REG_P (op);
2795 }
2796
2797 /* Return nonzero if OP is general operand representable on x86_64. */
2798
2799 int
2800 x86_64_general_operand (op, mode)
2801 rtx op;
2802 enum machine_mode mode;
2803 {
2804 if (!TARGET_64BIT)
2805 return general_operand (op, mode);
2806 if (nonimmediate_operand (op, mode))
2807 return 1;
2808 return x86_64_sign_extended_value (op);
2809 }
2810
2811 /* Return nonzero if OP is general operand representable on x86_64
2812 as either sign extended or zero extended constant. */
2813
2814 int
2815 x86_64_szext_general_operand (op, mode)
2816 rtx op;
2817 enum machine_mode mode;
2818 {
2819 if (!TARGET_64BIT)
2820 return general_operand (op, mode);
2821 if (nonimmediate_operand (op, mode))
2822 return 1;
2823 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2824 }
2825
2826 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2827
2828 int
2829 x86_64_nonmemory_operand (op, mode)
2830 rtx op;
2831 enum machine_mode mode;
2832 {
2833 if (!TARGET_64BIT)
2834 return nonmemory_operand (op, mode);
2835 if (register_operand (op, mode))
2836 return 1;
2837 return x86_64_sign_extended_value (op);
2838 }
2839
2840 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2841
2842 int
2843 x86_64_movabs_operand (op, mode)
2844 rtx op;
2845 enum machine_mode mode;
2846 {
2847 if (!TARGET_64BIT || !flag_pic)
2848 return nonmemory_operand (op, mode);
2849 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2850 return 1;
2851 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2852 return 1;
2853 return 0;
2854 }
2855
2856 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2857
2858 int
2859 x86_64_szext_nonmemory_operand (op, mode)
2860 rtx op;
2861 enum machine_mode mode;
2862 {
2863 if (!TARGET_64BIT)
2864 return nonmemory_operand (op, mode);
2865 if (register_operand (op, mode))
2866 return 1;
2867 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2868 }
2869
2870 /* Return nonzero if OP is immediate operand representable on x86_64. */
2871
2872 int
2873 x86_64_immediate_operand (op, mode)
2874 rtx op;
2875 enum machine_mode mode;
2876 {
2877 if (!TARGET_64BIT)
2878 return immediate_operand (op, mode);
2879 return x86_64_sign_extended_value (op);
2880 }
2881
2882 /* Return nonzero if OP is immediate operand representable on x86_64. */
2883
2884 int
2885 x86_64_zext_immediate_operand (op, mode)
2886 rtx op;
2887 enum machine_mode mode ATTRIBUTE_UNUSED;
2888 {
2889 return x86_64_zero_extended_value (op);
2890 }
2891
2892 /* Return nonzero if OP is (const_int 1), else return zero. */
2893
2894 int
2895 const_int_1_operand (op, mode)
2896 rtx op;
2897 enum machine_mode mode ATTRIBUTE_UNUSED;
2898 {
2899 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2900 }
2901
2902 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2903 for shift & compare patterns, as shifting by 0 does not change flags),
2904 else return zero. */
2905
2906 int
2907 const_int_1_31_operand (op, mode)
2908 rtx op;
2909 enum machine_mode mode ATTRIBUTE_UNUSED;
2910 {
2911 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2912 }
2913
2914 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2915 reference and a constant. */
2916
2917 int
2918 symbolic_operand (op, mode)
2919 register rtx op;
2920 enum machine_mode mode ATTRIBUTE_UNUSED;
2921 {
2922 switch (GET_CODE (op))
2923 {
2924 case SYMBOL_REF:
2925 case LABEL_REF:
2926 return 1;
2927
2928 case CONST:
2929 op = XEXP (op, 0);
2930 if (GET_CODE (op) == SYMBOL_REF
2931 || GET_CODE (op) == LABEL_REF
2932 || (GET_CODE (op) == UNSPEC
2933 && (XINT (op, 1) == UNSPEC_GOT
2934 || XINT (op, 1) == UNSPEC_GOTOFF
2935 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2936 return 1;
2937 if (GET_CODE (op) != PLUS
2938 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2939 return 0;
2940
2941 op = XEXP (op, 0);
2942 if (GET_CODE (op) == SYMBOL_REF
2943 || GET_CODE (op) == LABEL_REF)
2944 return 1;
2945 /* Only @GOTOFF gets offsets. */
2946 if (GET_CODE (op) != UNSPEC
2947 || XINT (op, 1) != UNSPEC_GOTOFF)
2948 return 0;
2949
2950 op = XVECEXP (op, 0, 0);
2951 if (GET_CODE (op) == SYMBOL_REF
2952 || GET_CODE (op) == LABEL_REF)
2953 return 1;
2954 return 0;
2955
2956 default:
2957 return 0;
2958 }
2959 }
2960
2961 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2962
2963 int
2964 pic_symbolic_operand (op, mode)
2965 register rtx op;
2966 enum machine_mode mode ATTRIBUTE_UNUSED;
2967 {
2968 if (GET_CODE (op) != CONST)
2969 return 0;
2970 op = XEXP (op, 0);
2971 if (TARGET_64BIT)
2972 {
2973 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2974 return 1;
2975 }
2976 else
2977 {
2978 if (GET_CODE (op) == UNSPEC)
2979 return 1;
2980 if (GET_CODE (op) != PLUS
2981 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2982 return 0;
2983 op = XEXP (op, 0);
2984 if (GET_CODE (op) == UNSPEC)
2985 return 1;
2986 }
2987 return 0;
2988 }
2989
2990 /* Return true if OP is a symbolic operand that resolves locally. */
2991
2992 static int
2993 local_symbolic_operand (op, mode)
2994 rtx op;
2995 enum machine_mode mode ATTRIBUTE_UNUSED;
2996 {
2997 if (GET_CODE (op) == LABEL_REF)
2998 return 1;
2999
3000 if (GET_CODE (op) == CONST
3001 && GET_CODE (XEXP (op, 0)) == PLUS
3002 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3003 op = XEXP (XEXP (op, 0), 0);
3004
3005 if (GET_CODE (op) != SYMBOL_REF)
3006 return 0;
3007
3008 /* These we've been told are local by varasm and encode_section_info
3009 respectively. */
3010 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3011 return 1;
3012
3013 /* There is, however, a not insubstantial body of code in the rest of
3014 the compiler that assumes it can just stick the results of
3015 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3016 /* ??? This is a hack. Should update the body of the compiler to
3017 always create a DECL an invoke targetm.encode_section_info. */
3018 if (strncmp (XSTR (op, 0), internal_label_prefix,
3019 internal_label_prefix_len) == 0)
3020 return 1;
3021
3022 return 0;
3023 }
3024
3025 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3026
3027 int
3028 tls_symbolic_operand (op, mode)
3029 register rtx op;
3030 enum machine_mode mode ATTRIBUTE_UNUSED;
3031 {
3032 const char *symbol_str;
3033
3034 if (GET_CODE (op) != SYMBOL_REF)
3035 return 0;
3036 symbol_str = XSTR (op, 0);
3037
3038 if (symbol_str[0] != '%')
3039 return 0;
3040 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3041 }
3042
3043 static int
3044 tls_symbolic_operand_1 (op, kind)
3045 rtx op;
3046 enum tls_model kind;
3047 {
3048 const char *symbol_str;
3049
3050 if (GET_CODE (op) != SYMBOL_REF)
3051 return 0;
3052 symbol_str = XSTR (op, 0);
3053
3054 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3055 }
3056
3057 int
3058 global_dynamic_symbolic_operand (op, mode)
3059 register rtx op;
3060 enum machine_mode mode ATTRIBUTE_UNUSED;
3061 {
3062 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3063 }
3064
3065 int
3066 local_dynamic_symbolic_operand (op, mode)
3067 register rtx op;
3068 enum machine_mode mode ATTRIBUTE_UNUSED;
3069 {
3070 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3071 }
3072
3073 int
3074 initial_exec_symbolic_operand (op, mode)
3075 register rtx op;
3076 enum machine_mode mode ATTRIBUTE_UNUSED;
3077 {
3078 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3079 }
3080
3081 int
3082 local_exec_symbolic_operand (op, mode)
3083 register rtx op;
3084 enum machine_mode mode ATTRIBUTE_UNUSED;
3085 {
3086 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3087 }
3088
3089 /* Test for a valid operand for a call instruction. Don't allow the
3090 arg pointer register or virtual regs since they may decay into
3091 reg + const, which the patterns can't handle. */
3092
3093 int
3094 call_insn_operand (op, mode)
3095 rtx op;
3096 enum machine_mode mode ATTRIBUTE_UNUSED;
3097 {
3098 /* Disallow indirect through a virtual register. This leads to
3099 compiler aborts when trying to eliminate them. */
3100 if (GET_CODE (op) == REG
3101 && (op == arg_pointer_rtx
3102 || op == frame_pointer_rtx
3103 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3104 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3105 return 0;
3106
3107 /* Disallow `call 1234'. Due to varying assembler lameness this
3108 gets either rejected or translated to `call .+1234'. */
3109 if (GET_CODE (op) == CONST_INT)
3110 return 0;
3111
3112 /* Explicitly allow SYMBOL_REF even if pic. */
3113 if (GET_CODE (op) == SYMBOL_REF)
3114 return 1;
3115
3116 /* Otherwise we can allow any general_operand in the address. */
3117 return general_operand (op, Pmode);
3118 }
3119
3120 int
3121 constant_call_address_operand (op, mode)
3122 rtx op;
3123 enum machine_mode mode ATTRIBUTE_UNUSED;
3124 {
3125 if (GET_CODE (op) == CONST
3126 && GET_CODE (XEXP (op, 0)) == PLUS
3127 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3128 op = XEXP (XEXP (op, 0), 0);
3129 return GET_CODE (op) == SYMBOL_REF;
3130 }
3131
3132 /* Match exactly zero and one. */
3133
3134 int
3135 const0_operand (op, mode)
3136 register rtx op;
3137 enum machine_mode mode;
3138 {
3139 return op == CONST0_RTX (mode);
3140 }
3141
3142 int
3143 const1_operand (op, mode)
3144 register rtx op;
3145 enum machine_mode mode ATTRIBUTE_UNUSED;
3146 {
3147 return op == const1_rtx;
3148 }
3149
3150 /* Match 2, 4, or 8. Used for leal multiplicands. */
3151
3152 int
3153 const248_operand (op, mode)
3154 register rtx op;
3155 enum machine_mode mode ATTRIBUTE_UNUSED;
3156 {
3157 return (GET_CODE (op) == CONST_INT
3158 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3159 }
3160
3161 /* True if this is a constant appropriate for an increment or decremenmt. */
3162
3163 int
3164 incdec_operand (op, mode)
3165 register rtx op;
3166 enum machine_mode mode ATTRIBUTE_UNUSED;
3167 {
3168 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3169 registers, since carry flag is not set. */
3170 if (TARGET_PENTIUM4 && !optimize_size)
3171 return 0;
3172 return op == const1_rtx || op == constm1_rtx;
3173 }
3174
3175 /* Return nonzero if OP is acceptable as operand of DImode shift
3176 expander. */
3177
3178 int
3179 shiftdi_operand (op, mode)
3180 rtx op;
3181 enum machine_mode mode ATTRIBUTE_UNUSED;
3182 {
3183 if (TARGET_64BIT)
3184 return nonimmediate_operand (op, mode);
3185 else
3186 return register_operand (op, mode);
3187 }
3188
3189 /* Return false if this is the stack pointer, or any other fake
3190 register eliminable to the stack pointer. Otherwise, this is
3191 a register operand.
3192
3193 This is used to prevent esp from being used as an index reg.
3194 Which would only happen in pathological cases. */
3195
3196 int
3197 reg_no_sp_operand (op, mode)
3198 register rtx op;
3199 enum machine_mode mode;
3200 {
3201 rtx t = op;
3202 if (GET_CODE (t) == SUBREG)
3203 t = SUBREG_REG (t);
3204 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3205 return 0;
3206
3207 return register_operand (op, mode);
3208 }
3209
3210 int
3211 mmx_reg_operand (op, mode)
3212 register rtx op;
3213 enum machine_mode mode ATTRIBUTE_UNUSED;
3214 {
3215 return MMX_REG_P (op);
3216 }
3217
3218 /* Return false if this is any eliminable register. Otherwise
3219 general_operand. */
3220
3221 int
3222 general_no_elim_operand (op, mode)
3223 register rtx op;
3224 enum machine_mode mode;
3225 {
3226 rtx t = op;
3227 if (GET_CODE (t) == SUBREG)
3228 t = SUBREG_REG (t);
3229 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3230 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3231 || t == virtual_stack_dynamic_rtx)
3232 return 0;
3233 if (REG_P (t)
3234 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3235 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3236 return 0;
3237
3238 return general_operand (op, mode);
3239 }
3240
3241 /* Return false if this is any eliminable register. Otherwise
3242 register_operand or const_int. */
3243
3244 int
3245 nonmemory_no_elim_operand (op, mode)
3246 register rtx op;
3247 enum machine_mode mode;
3248 {
3249 rtx t = op;
3250 if (GET_CODE (t) == SUBREG)
3251 t = SUBREG_REG (t);
3252 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3253 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3254 || t == virtual_stack_dynamic_rtx)
3255 return 0;
3256
3257 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3258 }
3259
3260 /* Return false if this is any eliminable register or stack register,
3261 otherwise work like register_operand. */
3262
3263 int
3264 index_register_operand (op, mode)
3265 register rtx op;
3266 enum machine_mode mode;
3267 {
3268 rtx t = op;
3269 if (GET_CODE (t) == SUBREG)
3270 t = SUBREG_REG (t);
3271 if (!REG_P (t))
3272 return 0;
3273 if (t == arg_pointer_rtx
3274 || t == frame_pointer_rtx
3275 || t == virtual_incoming_args_rtx
3276 || t == virtual_stack_vars_rtx
3277 || t == virtual_stack_dynamic_rtx
3278 || REGNO (t) == STACK_POINTER_REGNUM)
3279 return 0;
3280
3281 return general_operand (op, mode);
3282 }
3283
3284 /* Return true if op is a Q_REGS class register. */
3285
3286 int
3287 q_regs_operand (op, mode)
3288 register rtx op;
3289 enum machine_mode mode;
3290 {
3291 if (mode != VOIDmode && GET_MODE (op) != mode)
3292 return 0;
3293 if (GET_CODE (op) == SUBREG)
3294 op = SUBREG_REG (op);
3295 return ANY_QI_REG_P (op);
3296 }
3297
3298 /* Return true if op is a NON_Q_REGS class register. */
3299
3300 int
3301 non_q_regs_operand (op, mode)
3302 register rtx op;
3303 enum machine_mode mode;
3304 {
3305 if (mode != VOIDmode && GET_MODE (op) != mode)
3306 return 0;
3307 if (GET_CODE (op) == SUBREG)
3308 op = SUBREG_REG (op);
3309 return NON_QI_REG_P (op);
3310 }
3311
3312 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3313 insns. */
3314 int
3315 sse_comparison_operator (op, mode)
3316 rtx op;
3317 enum machine_mode mode ATTRIBUTE_UNUSED;
3318 {
3319 enum rtx_code code = GET_CODE (op);
3320 switch (code)
3321 {
3322 /* Operations supported directly. */
3323 case EQ:
3324 case LT:
3325 case LE:
3326 case UNORDERED:
3327 case NE:
3328 case UNGE:
3329 case UNGT:
3330 case ORDERED:
3331 return 1;
3332 /* These are equivalent to ones above in non-IEEE comparisons. */
3333 case UNEQ:
3334 case UNLT:
3335 case UNLE:
3336 case LTGT:
3337 case GE:
3338 case GT:
3339 return !TARGET_IEEE_FP;
3340 default:
3341 return 0;
3342 }
3343 }
3344 /* Return 1 if OP is a valid comparison operator in valid mode. */
3345 int
3346 ix86_comparison_operator (op, mode)
3347 register rtx op;
3348 enum machine_mode mode;
3349 {
3350 enum machine_mode inmode;
3351 enum rtx_code code = GET_CODE (op);
3352 if (mode != VOIDmode && GET_MODE (op) != mode)
3353 return 0;
3354 if (GET_RTX_CLASS (code) != '<')
3355 return 0;
3356 inmode = GET_MODE (XEXP (op, 0));
3357
3358 if (inmode == CCFPmode || inmode == CCFPUmode)
3359 {
3360 enum rtx_code second_code, bypass_code;
3361 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3362 return (bypass_code == NIL && second_code == NIL);
3363 }
3364 switch (code)
3365 {
3366 case EQ: case NE:
3367 return 1;
3368 case LT: case GE:
3369 if (inmode == CCmode || inmode == CCGCmode
3370 || inmode == CCGOCmode || inmode == CCNOmode)
3371 return 1;
3372 return 0;
3373 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3374 if (inmode == CCmode)
3375 return 1;
3376 return 0;
3377 case GT: case LE:
3378 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3379 return 1;
3380 return 0;
3381 default:
3382 return 0;
3383 }
3384 }
3385
3386 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3387
3388 int
3389 fcmov_comparison_operator (op, mode)
3390 register rtx op;
3391 enum machine_mode mode;
3392 {
3393 enum machine_mode inmode;
3394 enum rtx_code code = GET_CODE (op);
3395 if (mode != VOIDmode && GET_MODE (op) != mode)
3396 return 0;
3397 if (GET_RTX_CLASS (code) != '<')
3398 return 0;
3399 inmode = GET_MODE (XEXP (op, 0));
3400 if (inmode == CCFPmode || inmode == CCFPUmode)
3401 {
3402 enum rtx_code second_code, bypass_code;
3403 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3404 if (bypass_code != NIL || second_code != NIL)
3405 return 0;
3406 code = ix86_fp_compare_code_to_integer (code);
3407 }
3408 /* i387 supports just limited amount of conditional codes. */
3409 switch (code)
3410 {
3411 case LTU: case GTU: case LEU: case GEU:
3412 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3413 return 1;
3414 return 0;
3415 case ORDERED: case UNORDERED:
3416 case EQ: case NE:
3417 return 1;
3418 default:
3419 return 0;
3420 }
3421 }
3422
3423 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3424
3425 int
3426 promotable_binary_operator (op, mode)
3427 register rtx op;
3428 enum machine_mode mode ATTRIBUTE_UNUSED;
3429 {
3430 switch (GET_CODE (op))
3431 {
3432 case MULT:
3433 /* Modern CPUs have same latency for HImode and SImode multiply,
3434 but 386 and 486 do HImode multiply faster. */
3435 return ix86_cpu > PROCESSOR_I486;
3436 case PLUS:
3437 case AND:
3438 case IOR:
3439 case XOR:
3440 case ASHIFT:
3441 return 1;
3442 default:
3443 return 0;
3444 }
3445 }
3446
3447 /* Nearly general operand, but accept any const_double, since we wish
3448 to be able to drop them into memory rather than have them get pulled
3449 into registers. */
3450
3451 int
3452 cmp_fp_expander_operand (op, mode)
3453 register rtx op;
3454 enum machine_mode mode;
3455 {
3456 if (mode != VOIDmode && mode != GET_MODE (op))
3457 return 0;
3458 if (GET_CODE (op) == CONST_DOUBLE)
3459 return 1;
3460 return general_operand (op, mode);
3461 }
3462
3463 /* Match an SI or HImode register for a zero_extract. */
3464
3465 int
3466 ext_register_operand (op, mode)
3467 register rtx op;
3468 enum machine_mode mode ATTRIBUTE_UNUSED;
3469 {
3470 int regno;
3471 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3472 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3473 return 0;
3474
3475 if (!register_operand (op, VOIDmode))
3476 return 0;
3477
3478 /* Be curefull to accept only registers having upper parts. */
3479 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3480 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3481 }
3482
3483 /* Return 1 if this is a valid binary floating-point operation.
3484 OP is the expression matched, and MODE is its mode. */
3485
3486 int
3487 binary_fp_operator (op, mode)
3488 register rtx op;
3489 enum machine_mode mode;
3490 {
3491 if (mode != VOIDmode && mode != GET_MODE (op))
3492 return 0;
3493
3494 switch (GET_CODE (op))
3495 {
3496 case PLUS:
3497 case MINUS:
3498 case MULT:
3499 case DIV:
3500 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3501
3502 default:
3503 return 0;
3504 }
3505 }
3506
3507 int
3508 mult_operator (op, mode)
3509 register rtx op;
3510 enum machine_mode mode ATTRIBUTE_UNUSED;
3511 {
3512 return GET_CODE (op) == MULT;
3513 }
3514
3515 int
3516 div_operator (op, mode)
3517 register rtx op;
3518 enum machine_mode mode ATTRIBUTE_UNUSED;
3519 {
3520 return GET_CODE (op) == DIV;
3521 }
3522
3523 int
3524 arith_or_logical_operator (op, mode)
3525 rtx op;
3526 enum machine_mode mode;
3527 {
3528 return ((mode == VOIDmode || GET_MODE (op) == mode)
3529 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3530 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3531 }
3532
3533 /* Returns 1 if OP is memory operand with a displacement. */
3534
3535 int
3536 memory_displacement_operand (op, mode)
3537 register rtx op;
3538 enum machine_mode mode;
3539 {
3540 struct ix86_address parts;
3541
3542 if (! memory_operand (op, mode))
3543 return 0;
3544
3545 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3546 abort ();
3547
3548 return parts.disp != NULL_RTX;
3549 }
3550
3551 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3552 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3553
3554 ??? It seems likely that this will only work because cmpsi is an
3555 expander, and no actual insns use this. */
3556
3557 int
3558 cmpsi_operand (op, mode)
3559 rtx op;
3560 enum machine_mode mode;
3561 {
3562 if (nonimmediate_operand (op, mode))
3563 return 1;
3564
3565 if (GET_CODE (op) == AND
3566 && GET_MODE (op) == SImode
3567 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3568 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3569 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3570 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3571 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3572 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3573 return 1;
3574
3575 return 0;
3576 }
3577
3578 /* Returns 1 if OP is memory operand that can not be represented by the
3579 modRM array. */
3580
3581 int
3582 long_memory_operand (op, mode)
3583 register rtx op;
3584 enum machine_mode mode;
3585 {
3586 if (! memory_operand (op, mode))
3587 return 0;
3588
3589 return memory_address_length (op) != 0;
3590 }
3591
3592 /* Return nonzero if the rtx is known aligned. */
3593
3594 int
3595 aligned_operand (op, mode)
3596 rtx op;
3597 enum machine_mode mode;
3598 {
3599 struct ix86_address parts;
3600
3601 if (!general_operand (op, mode))
3602 return 0;
3603
3604 /* Registers and immediate operands are always "aligned". */
3605 if (GET_CODE (op) != MEM)
3606 return 1;
3607
3608 /* Don't even try to do any aligned optimizations with volatiles. */
3609 if (MEM_VOLATILE_P (op))
3610 return 0;
3611
3612 op = XEXP (op, 0);
3613
3614 /* Pushes and pops are only valid on the stack pointer. */
3615 if (GET_CODE (op) == PRE_DEC
3616 || GET_CODE (op) == POST_INC)
3617 return 1;
3618
3619 /* Decode the address. */
3620 if (! ix86_decompose_address (op, &parts))
3621 abort ();
3622
3623 if (parts.base && GET_CODE (parts.base) == SUBREG)
3624 parts.base = SUBREG_REG (parts.base);
3625 if (parts.index && GET_CODE (parts.index) == SUBREG)
3626 parts.index = SUBREG_REG (parts.index);
3627
3628 /* Look for some component that isn't known to be aligned. */
3629 if (parts.index)
3630 {
3631 if (parts.scale < 4
3632 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3633 return 0;
3634 }
3635 if (parts.base)
3636 {
3637 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3638 return 0;
3639 }
3640 if (parts.disp)
3641 {
3642 if (GET_CODE (parts.disp) != CONST_INT
3643 || (INTVAL (parts.disp) & 3) != 0)
3644 return 0;
3645 }
3646
3647 /* Didn't find one -- this must be an aligned address. */
3648 return 1;
3649 }
3650 \f
3651 /* Return true if the constant is something that can be loaded with
3652 a special instruction. Only handle 0.0 and 1.0; others are less
3653 worthwhile. */
3654
3655 int
3656 standard_80387_constant_p (x)
3657 rtx x;
3658 {
3659 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3660 return -1;
3661 /* Note that on the 80387, other constants, such as pi, that we should support
3662 too. On some machines, these are much slower to load as standard constant,
3663 than to load from doubles in memory. */
3664 if (x == CONST0_RTX (GET_MODE (x)))
3665 return 1;
3666 if (x == CONST1_RTX (GET_MODE (x)))
3667 return 2;
3668 return 0;
3669 }
3670
3671 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3672 */
3673 int
3674 standard_sse_constant_p (x)
3675 rtx x;
3676 {
3677 if (GET_CODE (x) != CONST_DOUBLE)
3678 return -1;
3679 return (x == CONST0_RTX (GET_MODE (x)));
3680 }
3681
3682 /* Returns 1 if OP contains a symbol reference */
3683
3684 int
3685 symbolic_reference_mentioned_p (op)
3686 rtx op;
3687 {
3688 register const char *fmt;
3689 register int i;
3690
3691 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3692 return 1;
3693
3694 fmt = GET_RTX_FORMAT (GET_CODE (op));
3695 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3696 {
3697 if (fmt[i] == 'E')
3698 {
3699 register int j;
3700
3701 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3702 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3703 return 1;
3704 }
3705
3706 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3707 return 1;
3708 }
3709
3710 return 0;
3711 }
3712
3713 /* Return 1 if it is appropriate to emit `ret' instructions in the
3714 body of a function. Do this only if the epilogue is simple, needing a
3715 couple of insns. Prior to reloading, we can't tell how many registers
3716 must be saved, so return 0 then. Return 0 if there is no frame
3717 marker to de-allocate.
3718
3719 If NON_SAVING_SETJMP is defined and true, then it is not possible
3720 for the epilogue to be simple, so return 0. This is a special case
3721 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3722 until final, but jump_optimize may need to know sooner if a
3723 `return' is OK. */
3724
3725 int
3726 ix86_can_use_return_insn_p ()
3727 {
3728 struct ix86_frame frame;
3729
3730 #ifdef NON_SAVING_SETJMP
3731 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3732 return 0;
3733 #endif
3734
3735 if (! reload_completed || frame_pointer_needed)
3736 return 0;
3737
3738 /* Don't allow more than 32 pop, since that's all we can do
3739 with one instruction. */
3740 if (current_function_pops_args
3741 && current_function_args_size >= 32768)
3742 return 0;
3743
3744 ix86_compute_frame_layout (&frame);
3745 return frame.to_allocate == 0 && frame.nregs == 0;
3746 }
3747 \f
3748 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3749 int
3750 x86_64_sign_extended_value (value)
3751 rtx value;
3752 {
3753 switch (GET_CODE (value))
3754 {
3755 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3756 to be at least 32 and this all acceptable constants are
3757 represented as CONST_INT. */
3758 case CONST_INT:
3759 if (HOST_BITS_PER_WIDE_INT == 32)
3760 return 1;
3761 else
3762 {
3763 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3764 return trunc_int_for_mode (val, SImode) == val;
3765 }
3766 break;
3767
3768 /* For certain code models, the symbolic references are known to fit. */
3769 case SYMBOL_REF:
3770 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3771
3772 /* For certain code models, the code is near as well. */
3773 case LABEL_REF:
3774 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3775
3776 /* We also may accept the offsetted memory references in certain special
3777 cases. */
3778 case CONST:
3779 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3780 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3781 return 1;
3782 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3783 {
3784 rtx op1 = XEXP (XEXP (value, 0), 0);
3785 rtx op2 = XEXP (XEXP (value, 0), 1);
3786 HOST_WIDE_INT offset;
3787
3788 if (ix86_cmodel == CM_LARGE)
3789 return 0;
3790 if (GET_CODE (op2) != CONST_INT)
3791 return 0;
3792 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3793 switch (GET_CODE (op1))
3794 {
3795 case SYMBOL_REF:
3796 /* For CM_SMALL assume that latest object is 1MB before
3797 end of 31bits boundary. We may also accept pretty
3798 large negative constants knowing that all objects are
3799 in the positive half of address space. */
3800 if (ix86_cmodel == CM_SMALL
3801 && offset < 1024*1024*1024
3802 && trunc_int_for_mode (offset, SImode) == offset)
3803 return 1;
3804 /* For CM_KERNEL we know that all object resist in the
3805 negative half of 32bits address space. We may not
3806 accept negative offsets, since they may be just off
3807 and we may accept pretty large positive ones. */
3808 if (ix86_cmodel == CM_KERNEL
3809 && offset > 0
3810 && trunc_int_for_mode (offset, SImode) == offset)
3811 return 1;
3812 break;
3813 case LABEL_REF:
3814 /* These conditions are similar to SYMBOL_REF ones, just the
3815 constraints for code models differ. */
3816 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3817 && offset < 1024*1024*1024
3818 && trunc_int_for_mode (offset, SImode) == offset)
3819 return 1;
3820 if (ix86_cmodel == CM_KERNEL
3821 && offset > 0
3822 && trunc_int_for_mode (offset, SImode) == offset)
3823 return 1;
3824 break;
3825 default:
3826 return 0;
3827 }
3828 }
3829 return 0;
3830 default:
3831 return 0;
3832 }
3833 }
3834
3835 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3836 int
3837 x86_64_zero_extended_value (value)
3838 rtx value;
3839 {
3840 switch (GET_CODE (value))
3841 {
3842 case CONST_DOUBLE:
3843 if (HOST_BITS_PER_WIDE_INT == 32)
3844 return (GET_MODE (value) == VOIDmode
3845 && !CONST_DOUBLE_HIGH (value));
3846 else
3847 return 0;
3848 case CONST_INT:
3849 if (HOST_BITS_PER_WIDE_INT == 32)
3850 return INTVAL (value) >= 0;
3851 else
3852 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3853 break;
3854
3855 /* For certain code models, the symbolic references are known to fit. */
3856 case SYMBOL_REF:
3857 return ix86_cmodel == CM_SMALL;
3858
3859 /* For certain code models, the code is near as well. */
3860 case LABEL_REF:
3861 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3862
3863 /* We also may accept the offsetted memory references in certain special
3864 cases. */
3865 case CONST:
3866 if (GET_CODE (XEXP (value, 0)) == PLUS)
3867 {
3868 rtx op1 = XEXP (XEXP (value, 0), 0);
3869 rtx op2 = XEXP (XEXP (value, 0), 1);
3870
3871 if (ix86_cmodel == CM_LARGE)
3872 return 0;
3873 switch (GET_CODE (op1))
3874 {
3875 case SYMBOL_REF:
3876 return 0;
3877 /* For small code model we may accept pretty large positive
3878 offsets, since one bit is available for free. Negative
3879 offsets are limited by the size of NULL pointer area
3880 specified by the ABI. */
3881 if (ix86_cmodel == CM_SMALL
3882 && GET_CODE (op2) == CONST_INT
3883 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3884 && (trunc_int_for_mode (INTVAL (op2), SImode)
3885 == INTVAL (op2)))
3886 return 1;
3887 /* ??? For the kernel, we may accept adjustment of
3888 -0x10000000, since we know that it will just convert
3889 negative address space to positive, but perhaps this
3890 is not worthwhile. */
3891 break;
3892 case LABEL_REF:
3893 /* These conditions are similar to SYMBOL_REF ones, just the
3894 constraints for code models differ. */
3895 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3896 && GET_CODE (op2) == CONST_INT
3897 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3898 && (trunc_int_for_mode (INTVAL (op2), SImode)
3899 == INTVAL (op2)))
3900 return 1;
3901 break;
3902 default:
3903 return 0;
3904 }
3905 }
3906 return 0;
3907 default:
3908 return 0;
3909 }
3910 }
3911
3912 /* Value should be nonzero if functions must have frame pointers.
3913 Zero means the frame pointer need not be set up (and parms may
3914 be accessed via the stack pointer) in functions that seem suitable. */
3915
3916 int
3917 ix86_frame_pointer_required ()
3918 {
3919 /* If we accessed previous frames, then the generated code expects
3920 to be able to access the saved ebp value in our frame. */
3921 if (cfun->machine->accesses_prev_frame)
3922 return 1;
3923
3924 /* Several x86 os'es need a frame pointer for other reasons,
3925 usually pertaining to setjmp. */
3926 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3927 return 1;
3928
3929 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3930 the frame pointer by default. Turn it back on now if we've not
3931 got a leaf function. */
3932 if (TARGET_OMIT_LEAF_FRAME_POINTER
3933 && (!current_function_is_leaf || current_function_profile))
3934 return 1;
3935
3936 return 0;
3937 }
3938
3939 /* Record that the current function accesses previous call frames. */
3940
3941 void
3942 ix86_setup_frame_addresses ()
3943 {
3944 cfun->machine->accesses_prev_frame = 1;
3945 }
3946 \f
3947 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3948 # define USE_HIDDEN_LINKONCE 1
3949 #else
3950 # define USE_HIDDEN_LINKONCE 0
3951 #endif
3952
3953 static int pic_labels_used;
3954
3955 /* Fills in the label name that should be used for a pc thunk for
3956 the given register. */
3957
3958 static void
3959 get_pc_thunk_name (name, regno)
3960 char name[32];
3961 unsigned int regno;
3962 {
3963 if (USE_HIDDEN_LINKONCE)
3964 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3965 else
3966 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3967 }
3968
3969
3970 /* This function generates code for -fpic that loads %ebx with
3971 the return address of the caller and then returns. */
3972
3973 void
3974 ix86_asm_file_end (file)
3975 FILE *file;
3976 {
3977 rtx xops[2];
3978 int regno;
3979
3980 for (regno = 0; regno < 8; ++regno)
3981 {
3982 char name[32];
3983
3984 if (! ((pic_labels_used >> regno) & 1))
3985 continue;
3986
3987 get_pc_thunk_name (name, regno);
3988
3989 if (USE_HIDDEN_LINKONCE)
3990 {
3991 tree decl;
3992
3993 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3994 error_mark_node);
3995 TREE_PUBLIC (decl) = 1;
3996 TREE_STATIC (decl) = 1;
3997 DECL_ONE_ONLY (decl) = 1;
3998
3999 (*targetm.asm_out.unique_section) (decl, 0);
4000 named_section (decl, NULL, 0);
4001
4002 (*targetm.asm_out.globalize_label) (file, name);
4003 fputs ("\t.hidden\t", file);
4004 assemble_name (file, name);
4005 fputc ('\n', file);
4006 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4007 }
4008 else
4009 {
4010 text_section ();
4011 ASM_OUTPUT_LABEL (file, name);
4012 }
4013
4014 xops[0] = gen_rtx_REG (SImode, regno);
4015 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4016 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4017 output_asm_insn ("ret", xops);
4018 }
4019 }
4020
4021 /* Emit code for the SET_GOT patterns. */
4022
4023 const char *
4024 output_set_got (dest)
4025 rtx dest;
4026 {
4027 rtx xops[3];
4028
4029 xops[0] = dest;
4030 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4031
4032 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4033 {
4034 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4035
4036 if (!flag_pic)
4037 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4038 else
4039 output_asm_insn ("call\t%a2", xops);
4040
4041 #if TARGET_MACHO
4042 /* Output the "canonical" label name ("Lxx$pb") here too. This
4043 is what will be referred to by the Mach-O PIC subsystem. */
4044 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4045 #endif
4046 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4047 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4048
4049 if (flag_pic)
4050 output_asm_insn ("pop{l}\t%0", xops);
4051 }
4052 else
4053 {
4054 char name[32];
4055 get_pc_thunk_name (name, REGNO (dest));
4056 pic_labels_used |= 1 << REGNO (dest);
4057
4058 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4059 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4060 output_asm_insn ("call\t%X2", xops);
4061 }
4062
4063 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4064 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4065 else if (!TARGET_MACHO)
4066 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4067
4068 return "";
4069 }
4070
4071 /* Generate an "push" pattern for input ARG. */
4072
4073 static rtx
4074 gen_push (arg)
4075 rtx arg;
4076 {
4077 return gen_rtx_SET (VOIDmode,
4078 gen_rtx_MEM (Pmode,
4079 gen_rtx_PRE_DEC (Pmode,
4080 stack_pointer_rtx)),
4081 arg);
4082 }
4083
4084 /* Return >= 0 if there is an unused call-clobbered register available
4085 for the entire function. */
4086
4087 static unsigned int
4088 ix86_select_alt_pic_regnum ()
4089 {
4090 if (current_function_is_leaf && !current_function_profile)
4091 {
4092 int i;
4093 for (i = 2; i >= 0; --i)
4094 if (!regs_ever_live[i])
4095 return i;
4096 }
4097
4098 return INVALID_REGNUM;
4099 }
4100
4101 /* Return 1 if we need to save REGNO. */
4102 static int
4103 ix86_save_reg (regno, maybe_eh_return)
4104 unsigned int regno;
4105 int maybe_eh_return;
4106 {
4107 if (pic_offset_table_rtx
4108 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4109 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4110 || current_function_profile
4111 || current_function_calls_eh_return))
4112 {
4113 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4114 return 0;
4115 return 1;
4116 }
4117
4118 if (current_function_calls_eh_return && maybe_eh_return)
4119 {
4120 unsigned i;
4121 for (i = 0; ; i++)
4122 {
4123 unsigned test = EH_RETURN_DATA_REGNO (i);
4124 if (test == INVALID_REGNUM)
4125 break;
4126 if (test == regno)
4127 return 1;
4128 }
4129 }
4130
4131 return (regs_ever_live[regno]
4132 && !call_used_regs[regno]
4133 && !fixed_regs[regno]
4134 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4135 }
4136
4137 /* Return number of registers to be saved on the stack. */
4138
4139 static int
4140 ix86_nsaved_regs ()
4141 {
4142 int nregs = 0;
4143 int regno;
4144
4145 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4146 if (ix86_save_reg (regno, true))
4147 nregs++;
4148 return nregs;
4149 }
4150
4151 /* Return the offset between two registers, one to be eliminated, and the other
4152 its replacement, at the start of a routine. */
4153
4154 HOST_WIDE_INT
4155 ix86_initial_elimination_offset (from, to)
4156 int from;
4157 int to;
4158 {
4159 struct ix86_frame frame;
4160 ix86_compute_frame_layout (&frame);
4161
4162 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4163 return frame.hard_frame_pointer_offset;
4164 else if (from == FRAME_POINTER_REGNUM
4165 && to == HARD_FRAME_POINTER_REGNUM)
4166 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4167 else
4168 {
4169 if (to != STACK_POINTER_REGNUM)
4170 abort ();
4171 else if (from == ARG_POINTER_REGNUM)
4172 return frame.stack_pointer_offset;
4173 else if (from != FRAME_POINTER_REGNUM)
4174 abort ();
4175 else
4176 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4177 }
4178 }
4179
4180 /* Fill structure ix86_frame about frame of currently computed function. */
4181
4182 static void
4183 ix86_compute_frame_layout (frame)
4184 struct ix86_frame *frame;
4185 {
4186 HOST_WIDE_INT total_size;
4187 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4188 int offset;
4189 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4190 HOST_WIDE_INT size = get_frame_size ();
4191
4192 frame->nregs = ix86_nsaved_regs ();
4193 total_size = size;
4194
4195 /* Skip return address and saved base pointer. */
4196 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4197
4198 frame->hard_frame_pointer_offset = offset;
4199
4200 /* Do some sanity checking of stack_alignment_needed and
4201 preferred_alignment, since i386 port is the only using those features
4202 that may break easily. */
4203
4204 if (size && !stack_alignment_needed)
4205 abort ();
4206 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4207 abort ();
4208 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4209 abort ();
4210 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4211 abort ();
4212
4213 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4214 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4215
4216 /* Register save area */
4217 offset += frame->nregs * UNITS_PER_WORD;
4218
4219 /* Va-arg area */
4220 if (ix86_save_varrargs_registers)
4221 {
4222 offset += X86_64_VARARGS_SIZE;
4223 frame->va_arg_size = X86_64_VARARGS_SIZE;
4224 }
4225 else
4226 frame->va_arg_size = 0;
4227
4228 /* Align start of frame for local function. */
4229 frame->padding1 = ((offset + stack_alignment_needed - 1)
4230 & -stack_alignment_needed) - offset;
4231
4232 offset += frame->padding1;
4233
4234 /* Frame pointer points here. */
4235 frame->frame_pointer_offset = offset;
4236
4237 offset += size;
4238
4239 /* Add outgoing arguments area. Can be skipped if we eliminated
4240 all the function calls as dead code. */
4241 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4242 {
4243 offset += current_function_outgoing_args_size;
4244 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4245 }
4246 else
4247 frame->outgoing_arguments_size = 0;
4248
4249 /* Align stack boundary. Only needed if we're calling another function
4250 or using alloca. */
4251 if (!current_function_is_leaf || current_function_calls_alloca)
4252 frame->padding2 = ((offset + preferred_alignment - 1)
4253 & -preferred_alignment) - offset;
4254 else
4255 frame->padding2 = 0;
4256
4257 offset += frame->padding2;
4258
4259 /* We've reached end of stack frame. */
4260 frame->stack_pointer_offset = offset;
4261
4262 /* Size prologue needs to allocate. */
4263 frame->to_allocate =
4264 (size + frame->padding1 + frame->padding2
4265 + frame->outgoing_arguments_size + frame->va_arg_size);
4266
4267 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4268 && current_function_is_leaf)
4269 {
4270 frame->red_zone_size = frame->to_allocate;
4271 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4272 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4273 }
4274 else
4275 frame->red_zone_size = 0;
4276 frame->to_allocate -= frame->red_zone_size;
4277 frame->stack_pointer_offset -= frame->red_zone_size;
4278 #if 0
4279 fprintf (stderr, "nregs: %i\n", frame->nregs);
4280 fprintf (stderr, "size: %i\n", size);
4281 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4282 fprintf (stderr, "padding1: %i\n", frame->padding1);
4283 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4284 fprintf (stderr, "padding2: %i\n", frame->padding2);
4285 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4286 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4287 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4288 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4289 frame->hard_frame_pointer_offset);
4290 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4291 #endif
4292 }
4293
4294 /* Emit code to save registers in the prologue. */
4295
4296 static void
4297 ix86_emit_save_regs ()
4298 {
4299 register int regno;
4300 rtx insn;
4301
4302 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4303 if (ix86_save_reg (regno, true))
4304 {
4305 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4306 RTX_FRAME_RELATED_P (insn) = 1;
4307 }
4308 }
4309
4310 /* Emit code to save registers using MOV insns. First register
4311 is restored from POINTER + OFFSET. */
4312 static void
4313 ix86_emit_save_regs_using_mov (pointer, offset)
4314 rtx pointer;
4315 HOST_WIDE_INT offset;
4316 {
4317 int regno;
4318 rtx insn;
4319
4320 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4321 if (ix86_save_reg (regno, true))
4322 {
4323 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4324 Pmode, offset),
4325 gen_rtx_REG (Pmode, regno));
4326 RTX_FRAME_RELATED_P (insn) = 1;
4327 offset += UNITS_PER_WORD;
4328 }
4329 }
4330
4331 /* Expand the prologue into a bunch of separate insns. */
4332
4333 void
4334 ix86_expand_prologue ()
4335 {
4336 rtx insn;
4337 bool pic_reg_used;
4338 struct ix86_frame frame;
4339 int use_mov = 0;
4340 HOST_WIDE_INT allocate;
4341
4342 if (!optimize_size)
4343 {
4344 use_fast_prologue_epilogue
4345 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4346 if (TARGET_PROLOGUE_USING_MOVE)
4347 use_mov = use_fast_prologue_epilogue;
4348 }
4349 ix86_compute_frame_layout (&frame);
4350
4351 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4352 slower on all targets. Also sdb doesn't like it. */
4353
4354 if (frame_pointer_needed)
4355 {
4356 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4357 RTX_FRAME_RELATED_P (insn) = 1;
4358
4359 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4360 RTX_FRAME_RELATED_P (insn) = 1;
4361 }
4362
4363 allocate = frame.to_allocate;
4364 /* In case we are dealing only with single register and empty frame,
4365 push is equivalent of the mov+add sequence. */
4366 if (allocate == 0 && frame.nregs <= 1)
4367 use_mov = 0;
4368
4369 if (!use_mov)
4370 ix86_emit_save_regs ();
4371 else
4372 allocate += frame.nregs * UNITS_PER_WORD;
4373
4374 if (allocate == 0)
4375 ;
4376 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4377 {
4378 insn = emit_insn (gen_pro_epilogue_adjust_stack
4379 (stack_pointer_rtx, stack_pointer_rtx,
4380 GEN_INT (-allocate)));
4381 RTX_FRAME_RELATED_P (insn) = 1;
4382 }
4383 else
4384 {
4385 /* ??? Is this only valid for Win32? */
4386
4387 rtx arg0, sym;
4388
4389 if (TARGET_64BIT)
4390 abort ();
4391
4392 arg0 = gen_rtx_REG (SImode, 0);
4393 emit_move_insn (arg0, GEN_INT (allocate));
4394
4395 sym = gen_rtx_MEM (FUNCTION_MODE,
4396 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4397 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4398
4399 CALL_INSN_FUNCTION_USAGE (insn)
4400 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4401 CALL_INSN_FUNCTION_USAGE (insn));
4402 }
4403 if (use_mov)
4404 {
4405 if (!frame_pointer_needed || !frame.to_allocate)
4406 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4407 else
4408 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4409 -frame.nregs * UNITS_PER_WORD);
4410 }
4411
4412 #ifdef SUBTARGET_PROLOGUE
4413 SUBTARGET_PROLOGUE;
4414 #endif
4415
4416 pic_reg_used = false;
4417 if (pic_offset_table_rtx
4418 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4419 || current_function_profile))
4420 {
4421 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4422
4423 if (alt_pic_reg_used != INVALID_REGNUM)
4424 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4425
4426 pic_reg_used = true;
4427 }
4428
4429 if (pic_reg_used)
4430 {
4431 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4432
4433 /* Even with accurate pre-reload life analysis, we can wind up
4434 deleting all references to the pic register after reload.
4435 Consider if cross-jumping unifies two sides of a branch
4436 controled by a comparison vs the only read from a global.
4437 In which case, allow the set_got to be deleted, though we're
4438 too late to do anything about the ebx save in the prologue. */
4439 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4440 }
4441
4442 /* Prevent function calls from be scheduled before the call to mcount.
4443 In the pic_reg_used case, make sure that the got load isn't deleted. */
4444 if (current_function_profile)
4445 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4446 }
4447
4448 /* Emit code to restore saved registers using MOV insns. First register
4449 is restored from POINTER + OFFSET. */
4450 static void
4451 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4452 rtx pointer;
4453 int offset;
4454 int maybe_eh_return;
4455 {
4456 int regno;
4457
4458 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4459 if (ix86_save_reg (regno, maybe_eh_return))
4460 {
4461 emit_move_insn (gen_rtx_REG (Pmode, regno),
4462 adjust_address (gen_rtx_MEM (Pmode, pointer),
4463 Pmode, offset));
4464 offset += UNITS_PER_WORD;
4465 }
4466 }
4467
4468 /* Restore function stack, frame, and registers. */
4469
4470 void
4471 ix86_expand_epilogue (style)
4472 int style;
4473 {
4474 int regno;
4475 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4476 struct ix86_frame frame;
4477 HOST_WIDE_INT offset;
4478
4479 ix86_compute_frame_layout (&frame);
4480
4481 /* Calculate start of saved registers relative to ebp. Special care
4482 must be taken for the normal return case of a function using
4483 eh_return: the eax and edx registers are marked as saved, but not
4484 restored along this path. */
4485 offset = frame.nregs;
4486 if (current_function_calls_eh_return && style != 2)
4487 offset -= 2;
4488 offset *= -UNITS_PER_WORD;
4489
4490 /* If we're only restoring one register and sp is not valid then
4491 using a move instruction to restore the register since it's
4492 less work than reloading sp and popping the register.
4493
4494 The default code result in stack adjustment using add/lea instruction,
4495 while this code results in LEAVE instruction (or discrete equivalent),
4496 so it is profitable in some other cases as well. Especially when there
4497 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4498 and there is exactly one register to pop. This heruistic may need some
4499 tuning in future. */
4500 if ((!sp_valid && frame.nregs <= 1)
4501 || (TARGET_EPILOGUE_USING_MOVE
4502 && use_fast_prologue_epilogue
4503 && (frame.nregs > 1 || frame.to_allocate))
4504 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4505 || (frame_pointer_needed && TARGET_USE_LEAVE
4506 && use_fast_prologue_epilogue && frame.nregs == 1)
4507 || current_function_calls_eh_return)
4508 {
4509 /* Restore registers. We can use ebp or esp to address the memory
4510 locations. If both are available, default to ebp, since offsets
4511 are known to be small. Only exception is esp pointing directly to the
4512 end of block of saved registers, where we may simplify addressing
4513 mode. */
4514
4515 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4516 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4517 frame.to_allocate, style == 2);
4518 else
4519 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4520 offset, style == 2);
4521
4522 /* eh_return epilogues need %ecx added to the stack pointer. */
4523 if (style == 2)
4524 {
4525 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4526
4527 if (frame_pointer_needed)
4528 {
4529 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4530 tmp = plus_constant (tmp, UNITS_PER_WORD);
4531 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4532
4533 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4534 emit_move_insn (hard_frame_pointer_rtx, tmp);
4535
4536 emit_insn (gen_pro_epilogue_adjust_stack
4537 (stack_pointer_rtx, sa, const0_rtx));
4538 }
4539 else
4540 {
4541 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4542 tmp = plus_constant (tmp, (frame.to_allocate
4543 + frame.nregs * UNITS_PER_WORD));
4544 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4545 }
4546 }
4547 else if (!frame_pointer_needed)
4548 emit_insn (gen_pro_epilogue_adjust_stack
4549 (stack_pointer_rtx, stack_pointer_rtx,
4550 GEN_INT (frame.to_allocate
4551 + frame.nregs * UNITS_PER_WORD)));
4552 /* If not an i386, mov & pop is faster than "leave". */
4553 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4554 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4555 else
4556 {
4557 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4558 hard_frame_pointer_rtx,
4559 const0_rtx));
4560 if (TARGET_64BIT)
4561 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4562 else
4563 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4564 }
4565 }
4566 else
4567 {
4568 /* First step is to deallocate the stack frame so that we can
4569 pop the registers. */
4570 if (!sp_valid)
4571 {
4572 if (!frame_pointer_needed)
4573 abort ();
4574 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4575 hard_frame_pointer_rtx,
4576 GEN_INT (offset)));
4577 }
4578 else if (frame.to_allocate)
4579 emit_insn (gen_pro_epilogue_adjust_stack
4580 (stack_pointer_rtx, stack_pointer_rtx,
4581 GEN_INT (frame.to_allocate)));
4582
4583 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4584 if (ix86_save_reg (regno, false))
4585 {
4586 if (TARGET_64BIT)
4587 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4588 else
4589 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4590 }
4591 if (frame_pointer_needed)
4592 {
4593 /* Leave results in shorter dependency chains on CPUs that are
4594 able to grok it fast. */
4595 if (TARGET_USE_LEAVE)
4596 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4597 else if (TARGET_64BIT)
4598 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4599 else
4600 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4601 }
4602 }
4603
4604 /* Sibcall epilogues don't want a return instruction. */
4605 if (style == 0)
4606 return;
4607
4608 if (current_function_pops_args && current_function_args_size)
4609 {
4610 rtx popc = GEN_INT (current_function_pops_args);
4611
4612 /* i386 can only pop 64K bytes. If asked to pop more, pop
4613 return address, do explicit add, and jump indirectly to the
4614 caller. */
4615
4616 if (current_function_pops_args >= 65536)
4617 {
4618 rtx ecx = gen_rtx_REG (SImode, 2);
4619
4620 /* There are is no "pascal" calling convention in 64bit ABI. */
4621 if (TARGET_64BIT)
4622 abort ();
4623
4624 emit_insn (gen_popsi1 (ecx));
4625 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4626 emit_jump_insn (gen_return_indirect_internal (ecx));
4627 }
4628 else
4629 emit_jump_insn (gen_return_pop_internal (popc));
4630 }
4631 else
4632 emit_jump_insn (gen_return_internal ());
4633 }
4634
4635 /* Reset from the function's potential modifications. */
4636
4637 static void
4638 ix86_output_function_epilogue (file, size)
4639 FILE *file ATTRIBUTE_UNUSED;
4640 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4641 {
4642 if (pic_offset_table_rtx)
4643 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4644 }
4645 \f
4646 /* Extract the parts of an RTL expression that is a valid memory address
4647 for an instruction. Return 0 if the structure of the address is
4648 grossly off. Return -1 if the address contains ASHIFT, so it is not
4649 strictly valid, but still used for computing length of lea instruction.
4650 */
4651
4652 static int
4653 ix86_decompose_address (addr, out)
4654 register rtx addr;
4655 struct ix86_address *out;
4656 {
4657 rtx base = NULL_RTX;
4658 rtx index = NULL_RTX;
4659 rtx disp = NULL_RTX;
4660 HOST_WIDE_INT scale = 1;
4661 rtx scale_rtx = NULL_RTX;
4662 int retval = 1;
4663
4664 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4665 base = addr;
4666 else if (GET_CODE (addr) == PLUS)
4667 {
4668 rtx op0 = XEXP (addr, 0);
4669 rtx op1 = XEXP (addr, 1);
4670 enum rtx_code code0 = GET_CODE (op0);
4671 enum rtx_code code1 = GET_CODE (op1);
4672
4673 if (code0 == REG || code0 == SUBREG)
4674 {
4675 if (code1 == REG || code1 == SUBREG)
4676 index = op0, base = op1; /* index + base */
4677 else
4678 base = op0, disp = op1; /* base + displacement */
4679 }
4680 else if (code0 == MULT)
4681 {
4682 index = XEXP (op0, 0);
4683 scale_rtx = XEXP (op0, 1);
4684 if (code1 == REG || code1 == SUBREG)
4685 base = op1; /* index*scale + base */
4686 else
4687 disp = op1; /* index*scale + disp */
4688 }
4689 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4690 {
4691 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4692 scale_rtx = XEXP (XEXP (op0, 0), 1);
4693 base = XEXP (op0, 1);
4694 disp = op1;
4695 }
4696 else if (code0 == PLUS)
4697 {
4698 index = XEXP (op0, 0); /* index + base + disp */
4699 base = XEXP (op0, 1);
4700 disp = op1;
4701 }
4702 else
4703 return 0;
4704 }
4705 else if (GET_CODE (addr) == MULT)
4706 {
4707 index = XEXP (addr, 0); /* index*scale */
4708 scale_rtx = XEXP (addr, 1);
4709 }
4710 else if (GET_CODE (addr) == ASHIFT)
4711 {
4712 rtx tmp;
4713
4714 /* We're called for lea too, which implements ashift on occasion. */
4715 index = XEXP (addr, 0);
4716 tmp = XEXP (addr, 1);
4717 if (GET_CODE (tmp) != CONST_INT)
4718 return 0;
4719 scale = INTVAL (tmp);
4720 if ((unsigned HOST_WIDE_INT) scale > 3)
4721 return 0;
4722 scale = 1 << scale;
4723 retval = -1;
4724 }
4725 else
4726 disp = addr; /* displacement */
4727
4728 /* Extract the integral value of scale. */
4729 if (scale_rtx)
4730 {
4731 if (GET_CODE (scale_rtx) != CONST_INT)
4732 return 0;
4733 scale = INTVAL (scale_rtx);
4734 }
4735
4736 /* Allow arg pointer and stack pointer as index if there is not scaling */
4737 if (base && index && scale == 1
4738 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4739 || index == stack_pointer_rtx))
4740 {
4741 rtx tmp = base;
4742 base = index;
4743 index = tmp;
4744 }
4745
4746 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4747 if ((base == hard_frame_pointer_rtx
4748 || base == frame_pointer_rtx
4749 || base == arg_pointer_rtx) && !disp)
4750 disp = const0_rtx;
4751
4752 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4753 Avoid this by transforming to [%esi+0]. */
4754 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4755 && base && !index && !disp
4756 && REG_P (base)
4757 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4758 disp = const0_rtx;
4759
4760 /* Special case: encode reg+reg instead of reg*2. */
4761 if (!base && index && scale && scale == 2)
4762 base = index, scale = 1;
4763
4764 /* Special case: scaling cannot be encoded without base or displacement. */
4765 if (!base && !disp && index && scale != 1)
4766 disp = const0_rtx;
4767
4768 out->base = base;
4769 out->index = index;
4770 out->disp = disp;
4771 out->scale = scale;
4772
4773 return retval;
4774 }
4775 \f
4776 /* Return cost of the memory address x.
4777 For i386, it is better to use a complex address than let gcc copy
4778 the address into a reg and make a new pseudo. But not if the address
4779 requires to two regs - that would mean more pseudos with longer
4780 lifetimes. */
4781 int
4782 ix86_address_cost (x)
4783 rtx x;
4784 {
4785 struct ix86_address parts;
4786 int cost = 1;
4787
4788 if (!ix86_decompose_address (x, &parts))
4789 abort ();
4790
4791 if (parts.base && GET_CODE (parts.base) == SUBREG)
4792 parts.base = SUBREG_REG (parts.base);
4793 if (parts.index && GET_CODE (parts.index) == SUBREG)
4794 parts.index = SUBREG_REG (parts.index);
4795
4796 /* More complex memory references are better. */
4797 if (parts.disp && parts.disp != const0_rtx)
4798 cost--;
4799
4800 /* Attempt to minimize number of registers in the address. */
4801 if ((parts.base
4802 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4803 || (parts.index
4804 && (!REG_P (parts.index)
4805 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4806 cost++;
4807
4808 if (parts.base
4809 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4810 && parts.index
4811 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4812 && parts.base != parts.index)
4813 cost++;
4814
4815 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4816 since it's predecode logic can't detect the length of instructions
4817 and it degenerates to vector decoded. Increase cost of such
4818 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4819 to split such addresses or even refuse such addresses at all.
4820
4821 Following addressing modes are affected:
4822 [base+scale*index]
4823 [scale*index+disp]
4824 [base+index]
4825
4826 The first and last case may be avoidable by explicitly coding the zero in
4827 memory address, but I don't have AMD-K6 machine handy to check this
4828 theory. */
4829
4830 if (TARGET_K6
4831 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4832 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4833 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4834 cost += 10;
4835
4836 return cost;
4837 }
4838 \f
4839 /* If X is a machine specific address (i.e. a symbol or label being
4840 referenced as a displacement from the GOT implemented using an
4841 UNSPEC), then return the base term. Otherwise return X. */
4842
4843 rtx
4844 ix86_find_base_term (x)
4845 rtx x;
4846 {
4847 rtx term;
4848
4849 if (TARGET_64BIT)
4850 {
4851 if (GET_CODE (x) != CONST)
4852 return x;
4853 term = XEXP (x, 0);
4854 if (GET_CODE (term) == PLUS
4855 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4856 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4857 term = XEXP (term, 0);
4858 if (GET_CODE (term) != UNSPEC
4859 || XINT (term, 1) != UNSPEC_GOTPCREL)
4860 return x;
4861
4862 term = XVECEXP (term, 0, 0);
4863
4864 if (GET_CODE (term) != SYMBOL_REF
4865 && GET_CODE (term) != LABEL_REF)
4866 return x;
4867
4868 return term;
4869 }
4870
4871 if (GET_CODE (x) != PLUS
4872 || XEXP (x, 0) != pic_offset_table_rtx
4873 || GET_CODE (XEXP (x, 1)) != CONST)
4874 return x;
4875
4876 term = XEXP (XEXP (x, 1), 0);
4877
4878 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4879 term = XEXP (term, 0);
4880
4881 if (GET_CODE (term) != UNSPEC
4882 || XINT (term, 1) != UNSPEC_GOTOFF)
4883 return x;
4884
4885 term = XVECEXP (term, 0, 0);
4886
4887 if (GET_CODE (term) != SYMBOL_REF
4888 && GET_CODE (term) != LABEL_REF)
4889 return x;
4890
4891 return term;
4892 }
4893 \f
4894 /* Determine if a given RTX is a valid constant. We already know this
4895 satisfies CONSTANT_P. */
4896
4897 bool
4898 legitimate_constant_p (x)
4899 rtx x;
4900 {
4901 rtx inner;
4902
4903 switch (GET_CODE (x))
4904 {
4905 case SYMBOL_REF:
4906 /* TLS symbols are not constant. */
4907 if (tls_symbolic_operand (x, Pmode))
4908 return false;
4909 break;
4910
4911 case CONST:
4912 inner = XEXP (x, 0);
4913
4914 /* Offsets of TLS symbols are never valid.
4915 Discourage CSE from creating them. */
4916 if (GET_CODE (inner) == PLUS
4917 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4918 return false;
4919
4920 /* Only some unspecs are valid as "constants". */
4921 if (GET_CODE (inner) == UNSPEC)
4922 switch (XINT (inner, 1))
4923 {
4924 case UNSPEC_TPOFF:
4925 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4926 default:
4927 return false;
4928 }
4929 break;
4930
4931 default:
4932 break;
4933 }
4934
4935 /* Otherwise we handle everything else in the move patterns. */
4936 return true;
4937 }
4938
4939 /* Determine if a given RTX is a valid constant address. */
4940
4941 bool
4942 constant_address_p (x)
4943 rtx x;
4944 {
4945 switch (GET_CODE (x))
4946 {
4947 case LABEL_REF:
4948 case CONST_INT:
4949 return true;
4950
4951 case CONST_DOUBLE:
4952 return TARGET_64BIT;
4953
4954 case CONST:
4955 /* For Mach-O, really believe the CONST. */
4956 if (TARGET_MACHO)
4957 return true;
4958 /* Otherwise fall through. */
4959 case SYMBOL_REF:
4960 return !flag_pic && legitimate_constant_p (x);
4961
4962 default:
4963 return false;
4964 }
4965 }
4966
4967 /* Nonzero if the constant value X is a legitimate general operand
4968 when generating PIC code. It is given that flag_pic is on and
4969 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4970
4971 bool
4972 legitimate_pic_operand_p (x)
4973 rtx x;
4974 {
4975 rtx inner;
4976
4977 switch (GET_CODE (x))
4978 {
4979 case CONST:
4980 inner = XEXP (x, 0);
4981
4982 /* Only some unspecs are valid as "constants". */
4983 if (GET_CODE (inner) == UNSPEC)
4984 switch (XINT (inner, 1))
4985 {
4986 case UNSPEC_TPOFF:
4987 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4988 default:
4989 return false;
4990 }
4991 /* FALLTHRU */
4992
4993 case SYMBOL_REF:
4994 case LABEL_REF:
4995 return legitimate_pic_address_disp_p (x);
4996
4997 default:
4998 return true;
4999 }
5000 }
5001
5002 /* Determine if a given CONST RTX is a valid memory displacement
5003 in PIC mode. */
5004
5005 int
5006 legitimate_pic_address_disp_p (disp)
5007 register rtx disp;
5008 {
5009 bool saw_plus;
5010
5011 /* In 64bit mode we can allow direct addresses of symbols and labels
5012 when they are not dynamic symbols. */
5013 if (TARGET_64BIT)
5014 {
5015 rtx x = disp;
5016 if (GET_CODE (disp) == CONST)
5017 x = XEXP (disp, 0);
5018 /* ??? Handle PIC code models */
5019 if (GET_CODE (x) == PLUS
5020 && (GET_CODE (XEXP (x, 1)) == CONST_INT
5021 && ix86_cmodel == CM_SMALL_PIC
5022 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
5023 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
5024 x = XEXP (x, 0);
5025 if (local_symbolic_operand (x, Pmode))
5026 return 1;
5027 }
5028 if (GET_CODE (disp) != CONST)
5029 return 0;
5030 disp = XEXP (disp, 0);
5031
5032 if (TARGET_64BIT)
5033 {
5034 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5035 of GOT tables. We should not need these anyway. */
5036 if (GET_CODE (disp) != UNSPEC
5037 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5038 return 0;
5039
5040 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5041 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5042 return 0;
5043 return 1;
5044 }
5045
5046 saw_plus = false;
5047 if (GET_CODE (disp) == PLUS)
5048 {
5049 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5050 return 0;
5051 disp = XEXP (disp, 0);
5052 saw_plus = true;
5053 }
5054
5055 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5056 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5057 {
5058 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5059 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5060 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5061 {
5062 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5063 if (strstr (sym_name, "$pb") != 0)
5064 return 1;
5065 }
5066 }
5067
5068 if (GET_CODE (disp) != UNSPEC)
5069 return 0;
5070
5071 switch (XINT (disp, 1))
5072 {
5073 case UNSPEC_GOT:
5074 if (saw_plus)
5075 return false;
5076 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5077 case UNSPEC_GOTOFF:
5078 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5079 case UNSPEC_GOTTPOFF:
5080 case UNSPEC_GOTNTPOFF:
5081 case UNSPEC_INDNTPOFF:
5082 if (saw_plus)
5083 return false;
5084 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5085 case UNSPEC_NTPOFF:
5086 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5087 case UNSPEC_DTPOFF:
5088 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5089 }
5090
5091 return 0;
5092 }
5093
5094 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5095 memory address for an instruction. The MODE argument is the machine mode
5096 for the MEM expression that wants to use this address.
5097
5098 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5099 convert common non-canonical forms to canonical form so that they will
5100 be recognized. */
5101
5102 int
5103 legitimate_address_p (mode, addr, strict)
5104 enum machine_mode mode;
5105 register rtx addr;
5106 int strict;
5107 {
5108 struct ix86_address parts;
5109 rtx base, index, disp;
5110 HOST_WIDE_INT scale;
5111 const char *reason = NULL;
5112 rtx reason_rtx = NULL_RTX;
5113
5114 if (TARGET_DEBUG_ADDR)
5115 {
5116 fprintf (stderr,
5117 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5118 GET_MODE_NAME (mode), strict);
5119 debug_rtx (addr);
5120 }
5121
5122 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5123 {
5124 if (TARGET_DEBUG_ADDR)
5125 fprintf (stderr, "Success.\n");
5126 return TRUE;
5127 }
5128
5129 if (ix86_decompose_address (addr, &parts) <= 0)
5130 {
5131 reason = "decomposition failed";
5132 goto report_error;
5133 }
5134
5135 base = parts.base;
5136 index = parts.index;
5137 disp = parts.disp;
5138 scale = parts.scale;
5139
5140 /* Validate base register.
5141
5142 Don't allow SUBREG's here, it can lead to spill failures when the base
5143 is one word out of a two word structure, which is represented internally
5144 as a DImode int. */
5145
5146 if (base)
5147 {
5148 rtx reg;
5149 reason_rtx = base;
5150
5151 if (GET_CODE (base) == SUBREG)
5152 reg = SUBREG_REG (base);
5153 else
5154 reg = base;
5155
5156 if (GET_CODE (reg) != REG)
5157 {
5158 reason = "base is not a register";
5159 goto report_error;
5160 }
5161
5162 if (GET_MODE (base) != Pmode)
5163 {
5164 reason = "base is not in Pmode";
5165 goto report_error;
5166 }
5167
5168 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5169 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5170 {
5171 reason = "base is not valid";
5172 goto report_error;
5173 }
5174 }
5175
5176 /* Validate index register.
5177
5178 Don't allow SUBREG's here, it can lead to spill failures when the index
5179 is one word out of a two word structure, which is represented internally
5180 as a DImode int. */
5181
5182 if (index)
5183 {
5184 rtx reg;
5185 reason_rtx = index;
5186
5187 if (GET_CODE (index) == SUBREG)
5188 reg = SUBREG_REG (index);
5189 else
5190 reg = index;
5191
5192 if (GET_CODE (reg) != REG)
5193 {
5194 reason = "index is not a register";
5195 goto report_error;
5196 }
5197
5198 if (GET_MODE (index) != Pmode)
5199 {
5200 reason = "index is not in Pmode";
5201 goto report_error;
5202 }
5203
5204 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5205 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5206 {
5207 reason = "index is not valid";
5208 goto report_error;
5209 }
5210 }
5211
5212 /* Validate scale factor. */
5213 if (scale != 1)
5214 {
5215 reason_rtx = GEN_INT (scale);
5216 if (!index)
5217 {
5218 reason = "scale without index";
5219 goto report_error;
5220 }
5221
5222 if (scale != 2 && scale != 4 && scale != 8)
5223 {
5224 reason = "scale is not a valid multiplier";
5225 goto report_error;
5226 }
5227 }
5228
5229 /* Validate displacement. */
5230 if (disp)
5231 {
5232 reason_rtx = disp;
5233
5234 if (TARGET_64BIT)
5235 {
5236 if (!x86_64_sign_extended_value (disp))
5237 {
5238 reason = "displacement is out of range";
5239 goto report_error;
5240 }
5241 }
5242 else
5243 {
5244 if (GET_CODE (disp) == CONST_DOUBLE)
5245 {
5246 reason = "displacement is a const_double";
5247 goto report_error;
5248 }
5249 }
5250
5251 if (GET_CODE (disp) == CONST
5252 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5253 switch (XINT (XEXP (disp, 0), 1))
5254 {
5255 case UNSPEC_GOT:
5256 case UNSPEC_GOTOFF:
5257 case UNSPEC_GOTPCREL:
5258 if (!flag_pic)
5259 abort ();
5260 goto is_legitimate_pic;
5261
5262 case UNSPEC_GOTTPOFF:
5263 case UNSPEC_GOTNTPOFF:
5264 case UNSPEC_INDNTPOFF:
5265 case UNSPEC_NTPOFF:
5266 case UNSPEC_DTPOFF:
5267 break;
5268
5269 default:
5270 reason = "invalid address unspec";
5271 goto report_error;
5272 }
5273
5274 else if (flag_pic && (SYMBOLIC_CONST (disp)
5275 #if TARGET_MACHO
5276 && !machopic_operand_p (disp)
5277 #endif
5278 ))
5279 {
5280 is_legitimate_pic:
5281 if (TARGET_64BIT && (index || base))
5282 {
5283 reason = "non-constant pic memory reference";
5284 goto report_error;
5285 }
5286 if (! legitimate_pic_address_disp_p (disp))
5287 {
5288 reason = "displacement is an invalid pic construct";
5289 goto report_error;
5290 }
5291
5292 /* This code used to verify that a symbolic pic displacement
5293 includes the pic_offset_table_rtx register.
5294
5295 While this is good idea, unfortunately these constructs may
5296 be created by "adds using lea" optimization for incorrect
5297 code like:
5298
5299 int a;
5300 int foo(int i)
5301 {
5302 return *(&a+i);
5303 }
5304
5305 This code is nonsensical, but results in addressing
5306 GOT table with pic_offset_table_rtx base. We can't
5307 just refuse it easily, since it gets matched by
5308 "addsi3" pattern, that later gets split to lea in the
5309 case output register differs from input. While this
5310 can be handled by separate addsi pattern for this case
5311 that never results in lea, this seems to be easier and
5312 correct fix for crash to disable this test. */
5313 }
5314 else if (!CONSTANT_ADDRESS_P (disp))
5315 {
5316 reason = "displacement is not constant";
5317 goto report_error;
5318 }
5319 }
5320
5321 /* Everything looks valid. */
5322 if (TARGET_DEBUG_ADDR)
5323 fprintf (stderr, "Success.\n");
5324 return TRUE;
5325
5326 report_error:
5327 if (TARGET_DEBUG_ADDR)
5328 {
5329 fprintf (stderr, "Error: %s\n", reason);
5330 debug_rtx (reason_rtx);
5331 }
5332 return FALSE;
5333 }
5334 \f
5335 /* Return an unique alias set for the GOT. */
5336
5337 static HOST_WIDE_INT
5338 ix86_GOT_alias_set ()
5339 {
5340 static HOST_WIDE_INT set = -1;
5341 if (set == -1)
5342 set = new_alias_set ();
5343 return set;
5344 }
5345
5346 /* Return a legitimate reference for ORIG (an address) using the
5347 register REG. If REG is 0, a new pseudo is generated.
5348
5349 There are two types of references that must be handled:
5350
5351 1. Global data references must load the address from the GOT, via
5352 the PIC reg. An insn is emitted to do this load, and the reg is
5353 returned.
5354
5355 2. Static data references, constant pool addresses, and code labels
5356 compute the address as an offset from the GOT, whose base is in
5357 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5358 differentiate them from global data objects. The returned
5359 address is the PIC reg + an unspec constant.
5360
5361 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5362 reg also appears in the address. */
5363
5364 rtx
5365 legitimize_pic_address (orig, reg)
5366 rtx orig;
5367 rtx reg;
5368 {
5369 rtx addr = orig;
5370 rtx new = orig;
5371 rtx base;
5372
5373 #if TARGET_MACHO
5374 if (reg == 0)
5375 reg = gen_reg_rtx (Pmode);
5376 /* Use the generic Mach-O PIC machinery. */
5377 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5378 #endif
5379
5380 if (local_symbolic_operand (addr, Pmode))
5381 {
5382 /* In 64bit mode we can address such objects directly. */
5383 if (TARGET_64BIT)
5384 new = addr;
5385 else
5386 {
5387 /* This symbol may be referenced via a displacement from the PIC
5388 base address (@GOTOFF). */
5389
5390 if (reload_in_progress)
5391 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5392 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5393 new = gen_rtx_CONST (Pmode, new);
5394 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5395
5396 if (reg != 0)
5397 {
5398 emit_move_insn (reg, new);
5399 new = reg;
5400 }
5401 }
5402 }
5403 else if (GET_CODE (addr) == SYMBOL_REF)
5404 {
5405 if (TARGET_64BIT)
5406 {
5407 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5408 new = gen_rtx_CONST (Pmode, new);
5409 new = gen_rtx_MEM (Pmode, new);
5410 RTX_UNCHANGING_P (new) = 1;
5411 set_mem_alias_set (new, ix86_GOT_alias_set ());
5412
5413 if (reg == 0)
5414 reg = gen_reg_rtx (Pmode);
5415 /* Use directly gen_movsi, otherwise the address is loaded
5416 into register for CSE. We don't want to CSE this addresses,
5417 instead we CSE addresses from the GOT table, so skip this. */
5418 emit_insn (gen_movsi (reg, new));
5419 new = reg;
5420 }
5421 else
5422 {
5423 /* This symbol must be referenced via a load from the
5424 Global Offset Table (@GOT). */
5425
5426 if (reload_in_progress)
5427 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5428 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5429 new = gen_rtx_CONST (Pmode, new);
5430 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5431 new = gen_rtx_MEM (Pmode, new);
5432 RTX_UNCHANGING_P (new) = 1;
5433 set_mem_alias_set (new, ix86_GOT_alias_set ());
5434
5435 if (reg == 0)
5436 reg = gen_reg_rtx (Pmode);
5437 emit_move_insn (reg, new);
5438 new = reg;
5439 }
5440 }
5441 else
5442 {
5443 if (GET_CODE (addr) == CONST)
5444 {
5445 addr = XEXP (addr, 0);
5446
5447 /* We must match stuff we generate before. Assume the only
5448 unspecs that can get here are ours. Not that we could do
5449 anything with them anyway... */
5450 if (GET_CODE (addr) == UNSPEC
5451 || (GET_CODE (addr) == PLUS
5452 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5453 return orig;
5454 if (GET_CODE (addr) != PLUS)
5455 abort ();
5456 }
5457 if (GET_CODE (addr) == PLUS)
5458 {
5459 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5460
5461 /* Check first to see if this is a constant offset from a @GOTOFF
5462 symbol reference. */
5463 if (local_symbolic_operand (op0, Pmode)
5464 && GET_CODE (op1) == CONST_INT)
5465 {
5466 if (!TARGET_64BIT)
5467 {
5468 if (reload_in_progress)
5469 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5470 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5471 UNSPEC_GOTOFF);
5472 new = gen_rtx_PLUS (Pmode, new, op1);
5473 new = gen_rtx_CONST (Pmode, new);
5474 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5475
5476 if (reg != 0)
5477 {
5478 emit_move_insn (reg, new);
5479 new = reg;
5480 }
5481 }
5482 else
5483 {
5484 /* ??? We need to limit offsets here. */
5485 }
5486 }
5487 else
5488 {
5489 base = legitimize_pic_address (XEXP (addr, 0), reg);
5490 new = legitimize_pic_address (XEXP (addr, 1),
5491 base == reg ? NULL_RTX : reg);
5492
5493 if (GET_CODE (new) == CONST_INT)
5494 new = plus_constant (base, INTVAL (new));
5495 else
5496 {
5497 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5498 {
5499 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5500 new = XEXP (new, 1);
5501 }
5502 new = gen_rtx_PLUS (Pmode, base, new);
5503 }
5504 }
5505 }
5506 }
5507 return new;
5508 }
5509
5510 static void
5511 ix86_encode_section_info (decl, first)
5512 tree decl;
5513 int first ATTRIBUTE_UNUSED;
5514 {
5515 bool local_p = (*targetm.binds_local_p) (decl);
5516 rtx rtl, symbol;
5517
5518 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5519 if (GET_CODE (rtl) != MEM)
5520 return;
5521 symbol = XEXP (rtl, 0);
5522 if (GET_CODE (symbol) != SYMBOL_REF)
5523 return;
5524
5525 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5526 symbol so that we may access it directly in the GOT. */
5527
5528 if (flag_pic)
5529 SYMBOL_REF_FLAG (symbol) = local_p;
5530
5531 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5532 "local dynamic", "initial exec" or "local exec" TLS models
5533 respectively. */
5534
5535 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5536 {
5537 const char *symbol_str;
5538 char *newstr;
5539 size_t len;
5540 enum tls_model kind;
5541
5542 if (!flag_pic)
5543 {
5544 if (local_p)
5545 kind = TLS_MODEL_LOCAL_EXEC;
5546 else
5547 kind = TLS_MODEL_INITIAL_EXEC;
5548 }
5549 /* Local dynamic is inefficient when we're not combining the
5550 parts of the address. */
5551 else if (optimize && local_p)
5552 kind = TLS_MODEL_LOCAL_DYNAMIC;
5553 else
5554 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5555 if (kind < flag_tls_default)
5556 kind = flag_tls_default;
5557
5558 symbol_str = XSTR (symbol, 0);
5559
5560 if (symbol_str[0] == '%')
5561 {
5562 if (symbol_str[1] == tls_model_chars[kind])
5563 return;
5564 symbol_str += 2;
5565 }
5566 len = strlen (symbol_str) + 1;
5567 newstr = alloca (len + 2);
5568
5569 newstr[0] = '%';
5570 newstr[1] = tls_model_chars[kind];
5571 memcpy (newstr + 2, symbol_str, len);
5572
5573 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5574 }
5575 }
5576
5577 /* Undo the above when printing symbol names. */
5578
5579 static const char *
5580 ix86_strip_name_encoding (str)
5581 const char *str;
5582 {
5583 if (str[0] == '%')
5584 str += 2;
5585 if (str [0] == '*')
5586 str += 1;
5587 return str;
5588 }
5589 \f
5590 /* Load the thread pointer into a register. */
5591
5592 static rtx
5593 get_thread_pointer ()
5594 {
5595 rtx tp;
5596
5597 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5598 tp = gen_rtx_MEM (Pmode, tp);
5599 RTX_UNCHANGING_P (tp) = 1;
5600 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5601 tp = force_reg (Pmode, tp);
5602
5603 return tp;
5604 }
5605
5606 /* Try machine-dependent ways of modifying an illegitimate address
5607 to be legitimate. If we find one, return the new, valid address.
5608 This macro is used in only one place: `memory_address' in explow.c.
5609
5610 OLDX is the address as it was before break_out_memory_refs was called.
5611 In some cases it is useful to look at this to decide what needs to be done.
5612
5613 MODE and WIN are passed so that this macro can use
5614 GO_IF_LEGITIMATE_ADDRESS.
5615
5616 It is always safe for this macro to do nothing. It exists to recognize
5617 opportunities to optimize the output.
5618
5619 For the 80386, we handle X+REG by loading X into a register R and
5620 using R+REG. R will go in a general reg and indexing will be used.
5621 However, if REG is a broken-out memory address or multiplication,
5622 nothing needs to be done because REG can certainly go in a general reg.
5623
5624 When -fpic is used, special handling is needed for symbolic references.
5625 See comments by legitimize_pic_address in i386.c for details. */
5626
5627 rtx
5628 legitimize_address (x, oldx, mode)
5629 register rtx x;
5630 register rtx oldx ATTRIBUTE_UNUSED;
5631 enum machine_mode mode;
5632 {
5633 int changed = 0;
5634 unsigned log;
5635
5636 if (TARGET_DEBUG_ADDR)
5637 {
5638 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5639 GET_MODE_NAME (mode));
5640 debug_rtx (x);
5641 }
5642
5643 log = tls_symbolic_operand (x, mode);
5644 if (log)
5645 {
5646 rtx dest, base, off, pic;
5647
5648 switch (log)
5649 {
5650 case TLS_MODEL_GLOBAL_DYNAMIC:
5651 dest = gen_reg_rtx (Pmode);
5652 emit_insn (gen_tls_global_dynamic (dest, x));
5653 break;
5654
5655 case TLS_MODEL_LOCAL_DYNAMIC:
5656 base = gen_reg_rtx (Pmode);
5657 emit_insn (gen_tls_local_dynamic_base (base));
5658
5659 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5660 off = gen_rtx_CONST (Pmode, off);
5661
5662 return gen_rtx_PLUS (Pmode, base, off);
5663
5664 case TLS_MODEL_INITIAL_EXEC:
5665 if (flag_pic)
5666 {
5667 if (reload_in_progress)
5668 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5669 pic = pic_offset_table_rtx;
5670 }
5671 else if (!TARGET_GNU_TLS)
5672 {
5673 pic = gen_reg_rtx (Pmode);
5674 emit_insn (gen_set_got (pic));
5675 }
5676 else
5677 pic = NULL;
5678
5679 base = get_thread_pointer ();
5680
5681 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5682 !TARGET_GNU_TLS
5683 ? UNSPEC_GOTTPOFF
5684 : flag_pic ? UNSPEC_GOTNTPOFF
5685 : UNSPEC_INDNTPOFF);
5686 off = gen_rtx_CONST (Pmode, off);
5687 if (flag_pic || !TARGET_GNU_TLS)
5688 off = gen_rtx_PLUS (Pmode, pic, off);
5689 off = gen_rtx_MEM (Pmode, off);
5690 RTX_UNCHANGING_P (off) = 1;
5691 set_mem_alias_set (off, ix86_GOT_alias_set ());
5692 dest = gen_reg_rtx (Pmode);
5693
5694 if (TARGET_GNU_TLS)
5695 {
5696 emit_move_insn (dest, off);
5697 return gen_rtx_PLUS (Pmode, base, dest);
5698 }
5699 else
5700 emit_insn (gen_subsi3 (dest, base, off));
5701 break;
5702
5703 case TLS_MODEL_LOCAL_EXEC:
5704 base = get_thread_pointer ();
5705
5706 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5707 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5708 off = gen_rtx_CONST (Pmode, off);
5709
5710 if (TARGET_GNU_TLS)
5711 return gen_rtx_PLUS (Pmode, base, off);
5712 else
5713 {
5714 dest = gen_reg_rtx (Pmode);
5715 emit_insn (gen_subsi3 (dest, base, off));
5716 }
5717 break;
5718
5719 default:
5720 abort ();
5721 }
5722
5723 return dest;
5724 }
5725
5726 if (flag_pic && SYMBOLIC_CONST (x))
5727 return legitimize_pic_address (x, 0);
5728
5729 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5730 if (GET_CODE (x) == ASHIFT
5731 && GET_CODE (XEXP (x, 1)) == CONST_INT
5732 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5733 {
5734 changed = 1;
5735 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5736 GEN_INT (1 << log));
5737 }
5738
5739 if (GET_CODE (x) == PLUS)
5740 {
5741 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5742
5743 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5744 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5745 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5746 {
5747 changed = 1;
5748 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5749 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5750 GEN_INT (1 << log));
5751 }
5752
5753 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5754 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5755 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5756 {
5757 changed = 1;
5758 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5759 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5760 GEN_INT (1 << log));
5761 }
5762
5763 /* Put multiply first if it isn't already. */
5764 if (GET_CODE (XEXP (x, 1)) == MULT)
5765 {
5766 rtx tmp = XEXP (x, 0);
5767 XEXP (x, 0) = XEXP (x, 1);
5768 XEXP (x, 1) = tmp;
5769 changed = 1;
5770 }
5771
5772 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5773 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5774 created by virtual register instantiation, register elimination, and
5775 similar optimizations. */
5776 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5777 {
5778 changed = 1;
5779 x = gen_rtx_PLUS (Pmode,
5780 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5781 XEXP (XEXP (x, 1), 0)),
5782 XEXP (XEXP (x, 1), 1));
5783 }
5784
5785 /* Canonicalize
5786 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5787 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5788 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5789 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5790 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5791 && CONSTANT_P (XEXP (x, 1)))
5792 {
5793 rtx constant;
5794 rtx other = NULL_RTX;
5795
5796 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5797 {
5798 constant = XEXP (x, 1);
5799 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5800 }
5801 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5802 {
5803 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5804 other = XEXP (x, 1);
5805 }
5806 else
5807 constant = 0;
5808
5809 if (constant)
5810 {
5811 changed = 1;
5812 x = gen_rtx_PLUS (Pmode,
5813 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5814 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5815 plus_constant (other, INTVAL (constant)));
5816 }
5817 }
5818
5819 if (changed && legitimate_address_p (mode, x, FALSE))
5820 return x;
5821
5822 if (GET_CODE (XEXP (x, 0)) == MULT)
5823 {
5824 changed = 1;
5825 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5826 }
5827
5828 if (GET_CODE (XEXP (x, 1)) == MULT)
5829 {
5830 changed = 1;
5831 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5832 }
5833
5834 if (changed
5835 && GET_CODE (XEXP (x, 1)) == REG
5836 && GET_CODE (XEXP (x, 0)) == REG)
5837 return x;
5838
5839 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5840 {
5841 changed = 1;
5842 x = legitimize_pic_address (x, 0);
5843 }
5844
5845 if (changed && legitimate_address_p (mode, x, FALSE))
5846 return x;
5847
5848 if (GET_CODE (XEXP (x, 0)) == REG)
5849 {
5850 register rtx temp = gen_reg_rtx (Pmode);
5851 register rtx val = force_operand (XEXP (x, 1), temp);
5852 if (val != temp)
5853 emit_move_insn (temp, val);
5854
5855 XEXP (x, 1) = temp;
5856 return x;
5857 }
5858
5859 else if (GET_CODE (XEXP (x, 1)) == REG)
5860 {
5861 register rtx temp = gen_reg_rtx (Pmode);
5862 register rtx val = force_operand (XEXP (x, 0), temp);
5863 if (val != temp)
5864 emit_move_insn (temp, val);
5865
5866 XEXP (x, 0) = temp;
5867 return x;
5868 }
5869 }
5870
5871 return x;
5872 }
5873 \f
5874 /* Print an integer constant expression in assembler syntax. Addition
5875 and subtraction are the only arithmetic that may appear in these
5876 expressions. FILE is the stdio stream to write to, X is the rtx, and
5877 CODE is the operand print code from the output string. */
5878
5879 static void
5880 output_pic_addr_const (file, x, code)
5881 FILE *file;
5882 rtx x;
5883 int code;
5884 {
5885 char buf[256];
5886
5887 switch (GET_CODE (x))
5888 {
5889 case PC:
5890 if (flag_pic)
5891 putc ('.', file);
5892 else
5893 abort ();
5894 break;
5895
5896 case SYMBOL_REF:
5897 assemble_name (file, XSTR (x, 0));
5898 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5899 fputs ("@PLT", file);
5900 break;
5901
5902 case LABEL_REF:
5903 x = XEXP (x, 0);
5904 /* FALLTHRU */
5905 case CODE_LABEL:
5906 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5907 assemble_name (asm_out_file, buf);
5908 break;
5909
5910 case CONST_INT:
5911 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5912 break;
5913
5914 case CONST:
5915 /* This used to output parentheses around the expression,
5916 but that does not work on the 386 (either ATT or BSD assembler). */
5917 output_pic_addr_const (file, XEXP (x, 0), code);
5918 break;
5919
5920 case CONST_DOUBLE:
5921 if (GET_MODE (x) == VOIDmode)
5922 {
5923 /* We can use %d if the number is <32 bits and positive. */
5924 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5925 fprintf (file, "0x%lx%08lx",
5926 (unsigned long) CONST_DOUBLE_HIGH (x),
5927 (unsigned long) CONST_DOUBLE_LOW (x));
5928 else
5929 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5930 }
5931 else
5932 /* We can't handle floating point constants;
5933 PRINT_OPERAND must handle them. */
5934 output_operand_lossage ("floating constant misused");
5935 break;
5936
5937 case PLUS:
5938 /* Some assemblers need integer constants to appear first. */
5939 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5940 {
5941 output_pic_addr_const (file, XEXP (x, 0), code);
5942 putc ('+', file);
5943 output_pic_addr_const (file, XEXP (x, 1), code);
5944 }
5945 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5946 {
5947 output_pic_addr_const (file, XEXP (x, 1), code);
5948 putc ('+', file);
5949 output_pic_addr_const (file, XEXP (x, 0), code);
5950 }
5951 else
5952 abort ();
5953 break;
5954
5955 case MINUS:
5956 if (!TARGET_MACHO)
5957 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5958 output_pic_addr_const (file, XEXP (x, 0), code);
5959 putc ('-', file);
5960 output_pic_addr_const (file, XEXP (x, 1), code);
5961 if (!TARGET_MACHO)
5962 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5963 break;
5964
5965 case UNSPEC:
5966 if (XVECLEN (x, 0) != 1)
5967 abort ();
5968 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5969 switch (XINT (x, 1))
5970 {
5971 case UNSPEC_GOT:
5972 fputs ("@GOT", file);
5973 break;
5974 case UNSPEC_GOTOFF:
5975 fputs ("@GOTOFF", file);
5976 break;
5977 case UNSPEC_GOTPCREL:
5978 fputs ("@GOTPCREL(%rip)", file);
5979 break;
5980 case UNSPEC_GOTTPOFF:
5981 /* FIXME: This might be @TPOFF in Sun ld too. */
5982 fputs ("@GOTTPOFF", file);
5983 break;
5984 case UNSPEC_TPOFF:
5985 fputs ("@TPOFF", file);
5986 break;
5987 case UNSPEC_NTPOFF:
5988 fputs ("@NTPOFF", file);
5989 break;
5990 case UNSPEC_DTPOFF:
5991 fputs ("@DTPOFF", file);
5992 break;
5993 case UNSPEC_GOTNTPOFF:
5994 fputs ("@GOTNTPOFF", file);
5995 break;
5996 case UNSPEC_INDNTPOFF:
5997 fputs ("@INDNTPOFF", file);
5998 break;
5999 default:
6000 output_operand_lossage ("invalid UNSPEC as operand");
6001 break;
6002 }
6003 break;
6004
6005 default:
6006 output_operand_lossage ("invalid expression as operand");
6007 }
6008 }
6009
6010 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6011 We need to handle our special PIC relocations. */
6012
6013 void
6014 i386_dwarf_output_addr_const (file, x)
6015 FILE *file;
6016 rtx x;
6017 {
6018 #ifdef ASM_QUAD
6019 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6020 #else
6021 if (TARGET_64BIT)
6022 abort ();
6023 fprintf (file, "%s", ASM_LONG);
6024 #endif
6025 if (flag_pic)
6026 output_pic_addr_const (file, x, '\0');
6027 else
6028 output_addr_const (file, x);
6029 fputc ('\n', file);
6030 }
6031
6032 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6033 We need to emit DTP-relative relocations. */
6034
6035 void
6036 i386_output_dwarf_dtprel (file, size, x)
6037 FILE *file;
6038 int size;
6039 rtx x;
6040 {
6041 switch (size)
6042 {
6043 case 4:
6044 fputs (ASM_LONG, file);
6045 break;
6046 case 8:
6047 #ifdef ASM_QUAD
6048 fputs (ASM_QUAD, file);
6049 break;
6050 #endif
6051 default:
6052 abort ();
6053 }
6054
6055 output_addr_const (file, x);
6056 fputs ("@DTPOFF", file);
6057 }
6058
6059 /* In the name of slightly smaller debug output, and to cater to
6060 general assembler losage, recognize PIC+GOTOFF and turn it back
6061 into a direct symbol reference. */
6062
6063 rtx
6064 i386_simplify_dwarf_addr (orig_x)
6065 rtx orig_x;
6066 {
6067 rtx x = orig_x, y;
6068
6069 if (GET_CODE (x) == MEM)
6070 x = XEXP (x, 0);
6071
6072 if (TARGET_64BIT)
6073 {
6074 if (GET_CODE (x) != CONST
6075 || GET_CODE (XEXP (x, 0)) != UNSPEC
6076 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6077 || GET_CODE (orig_x) != MEM)
6078 return orig_x;
6079 return XVECEXP (XEXP (x, 0), 0, 0);
6080 }
6081
6082 if (GET_CODE (x) != PLUS
6083 || GET_CODE (XEXP (x, 1)) != CONST)
6084 return orig_x;
6085
6086 if (GET_CODE (XEXP (x, 0)) == REG
6087 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6088 /* %ebx + GOT/GOTOFF */
6089 y = NULL;
6090 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6091 {
6092 /* %ebx + %reg * scale + GOT/GOTOFF */
6093 y = XEXP (x, 0);
6094 if (GET_CODE (XEXP (y, 0)) == REG
6095 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6096 y = XEXP (y, 1);
6097 else if (GET_CODE (XEXP (y, 1)) == REG
6098 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6099 y = XEXP (y, 0);
6100 else
6101 return orig_x;
6102 if (GET_CODE (y) != REG
6103 && GET_CODE (y) != MULT
6104 && GET_CODE (y) != ASHIFT)
6105 return orig_x;
6106 }
6107 else
6108 return orig_x;
6109
6110 x = XEXP (XEXP (x, 1), 0);
6111 if (GET_CODE (x) == UNSPEC
6112 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6113 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6114 {
6115 if (y)
6116 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6117 return XVECEXP (x, 0, 0);
6118 }
6119
6120 if (GET_CODE (x) == PLUS
6121 && GET_CODE (XEXP (x, 0)) == UNSPEC
6122 && GET_CODE (XEXP (x, 1)) == CONST_INT
6123 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6124 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6125 && GET_CODE (orig_x) != MEM)))
6126 {
6127 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6128 if (y)
6129 return gen_rtx_PLUS (Pmode, y, x);
6130 return x;
6131 }
6132
6133 return orig_x;
6134 }
6135 \f
6136 static void
6137 put_condition_code (code, mode, reverse, fp, file)
6138 enum rtx_code code;
6139 enum machine_mode mode;
6140 int reverse, fp;
6141 FILE *file;
6142 {
6143 const char *suffix;
6144
6145 if (mode == CCFPmode || mode == CCFPUmode)
6146 {
6147 enum rtx_code second_code, bypass_code;
6148 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6149 if (bypass_code != NIL || second_code != NIL)
6150 abort ();
6151 code = ix86_fp_compare_code_to_integer (code);
6152 mode = CCmode;
6153 }
6154 if (reverse)
6155 code = reverse_condition (code);
6156
6157 switch (code)
6158 {
6159 case EQ:
6160 suffix = "e";
6161 break;
6162 case NE:
6163 suffix = "ne";
6164 break;
6165 case GT:
6166 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6167 abort ();
6168 suffix = "g";
6169 break;
6170 case GTU:
6171 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6172 Those same assemblers have the same but opposite losage on cmov. */
6173 if (mode != CCmode)
6174 abort ();
6175 suffix = fp ? "nbe" : "a";
6176 break;
6177 case LT:
6178 if (mode == CCNOmode || mode == CCGOCmode)
6179 suffix = "s";
6180 else if (mode == CCmode || mode == CCGCmode)
6181 suffix = "l";
6182 else
6183 abort ();
6184 break;
6185 case LTU:
6186 if (mode != CCmode)
6187 abort ();
6188 suffix = "b";
6189 break;
6190 case GE:
6191 if (mode == CCNOmode || mode == CCGOCmode)
6192 suffix = "ns";
6193 else if (mode == CCmode || mode == CCGCmode)
6194 suffix = "ge";
6195 else
6196 abort ();
6197 break;
6198 case GEU:
6199 /* ??? As above. */
6200 if (mode != CCmode)
6201 abort ();
6202 suffix = fp ? "nb" : "ae";
6203 break;
6204 case LE:
6205 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6206 abort ();
6207 suffix = "le";
6208 break;
6209 case LEU:
6210 if (mode != CCmode)
6211 abort ();
6212 suffix = "be";
6213 break;
6214 case UNORDERED:
6215 suffix = fp ? "u" : "p";
6216 break;
6217 case ORDERED:
6218 suffix = fp ? "nu" : "np";
6219 break;
6220 default:
6221 abort ();
6222 }
6223 fputs (suffix, file);
6224 }
6225
6226 void
6227 print_reg (x, code, file)
6228 rtx x;
6229 int code;
6230 FILE *file;
6231 {
6232 if (REGNO (x) == ARG_POINTER_REGNUM
6233 || REGNO (x) == FRAME_POINTER_REGNUM
6234 || REGNO (x) == FLAGS_REG
6235 || REGNO (x) == FPSR_REG)
6236 abort ();
6237
6238 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6239 putc ('%', file);
6240
6241 if (code == 'w' || MMX_REG_P (x))
6242 code = 2;
6243 else if (code == 'b')
6244 code = 1;
6245 else if (code == 'k')
6246 code = 4;
6247 else if (code == 'q')
6248 code = 8;
6249 else if (code == 'y')
6250 code = 3;
6251 else if (code == 'h')
6252 code = 0;
6253 else
6254 code = GET_MODE_SIZE (GET_MODE (x));
6255
6256 /* Irritatingly, AMD extended registers use different naming convention
6257 from the normal registers. */
6258 if (REX_INT_REG_P (x))
6259 {
6260 if (!TARGET_64BIT)
6261 abort ();
6262 switch (code)
6263 {
6264 case 0:
6265 error ("extended registers have no high halves");
6266 break;
6267 case 1:
6268 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6269 break;
6270 case 2:
6271 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6272 break;
6273 case 4:
6274 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6275 break;
6276 case 8:
6277 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6278 break;
6279 default:
6280 error ("unsupported operand size for extended register");
6281 break;
6282 }
6283 return;
6284 }
6285 switch (code)
6286 {
6287 case 3:
6288 if (STACK_TOP_P (x))
6289 {
6290 fputs ("st(0)", file);
6291 break;
6292 }
6293 /* FALLTHRU */
6294 case 8:
6295 case 4:
6296 case 12:
6297 if (! ANY_FP_REG_P (x))
6298 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6299 /* FALLTHRU */
6300 case 16:
6301 case 2:
6302 fputs (hi_reg_name[REGNO (x)], file);
6303 break;
6304 case 1:
6305 fputs (qi_reg_name[REGNO (x)], file);
6306 break;
6307 case 0:
6308 fputs (qi_high_reg_name[REGNO (x)], file);
6309 break;
6310 default:
6311 abort ();
6312 }
6313 }
6314
6315 /* Locate some local-dynamic symbol still in use by this function
6316 so that we can print its name in some tls_local_dynamic_base
6317 pattern. */
6318
6319 static const char *
6320 get_some_local_dynamic_name ()
6321 {
6322 rtx insn;
6323
6324 if (cfun->machine->some_ld_name)
6325 return cfun->machine->some_ld_name;
6326
6327 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6328 if (INSN_P (insn)
6329 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6330 return cfun->machine->some_ld_name;
6331
6332 abort ();
6333 }
6334
6335 static int
6336 get_some_local_dynamic_name_1 (px, data)
6337 rtx *px;
6338 void *data ATTRIBUTE_UNUSED;
6339 {
6340 rtx x = *px;
6341
6342 if (GET_CODE (x) == SYMBOL_REF
6343 && local_dynamic_symbolic_operand (x, Pmode))
6344 {
6345 cfun->machine->some_ld_name = XSTR (x, 0);
6346 return 1;
6347 }
6348
6349 return 0;
6350 }
6351
6352 /* Meaning of CODE:
6353 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6354 C -- print opcode suffix for set/cmov insn.
6355 c -- like C, but print reversed condition
6356 F,f -- likewise, but for floating-point.
6357 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6358 nothing
6359 R -- print the prefix for register names.
6360 z -- print the opcode suffix for the size of the current operand.
6361 * -- print a star (in certain assembler syntax)
6362 A -- print an absolute memory reference.
6363 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6364 s -- print a shift double count, followed by the assemblers argument
6365 delimiter.
6366 b -- print the QImode name of the register for the indicated operand.
6367 %b0 would print %al if operands[0] is reg 0.
6368 w -- likewise, print the HImode name of the register.
6369 k -- likewise, print the SImode name of the register.
6370 q -- likewise, print the DImode name of the register.
6371 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6372 y -- print "st(0)" instead of "st" as a register.
6373 D -- print condition for SSE cmp instruction.
6374 P -- if PIC, print an @PLT suffix.
6375 X -- don't print any sort of PIC '@' suffix for a symbol.
6376 & -- print some in-use local-dynamic symbol name.
6377 */
6378
6379 void
6380 print_operand (file, x, code)
6381 FILE *file;
6382 rtx x;
6383 int code;
6384 {
6385 if (code)
6386 {
6387 switch (code)
6388 {
6389 case '*':
6390 if (ASSEMBLER_DIALECT == ASM_ATT)
6391 putc ('*', file);
6392 return;
6393
6394 case '&':
6395 assemble_name (file, get_some_local_dynamic_name ());
6396 return;
6397
6398 case 'A':
6399 if (ASSEMBLER_DIALECT == ASM_ATT)
6400 putc ('*', file);
6401 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6402 {
6403 /* Intel syntax. For absolute addresses, registers should not
6404 be surrounded by braces. */
6405 if (GET_CODE (x) != REG)
6406 {
6407 putc ('[', file);
6408 PRINT_OPERAND (file, x, 0);
6409 putc (']', file);
6410 return;
6411 }
6412 }
6413 else
6414 abort ();
6415
6416 PRINT_OPERAND (file, x, 0);
6417 return;
6418
6419
6420 case 'L':
6421 if (ASSEMBLER_DIALECT == ASM_ATT)
6422 putc ('l', file);
6423 return;
6424
6425 case 'W':
6426 if (ASSEMBLER_DIALECT == ASM_ATT)
6427 putc ('w', file);
6428 return;
6429
6430 case 'B':
6431 if (ASSEMBLER_DIALECT == ASM_ATT)
6432 putc ('b', file);
6433 return;
6434
6435 case 'Q':
6436 if (ASSEMBLER_DIALECT == ASM_ATT)
6437 putc ('l', file);
6438 return;
6439
6440 case 'S':
6441 if (ASSEMBLER_DIALECT == ASM_ATT)
6442 putc ('s', file);
6443 return;
6444
6445 case 'T':
6446 if (ASSEMBLER_DIALECT == ASM_ATT)
6447 putc ('t', file);
6448 return;
6449
6450 case 'z':
6451 /* 387 opcodes don't get size suffixes if the operands are
6452 registers. */
6453 if (STACK_REG_P (x))
6454 return;
6455
6456 /* Likewise if using Intel opcodes. */
6457 if (ASSEMBLER_DIALECT == ASM_INTEL)
6458 return;
6459
6460 /* This is the size of op from size of operand. */
6461 switch (GET_MODE_SIZE (GET_MODE (x)))
6462 {
6463 case 2:
6464 #ifdef HAVE_GAS_FILDS_FISTS
6465 putc ('s', file);
6466 #endif
6467 return;
6468
6469 case 4:
6470 if (GET_MODE (x) == SFmode)
6471 {
6472 putc ('s', file);
6473 return;
6474 }
6475 else
6476 putc ('l', file);
6477 return;
6478
6479 case 12:
6480 case 16:
6481 putc ('t', file);
6482 return;
6483
6484 case 8:
6485 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6486 {
6487 #ifdef GAS_MNEMONICS
6488 putc ('q', file);
6489 #else
6490 putc ('l', file);
6491 putc ('l', file);
6492 #endif
6493 }
6494 else
6495 putc ('l', file);
6496 return;
6497
6498 default:
6499 abort ();
6500 }
6501
6502 case 'b':
6503 case 'w':
6504 case 'k':
6505 case 'q':
6506 case 'h':
6507 case 'y':
6508 case 'X':
6509 case 'P':
6510 break;
6511
6512 case 's':
6513 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6514 {
6515 PRINT_OPERAND (file, x, 0);
6516 putc (',', file);
6517 }
6518 return;
6519
6520 case 'D':
6521 /* Little bit of braindamage here. The SSE compare instructions
6522 does use completely different names for the comparisons that the
6523 fp conditional moves. */
6524 switch (GET_CODE (x))
6525 {
6526 case EQ:
6527 case UNEQ:
6528 fputs ("eq", file);
6529 break;
6530 case LT:
6531 case UNLT:
6532 fputs ("lt", file);
6533 break;
6534 case LE:
6535 case UNLE:
6536 fputs ("le", file);
6537 break;
6538 case UNORDERED:
6539 fputs ("unord", file);
6540 break;
6541 case NE:
6542 case LTGT:
6543 fputs ("neq", file);
6544 break;
6545 case UNGE:
6546 case GE:
6547 fputs ("nlt", file);
6548 break;
6549 case UNGT:
6550 case GT:
6551 fputs ("nle", file);
6552 break;
6553 case ORDERED:
6554 fputs ("ord", file);
6555 break;
6556 default:
6557 abort ();
6558 break;
6559 }
6560 return;
6561 case 'O':
6562 #ifdef CMOV_SUN_AS_SYNTAX
6563 if (ASSEMBLER_DIALECT == ASM_ATT)
6564 {
6565 switch (GET_MODE (x))
6566 {
6567 case HImode: putc ('w', file); break;
6568 case SImode:
6569 case SFmode: putc ('l', file); break;
6570 case DImode:
6571 case DFmode: putc ('q', file); break;
6572 default: abort ();
6573 }
6574 putc ('.', file);
6575 }
6576 #endif
6577 return;
6578 case 'C':
6579 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6580 return;
6581 case 'F':
6582 #ifdef CMOV_SUN_AS_SYNTAX
6583 if (ASSEMBLER_DIALECT == ASM_ATT)
6584 putc ('.', file);
6585 #endif
6586 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6587 return;
6588
6589 /* Like above, but reverse condition */
6590 case 'c':
6591 /* Check to see if argument to %c is really a constant
6592 and not a condition code which needs to be reversed. */
6593 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6594 {
6595 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6596 return;
6597 }
6598 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6599 return;
6600 case 'f':
6601 #ifdef CMOV_SUN_AS_SYNTAX
6602 if (ASSEMBLER_DIALECT == ASM_ATT)
6603 putc ('.', file);
6604 #endif
6605 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6606 return;
6607 case '+':
6608 {
6609 rtx x;
6610
6611 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6612 return;
6613
6614 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6615 if (x)
6616 {
6617 int pred_val = INTVAL (XEXP (x, 0));
6618
6619 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6620 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6621 {
6622 int taken = pred_val > REG_BR_PROB_BASE / 2;
6623 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6624
6625 /* Emit hints only in the case default branch prediction
6626 heruistics would fail. */
6627 if (taken != cputaken)
6628 {
6629 /* We use 3e (DS) prefix for taken branches and
6630 2e (CS) prefix for not taken branches. */
6631 if (taken)
6632 fputs ("ds ; ", file);
6633 else
6634 fputs ("cs ; ", file);
6635 }
6636 }
6637 }
6638 return;
6639 }
6640 default:
6641 output_operand_lossage ("invalid operand code `%c'", code);
6642 }
6643 }
6644
6645 if (GET_CODE (x) == REG)
6646 {
6647 PRINT_REG (x, code, file);
6648 }
6649
6650 else if (GET_CODE (x) == MEM)
6651 {
6652 /* No `byte ptr' prefix for call instructions. */
6653 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6654 {
6655 const char * size;
6656 switch (GET_MODE_SIZE (GET_MODE (x)))
6657 {
6658 case 1: size = "BYTE"; break;
6659 case 2: size = "WORD"; break;
6660 case 4: size = "DWORD"; break;
6661 case 8: size = "QWORD"; break;
6662 case 12: size = "XWORD"; break;
6663 case 16: size = "XMMWORD"; break;
6664 default:
6665 abort ();
6666 }
6667
6668 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6669 if (code == 'b')
6670 size = "BYTE";
6671 else if (code == 'w')
6672 size = "WORD";
6673 else if (code == 'k')
6674 size = "DWORD";
6675
6676 fputs (size, file);
6677 fputs (" PTR ", file);
6678 }
6679
6680 x = XEXP (x, 0);
6681 if (flag_pic && CONSTANT_ADDRESS_P (x))
6682 output_pic_addr_const (file, x, code);
6683 /* Avoid (%rip) for call operands. */
6684 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6685 && GET_CODE (x) != CONST_INT)
6686 output_addr_const (file, x);
6687 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6688 output_operand_lossage ("invalid constraints for operand");
6689 else
6690 output_address (x);
6691 }
6692
6693 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6694 {
6695 REAL_VALUE_TYPE r;
6696 long l;
6697
6698 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6699 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6700
6701 if (ASSEMBLER_DIALECT == ASM_ATT)
6702 putc ('$', file);
6703 fprintf (file, "0x%lx", l);
6704 }
6705
6706 /* These float cases don't actually occur as immediate operands. */
6707 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6708 {
6709 REAL_VALUE_TYPE r;
6710 char dstr[30];
6711
6712 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6713 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6714 fprintf (file, "%s", dstr);
6715 }
6716
6717 else if (GET_CODE (x) == CONST_DOUBLE
6718 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6719 {
6720 REAL_VALUE_TYPE r;
6721 char dstr[30];
6722
6723 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6724 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6725 fprintf (file, "%s", dstr);
6726 }
6727
6728 else
6729 {
6730 if (code != 'P')
6731 {
6732 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6733 {
6734 if (ASSEMBLER_DIALECT == ASM_ATT)
6735 putc ('$', file);
6736 }
6737 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6738 || GET_CODE (x) == LABEL_REF)
6739 {
6740 if (ASSEMBLER_DIALECT == ASM_ATT)
6741 putc ('$', file);
6742 else
6743 fputs ("OFFSET FLAT:", file);
6744 }
6745 }
6746 if (GET_CODE (x) == CONST_INT)
6747 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6748 else if (flag_pic)
6749 output_pic_addr_const (file, x, code);
6750 else
6751 output_addr_const (file, x);
6752 }
6753 }
6754 \f
6755 /* Print a memory operand whose address is ADDR. */
6756
6757 void
6758 print_operand_address (file, addr)
6759 FILE *file;
6760 register rtx addr;
6761 {
6762 struct ix86_address parts;
6763 rtx base, index, disp;
6764 int scale;
6765
6766 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6767 {
6768 if (ASSEMBLER_DIALECT == ASM_INTEL)
6769 fputs ("DWORD PTR ", file);
6770 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6771 putc ('%', file);
6772 fputs ("gs:0", file);
6773 return;
6774 }
6775
6776 if (! ix86_decompose_address (addr, &parts))
6777 abort ();
6778
6779 base = parts.base;
6780 index = parts.index;
6781 disp = parts.disp;
6782 scale = parts.scale;
6783
6784 if (!base && !index)
6785 {
6786 /* Displacement only requires special attention. */
6787
6788 if (GET_CODE (disp) == CONST_INT)
6789 {
6790 if (ASSEMBLER_DIALECT == ASM_INTEL)
6791 {
6792 if (USER_LABEL_PREFIX[0] == 0)
6793 putc ('%', file);
6794 fputs ("ds:", file);
6795 }
6796 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6797 }
6798 else if (flag_pic)
6799 output_pic_addr_const (file, addr, 0);
6800 else
6801 output_addr_const (file, addr);
6802
6803 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6804 if (TARGET_64BIT
6805 && (GET_CODE (addr) == SYMBOL_REF
6806 || GET_CODE (addr) == LABEL_REF
6807 || (GET_CODE (addr) == CONST
6808 && GET_CODE (XEXP (addr, 0)) == PLUS
6809 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6810 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6811 fputs ("(%rip)", file);
6812 }
6813 else
6814 {
6815 if (ASSEMBLER_DIALECT == ASM_ATT)
6816 {
6817 if (disp)
6818 {
6819 if (flag_pic)
6820 output_pic_addr_const (file, disp, 0);
6821 else if (GET_CODE (disp) == LABEL_REF)
6822 output_asm_label (disp);
6823 else
6824 output_addr_const (file, disp);
6825 }
6826
6827 putc ('(', file);
6828 if (base)
6829 PRINT_REG (base, 0, file);
6830 if (index)
6831 {
6832 putc (',', file);
6833 PRINT_REG (index, 0, file);
6834 if (scale != 1)
6835 fprintf (file, ",%d", scale);
6836 }
6837 putc (')', file);
6838 }
6839 else
6840 {
6841 rtx offset = NULL_RTX;
6842
6843 if (disp)
6844 {
6845 /* Pull out the offset of a symbol; print any symbol itself. */
6846 if (GET_CODE (disp) == CONST
6847 && GET_CODE (XEXP (disp, 0)) == PLUS
6848 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6849 {
6850 offset = XEXP (XEXP (disp, 0), 1);
6851 disp = gen_rtx_CONST (VOIDmode,
6852 XEXP (XEXP (disp, 0), 0));
6853 }
6854
6855 if (flag_pic)
6856 output_pic_addr_const (file, disp, 0);
6857 else if (GET_CODE (disp) == LABEL_REF)
6858 output_asm_label (disp);
6859 else if (GET_CODE (disp) == CONST_INT)
6860 offset = disp;
6861 else
6862 output_addr_const (file, disp);
6863 }
6864
6865 putc ('[', file);
6866 if (base)
6867 {
6868 PRINT_REG (base, 0, file);
6869 if (offset)
6870 {
6871 if (INTVAL (offset) >= 0)
6872 putc ('+', file);
6873 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6874 }
6875 }
6876 else if (offset)
6877 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6878 else
6879 putc ('0', file);
6880
6881 if (index)
6882 {
6883 putc ('+', file);
6884 PRINT_REG (index, 0, file);
6885 if (scale != 1)
6886 fprintf (file, "*%d", scale);
6887 }
6888 putc (']', file);
6889 }
6890 }
6891 }
6892
6893 bool
6894 output_addr_const_extra (file, x)
6895 FILE *file;
6896 rtx x;
6897 {
6898 rtx op;
6899
6900 if (GET_CODE (x) != UNSPEC)
6901 return false;
6902
6903 op = XVECEXP (x, 0, 0);
6904 switch (XINT (x, 1))
6905 {
6906 case UNSPEC_GOTTPOFF:
6907 output_addr_const (file, op);
6908 /* FIXME: This might be @TPOFF in Sun ld. */
6909 fputs ("@GOTTPOFF", file);
6910 break;
6911 case UNSPEC_TPOFF:
6912 output_addr_const (file, op);
6913 fputs ("@TPOFF", file);
6914 break;
6915 case UNSPEC_NTPOFF:
6916 output_addr_const (file, op);
6917 fputs ("@NTPOFF", file);
6918 break;
6919 case UNSPEC_DTPOFF:
6920 output_addr_const (file, op);
6921 fputs ("@DTPOFF", file);
6922 break;
6923 case UNSPEC_GOTNTPOFF:
6924 output_addr_const (file, op);
6925 fputs ("@GOTNTPOFF", file);
6926 break;
6927 case UNSPEC_INDNTPOFF:
6928 output_addr_const (file, op);
6929 fputs ("@INDNTPOFF", file);
6930 break;
6931
6932 default:
6933 return false;
6934 }
6935
6936 return true;
6937 }
6938 \f
6939 /* Split one or more DImode RTL references into pairs of SImode
6940 references. The RTL can be REG, offsettable MEM, integer constant, or
6941 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6942 split and "num" is its length. lo_half and hi_half are output arrays
6943 that parallel "operands". */
6944
6945 void
6946 split_di (operands, num, lo_half, hi_half)
6947 rtx operands[];
6948 int num;
6949 rtx lo_half[], hi_half[];
6950 {
6951 while (num--)
6952 {
6953 rtx op = operands[num];
6954
6955 /* simplify_subreg refuse to split volatile memory addresses,
6956 but we still have to handle it. */
6957 if (GET_CODE (op) == MEM)
6958 {
6959 lo_half[num] = adjust_address (op, SImode, 0);
6960 hi_half[num] = adjust_address (op, SImode, 4);
6961 }
6962 else
6963 {
6964 lo_half[num] = simplify_gen_subreg (SImode, op,
6965 GET_MODE (op) == VOIDmode
6966 ? DImode : GET_MODE (op), 0);
6967 hi_half[num] = simplify_gen_subreg (SImode, op,
6968 GET_MODE (op) == VOIDmode
6969 ? DImode : GET_MODE (op), 4);
6970 }
6971 }
6972 }
6973 /* Split one or more TImode RTL references into pairs of SImode
6974 references. The RTL can be REG, offsettable MEM, integer constant, or
6975 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6976 split and "num" is its length. lo_half and hi_half are output arrays
6977 that parallel "operands". */
6978
6979 void
6980 split_ti (operands, num, lo_half, hi_half)
6981 rtx operands[];
6982 int num;
6983 rtx lo_half[], hi_half[];
6984 {
6985 while (num--)
6986 {
6987 rtx op = operands[num];
6988
6989 /* simplify_subreg refuse to split volatile memory addresses, but we
6990 still have to handle it. */
6991 if (GET_CODE (op) == MEM)
6992 {
6993 lo_half[num] = adjust_address (op, DImode, 0);
6994 hi_half[num] = adjust_address (op, DImode, 8);
6995 }
6996 else
6997 {
6998 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6999 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7000 }
7001 }
7002 }
7003 \f
7004 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7005 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7006 is the expression of the binary operation. The output may either be
7007 emitted here, or returned to the caller, like all output_* functions.
7008
7009 There is no guarantee that the operands are the same mode, as they
7010 might be within FLOAT or FLOAT_EXTEND expressions. */
7011
7012 #ifndef SYSV386_COMPAT
7013 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7014 wants to fix the assemblers because that causes incompatibility
7015 with gcc. No-one wants to fix gcc because that causes
7016 incompatibility with assemblers... You can use the option of
7017 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7018 #define SYSV386_COMPAT 1
7019 #endif
7020
7021 const char *
7022 output_387_binary_op (insn, operands)
7023 rtx insn;
7024 rtx *operands;
7025 {
7026 static char buf[30];
7027 const char *p;
7028 const char *ssep;
7029 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7030
7031 #ifdef ENABLE_CHECKING
7032 /* Even if we do not want to check the inputs, this documents input
7033 constraints. Which helps in understanding the following code. */
7034 if (STACK_REG_P (operands[0])
7035 && ((REG_P (operands[1])
7036 && REGNO (operands[0]) == REGNO (operands[1])
7037 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7038 || (REG_P (operands[2])
7039 && REGNO (operands[0]) == REGNO (operands[2])
7040 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7041 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7042 ; /* ok */
7043 else if (!is_sse)
7044 abort ();
7045 #endif
7046
7047 switch (GET_CODE (operands[3]))
7048 {
7049 case PLUS:
7050 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7051 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7052 p = "fiadd";
7053 else
7054 p = "fadd";
7055 ssep = "add";
7056 break;
7057
7058 case MINUS:
7059 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7060 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7061 p = "fisub";
7062 else
7063 p = "fsub";
7064 ssep = "sub";
7065 break;
7066
7067 case MULT:
7068 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7069 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7070 p = "fimul";
7071 else
7072 p = "fmul";
7073 ssep = "mul";
7074 break;
7075
7076 case DIV:
7077 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7078 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7079 p = "fidiv";
7080 else
7081 p = "fdiv";
7082 ssep = "div";
7083 break;
7084
7085 default:
7086 abort ();
7087 }
7088
7089 if (is_sse)
7090 {
7091 strcpy (buf, ssep);
7092 if (GET_MODE (operands[0]) == SFmode)
7093 strcat (buf, "ss\t{%2, %0|%0, %2}");
7094 else
7095 strcat (buf, "sd\t{%2, %0|%0, %2}");
7096 return buf;
7097 }
7098 strcpy (buf, p);
7099
7100 switch (GET_CODE (operands[3]))
7101 {
7102 case MULT:
7103 case PLUS:
7104 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7105 {
7106 rtx temp = operands[2];
7107 operands[2] = operands[1];
7108 operands[1] = temp;
7109 }
7110
7111 /* know operands[0] == operands[1]. */
7112
7113 if (GET_CODE (operands[2]) == MEM)
7114 {
7115 p = "%z2\t%2";
7116 break;
7117 }
7118
7119 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7120 {
7121 if (STACK_TOP_P (operands[0]))
7122 /* How is it that we are storing to a dead operand[2]?
7123 Well, presumably operands[1] is dead too. We can't
7124 store the result to st(0) as st(0) gets popped on this
7125 instruction. Instead store to operands[2] (which I
7126 think has to be st(1)). st(1) will be popped later.
7127 gcc <= 2.8.1 didn't have this check and generated
7128 assembly code that the Unixware assembler rejected. */
7129 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7130 else
7131 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7132 break;
7133 }
7134
7135 if (STACK_TOP_P (operands[0]))
7136 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7137 else
7138 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7139 break;
7140
7141 case MINUS:
7142 case DIV:
7143 if (GET_CODE (operands[1]) == MEM)
7144 {
7145 p = "r%z1\t%1";
7146 break;
7147 }
7148
7149 if (GET_CODE (operands[2]) == MEM)
7150 {
7151 p = "%z2\t%2";
7152 break;
7153 }
7154
7155 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7156 {
7157 #if SYSV386_COMPAT
7158 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7159 derived assemblers, confusingly reverse the direction of
7160 the operation for fsub{r} and fdiv{r} when the
7161 destination register is not st(0). The Intel assembler
7162 doesn't have this brain damage. Read !SYSV386_COMPAT to
7163 figure out what the hardware really does. */
7164 if (STACK_TOP_P (operands[0]))
7165 p = "{p\t%0, %2|rp\t%2, %0}";
7166 else
7167 p = "{rp\t%2, %0|p\t%0, %2}";
7168 #else
7169 if (STACK_TOP_P (operands[0]))
7170 /* As above for fmul/fadd, we can't store to st(0). */
7171 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7172 else
7173 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7174 #endif
7175 break;
7176 }
7177
7178 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7179 {
7180 #if SYSV386_COMPAT
7181 if (STACK_TOP_P (operands[0]))
7182 p = "{rp\t%0, %1|p\t%1, %0}";
7183 else
7184 p = "{p\t%1, %0|rp\t%0, %1}";
7185 #else
7186 if (STACK_TOP_P (operands[0]))
7187 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7188 else
7189 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7190 #endif
7191 break;
7192 }
7193
7194 if (STACK_TOP_P (operands[0]))
7195 {
7196 if (STACK_TOP_P (operands[1]))
7197 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7198 else
7199 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7200 break;
7201 }
7202 else if (STACK_TOP_P (operands[1]))
7203 {
7204 #if SYSV386_COMPAT
7205 p = "{\t%1, %0|r\t%0, %1}";
7206 #else
7207 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7208 #endif
7209 }
7210 else
7211 {
7212 #if SYSV386_COMPAT
7213 p = "{r\t%2, %0|\t%0, %2}";
7214 #else
7215 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7216 #endif
7217 }
7218 break;
7219
7220 default:
7221 abort ();
7222 }
7223
7224 strcat (buf, p);
7225 return buf;
7226 }
7227
7228 /* Output code to initialize control word copies used by
7229 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7230 is set to control word rounding downwards. */
7231 void
7232 emit_i387_cw_initialization (normal, round_down)
7233 rtx normal, round_down;
7234 {
7235 rtx reg = gen_reg_rtx (HImode);
7236
7237 emit_insn (gen_x86_fnstcw_1 (normal));
7238 emit_move_insn (reg, normal);
7239 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7240 && !TARGET_64BIT)
7241 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7242 else
7243 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7244 emit_move_insn (round_down, reg);
7245 }
7246
7247 /* Output code for INSN to convert a float to a signed int. OPERANDS
7248 are the insn operands. The output may be [HSD]Imode and the input
7249 operand may be [SDX]Fmode. */
7250
7251 const char *
7252 output_fix_trunc (insn, operands)
7253 rtx insn;
7254 rtx *operands;
7255 {
7256 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7257 int dimode_p = GET_MODE (operands[0]) == DImode;
7258
7259 /* Jump through a hoop or two for DImode, since the hardware has no
7260 non-popping instruction. We used to do this a different way, but
7261 that was somewhat fragile and broke with post-reload splitters. */
7262 if (dimode_p && !stack_top_dies)
7263 output_asm_insn ("fld\t%y1", operands);
7264
7265 if (!STACK_TOP_P (operands[1]))
7266 abort ();
7267
7268 if (GET_CODE (operands[0]) != MEM)
7269 abort ();
7270
7271 output_asm_insn ("fldcw\t%3", operands);
7272 if (stack_top_dies || dimode_p)
7273 output_asm_insn ("fistp%z0\t%0", operands);
7274 else
7275 output_asm_insn ("fist%z0\t%0", operands);
7276 output_asm_insn ("fldcw\t%2", operands);
7277
7278 return "";
7279 }
7280
7281 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7282 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7283 when fucom should be used. */
7284
7285 const char *
7286 output_fp_compare (insn, operands, eflags_p, unordered_p)
7287 rtx insn;
7288 rtx *operands;
7289 int eflags_p, unordered_p;
7290 {
7291 int stack_top_dies;
7292 rtx cmp_op0 = operands[0];
7293 rtx cmp_op1 = operands[1];
7294 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7295
7296 if (eflags_p == 2)
7297 {
7298 cmp_op0 = cmp_op1;
7299 cmp_op1 = operands[2];
7300 }
7301 if (is_sse)
7302 {
7303 if (GET_MODE (operands[0]) == SFmode)
7304 if (unordered_p)
7305 return "ucomiss\t{%1, %0|%0, %1}";
7306 else
7307 return "comiss\t{%1, %0|%0, %y}";
7308 else
7309 if (unordered_p)
7310 return "ucomisd\t{%1, %0|%0, %1}";
7311 else
7312 return "comisd\t{%1, %0|%0, %y}";
7313 }
7314
7315 if (! STACK_TOP_P (cmp_op0))
7316 abort ();
7317
7318 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7319
7320 if (STACK_REG_P (cmp_op1)
7321 && stack_top_dies
7322 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7323 && REGNO (cmp_op1) != FIRST_STACK_REG)
7324 {
7325 /* If both the top of the 387 stack dies, and the other operand
7326 is also a stack register that dies, then this must be a
7327 `fcompp' float compare */
7328
7329 if (eflags_p == 1)
7330 {
7331 /* There is no double popping fcomi variant. Fortunately,
7332 eflags is immune from the fstp's cc clobbering. */
7333 if (unordered_p)
7334 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7335 else
7336 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7337 return "fstp\t%y0";
7338 }
7339 else
7340 {
7341 if (eflags_p == 2)
7342 {
7343 if (unordered_p)
7344 return "fucompp\n\tfnstsw\t%0";
7345 else
7346 return "fcompp\n\tfnstsw\t%0";
7347 }
7348 else
7349 {
7350 if (unordered_p)
7351 return "fucompp";
7352 else
7353 return "fcompp";
7354 }
7355 }
7356 }
7357 else
7358 {
7359 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7360
7361 static const char * const alt[24] =
7362 {
7363 "fcom%z1\t%y1",
7364 "fcomp%z1\t%y1",
7365 "fucom%z1\t%y1",
7366 "fucomp%z1\t%y1",
7367
7368 "ficom%z1\t%y1",
7369 "ficomp%z1\t%y1",
7370 NULL,
7371 NULL,
7372
7373 "fcomi\t{%y1, %0|%0, %y1}",
7374 "fcomip\t{%y1, %0|%0, %y1}",
7375 "fucomi\t{%y1, %0|%0, %y1}",
7376 "fucomip\t{%y1, %0|%0, %y1}",
7377
7378 NULL,
7379 NULL,
7380 NULL,
7381 NULL,
7382
7383 "fcom%z2\t%y2\n\tfnstsw\t%0",
7384 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7385 "fucom%z2\t%y2\n\tfnstsw\t%0",
7386 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7387
7388 "ficom%z2\t%y2\n\tfnstsw\t%0",
7389 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7390 NULL,
7391 NULL
7392 };
7393
7394 int mask;
7395 const char *ret;
7396
7397 mask = eflags_p << 3;
7398 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7399 mask |= unordered_p << 1;
7400 mask |= stack_top_dies;
7401
7402 if (mask >= 24)
7403 abort ();
7404 ret = alt[mask];
7405 if (ret == NULL)
7406 abort ();
7407
7408 return ret;
7409 }
7410 }
7411
7412 void
7413 ix86_output_addr_vec_elt (file, value)
7414 FILE *file;
7415 int value;
7416 {
7417 const char *directive = ASM_LONG;
7418
7419 if (TARGET_64BIT)
7420 {
7421 #ifdef ASM_QUAD
7422 directive = ASM_QUAD;
7423 #else
7424 abort ();
7425 #endif
7426 }
7427
7428 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7429 }
7430
7431 void
7432 ix86_output_addr_diff_elt (file, value, rel)
7433 FILE *file;
7434 int value, rel;
7435 {
7436 if (TARGET_64BIT)
7437 fprintf (file, "%s%s%d-%s%d\n",
7438 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7439 else if (HAVE_AS_GOTOFF_IN_DATA)
7440 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7441 #if TARGET_MACHO
7442 else if (TARGET_MACHO)
7443 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7444 machopic_function_base_name () + 1);
7445 #endif
7446 else
7447 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7448 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7449 }
7450 \f
7451 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7452 for the target. */
7453
7454 void
7455 ix86_expand_clear (dest)
7456 rtx dest;
7457 {
7458 rtx tmp;
7459
7460 /* We play register width games, which are only valid after reload. */
7461 if (!reload_completed)
7462 abort ();
7463
7464 /* Avoid HImode and its attendant prefix byte. */
7465 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7466 dest = gen_rtx_REG (SImode, REGNO (dest));
7467
7468 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7469
7470 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7471 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7472 {
7473 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7474 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7475 }
7476
7477 emit_insn (tmp);
7478 }
7479
7480 /* X is an unchanging MEM. If it is a constant pool reference, return
7481 the constant pool rtx, else NULL. */
7482
7483 static rtx
7484 maybe_get_pool_constant (x)
7485 rtx x;
7486 {
7487 x = XEXP (x, 0);
7488
7489 if (flag_pic)
7490 {
7491 if (GET_CODE (x) != PLUS)
7492 return NULL_RTX;
7493 if (XEXP (x, 0) != pic_offset_table_rtx)
7494 return NULL_RTX;
7495 x = XEXP (x, 1);
7496 if (GET_CODE (x) != CONST)
7497 return NULL_RTX;
7498 x = XEXP (x, 0);
7499 if (GET_CODE (x) != UNSPEC)
7500 return NULL_RTX;
7501 if (XINT (x, 1) != UNSPEC_GOTOFF)
7502 return NULL_RTX;
7503 x = XVECEXP (x, 0, 0);
7504 }
7505
7506 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7507 return get_pool_constant (x);
7508
7509 return NULL_RTX;
7510 }
7511
7512 void
7513 ix86_expand_move (mode, operands)
7514 enum machine_mode mode;
7515 rtx operands[];
7516 {
7517 int strict = (reload_in_progress || reload_completed);
7518 rtx insn, op0, op1, tmp;
7519
7520 op0 = operands[0];
7521 op1 = operands[1];
7522
7523 /* ??? We have a slight problem. We need to say that tls symbols are
7524 not legitimate constants so that reload does not helpfully reload
7525 these constants from a REG_EQUIV, which we cannot handle. (Recall
7526 that general- and local-dynamic address resolution requires a
7527 function call.)
7528
7529 However, if we say that tls symbols are not legitimate constants,
7530 then emit_move_insn helpfully drop them into the constant pool.
7531
7532 It is far easier to work around emit_move_insn than reload. Recognize
7533 the MEM that we would have created and extract the symbol_ref. */
7534
7535 if (mode == Pmode
7536 && GET_CODE (op1) == MEM
7537 && RTX_UNCHANGING_P (op1))
7538 {
7539 tmp = maybe_get_pool_constant (op1);
7540 /* Note that we only care about symbolic constants here, which
7541 unlike CONST_INT will always have a proper mode. */
7542 if (tmp && GET_MODE (tmp) == Pmode)
7543 op1 = tmp;
7544 }
7545
7546 if (tls_symbolic_operand (op1, Pmode))
7547 {
7548 op1 = legitimize_address (op1, op1, VOIDmode);
7549 if (GET_CODE (op0) == MEM)
7550 {
7551 tmp = gen_reg_rtx (mode);
7552 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7553 op1 = tmp;
7554 }
7555 }
7556 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7557 {
7558 #if TARGET_MACHO
7559 if (MACHOPIC_PURE)
7560 {
7561 rtx temp = ((reload_in_progress
7562 || ((op0 && GET_CODE (op0) == REG)
7563 && mode == Pmode))
7564 ? op0 : gen_reg_rtx (Pmode));
7565 op1 = machopic_indirect_data_reference (op1, temp);
7566 op1 = machopic_legitimize_pic_address (op1, mode,
7567 temp == op1 ? 0 : temp);
7568 }
7569 else
7570 {
7571 if (MACHOPIC_INDIRECT)
7572 op1 = machopic_indirect_data_reference (op1, 0);
7573 }
7574 if (op0 != op1)
7575 {
7576 insn = gen_rtx_SET (VOIDmode, op0, op1);
7577 emit_insn (insn);
7578 }
7579 return;
7580 #endif /* TARGET_MACHO */
7581 if (GET_CODE (op0) == MEM)
7582 op1 = force_reg (Pmode, op1);
7583 else
7584 {
7585 rtx temp = op0;
7586 if (GET_CODE (temp) != REG)
7587 temp = gen_reg_rtx (Pmode);
7588 temp = legitimize_pic_address (op1, temp);
7589 if (temp == op0)
7590 return;
7591 op1 = temp;
7592 }
7593 }
7594 else
7595 {
7596 if (GET_CODE (op0) == MEM
7597 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7598 || !push_operand (op0, mode))
7599 && GET_CODE (op1) == MEM)
7600 op1 = force_reg (mode, op1);
7601
7602 if (push_operand (op0, mode)
7603 && ! general_no_elim_operand (op1, mode))
7604 op1 = copy_to_mode_reg (mode, op1);
7605
7606 /* Force large constants in 64bit compilation into register
7607 to get them CSEed. */
7608 if (TARGET_64BIT && mode == DImode
7609 && immediate_operand (op1, mode)
7610 && !x86_64_zero_extended_value (op1)
7611 && !register_operand (op0, mode)
7612 && optimize && !reload_completed && !reload_in_progress)
7613 op1 = copy_to_mode_reg (mode, op1);
7614
7615 if (FLOAT_MODE_P (mode))
7616 {
7617 /* If we are loading a floating point constant to a register,
7618 force the value to memory now, since we'll get better code
7619 out the back end. */
7620
7621 if (strict)
7622 ;
7623 else if (GET_CODE (op1) == CONST_DOUBLE
7624 && register_operand (op0, mode))
7625 op1 = validize_mem (force_const_mem (mode, op1));
7626 }
7627 }
7628
7629 insn = gen_rtx_SET (VOIDmode, op0, op1);
7630
7631 emit_insn (insn);
7632 }
7633
7634 void
7635 ix86_expand_vector_move (mode, operands)
7636 enum machine_mode mode;
7637 rtx operands[];
7638 {
7639 /* Force constants other than zero into memory. We do not know how
7640 the instructions used to build constants modify the upper 64 bits
7641 of the register, once we have that information we may be able
7642 to handle some of them more efficiently. */
7643 if ((reload_in_progress | reload_completed) == 0
7644 && register_operand (operands[0], mode)
7645 && CONSTANT_P (operands[1]))
7646 {
7647 rtx addr = gen_reg_rtx (Pmode);
7648 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7649 operands[1] = gen_rtx_MEM (mode, addr);
7650 }
7651
7652 /* Make operand1 a register if it isn't already. */
7653 if ((reload_in_progress | reload_completed) == 0
7654 && !register_operand (operands[0], mode)
7655 && !register_operand (operands[1], mode)
7656 && operands[1] != CONST0_RTX (mode))
7657 {
7658 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7659 emit_move_insn (operands[0], temp);
7660 return;
7661 }
7662
7663 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7664 }
7665
7666 /* Attempt to expand a binary operator. Make the expansion closer to the
7667 actual machine, then just general_operand, which will allow 3 separate
7668 memory references (one output, two input) in a single insn. */
7669
7670 void
7671 ix86_expand_binary_operator (code, mode, operands)
7672 enum rtx_code code;
7673 enum machine_mode mode;
7674 rtx operands[];
7675 {
7676 int matching_memory;
7677 rtx src1, src2, dst, op, clob;
7678
7679 dst = operands[0];
7680 src1 = operands[1];
7681 src2 = operands[2];
7682
7683 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7684 if (GET_RTX_CLASS (code) == 'c'
7685 && (rtx_equal_p (dst, src2)
7686 || immediate_operand (src1, mode)))
7687 {
7688 rtx temp = src1;
7689 src1 = src2;
7690 src2 = temp;
7691 }
7692
7693 /* If the destination is memory, and we do not have matching source
7694 operands, do things in registers. */
7695 matching_memory = 0;
7696 if (GET_CODE (dst) == MEM)
7697 {
7698 if (rtx_equal_p (dst, src1))
7699 matching_memory = 1;
7700 else if (GET_RTX_CLASS (code) == 'c'
7701 && rtx_equal_p (dst, src2))
7702 matching_memory = 2;
7703 else
7704 dst = gen_reg_rtx (mode);
7705 }
7706
7707 /* Both source operands cannot be in memory. */
7708 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7709 {
7710 if (matching_memory != 2)
7711 src2 = force_reg (mode, src2);
7712 else
7713 src1 = force_reg (mode, src1);
7714 }
7715
7716 /* If the operation is not commutable, source 1 cannot be a constant
7717 or non-matching memory. */
7718 if ((CONSTANT_P (src1)
7719 || (!matching_memory && GET_CODE (src1) == MEM))
7720 && GET_RTX_CLASS (code) != 'c')
7721 src1 = force_reg (mode, src1);
7722
7723 /* If optimizing, copy to regs to improve CSE */
7724 if (optimize && ! no_new_pseudos)
7725 {
7726 if (GET_CODE (dst) == MEM)
7727 dst = gen_reg_rtx (mode);
7728 if (GET_CODE (src1) == MEM)
7729 src1 = force_reg (mode, src1);
7730 if (GET_CODE (src2) == MEM)
7731 src2 = force_reg (mode, src2);
7732 }
7733
7734 /* Emit the instruction. */
7735
7736 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7737 if (reload_in_progress)
7738 {
7739 /* Reload doesn't know about the flags register, and doesn't know that
7740 it doesn't want to clobber it. We can only do this with PLUS. */
7741 if (code != PLUS)
7742 abort ();
7743 emit_insn (op);
7744 }
7745 else
7746 {
7747 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7748 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7749 }
7750
7751 /* Fix up the destination if needed. */
7752 if (dst != operands[0])
7753 emit_move_insn (operands[0], dst);
7754 }
7755
7756 /* Return TRUE or FALSE depending on whether the binary operator meets the
7757 appropriate constraints. */
7758
7759 int
7760 ix86_binary_operator_ok (code, mode, operands)
7761 enum rtx_code code;
7762 enum machine_mode mode ATTRIBUTE_UNUSED;
7763 rtx operands[3];
7764 {
7765 /* Both source operands cannot be in memory. */
7766 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7767 return 0;
7768 /* If the operation is not commutable, source 1 cannot be a constant. */
7769 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7770 return 0;
7771 /* If the destination is memory, we must have a matching source operand. */
7772 if (GET_CODE (operands[0]) == MEM
7773 && ! (rtx_equal_p (operands[0], operands[1])
7774 || (GET_RTX_CLASS (code) == 'c'
7775 && rtx_equal_p (operands[0], operands[2]))))
7776 return 0;
7777 /* If the operation is not commutable and the source 1 is memory, we must
7778 have a matching destination. */
7779 if (GET_CODE (operands[1]) == MEM
7780 && GET_RTX_CLASS (code) != 'c'
7781 && ! rtx_equal_p (operands[0], operands[1]))
7782 return 0;
7783 return 1;
7784 }
7785
7786 /* Attempt to expand a unary operator. Make the expansion closer to the
7787 actual machine, then just general_operand, which will allow 2 separate
7788 memory references (one output, one input) in a single insn. */
7789
7790 void
7791 ix86_expand_unary_operator (code, mode, operands)
7792 enum rtx_code code;
7793 enum machine_mode mode;
7794 rtx operands[];
7795 {
7796 int matching_memory;
7797 rtx src, dst, op, clob;
7798
7799 dst = operands[0];
7800 src = operands[1];
7801
7802 /* If the destination is memory, and we do not have matching source
7803 operands, do things in registers. */
7804 matching_memory = 0;
7805 if (GET_CODE (dst) == MEM)
7806 {
7807 if (rtx_equal_p (dst, src))
7808 matching_memory = 1;
7809 else
7810 dst = gen_reg_rtx (mode);
7811 }
7812
7813 /* When source operand is memory, destination must match. */
7814 if (!matching_memory && GET_CODE (src) == MEM)
7815 src = force_reg (mode, src);
7816
7817 /* If optimizing, copy to regs to improve CSE */
7818 if (optimize && ! no_new_pseudos)
7819 {
7820 if (GET_CODE (dst) == MEM)
7821 dst = gen_reg_rtx (mode);
7822 if (GET_CODE (src) == MEM)
7823 src = force_reg (mode, src);
7824 }
7825
7826 /* Emit the instruction. */
7827
7828 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7829 if (reload_in_progress || code == NOT)
7830 {
7831 /* Reload doesn't know about the flags register, and doesn't know that
7832 it doesn't want to clobber it. */
7833 if (code != NOT)
7834 abort ();
7835 emit_insn (op);
7836 }
7837 else
7838 {
7839 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7840 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7841 }
7842
7843 /* Fix up the destination if needed. */
7844 if (dst != operands[0])
7845 emit_move_insn (operands[0], dst);
7846 }
7847
7848 /* Return TRUE or FALSE depending on whether the unary operator meets the
7849 appropriate constraints. */
7850
7851 int
7852 ix86_unary_operator_ok (code, mode, operands)
7853 enum rtx_code code ATTRIBUTE_UNUSED;
7854 enum machine_mode mode ATTRIBUTE_UNUSED;
7855 rtx operands[2] ATTRIBUTE_UNUSED;
7856 {
7857 /* If one of operands is memory, source and destination must match. */
7858 if ((GET_CODE (operands[0]) == MEM
7859 || GET_CODE (operands[1]) == MEM)
7860 && ! rtx_equal_p (operands[0], operands[1]))
7861 return FALSE;
7862 return TRUE;
7863 }
7864
7865 /* Return TRUE or FALSE depending on whether the first SET in INSN
7866 has source and destination with matching CC modes, and that the
7867 CC mode is at least as constrained as REQ_MODE. */
7868
7869 int
7870 ix86_match_ccmode (insn, req_mode)
7871 rtx insn;
7872 enum machine_mode req_mode;
7873 {
7874 rtx set;
7875 enum machine_mode set_mode;
7876
7877 set = PATTERN (insn);
7878 if (GET_CODE (set) == PARALLEL)
7879 set = XVECEXP (set, 0, 0);
7880 if (GET_CODE (set) != SET)
7881 abort ();
7882 if (GET_CODE (SET_SRC (set)) != COMPARE)
7883 abort ();
7884
7885 set_mode = GET_MODE (SET_DEST (set));
7886 switch (set_mode)
7887 {
7888 case CCNOmode:
7889 if (req_mode != CCNOmode
7890 && (req_mode != CCmode
7891 || XEXP (SET_SRC (set), 1) != const0_rtx))
7892 return 0;
7893 break;
7894 case CCmode:
7895 if (req_mode == CCGCmode)
7896 return 0;
7897 /* FALLTHRU */
7898 case CCGCmode:
7899 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7900 return 0;
7901 /* FALLTHRU */
7902 case CCGOCmode:
7903 if (req_mode == CCZmode)
7904 return 0;
7905 /* FALLTHRU */
7906 case CCZmode:
7907 break;
7908
7909 default:
7910 abort ();
7911 }
7912
7913 return (GET_MODE (SET_SRC (set)) == set_mode);
7914 }
7915
7916 /* Generate insn patterns to do an integer compare of OPERANDS. */
7917
7918 static rtx
7919 ix86_expand_int_compare (code, op0, op1)
7920 enum rtx_code code;
7921 rtx op0, op1;
7922 {
7923 enum machine_mode cmpmode;
7924 rtx tmp, flags;
7925
7926 cmpmode = SELECT_CC_MODE (code, op0, op1);
7927 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7928
7929 /* This is very simple, but making the interface the same as in the
7930 FP case makes the rest of the code easier. */
7931 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7932 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7933
7934 /* Return the test that should be put into the flags user, i.e.
7935 the bcc, scc, or cmov instruction. */
7936 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7937 }
7938
7939 /* Figure out whether to use ordered or unordered fp comparisons.
7940 Return the appropriate mode to use. */
7941
7942 enum machine_mode
7943 ix86_fp_compare_mode (code)
7944 enum rtx_code code ATTRIBUTE_UNUSED;
7945 {
7946 /* ??? In order to make all comparisons reversible, we do all comparisons
7947 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7948 all forms trapping and nontrapping comparisons, we can make inequality
7949 comparisons trapping again, since it results in better code when using
7950 FCOM based compares. */
7951 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7952 }
7953
7954 enum machine_mode
7955 ix86_cc_mode (code, op0, op1)
7956 enum rtx_code code;
7957 rtx op0, op1;
7958 {
7959 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7960 return ix86_fp_compare_mode (code);
7961 switch (code)
7962 {
7963 /* Only zero flag is needed. */
7964 case EQ: /* ZF=0 */
7965 case NE: /* ZF!=0 */
7966 return CCZmode;
7967 /* Codes needing carry flag. */
7968 case GEU: /* CF=0 */
7969 case GTU: /* CF=0 & ZF=0 */
7970 case LTU: /* CF=1 */
7971 case LEU: /* CF=1 | ZF=1 */
7972 return CCmode;
7973 /* Codes possibly doable only with sign flag when
7974 comparing against zero. */
7975 case GE: /* SF=OF or SF=0 */
7976 case LT: /* SF<>OF or SF=1 */
7977 if (op1 == const0_rtx)
7978 return CCGOCmode;
7979 else
7980 /* For other cases Carry flag is not required. */
7981 return CCGCmode;
7982 /* Codes doable only with sign flag when comparing
7983 against zero, but we miss jump instruction for it
7984 so we need to use relational tests agains overflow
7985 that thus needs to be zero. */
7986 case GT: /* ZF=0 & SF=OF */
7987 case LE: /* ZF=1 | SF<>OF */
7988 if (op1 == const0_rtx)
7989 return CCNOmode;
7990 else
7991 return CCGCmode;
7992 /* strcmp pattern do (use flags) and combine may ask us for proper
7993 mode. */
7994 case USE:
7995 return CCmode;
7996 default:
7997 abort ();
7998 }
7999 }
8000
8001 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8002
8003 int
8004 ix86_use_fcomi_compare (code)
8005 enum rtx_code code ATTRIBUTE_UNUSED;
8006 {
8007 enum rtx_code swapped_code = swap_condition (code);
8008 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8009 || (ix86_fp_comparison_cost (swapped_code)
8010 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8011 }
8012
8013 /* Swap, force into registers, or otherwise massage the two operands
8014 to a fp comparison. The operands are updated in place; the new
8015 comparsion code is returned. */
8016
8017 static enum rtx_code
8018 ix86_prepare_fp_compare_args (code, pop0, pop1)
8019 enum rtx_code code;
8020 rtx *pop0, *pop1;
8021 {
8022 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8023 rtx op0 = *pop0, op1 = *pop1;
8024 enum machine_mode op_mode = GET_MODE (op0);
8025 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8026
8027 /* All of the unordered compare instructions only work on registers.
8028 The same is true of the XFmode compare instructions. The same is
8029 true of the fcomi compare instructions. */
8030
8031 if (!is_sse
8032 && (fpcmp_mode == CCFPUmode
8033 || op_mode == XFmode
8034 || op_mode == TFmode
8035 || ix86_use_fcomi_compare (code)))
8036 {
8037 op0 = force_reg (op_mode, op0);
8038 op1 = force_reg (op_mode, op1);
8039 }
8040 else
8041 {
8042 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8043 things around if they appear profitable, otherwise force op0
8044 into a register. */
8045
8046 if (standard_80387_constant_p (op0) == 0
8047 || (GET_CODE (op0) == MEM
8048 && ! (standard_80387_constant_p (op1) == 0
8049 || GET_CODE (op1) == MEM)))
8050 {
8051 rtx tmp;
8052 tmp = op0, op0 = op1, op1 = tmp;
8053 code = swap_condition (code);
8054 }
8055
8056 if (GET_CODE (op0) != REG)
8057 op0 = force_reg (op_mode, op0);
8058
8059 if (CONSTANT_P (op1))
8060 {
8061 if (standard_80387_constant_p (op1))
8062 op1 = force_reg (op_mode, op1);
8063 else
8064 op1 = validize_mem (force_const_mem (op_mode, op1));
8065 }
8066 }
8067
8068 /* Try to rearrange the comparison to make it cheaper. */
8069 if (ix86_fp_comparison_cost (code)
8070 > ix86_fp_comparison_cost (swap_condition (code))
8071 && (GET_CODE (op1) == REG || !no_new_pseudos))
8072 {
8073 rtx tmp;
8074 tmp = op0, op0 = op1, op1 = tmp;
8075 code = swap_condition (code);
8076 if (GET_CODE (op0) != REG)
8077 op0 = force_reg (op_mode, op0);
8078 }
8079
8080 *pop0 = op0;
8081 *pop1 = op1;
8082 return code;
8083 }
8084
8085 /* Convert comparison codes we use to represent FP comparison to integer
8086 code that will result in proper branch. Return UNKNOWN if no such code
8087 is available. */
8088 static enum rtx_code
8089 ix86_fp_compare_code_to_integer (code)
8090 enum rtx_code code;
8091 {
8092 switch (code)
8093 {
8094 case GT:
8095 return GTU;
8096 case GE:
8097 return GEU;
8098 case ORDERED:
8099 case UNORDERED:
8100 return code;
8101 break;
8102 case UNEQ:
8103 return EQ;
8104 break;
8105 case UNLT:
8106 return LTU;
8107 break;
8108 case UNLE:
8109 return LEU;
8110 break;
8111 case LTGT:
8112 return NE;
8113 break;
8114 default:
8115 return UNKNOWN;
8116 }
8117 }
8118
8119 /* Split comparison code CODE into comparisons we can do using branch
8120 instructions. BYPASS_CODE is comparison code for branch that will
8121 branch around FIRST_CODE and SECOND_CODE. If some of branches
8122 is not required, set value to NIL.
8123 We never require more than two branches. */
8124 static void
8125 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8126 enum rtx_code code, *bypass_code, *first_code, *second_code;
8127 {
8128 *first_code = code;
8129 *bypass_code = NIL;
8130 *second_code = NIL;
8131
8132 /* The fcomi comparison sets flags as follows:
8133
8134 cmp ZF PF CF
8135 > 0 0 0
8136 < 0 0 1
8137 = 1 0 0
8138 un 1 1 1 */
8139
8140 switch (code)
8141 {
8142 case GT: /* GTU - CF=0 & ZF=0 */
8143 case GE: /* GEU - CF=0 */
8144 case ORDERED: /* PF=0 */
8145 case UNORDERED: /* PF=1 */
8146 case UNEQ: /* EQ - ZF=1 */
8147 case UNLT: /* LTU - CF=1 */
8148 case UNLE: /* LEU - CF=1 | ZF=1 */
8149 case LTGT: /* EQ - ZF=0 */
8150 break;
8151 case LT: /* LTU - CF=1 - fails on unordered */
8152 *first_code = UNLT;
8153 *bypass_code = UNORDERED;
8154 break;
8155 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8156 *first_code = UNLE;
8157 *bypass_code = UNORDERED;
8158 break;
8159 case EQ: /* EQ - ZF=1 - fails on unordered */
8160 *first_code = UNEQ;
8161 *bypass_code = UNORDERED;
8162 break;
8163 case NE: /* NE - ZF=0 - fails on unordered */
8164 *first_code = LTGT;
8165 *second_code = UNORDERED;
8166 break;
8167 case UNGE: /* GEU - CF=0 - fails on unordered */
8168 *first_code = GE;
8169 *second_code = UNORDERED;
8170 break;
8171 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8172 *first_code = GT;
8173 *second_code = UNORDERED;
8174 break;
8175 default:
8176 abort ();
8177 }
8178 if (!TARGET_IEEE_FP)
8179 {
8180 *second_code = NIL;
8181 *bypass_code = NIL;
8182 }
8183 }
8184
8185 /* Return cost of comparison done fcom + arithmetics operations on AX.
8186 All following functions do use number of instructions as an cost metrics.
8187 In future this should be tweaked to compute bytes for optimize_size and
8188 take into account performance of various instructions on various CPUs. */
8189 static int
8190 ix86_fp_comparison_arithmetics_cost (code)
8191 enum rtx_code code;
8192 {
8193 if (!TARGET_IEEE_FP)
8194 return 4;
8195 /* The cost of code output by ix86_expand_fp_compare. */
8196 switch (code)
8197 {
8198 case UNLE:
8199 case UNLT:
8200 case LTGT:
8201 case GT:
8202 case GE:
8203 case UNORDERED:
8204 case ORDERED:
8205 case UNEQ:
8206 return 4;
8207 break;
8208 case LT:
8209 case NE:
8210 case EQ:
8211 case UNGE:
8212 return 5;
8213 break;
8214 case LE:
8215 case UNGT:
8216 return 6;
8217 break;
8218 default:
8219 abort ();
8220 }
8221 }
8222
8223 /* Return cost of comparison done using fcomi operation.
8224 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8225 static int
8226 ix86_fp_comparison_fcomi_cost (code)
8227 enum rtx_code code;
8228 {
8229 enum rtx_code bypass_code, first_code, second_code;
8230 /* Return arbitarily high cost when instruction is not supported - this
8231 prevents gcc from using it. */
8232 if (!TARGET_CMOVE)
8233 return 1024;
8234 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8235 return (bypass_code != NIL || second_code != NIL) + 2;
8236 }
8237
8238 /* Return cost of comparison done using sahf operation.
8239 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8240 static int
8241 ix86_fp_comparison_sahf_cost (code)
8242 enum rtx_code code;
8243 {
8244 enum rtx_code bypass_code, first_code, second_code;
8245 /* Return arbitarily high cost when instruction is not preferred - this
8246 avoids gcc from using it. */
8247 if (!TARGET_USE_SAHF && !optimize_size)
8248 return 1024;
8249 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8250 return (bypass_code != NIL || second_code != NIL) + 3;
8251 }
8252
8253 /* Compute cost of the comparison done using any method.
8254 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8255 static int
8256 ix86_fp_comparison_cost (code)
8257 enum rtx_code code;
8258 {
8259 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8260 int min;
8261
8262 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8263 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8264
8265 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8266 if (min > sahf_cost)
8267 min = sahf_cost;
8268 if (min > fcomi_cost)
8269 min = fcomi_cost;
8270 return min;
8271 }
8272
8273 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8274
8275 static rtx
8276 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8277 enum rtx_code code;
8278 rtx op0, op1, scratch;
8279 rtx *second_test;
8280 rtx *bypass_test;
8281 {
8282 enum machine_mode fpcmp_mode, intcmp_mode;
8283 rtx tmp, tmp2;
8284 int cost = ix86_fp_comparison_cost (code);
8285 enum rtx_code bypass_code, first_code, second_code;
8286
8287 fpcmp_mode = ix86_fp_compare_mode (code);
8288 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8289
8290 if (second_test)
8291 *second_test = NULL_RTX;
8292 if (bypass_test)
8293 *bypass_test = NULL_RTX;
8294
8295 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8296
8297 /* Do fcomi/sahf based test when profitable. */
8298 if ((bypass_code == NIL || bypass_test)
8299 && (second_code == NIL || second_test)
8300 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8301 {
8302 if (TARGET_CMOVE)
8303 {
8304 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8305 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8306 tmp);
8307 emit_insn (tmp);
8308 }
8309 else
8310 {
8311 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8312 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8313 if (!scratch)
8314 scratch = gen_reg_rtx (HImode);
8315 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8316 emit_insn (gen_x86_sahf_1 (scratch));
8317 }
8318
8319 /* The FP codes work out to act like unsigned. */
8320 intcmp_mode = fpcmp_mode;
8321 code = first_code;
8322 if (bypass_code != NIL)
8323 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8324 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8325 const0_rtx);
8326 if (second_code != NIL)
8327 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8328 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8329 const0_rtx);
8330 }
8331 else
8332 {
8333 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8334 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8335 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8336 if (!scratch)
8337 scratch = gen_reg_rtx (HImode);
8338 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8339
8340 /* In the unordered case, we have to check C2 for NaN's, which
8341 doesn't happen to work out to anything nice combination-wise.
8342 So do some bit twiddling on the value we've got in AH to come
8343 up with an appropriate set of condition codes. */
8344
8345 intcmp_mode = CCNOmode;
8346 switch (code)
8347 {
8348 case GT:
8349 case UNGT:
8350 if (code == GT || !TARGET_IEEE_FP)
8351 {
8352 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8353 code = EQ;
8354 }
8355 else
8356 {
8357 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8358 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8359 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8360 intcmp_mode = CCmode;
8361 code = GEU;
8362 }
8363 break;
8364 case LT:
8365 case UNLT:
8366 if (code == LT && TARGET_IEEE_FP)
8367 {
8368 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8369 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8370 intcmp_mode = CCmode;
8371 code = EQ;
8372 }
8373 else
8374 {
8375 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8376 code = NE;
8377 }
8378 break;
8379 case GE:
8380 case UNGE:
8381 if (code == GE || !TARGET_IEEE_FP)
8382 {
8383 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8384 code = EQ;
8385 }
8386 else
8387 {
8388 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8389 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8390 GEN_INT (0x01)));
8391 code = NE;
8392 }
8393 break;
8394 case LE:
8395 case UNLE:
8396 if (code == LE && TARGET_IEEE_FP)
8397 {
8398 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8399 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8400 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8401 intcmp_mode = CCmode;
8402 code = LTU;
8403 }
8404 else
8405 {
8406 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8407 code = NE;
8408 }
8409 break;
8410 case EQ:
8411 case UNEQ:
8412 if (code == EQ && TARGET_IEEE_FP)
8413 {
8414 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8415 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8416 intcmp_mode = CCmode;
8417 code = EQ;
8418 }
8419 else
8420 {
8421 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8422 code = NE;
8423 break;
8424 }
8425 break;
8426 case NE:
8427 case LTGT:
8428 if (code == NE && TARGET_IEEE_FP)
8429 {
8430 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8431 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8432 GEN_INT (0x40)));
8433 code = NE;
8434 }
8435 else
8436 {
8437 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8438 code = EQ;
8439 }
8440 break;
8441
8442 case UNORDERED:
8443 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8444 code = NE;
8445 break;
8446 case ORDERED:
8447 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8448 code = EQ;
8449 break;
8450
8451 default:
8452 abort ();
8453 }
8454 }
8455
8456 /* Return the test that should be put into the flags user, i.e.
8457 the bcc, scc, or cmov instruction. */
8458 return gen_rtx_fmt_ee (code, VOIDmode,
8459 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8460 const0_rtx);
8461 }
8462
8463 rtx
8464 ix86_expand_compare (code, second_test, bypass_test)
8465 enum rtx_code code;
8466 rtx *second_test, *bypass_test;
8467 {
8468 rtx op0, op1, ret;
8469 op0 = ix86_compare_op0;
8470 op1 = ix86_compare_op1;
8471
8472 if (second_test)
8473 *second_test = NULL_RTX;
8474 if (bypass_test)
8475 *bypass_test = NULL_RTX;
8476
8477 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8478 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8479 second_test, bypass_test);
8480 else
8481 ret = ix86_expand_int_compare (code, op0, op1);
8482
8483 return ret;
8484 }
8485
8486 /* Return true if the CODE will result in nontrivial jump sequence. */
8487 bool
8488 ix86_fp_jump_nontrivial_p (code)
8489 enum rtx_code code;
8490 {
8491 enum rtx_code bypass_code, first_code, second_code;
8492 if (!TARGET_CMOVE)
8493 return true;
8494 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8495 return bypass_code != NIL || second_code != NIL;
8496 }
8497
8498 void
8499 ix86_expand_branch (code, label)
8500 enum rtx_code code;
8501 rtx label;
8502 {
8503 rtx tmp;
8504
8505 switch (GET_MODE (ix86_compare_op0))
8506 {
8507 case QImode:
8508 case HImode:
8509 case SImode:
8510 simple:
8511 tmp = ix86_expand_compare (code, NULL, NULL);
8512 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8513 gen_rtx_LABEL_REF (VOIDmode, label),
8514 pc_rtx);
8515 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8516 return;
8517
8518 case SFmode:
8519 case DFmode:
8520 case XFmode:
8521 case TFmode:
8522 {
8523 rtvec vec;
8524 int use_fcomi;
8525 enum rtx_code bypass_code, first_code, second_code;
8526
8527 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8528 &ix86_compare_op1);
8529
8530 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8531
8532 /* Check whether we will use the natural sequence with one jump. If
8533 so, we can expand jump early. Otherwise delay expansion by
8534 creating compound insn to not confuse optimizers. */
8535 if (bypass_code == NIL && second_code == NIL
8536 && TARGET_CMOVE)
8537 {
8538 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8539 gen_rtx_LABEL_REF (VOIDmode, label),
8540 pc_rtx, NULL_RTX);
8541 }
8542 else
8543 {
8544 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8545 ix86_compare_op0, ix86_compare_op1);
8546 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8547 gen_rtx_LABEL_REF (VOIDmode, label),
8548 pc_rtx);
8549 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8550
8551 use_fcomi = ix86_use_fcomi_compare (code);
8552 vec = rtvec_alloc (3 + !use_fcomi);
8553 RTVEC_ELT (vec, 0) = tmp;
8554 RTVEC_ELT (vec, 1)
8555 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8556 RTVEC_ELT (vec, 2)
8557 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8558 if (! use_fcomi)
8559 RTVEC_ELT (vec, 3)
8560 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8561
8562 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8563 }
8564 return;
8565 }
8566
8567 case DImode:
8568 if (TARGET_64BIT)
8569 goto simple;
8570 /* Expand DImode branch into multiple compare+branch. */
8571 {
8572 rtx lo[2], hi[2], label2;
8573 enum rtx_code code1, code2, code3;
8574
8575 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8576 {
8577 tmp = ix86_compare_op0;
8578 ix86_compare_op0 = ix86_compare_op1;
8579 ix86_compare_op1 = tmp;
8580 code = swap_condition (code);
8581 }
8582 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8583 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8584
8585 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8586 avoid two branches. This costs one extra insn, so disable when
8587 optimizing for size. */
8588
8589 if ((code == EQ || code == NE)
8590 && (!optimize_size
8591 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8592 {
8593 rtx xor0, xor1;
8594
8595 xor1 = hi[0];
8596 if (hi[1] != const0_rtx)
8597 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8598 NULL_RTX, 0, OPTAB_WIDEN);
8599
8600 xor0 = lo[0];
8601 if (lo[1] != const0_rtx)
8602 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8603 NULL_RTX, 0, OPTAB_WIDEN);
8604
8605 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8606 NULL_RTX, 0, OPTAB_WIDEN);
8607
8608 ix86_compare_op0 = tmp;
8609 ix86_compare_op1 = const0_rtx;
8610 ix86_expand_branch (code, label);
8611 return;
8612 }
8613
8614 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8615 op1 is a constant and the low word is zero, then we can just
8616 examine the high word. */
8617
8618 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8619 switch (code)
8620 {
8621 case LT: case LTU: case GE: case GEU:
8622 ix86_compare_op0 = hi[0];
8623 ix86_compare_op1 = hi[1];
8624 ix86_expand_branch (code, label);
8625 return;
8626 default:
8627 break;
8628 }
8629
8630 /* Otherwise, we need two or three jumps. */
8631
8632 label2 = gen_label_rtx ();
8633
8634 code1 = code;
8635 code2 = swap_condition (code);
8636 code3 = unsigned_condition (code);
8637
8638 switch (code)
8639 {
8640 case LT: case GT: case LTU: case GTU:
8641 break;
8642
8643 case LE: code1 = LT; code2 = GT; break;
8644 case GE: code1 = GT; code2 = LT; break;
8645 case LEU: code1 = LTU; code2 = GTU; break;
8646 case GEU: code1 = GTU; code2 = LTU; break;
8647
8648 case EQ: code1 = NIL; code2 = NE; break;
8649 case NE: code2 = NIL; break;
8650
8651 default:
8652 abort ();
8653 }
8654
8655 /*
8656 * a < b =>
8657 * if (hi(a) < hi(b)) goto true;
8658 * if (hi(a) > hi(b)) goto false;
8659 * if (lo(a) < lo(b)) goto true;
8660 * false:
8661 */
8662
8663 ix86_compare_op0 = hi[0];
8664 ix86_compare_op1 = hi[1];
8665
8666 if (code1 != NIL)
8667 ix86_expand_branch (code1, label);
8668 if (code2 != NIL)
8669 ix86_expand_branch (code2, label2);
8670
8671 ix86_compare_op0 = lo[0];
8672 ix86_compare_op1 = lo[1];
8673 ix86_expand_branch (code3, label);
8674
8675 if (code2 != NIL)
8676 emit_label (label2);
8677 return;
8678 }
8679
8680 default:
8681 abort ();
8682 }
8683 }
8684
8685 /* Split branch based on floating point condition. */
8686 void
8687 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8688 enum rtx_code code;
8689 rtx op1, op2, target1, target2, tmp;
8690 {
8691 rtx second, bypass;
8692 rtx label = NULL_RTX;
8693 rtx condition;
8694 int bypass_probability = -1, second_probability = -1, probability = -1;
8695 rtx i;
8696
8697 if (target2 != pc_rtx)
8698 {
8699 rtx tmp = target2;
8700 code = reverse_condition_maybe_unordered (code);
8701 target2 = target1;
8702 target1 = tmp;
8703 }
8704
8705 condition = ix86_expand_fp_compare (code, op1, op2,
8706 tmp, &second, &bypass);
8707
8708 if (split_branch_probability >= 0)
8709 {
8710 /* Distribute the probabilities across the jumps.
8711 Assume the BYPASS and SECOND to be always test
8712 for UNORDERED. */
8713 probability = split_branch_probability;
8714
8715 /* Value of 1 is low enough to make no need for probability
8716 to be updated. Later we may run some experiments and see
8717 if unordered values are more frequent in practice. */
8718 if (bypass)
8719 bypass_probability = 1;
8720 if (second)
8721 second_probability = 1;
8722 }
8723 if (bypass != NULL_RTX)
8724 {
8725 label = gen_label_rtx ();
8726 i = emit_jump_insn (gen_rtx_SET
8727 (VOIDmode, pc_rtx,
8728 gen_rtx_IF_THEN_ELSE (VOIDmode,
8729 bypass,
8730 gen_rtx_LABEL_REF (VOIDmode,
8731 label),
8732 pc_rtx)));
8733 if (bypass_probability >= 0)
8734 REG_NOTES (i)
8735 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8736 GEN_INT (bypass_probability),
8737 REG_NOTES (i));
8738 }
8739 i = emit_jump_insn (gen_rtx_SET
8740 (VOIDmode, pc_rtx,
8741 gen_rtx_IF_THEN_ELSE (VOIDmode,
8742 condition, target1, target2)));
8743 if (probability >= 0)
8744 REG_NOTES (i)
8745 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8746 GEN_INT (probability),
8747 REG_NOTES (i));
8748 if (second != NULL_RTX)
8749 {
8750 i = emit_jump_insn (gen_rtx_SET
8751 (VOIDmode, pc_rtx,
8752 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8753 target2)));
8754 if (second_probability >= 0)
8755 REG_NOTES (i)
8756 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8757 GEN_INT (second_probability),
8758 REG_NOTES (i));
8759 }
8760 if (label != NULL_RTX)
8761 emit_label (label);
8762 }
8763
8764 int
8765 ix86_expand_setcc (code, dest)
8766 enum rtx_code code;
8767 rtx dest;
8768 {
8769 rtx ret, tmp, tmpreg;
8770 rtx second_test, bypass_test;
8771
8772 if (GET_MODE (ix86_compare_op0) == DImode
8773 && !TARGET_64BIT)
8774 return 0; /* FAIL */
8775
8776 if (GET_MODE (dest) != QImode)
8777 abort ();
8778
8779 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8780 PUT_MODE (ret, QImode);
8781
8782 tmp = dest;
8783 tmpreg = dest;
8784
8785 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8786 if (bypass_test || second_test)
8787 {
8788 rtx test = second_test;
8789 int bypass = 0;
8790 rtx tmp2 = gen_reg_rtx (QImode);
8791 if (bypass_test)
8792 {
8793 if (second_test)
8794 abort ();
8795 test = bypass_test;
8796 bypass = 1;
8797 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8798 }
8799 PUT_MODE (test, QImode);
8800 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8801
8802 if (bypass)
8803 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8804 else
8805 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8806 }
8807
8808 return 1; /* DONE */
8809 }
8810
8811 int
8812 ix86_expand_int_movcc (operands)
8813 rtx operands[];
8814 {
8815 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8816 rtx compare_seq, compare_op;
8817 rtx second_test, bypass_test;
8818 enum machine_mode mode = GET_MODE (operands[0]);
8819
8820 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8821 In case comparsion is done with immediate, we can convert it to LTU or
8822 GEU by altering the integer. */
8823
8824 if ((code == LEU || code == GTU)
8825 && GET_CODE (ix86_compare_op1) == CONST_INT
8826 && mode != HImode
8827 && INTVAL (ix86_compare_op1) != -1
8828 /* For x86-64, the immediate field in the instruction is 32-bit
8829 signed, so we can't increment a DImode value above 0x7fffffff. */
8830 && (!TARGET_64BIT
8831 || GET_MODE (ix86_compare_op0) != DImode
8832 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8833 && GET_CODE (operands[2]) == CONST_INT
8834 && GET_CODE (operands[3]) == CONST_INT)
8835 {
8836 if (code == LEU)
8837 code = LTU;
8838 else
8839 code = GEU;
8840 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8841 GET_MODE (ix86_compare_op0));
8842 }
8843
8844 start_sequence ();
8845 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8846 compare_seq = get_insns ();
8847 end_sequence ();
8848
8849 compare_code = GET_CODE (compare_op);
8850
8851 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8852 HImode insns, we'd be swallowed in word prefix ops. */
8853
8854 if (mode != HImode
8855 && (mode != DImode || TARGET_64BIT)
8856 && GET_CODE (operands[2]) == CONST_INT
8857 && GET_CODE (operands[3]) == CONST_INT)
8858 {
8859 rtx out = operands[0];
8860 HOST_WIDE_INT ct = INTVAL (operands[2]);
8861 HOST_WIDE_INT cf = INTVAL (operands[3]);
8862 HOST_WIDE_INT diff;
8863
8864 if ((compare_code == LTU || compare_code == GEU)
8865 && !second_test && !bypass_test)
8866 {
8867 /* Detect overlap between destination and compare sources. */
8868 rtx tmp = out;
8869
8870 /* To simplify rest of code, restrict to the GEU case. */
8871 if (compare_code == LTU)
8872 {
8873 int tmp = ct;
8874 ct = cf;
8875 cf = tmp;
8876 compare_code = reverse_condition (compare_code);
8877 code = reverse_condition (code);
8878 }
8879 diff = ct - cf;
8880
8881 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8882 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8883 tmp = gen_reg_rtx (mode);
8884
8885 emit_insn (compare_seq);
8886 if (mode == DImode)
8887 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8888 else
8889 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8890
8891 if (diff == 1)
8892 {
8893 /*
8894 * cmpl op0,op1
8895 * sbbl dest,dest
8896 * [addl dest, ct]
8897 *
8898 * Size 5 - 8.
8899 */
8900 if (ct)
8901 tmp = expand_simple_binop (mode, PLUS,
8902 tmp, GEN_INT (ct),
8903 tmp, 1, OPTAB_DIRECT);
8904 }
8905 else if (cf == -1)
8906 {
8907 /*
8908 * cmpl op0,op1
8909 * sbbl dest,dest
8910 * orl $ct, dest
8911 *
8912 * Size 8.
8913 */
8914 tmp = expand_simple_binop (mode, IOR,
8915 tmp, GEN_INT (ct),
8916 tmp, 1, OPTAB_DIRECT);
8917 }
8918 else if (diff == -1 && ct)
8919 {
8920 /*
8921 * cmpl op0,op1
8922 * sbbl dest,dest
8923 * notl dest
8924 * [addl dest, cf]
8925 *
8926 * Size 8 - 11.
8927 */
8928 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8929 if (cf)
8930 tmp = expand_simple_binop (mode, PLUS,
8931 tmp, GEN_INT (cf),
8932 tmp, 1, OPTAB_DIRECT);
8933 }
8934 else
8935 {
8936 /*
8937 * cmpl op0,op1
8938 * sbbl dest,dest
8939 * [notl dest]
8940 * andl cf - ct, dest
8941 * [addl dest, ct]
8942 *
8943 * Size 8 - 11.
8944 */
8945
8946 if (cf == 0)
8947 {
8948 cf = ct;
8949 ct = 0;
8950 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8951 }
8952
8953 tmp = expand_simple_binop (mode, AND,
8954 tmp,
8955 gen_int_mode (cf - ct, mode),
8956 tmp, 1, OPTAB_DIRECT);
8957 if (ct)
8958 tmp = expand_simple_binop (mode, PLUS,
8959 tmp, GEN_INT (ct),
8960 tmp, 1, OPTAB_DIRECT);
8961 }
8962
8963 if (tmp != out)
8964 emit_move_insn (out, tmp);
8965
8966 return 1; /* DONE */
8967 }
8968
8969 diff = ct - cf;
8970 if (diff < 0)
8971 {
8972 HOST_WIDE_INT tmp;
8973 tmp = ct, ct = cf, cf = tmp;
8974 diff = -diff;
8975 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8976 {
8977 /* We may be reversing unordered compare to normal compare, that
8978 is not valid in general (we may convert non-trapping condition
8979 to trapping one), however on i386 we currently emit all
8980 comparisons unordered. */
8981 compare_code = reverse_condition_maybe_unordered (compare_code);
8982 code = reverse_condition_maybe_unordered (code);
8983 }
8984 else
8985 {
8986 compare_code = reverse_condition (compare_code);
8987 code = reverse_condition (code);
8988 }
8989 }
8990
8991 compare_code = NIL;
8992 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8993 && GET_CODE (ix86_compare_op1) == CONST_INT)
8994 {
8995 if (ix86_compare_op1 == const0_rtx
8996 && (code == LT || code == GE))
8997 compare_code = code;
8998 else if (ix86_compare_op1 == constm1_rtx)
8999 {
9000 if (code == LE)
9001 compare_code = LT;
9002 else if (code == GT)
9003 compare_code = GE;
9004 }
9005 }
9006
9007 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9008 if (compare_code != NIL
9009 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9010 && (cf == -1 || ct == -1))
9011 {
9012 /* If lea code below could be used, only optimize
9013 if it results in a 2 insn sequence. */
9014
9015 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9016 || diff == 3 || diff == 5 || diff == 9)
9017 || (compare_code == LT && ct == -1)
9018 || (compare_code == GE && cf == -1))
9019 {
9020 /*
9021 * notl op1 (if necessary)
9022 * sarl $31, op1
9023 * orl cf, op1
9024 */
9025 if (ct != -1)
9026 {
9027 cf = ct;
9028 ct = -1;
9029 code = reverse_condition (code);
9030 }
9031
9032 out = emit_store_flag (out, code, ix86_compare_op0,
9033 ix86_compare_op1, VOIDmode, 0, -1);
9034
9035 out = expand_simple_binop (mode, IOR,
9036 out, GEN_INT (cf),
9037 out, 1, OPTAB_DIRECT);
9038 if (out != operands[0])
9039 emit_move_insn (operands[0], out);
9040
9041 return 1; /* DONE */
9042 }
9043 }
9044
9045 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9046 || diff == 3 || diff == 5 || diff == 9)
9047 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9048 {
9049 /*
9050 * xorl dest,dest
9051 * cmpl op1,op2
9052 * setcc dest
9053 * lea cf(dest*(ct-cf)),dest
9054 *
9055 * Size 14.
9056 *
9057 * This also catches the degenerate setcc-only case.
9058 */
9059
9060 rtx tmp;
9061 int nops;
9062
9063 out = emit_store_flag (out, code, ix86_compare_op0,
9064 ix86_compare_op1, VOIDmode, 0, 1);
9065
9066 nops = 0;
9067 /* On x86_64 the lea instruction operates on Pmode, so we need
9068 to get arithmetics done in proper mode to match. */
9069 if (diff == 1)
9070 tmp = out;
9071 else
9072 {
9073 rtx out1;
9074 out1 = out;
9075 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9076 nops++;
9077 if (diff & 1)
9078 {
9079 tmp = gen_rtx_PLUS (mode, tmp, out1);
9080 nops++;
9081 }
9082 }
9083 if (cf != 0)
9084 {
9085 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9086 nops++;
9087 }
9088 if (tmp != out
9089 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9090 {
9091 if (nops == 1)
9092 {
9093 rtx clob;
9094
9095 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9096 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9097
9098 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9099 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9100 emit_insn (tmp);
9101 }
9102 else
9103 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9104 }
9105 if (out != operands[0])
9106 emit_move_insn (operands[0], copy_rtx (out));
9107
9108 return 1; /* DONE */
9109 }
9110
9111 /*
9112 * General case: Jumpful:
9113 * xorl dest,dest cmpl op1, op2
9114 * cmpl op1, op2 movl ct, dest
9115 * setcc dest jcc 1f
9116 * decl dest movl cf, dest
9117 * andl (cf-ct),dest 1:
9118 * addl ct,dest
9119 *
9120 * Size 20. Size 14.
9121 *
9122 * This is reasonably steep, but branch mispredict costs are
9123 * high on modern cpus, so consider failing only if optimizing
9124 * for space.
9125 *
9126 * %%% Parameterize branch_cost on the tuning architecture, then
9127 * use that. The 80386 couldn't care less about mispredicts.
9128 */
9129
9130 if (!optimize_size && !TARGET_CMOVE)
9131 {
9132 if (cf == 0)
9133 {
9134 cf = ct;
9135 ct = 0;
9136 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9137 /* We may be reversing unordered compare to normal compare,
9138 that is not valid in general (we may convert non-trapping
9139 condition to trapping one), however on i386 we currently
9140 emit all comparisons unordered. */
9141 code = reverse_condition_maybe_unordered (code);
9142 else
9143 {
9144 code = reverse_condition (code);
9145 if (compare_code != NIL)
9146 compare_code = reverse_condition (compare_code);
9147 }
9148 }
9149
9150 if (compare_code != NIL)
9151 {
9152 /* notl op1 (if needed)
9153 sarl $31, op1
9154 andl (cf-ct), op1
9155 addl ct, op1
9156
9157 For x < 0 (resp. x <= -1) there will be no notl,
9158 so if possible swap the constants to get rid of the
9159 complement.
9160 True/false will be -1/0 while code below (store flag
9161 followed by decrement) is 0/-1, so the constants need
9162 to be exchanged once more. */
9163
9164 if (compare_code == GE || !cf)
9165 {
9166 code = reverse_condition (code);
9167 compare_code = LT;
9168 }
9169 else
9170 {
9171 HOST_WIDE_INT tmp = cf;
9172 cf = ct;
9173 ct = tmp;
9174 }
9175
9176 out = emit_store_flag (out, code, ix86_compare_op0,
9177 ix86_compare_op1, VOIDmode, 0, -1);
9178 }
9179 else
9180 {
9181 out = emit_store_flag (out, code, ix86_compare_op0,
9182 ix86_compare_op1, VOIDmode, 0, 1);
9183
9184 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9185 out, 1, OPTAB_DIRECT);
9186 }
9187
9188 out = expand_simple_binop (mode, AND, out,
9189 gen_int_mode (cf - ct, mode),
9190 out, 1, OPTAB_DIRECT);
9191 if (ct)
9192 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9193 out, 1, OPTAB_DIRECT);
9194 if (out != operands[0])
9195 emit_move_insn (operands[0], out);
9196
9197 return 1; /* DONE */
9198 }
9199 }
9200
9201 if (!TARGET_CMOVE)
9202 {
9203 /* Try a few things more with specific constants and a variable. */
9204
9205 optab op;
9206 rtx var, orig_out, out, tmp;
9207
9208 if (optimize_size)
9209 return 0; /* FAIL */
9210
9211 /* If one of the two operands is an interesting constant, load a
9212 constant with the above and mask it in with a logical operation. */
9213
9214 if (GET_CODE (operands[2]) == CONST_INT)
9215 {
9216 var = operands[3];
9217 if (INTVAL (operands[2]) == 0)
9218 operands[3] = constm1_rtx, op = and_optab;
9219 else if (INTVAL (operands[2]) == -1)
9220 operands[3] = const0_rtx, op = ior_optab;
9221 else
9222 return 0; /* FAIL */
9223 }
9224 else if (GET_CODE (operands[3]) == CONST_INT)
9225 {
9226 var = operands[2];
9227 if (INTVAL (operands[3]) == 0)
9228 operands[2] = constm1_rtx, op = and_optab;
9229 else if (INTVAL (operands[3]) == -1)
9230 operands[2] = const0_rtx, op = ior_optab;
9231 else
9232 return 0; /* FAIL */
9233 }
9234 else
9235 return 0; /* FAIL */
9236
9237 orig_out = operands[0];
9238 tmp = gen_reg_rtx (mode);
9239 operands[0] = tmp;
9240
9241 /* Recurse to get the constant loaded. */
9242 if (ix86_expand_int_movcc (operands) == 0)
9243 return 0; /* FAIL */
9244
9245 /* Mask in the interesting variable. */
9246 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9247 OPTAB_WIDEN);
9248 if (out != orig_out)
9249 emit_move_insn (orig_out, out);
9250
9251 return 1; /* DONE */
9252 }
9253
9254 /*
9255 * For comparison with above,
9256 *
9257 * movl cf,dest
9258 * movl ct,tmp
9259 * cmpl op1,op2
9260 * cmovcc tmp,dest
9261 *
9262 * Size 15.
9263 */
9264
9265 if (! nonimmediate_operand (operands[2], mode))
9266 operands[2] = force_reg (mode, operands[2]);
9267 if (! nonimmediate_operand (operands[3], mode))
9268 operands[3] = force_reg (mode, operands[3]);
9269
9270 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9271 {
9272 rtx tmp = gen_reg_rtx (mode);
9273 emit_move_insn (tmp, operands[3]);
9274 operands[3] = tmp;
9275 }
9276 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9277 {
9278 rtx tmp = gen_reg_rtx (mode);
9279 emit_move_insn (tmp, operands[2]);
9280 operands[2] = tmp;
9281 }
9282 if (! register_operand (operands[2], VOIDmode)
9283 && ! register_operand (operands[3], VOIDmode))
9284 operands[2] = force_reg (mode, operands[2]);
9285
9286 emit_insn (compare_seq);
9287 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9288 gen_rtx_IF_THEN_ELSE (mode,
9289 compare_op, operands[2],
9290 operands[3])));
9291 if (bypass_test)
9292 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9293 gen_rtx_IF_THEN_ELSE (mode,
9294 bypass_test,
9295 operands[3],
9296 operands[0])));
9297 if (second_test)
9298 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9299 gen_rtx_IF_THEN_ELSE (mode,
9300 second_test,
9301 operands[2],
9302 operands[0])));
9303
9304 return 1; /* DONE */
9305 }
9306
9307 int
9308 ix86_expand_fp_movcc (operands)
9309 rtx operands[];
9310 {
9311 enum rtx_code code;
9312 rtx tmp;
9313 rtx compare_op, second_test, bypass_test;
9314
9315 /* For SF/DFmode conditional moves based on comparisons
9316 in same mode, we may want to use SSE min/max instructions. */
9317 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9318 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9319 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9320 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9321 && (!TARGET_IEEE_FP
9322 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9323 /* We may be called from the post-reload splitter. */
9324 && (!REG_P (operands[0])
9325 || SSE_REG_P (operands[0])
9326 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9327 {
9328 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9329 code = GET_CODE (operands[1]);
9330
9331 /* See if we have (cross) match between comparison operands and
9332 conditional move operands. */
9333 if (rtx_equal_p (operands[2], op1))
9334 {
9335 rtx tmp = op0;
9336 op0 = op1;
9337 op1 = tmp;
9338 code = reverse_condition_maybe_unordered (code);
9339 }
9340 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9341 {
9342 /* Check for min operation. */
9343 if (code == LT)
9344 {
9345 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9346 if (memory_operand (op0, VOIDmode))
9347 op0 = force_reg (GET_MODE (operands[0]), op0);
9348 if (GET_MODE (operands[0]) == SFmode)
9349 emit_insn (gen_minsf3 (operands[0], op0, op1));
9350 else
9351 emit_insn (gen_mindf3 (operands[0], op0, op1));
9352 return 1;
9353 }
9354 /* Check for max operation. */
9355 if (code == GT)
9356 {
9357 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9358 if (memory_operand (op0, VOIDmode))
9359 op0 = force_reg (GET_MODE (operands[0]), op0);
9360 if (GET_MODE (operands[0]) == SFmode)
9361 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9362 else
9363 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9364 return 1;
9365 }
9366 }
9367 /* Manage condition to be sse_comparison_operator. In case we are
9368 in non-ieee mode, try to canonicalize the destination operand
9369 to be first in the comparison - this helps reload to avoid extra
9370 moves. */
9371 if (!sse_comparison_operator (operands[1], VOIDmode)
9372 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9373 {
9374 rtx tmp = ix86_compare_op0;
9375 ix86_compare_op0 = ix86_compare_op1;
9376 ix86_compare_op1 = tmp;
9377 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9378 VOIDmode, ix86_compare_op0,
9379 ix86_compare_op1);
9380 }
9381 /* Similary try to manage result to be first operand of conditional
9382 move. We also don't support the NE comparison on SSE, so try to
9383 avoid it. */
9384 if ((rtx_equal_p (operands[0], operands[3])
9385 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9386 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9387 {
9388 rtx tmp = operands[2];
9389 operands[2] = operands[3];
9390 operands[3] = tmp;
9391 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9392 (GET_CODE (operands[1])),
9393 VOIDmode, ix86_compare_op0,
9394 ix86_compare_op1);
9395 }
9396 if (GET_MODE (operands[0]) == SFmode)
9397 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9398 operands[2], operands[3],
9399 ix86_compare_op0, ix86_compare_op1));
9400 else
9401 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9402 operands[2], operands[3],
9403 ix86_compare_op0, ix86_compare_op1));
9404 return 1;
9405 }
9406
9407 /* The floating point conditional move instructions don't directly
9408 support conditions resulting from a signed integer comparison. */
9409
9410 code = GET_CODE (operands[1]);
9411 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9412
9413 /* The floating point conditional move instructions don't directly
9414 support signed integer comparisons. */
9415
9416 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9417 {
9418 if (second_test != NULL || bypass_test != NULL)
9419 abort ();
9420 tmp = gen_reg_rtx (QImode);
9421 ix86_expand_setcc (code, tmp);
9422 code = NE;
9423 ix86_compare_op0 = tmp;
9424 ix86_compare_op1 = const0_rtx;
9425 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9426 }
9427 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9428 {
9429 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9430 emit_move_insn (tmp, operands[3]);
9431 operands[3] = tmp;
9432 }
9433 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9434 {
9435 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9436 emit_move_insn (tmp, operands[2]);
9437 operands[2] = tmp;
9438 }
9439
9440 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9441 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9442 compare_op,
9443 operands[2],
9444 operands[3])));
9445 if (bypass_test)
9446 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9447 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9448 bypass_test,
9449 operands[3],
9450 operands[0])));
9451 if (second_test)
9452 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9453 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9454 second_test,
9455 operands[2],
9456 operands[0])));
9457
9458 return 1;
9459 }
9460
9461 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9462 works for floating pointer parameters and nonoffsetable memories.
9463 For pushes, it returns just stack offsets; the values will be saved
9464 in the right order. Maximally three parts are generated. */
9465
9466 static int
9467 ix86_split_to_parts (operand, parts, mode)
9468 rtx operand;
9469 rtx *parts;
9470 enum machine_mode mode;
9471 {
9472 int size;
9473
9474 if (!TARGET_64BIT)
9475 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9476 else
9477 size = (GET_MODE_SIZE (mode) + 4) / 8;
9478
9479 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9480 abort ();
9481 if (size < 2 || size > 3)
9482 abort ();
9483
9484 /* Optimize constant pool reference to immediates. This is used by fp
9485 moves, that force all constants to memory to allow combining. */
9486 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9487 {
9488 rtx tmp = maybe_get_pool_constant (operand);
9489 if (tmp)
9490 operand = tmp;
9491 }
9492
9493 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9494 {
9495 /* The only non-offsetable memories we handle are pushes. */
9496 if (! push_operand (operand, VOIDmode))
9497 abort ();
9498
9499 operand = copy_rtx (operand);
9500 PUT_MODE (operand, Pmode);
9501 parts[0] = parts[1] = parts[2] = operand;
9502 }
9503 else if (!TARGET_64BIT)
9504 {
9505 if (mode == DImode)
9506 split_di (&operand, 1, &parts[0], &parts[1]);
9507 else
9508 {
9509 if (REG_P (operand))
9510 {
9511 if (!reload_completed)
9512 abort ();
9513 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9514 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9515 if (size == 3)
9516 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9517 }
9518 else if (offsettable_memref_p (operand))
9519 {
9520 operand = adjust_address (operand, SImode, 0);
9521 parts[0] = operand;
9522 parts[1] = adjust_address (operand, SImode, 4);
9523 if (size == 3)
9524 parts[2] = adjust_address (operand, SImode, 8);
9525 }
9526 else if (GET_CODE (operand) == CONST_DOUBLE)
9527 {
9528 REAL_VALUE_TYPE r;
9529 long l[4];
9530
9531 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9532 switch (mode)
9533 {
9534 case XFmode:
9535 case TFmode:
9536 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9537 parts[2] = gen_int_mode (l[2], SImode);
9538 break;
9539 case DFmode:
9540 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9541 break;
9542 default:
9543 abort ();
9544 }
9545 parts[1] = gen_int_mode (l[1], SImode);
9546 parts[0] = gen_int_mode (l[0], SImode);
9547 }
9548 else
9549 abort ();
9550 }
9551 }
9552 else
9553 {
9554 if (mode == TImode)
9555 split_ti (&operand, 1, &parts[0], &parts[1]);
9556 if (mode == XFmode || mode == TFmode)
9557 {
9558 if (REG_P (operand))
9559 {
9560 if (!reload_completed)
9561 abort ();
9562 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9563 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9564 }
9565 else if (offsettable_memref_p (operand))
9566 {
9567 operand = adjust_address (operand, DImode, 0);
9568 parts[0] = operand;
9569 parts[1] = adjust_address (operand, SImode, 8);
9570 }
9571 else if (GET_CODE (operand) == CONST_DOUBLE)
9572 {
9573 REAL_VALUE_TYPE r;
9574 long l[3];
9575
9576 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9577 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9578 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9579 if (HOST_BITS_PER_WIDE_INT >= 64)
9580 parts[0]
9581 = gen_int_mode
9582 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9583 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9584 DImode);
9585 else
9586 parts[0] = immed_double_const (l[0], l[1], DImode);
9587 parts[1] = gen_int_mode (l[2], SImode);
9588 }
9589 else
9590 abort ();
9591 }
9592 }
9593
9594 return size;
9595 }
9596
9597 /* Emit insns to perform a move or push of DI, DF, and XF values.
9598 Return false when normal moves are needed; true when all required
9599 insns have been emitted. Operands 2-4 contain the input values
9600 int the correct order; operands 5-7 contain the output values. */
9601
9602 void
9603 ix86_split_long_move (operands)
9604 rtx operands[];
9605 {
9606 rtx part[2][3];
9607 int nparts;
9608 int push = 0;
9609 int collisions = 0;
9610 enum machine_mode mode = GET_MODE (operands[0]);
9611
9612 /* The DFmode expanders may ask us to move double.
9613 For 64bit target this is single move. By hiding the fact
9614 here we simplify i386.md splitters. */
9615 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9616 {
9617 /* Optimize constant pool reference to immediates. This is used by
9618 fp moves, that force all constants to memory to allow combining. */
9619
9620 if (GET_CODE (operands[1]) == MEM
9621 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9622 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9623 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9624 if (push_operand (operands[0], VOIDmode))
9625 {
9626 operands[0] = copy_rtx (operands[0]);
9627 PUT_MODE (operands[0], Pmode);
9628 }
9629 else
9630 operands[0] = gen_lowpart (DImode, operands[0]);
9631 operands[1] = gen_lowpart (DImode, operands[1]);
9632 emit_move_insn (operands[0], operands[1]);
9633 return;
9634 }
9635
9636 /* The only non-offsettable memory we handle is push. */
9637 if (push_operand (operands[0], VOIDmode))
9638 push = 1;
9639 else if (GET_CODE (operands[0]) == MEM
9640 && ! offsettable_memref_p (operands[0]))
9641 abort ();
9642
9643 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9644 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9645
9646 /* When emitting push, take care for source operands on the stack. */
9647 if (push && GET_CODE (operands[1]) == MEM
9648 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9649 {
9650 if (nparts == 3)
9651 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9652 XEXP (part[1][2], 0));
9653 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9654 XEXP (part[1][1], 0));
9655 }
9656
9657 /* We need to do copy in the right order in case an address register
9658 of the source overlaps the destination. */
9659 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9660 {
9661 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9662 collisions++;
9663 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9664 collisions++;
9665 if (nparts == 3
9666 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9667 collisions++;
9668
9669 /* Collision in the middle part can be handled by reordering. */
9670 if (collisions == 1 && nparts == 3
9671 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9672 {
9673 rtx tmp;
9674 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9675 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9676 }
9677
9678 /* If there are more collisions, we can't handle it by reordering.
9679 Do an lea to the last part and use only one colliding move. */
9680 else if (collisions > 1)
9681 {
9682 collisions = 1;
9683 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9684 XEXP (part[1][0], 0)));
9685 part[1][0] = change_address (part[1][0],
9686 TARGET_64BIT ? DImode : SImode,
9687 part[0][nparts - 1]);
9688 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9689 if (nparts == 3)
9690 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9691 }
9692 }
9693
9694 if (push)
9695 {
9696 if (!TARGET_64BIT)
9697 {
9698 if (nparts == 3)
9699 {
9700 /* We use only first 12 bytes of TFmode value, but for pushing we
9701 are required to adjust stack as if we were pushing real 16byte
9702 value. */
9703 if (mode == TFmode && !TARGET_64BIT)
9704 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9705 GEN_INT (-4)));
9706 emit_move_insn (part[0][2], part[1][2]);
9707 }
9708 }
9709 else
9710 {
9711 /* In 64bit mode we don't have 32bit push available. In case this is
9712 register, it is OK - we will just use larger counterpart. We also
9713 retype memory - these comes from attempt to avoid REX prefix on
9714 moving of second half of TFmode value. */
9715 if (GET_MODE (part[1][1]) == SImode)
9716 {
9717 if (GET_CODE (part[1][1]) == MEM)
9718 part[1][1] = adjust_address (part[1][1], DImode, 0);
9719 else if (REG_P (part[1][1]))
9720 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9721 else
9722 abort ();
9723 if (GET_MODE (part[1][0]) == SImode)
9724 part[1][0] = part[1][1];
9725 }
9726 }
9727 emit_move_insn (part[0][1], part[1][1]);
9728 emit_move_insn (part[0][0], part[1][0]);
9729 return;
9730 }
9731
9732 /* Choose correct order to not overwrite the source before it is copied. */
9733 if ((REG_P (part[0][0])
9734 && REG_P (part[1][1])
9735 && (REGNO (part[0][0]) == REGNO (part[1][1])
9736 || (nparts == 3
9737 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9738 || (collisions > 0
9739 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9740 {
9741 if (nparts == 3)
9742 {
9743 operands[2] = part[0][2];
9744 operands[3] = part[0][1];
9745 operands[4] = part[0][0];
9746 operands[5] = part[1][2];
9747 operands[6] = part[1][1];
9748 operands[7] = part[1][0];
9749 }
9750 else
9751 {
9752 operands[2] = part[0][1];
9753 operands[3] = part[0][0];
9754 operands[5] = part[1][1];
9755 operands[6] = part[1][0];
9756 }
9757 }
9758 else
9759 {
9760 if (nparts == 3)
9761 {
9762 operands[2] = part[0][0];
9763 operands[3] = part[0][1];
9764 operands[4] = part[0][2];
9765 operands[5] = part[1][0];
9766 operands[6] = part[1][1];
9767 operands[7] = part[1][2];
9768 }
9769 else
9770 {
9771 operands[2] = part[0][0];
9772 operands[3] = part[0][1];
9773 operands[5] = part[1][0];
9774 operands[6] = part[1][1];
9775 }
9776 }
9777 emit_move_insn (operands[2], operands[5]);
9778 emit_move_insn (operands[3], operands[6]);
9779 if (nparts == 3)
9780 emit_move_insn (operands[4], operands[7]);
9781
9782 return;
9783 }
9784
9785 void
9786 ix86_split_ashldi (operands, scratch)
9787 rtx *operands, scratch;
9788 {
9789 rtx low[2], high[2];
9790 int count;
9791
9792 if (GET_CODE (operands[2]) == CONST_INT)
9793 {
9794 split_di (operands, 2, low, high);
9795 count = INTVAL (operands[2]) & 63;
9796
9797 if (count >= 32)
9798 {
9799 emit_move_insn (high[0], low[1]);
9800 emit_move_insn (low[0], const0_rtx);
9801
9802 if (count > 32)
9803 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9804 }
9805 else
9806 {
9807 if (!rtx_equal_p (operands[0], operands[1]))
9808 emit_move_insn (operands[0], operands[1]);
9809 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9810 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9811 }
9812 }
9813 else
9814 {
9815 if (!rtx_equal_p (operands[0], operands[1]))
9816 emit_move_insn (operands[0], operands[1]);
9817
9818 split_di (operands, 1, low, high);
9819
9820 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9821 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9822
9823 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9824 {
9825 if (! no_new_pseudos)
9826 scratch = force_reg (SImode, const0_rtx);
9827 else
9828 emit_move_insn (scratch, const0_rtx);
9829
9830 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9831 scratch));
9832 }
9833 else
9834 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9835 }
9836 }
9837
9838 void
9839 ix86_split_ashrdi (operands, scratch)
9840 rtx *operands, scratch;
9841 {
9842 rtx low[2], high[2];
9843 int count;
9844
9845 if (GET_CODE (operands[2]) == CONST_INT)
9846 {
9847 split_di (operands, 2, low, high);
9848 count = INTVAL (operands[2]) & 63;
9849
9850 if (count >= 32)
9851 {
9852 emit_move_insn (low[0], high[1]);
9853
9854 if (! reload_completed)
9855 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9856 else
9857 {
9858 emit_move_insn (high[0], low[0]);
9859 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9860 }
9861
9862 if (count > 32)
9863 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9864 }
9865 else
9866 {
9867 if (!rtx_equal_p (operands[0], operands[1]))
9868 emit_move_insn (operands[0], operands[1]);
9869 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9870 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9871 }
9872 }
9873 else
9874 {
9875 if (!rtx_equal_p (operands[0], operands[1]))
9876 emit_move_insn (operands[0], operands[1]);
9877
9878 split_di (operands, 1, low, high);
9879
9880 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9881 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9882
9883 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9884 {
9885 if (! no_new_pseudos)
9886 scratch = gen_reg_rtx (SImode);
9887 emit_move_insn (scratch, high[0]);
9888 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9889 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9890 scratch));
9891 }
9892 else
9893 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9894 }
9895 }
9896
9897 void
9898 ix86_split_lshrdi (operands, scratch)
9899 rtx *operands, scratch;
9900 {
9901 rtx low[2], high[2];
9902 int count;
9903
9904 if (GET_CODE (operands[2]) == CONST_INT)
9905 {
9906 split_di (operands, 2, low, high);
9907 count = INTVAL (operands[2]) & 63;
9908
9909 if (count >= 32)
9910 {
9911 emit_move_insn (low[0], high[1]);
9912 emit_move_insn (high[0], const0_rtx);
9913
9914 if (count > 32)
9915 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9916 }
9917 else
9918 {
9919 if (!rtx_equal_p (operands[0], operands[1]))
9920 emit_move_insn (operands[0], operands[1]);
9921 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9922 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9923 }
9924 }
9925 else
9926 {
9927 if (!rtx_equal_p (operands[0], operands[1]))
9928 emit_move_insn (operands[0], operands[1]);
9929
9930 split_di (operands, 1, low, high);
9931
9932 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9933 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9934
9935 /* Heh. By reversing the arguments, we can reuse this pattern. */
9936 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9937 {
9938 if (! no_new_pseudos)
9939 scratch = force_reg (SImode, const0_rtx);
9940 else
9941 emit_move_insn (scratch, const0_rtx);
9942
9943 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9944 scratch));
9945 }
9946 else
9947 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9948 }
9949 }
9950
9951 /* Helper function for the string operations below. Dest VARIABLE whether
9952 it is aligned to VALUE bytes. If true, jump to the label. */
9953 static rtx
9954 ix86_expand_aligntest (variable, value)
9955 rtx variable;
9956 int value;
9957 {
9958 rtx label = gen_label_rtx ();
9959 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9960 if (GET_MODE (variable) == DImode)
9961 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9962 else
9963 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9964 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9965 1, label);
9966 return label;
9967 }
9968
9969 /* Adjust COUNTER by the VALUE. */
9970 static void
9971 ix86_adjust_counter (countreg, value)
9972 rtx countreg;
9973 HOST_WIDE_INT value;
9974 {
9975 if (GET_MODE (countreg) == DImode)
9976 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9977 else
9978 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9979 }
9980
9981 /* Zero extend possibly SImode EXP to Pmode register. */
9982 rtx
9983 ix86_zero_extend_to_Pmode (exp)
9984 rtx exp;
9985 {
9986 rtx r;
9987 if (GET_MODE (exp) == VOIDmode)
9988 return force_reg (Pmode, exp);
9989 if (GET_MODE (exp) == Pmode)
9990 return copy_to_mode_reg (Pmode, exp);
9991 r = gen_reg_rtx (Pmode);
9992 emit_insn (gen_zero_extendsidi2 (r, exp));
9993 return r;
9994 }
9995
9996 /* Expand string move (memcpy) operation. Use i386 string operations when
9997 profitable. expand_clrstr contains similar code. */
9998 int
9999 ix86_expand_movstr (dst, src, count_exp, align_exp)
10000 rtx dst, src, count_exp, align_exp;
10001 {
10002 rtx srcreg, destreg, countreg;
10003 enum machine_mode counter_mode;
10004 HOST_WIDE_INT align = 0;
10005 unsigned HOST_WIDE_INT count = 0;
10006 rtx insns;
10007
10008 start_sequence ();
10009
10010 if (GET_CODE (align_exp) == CONST_INT)
10011 align = INTVAL (align_exp);
10012
10013 /* This simple hack avoids all inlining code and simplifies code below. */
10014 if (!TARGET_ALIGN_STRINGOPS)
10015 align = 64;
10016
10017 if (GET_CODE (count_exp) == CONST_INT)
10018 count = INTVAL (count_exp);
10019
10020 /* Figure out proper mode for counter. For 32bits it is always SImode,
10021 for 64bits use SImode when possible, otherwise DImode.
10022 Set count to number of bytes copied when known at compile time. */
10023 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10024 || x86_64_zero_extended_value (count_exp))
10025 counter_mode = SImode;
10026 else
10027 counter_mode = DImode;
10028
10029 if (counter_mode != SImode && counter_mode != DImode)
10030 abort ();
10031
10032 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10033 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10034
10035 emit_insn (gen_cld ());
10036
10037 /* When optimizing for size emit simple rep ; movsb instruction for
10038 counts not divisible by 4. */
10039
10040 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10041 {
10042 countreg = ix86_zero_extend_to_Pmode (count_exp);
10043 if (TARGET_64BIT)
10044 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10045 destreg, srcreg, countreg));
10046 else
10047 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10048 destreg, srcreg, countreg));
10049 }
10050
10051 /* For constant aligned (or small unaligned) copies use rep movsl
10052 followed by code copying the rest. For PentiumPro ensure 8 byte
10053 alignment to allow rep movsl acceleration. */
10054
10055 else if (count != 0
10056 && (align >= 8
10057 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10058 || optimize_size || count < (unsigned int) 64))
10059 {
10060 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10061 if (count & ~(size - 1))
10062 {
10063 countreg = copy_to_mode_reg (counter_mode,
10064 GEN_INT ((count >> (size == 4 ? 2 : 3))
10065 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10066 countreg = ix86_zero_extend_to_Pmode (countreg);
10067 if (size == 4)
10068 {
10069 if (TARGET_64BIT)
10070 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10071 destreg, srcreg, countreg));
10072 else
10073 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10074 destreg, srcreg, countreg));
10075 }
10076 else
10077 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10078 destreg, srcreg, countreg));
10079 }
10080 if (size == 8 && (count & 0x04))
10081 emit_insn (gen_strmovsi (destreg, srcreg));
10082 if (count & 0x02)
10083 emit_insn (gen_strmovhi (destreg, srcreg));
10084 if (count & 0x01)
10085 emit_insn (gen_strmovqi (destreg, srcreg));
10086 }
10087 /* The generic code based on the glibc implementation:
10088 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10089 allowing accelerated copying there)
10090 - copy the data using rep movsl
10091 - copy the rest. */
10092 else
10093 {
10094 rtx countreg2;
10095 rtx label = NULL;
10096 int desired_alignment = (TARGET_PENTIUMPRO
10097 && (count == 0 || count >= (unsigned int) 260)
10098 ? 8 : UNITS_PER_WORD);
10099
10100 /* In case we don't know anything about the alignment, default to
10101 library version, since it is usually equally fast and result in
10102 shorter code. */
10103 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10104 {
10105 end_sequence ();
10106 return 0;
10107 }
10108
10109 if (TARGET_SINGLE_STRINGOP)
10110 emit_insn (gen_cld ());
10111
10112 countreg2 = gen_reg_rtx (Pmode);
10113 countreg = copy_to_mode_reg (counter_mode, count_exp);
10114
10115 /* We don't use loops to align destination and to copy parts smaller
10116 than 4 bytes, because gcc is able to optimize such code better (in
10117 the case the destination or the count really is aligned, gcc is often
10118 able to predict the branches) and also it is friendlier to the
10119 hardware branch prediction.
10120
10121 Using loops is benefical for generic case, because we can
10122 handle small counts using the loops. Many CPUs (such as Athlon)
10123 have large REP prefix setup costs.
10124
10125 This is quite costy. Maybe we can revisit this decision later or
10126 add some customizability to this code. */
10127
10128 if (count == 0 && align < desired_alignment)
10129 {
10130 label = gen_label_rtx ();
10131 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10132 LEU, 0, counter_mode, 1, label);
10133 }
10134 if (align <= 1)
10135 {
10136 rtx label = ix86_expand_aligntest (destreg, 1);
10137 emit_insn (gen_strmovqi (destreg, srcreg));
10138 ix86_adjust_counter (countreg, 1);
10139 emit_label (label);
10140 LABEL_NUSES (label) = 1;
10141 }
10142 if (align <= 2)
10143 {
10144 rtx label = ix86_expand_aligntest (destreg, 2);
10145 emit_insn (gen_strmovhi (destreg, srcreg));
10146 ix86_adjust_counter (countreg, 2);
10147 emit_label (label);
10148 LABEL_NUSES (label) = 1;
10149 }
10150 if (align <= 4 && desired_alignment > 4)
10151 {
10152 rtx label = ix86_expand_aligntest (destreg, 4);
10153 emit_insn (gen_strmovsi (destreg, srcreg));
10154 ix86_adjust_counter (countreg, 4);
10155 emit_label (label);
10156 LABEL_NUSES (label) = 1;
10157 }
10158
10159 if (label && desired_alignment > 4 && !TARGET_64BIT)
10160 {
10161 emit_label (label);
10162 LABEL_NUSES (label) = 1;
10163 label = NULL_RTX;
10164 }
10165 if (!TARGET_SINGLE_STRINGOP)
10166 emit_insn (gen_cld ());
10167 if (TARGET_64BIT)
10168 {
10169 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10170 GEN_INT (3)));
10171 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10172 destreg, srcreg, countreg2));
10173 }
10174 else
10175 {
10176 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10177 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10178 destreg, srcreg, countreg2));
10179 }
10180
10181 if (label)
10182 {
10183 emit_label (label);
10184 LABEL_NUSES (label) = 1;
10185 }
10186 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10187 emit_insn (gen_strmovsi (destreg, srcreg));
10188 if ((align <= 4 || count == 0) && TARGET_64BIT)
10189 {
10190 rtx label = ix86_expand_aligntest (countreg, 4);
10191 emit_insn (gen_strmovsi (destreg, srcreg));
10192 emit_label (label);
10193 LABEL_NUSES (label) = 1;
10194 }
10195 if (align > 2 && count != 0 && (count & 2))
10196 emit_insn (gen_strmovhi (destreg, srcreg));
10197 if (align <= 2 || count == 0)
10198 {
10199 rtx label = ix86_expand_aligntest (countreg, 2);
10200 emit_insn (gen_strmovhi (destreg, srcreg));
10201 emit_label (label);
10202 LABEL_NUSES (label) = 1;
10203 }
10204 if (align > 1 && count != 0 && (count & 1))
10205 emit_insn (gen_strmovqi (destreg, srcreg));
10206 if (align <= 1 || count == 0)
10207 {
10208 rtx label = ix86_expand_aligntest (countreg, 1);
10209 emit_insn (gen_strmovqi (destreg, srcreg));
10210 emit_label (label);
10211 LABEL_NUSES (label) = 1;
10212 }
10213 }
10214
10215 insns = get_insns ();
10216 end_sequence ();
10217
10218 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10219 emit_insn (insns);
10220 return 1;
10221 }
10222
10223 /* Expand string clear operation (bzero). Use i386 string operations when
10224 profitable. expand_movstr contains similar code. */
10225 int
10226 ix86_expand_clrstr (src, count_exp, align_exp)
10227 rtx src, count_exp, align_exp;
10228 {
10229 rtx destreg, zeroreg, countreg;
10230 enum machine_mode counter_mode;
10231 HOST_WIDE_INT align = 0;
10232 unsigned HOST_WIDE_INT count = 0;
10233
10234 if (GET_CODE (align_exp) == CONST_INT)
10235 align = INTVAL (align_exp);
10236
10237 /* This simple hack avoids all inlining code and simplifies code below. */
10238 if (!TARGET_ALIGN_STRINGOPS)
10239 align = 32;
10240
10241 if (GET_CODE (count_exp) == CONST_INT)
10242 count = INTVAL (count_exp);
10243 /* Figure out proper mode for counter. For 32bits it is always SImode,
10244 for 64bits use SImode when possible, otherwise DImode.
10245 Set count to number of bytes copied when known at compile time. */
10246 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10247 || x86_64_zero_extended_value (count_exp))
10248 counter_mode = SImode;
10249 else
10250 counter_mode = DImode;
10251
10252 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10253
10254 emit_insn (gen_cld ());
10255
10256 /* When optimizing for size emit simple rep ; movsb instruction for
10257 counts not divisible by 4. */
10258
10259 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10260 {
10261 countreg = ix86_zero_extend_to_Pmode (count_exp);
10262 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10263 if (TARGET_64BIT)
10264 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10265 destreg, countreg));
10266 else
10267 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10268 destreg, countreg));
10269 }
10270 else if (count != 0
10271 && (align >= 8
10272 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10273 || optimize_size || count < (unsigned int) 64))
10274 {
10275 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10276 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10277 if (count & ~(size - 1))
10278 {
10279 countreg = copy_to_mode_reg (counter_mode,
10280 GEN_INT ((count >> (size == 4 ? 2 : 3))
10281 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10282 countreg = ix86_zero_extend_to_Pmode (countreg);
10283 if (size == 4)
10284 {
10285 if (TARGET_64BIT)
10286 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10287 destreg, countreg));
10288 else
10289 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10290 destreg, countreg));
10291 }
10292 else
10293 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10294 destreg, countreg));
10295 }
10296 if (size == 8 && (count & 0x04))
10297 emit_insn (gen_strsetsi (destreg,
10298 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10299 if (count & 0x02)
10300 emit_insn (gen_strsethi (destreg,
10301 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10302 if (count & 0x01)
10303 emit_insn (gen_strsetqi (destreg,
10304 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10305 }
10306 else
10307 {
10308 rtx countreg2;
10309 rtx label = NULL;
10310 /* Compute desired alignment of the string operation. */
10311 int desired_alignment = (TARGET_PENTIUMPRO
10312 && (count == 0 || count >= (unsigned int) 260)
10313 ? 8 : UNITS_PER_WORD);
10314
10315 /* In case we don't know anything about the alignment, default to
10316 library version, since it is usually equally fast and result in
10317 shorter code. */
10318 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10319 return 0;
10320
10321 if (TARGET_SINGLE_STRINGOP)
10322 emit_insn (gen_cld ());
10323
10324 countreg2 = gen_reg_rtx (Pmode);
10325 countreg = copy_to_mode_reg (counter_mode, count_exp);
10326 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10327
10328 if (count == 0 && align < desired_alignment)
10329 {
10330 label = gen_label_rtx ();
10331 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10332 LEU, 0, counter_mode, 1, label);
10333 }
10334 if (align <= 1)
10335 {
10336 rtx label = ix86_expand_aligntest (destreg, 1);
10337 emit_insn (gen_strsetqi (destreg,
10338 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10339 ix86_adjust_counter (countreg, 1);
10340 emit_label (label);
10341 LABEL_NUSES (label) = 1;
10342 }
10343 if (align <= 2)
10344 {
10345 rtx label = ix86_expand_aligntest (destreg, 2);
10346 emit_insn (gen_strsethi (destreg,
10347 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10348 ix86_adjust_counter (countreg, 2);
10349 emit_label (label);
10350 LABEL_NUSES (label) = 1;
10351 }
10352 if (align <= 4 && desired_alignment > 4)
10353 {
10354 rtx label = ix86_expand_aligntest (destreg, 4);
10355 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10356 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10357 : zeroreg)));
10358 ix86_adjust_counter (countreg, 4);
10359 emit_label (label);
10360 LABEL_NUSES (label) = 1;
10361 }
10362
10363 if (label && desired_alignment > 4 && !TARGET_64BIT)
10364 {
10365 emit_label (label);
10366 LABEL_NUSES (label) = 1;
10367 label = NULL_RTX;
10368 }
10369
10370 if (!TARGET_SINGLE_STRINGOP)
10371 emit_insn (gen_cld ());
10372 if (TARGET_64BIT)
10373 {
10374 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10375 GEN_INT (3)));
10376 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10377 destreg, countreg2));
10378 }
10379 else
10380 {
10381 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10382 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10383 destreg, countreg2));
10384 }
10385 if (label)
10386 {
10387 emit_label (label);
10388 LABEL_NUSES (label) = 1;
10389 }
10390
10391 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10392 emit_insn (gen_strsetsi (destreg,
10393 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10394 if (TARGET_64BIT && (align <= 4 || count == 0))
10395 {
10396 rtx label = ix86_expand_aligntest (countreg, 4);
10397 emit_insn (gen_strsetsi (destreg,
10398 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10399 emit_label (label);
10400 LABEL_NUSES (label) = 1;
10401 }
10402 if (align > 2 && count != 0 && (count & 2))
10403 emit_insn (gen_strsethi (destreg,
10404 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10405 if (align <= 2 || count == 0)
10406 {
10407 rtx label = ix86_expand_aligntest (countreg, 2);
10408 emit_insn (gen_strsethi (destreg,
10409 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10410 emit_label (label);
10411 LABEL_NUSES (label) = 1;
10412 }
10413 if (align > 1 && count != 0 && (count & 1))
10414 emit_insn (gen_strsetqi (destreg,
10415 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10416 if (align <= 1 || count == 0)
10417 {
10418 rtx label = ix86_expand_aligntest (countreg, 1);
10419 emit_insn (gen_strsetqi (destreg,
10420 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10421 emit_label (label);
10422 LABEL_NUSES (label) = 1;
10423 }
10424 }
10425 return 1;
10426 }
10427 /* Expand strlen. */
10428 int
10429 ix86_expand_strlen (out, src, eoschar, align)
10430 rtx out, src, eoschar, align;
10431 {
10432 rtx addr, scratch1, scratch2, scratch3, scratch4;
10433
10434 /* The generic case of strlen expander is long. Avoid it's
10435 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10436
10437 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10438 && !TARGET_INLINE_ALL_STRINGOPS
10439 && !optimize_size
10440 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10441 return 0;
10442
10443 addr = force_reg (Pmode, XEXP (src, 0));
10444 scratch1 = gen_reg_rtx (Pmode);
10445
10446 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10447 && !optimize_size)
10448 {
10449 /* Well it seems that some optimizer does not combine a call like
10450 foo(strlen(bar), strlen(bar));
10451 when the move and the subtraction is done here. It does calculate
10452 the length just once when these instructions are done inside of
10453 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10454 often used and I use one fewer register for the lifetime of
10455 output_strlen_unroll() this is better. */
10456
10457 emit_move_insn (out, addr);
10458
10459 ix86_expand_strlensi_unroll_1 (out, align);
10460
10461 /* strlensi_unroll_1 returns the address of the zero at the end of
10462 the string, like memchr(), so compute the length by subtracting
10463 the start address. */
10464 if (TARGET_64BIT)
10465 emit_insn (gen_subdi3 (out, out, addr));
10466 else
10467 emit_insn (gen_subsi3 (out, out, addr));
10468 }
10469 else
10470 {
10471 scratch2 = gen_reg_rtx (Pmode);
10472 scratch3 = gen_reg_rtx (Pmode);
10473 scratch4 = force_reg (Pmode, constm1_rtx);
10474
10475 emit_move_insn (scratch3, addr);
10476 eoschar = force_reg (QImode, eoschar);
10477
10478 emit_insn (gen_cld ());
10479 if (TARGET_64BIT)
10480 {
10481 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10482 align, scratch4, scratch3));
10483 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10484 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10485 }
10486 else
10487 {
10488 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10489 align, scratch4, scratch3));
10490 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10491 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10492 }
10493 }
10494 return 1;
10495 }
10496
10497 /* Expand the appropriate insns for doing strlen if not just doing
10498 repnz; scasb
10499
10500 out = result, initialized with the start address
10501 align_rtx = alignment of the address.
10502 scratch = scratch register, initialized with the startaddress when
10503 not aligned, otherwise undefined
10504
10505 This is just the body. It needs the initialisations mentioned above and
10506 some address computing at the end. These things are done in i386.md. */
10507
10508 static void
10509 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10510 rtx out, align_rtx;
10511 {
10512 int align;
10513 rtx tmp;
10514 rtx align_2_label = NULL_RTX;
10515 rtx align_3_label = NULL_RTX;
10516 rtx align_4_label = gen_label_rtx ();
10517 rtx end_0_label = gen_label_rtx ();
10518 rtx mem;
10519 rtx tmpreg = gen_reg_rtx (SImode);
10520 rtx scratch = gen_reg_rtx (SImode);
10521
10522 align = 0;
10523 if (GET_CODE (align_rtx) == CONST_INT)
10524 align = INTVAL (align_rtx);
10525
10526 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10527
10528 /* Is there a known alignment and is it less than 4? */
10529 if (align < 4)
10530 {
10531 rtx scratch1 = gen_reg_rtx (Pmode);
10532 emit_move_insn (scratch1, out);
10533 /* Is there a known alignment and is it not 2? */
10534 if (align != 2)
10535 {
10536 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10537 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10538
10539 /* Leave just the 3 lower bits. */
10540 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10541 NULL_RTX, 0, OPTAB_WIDEN);
10542
10543 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10544 Pmode, 1, align_4_label);
10545 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10546 Pmode, 1, align_2_label);
10547 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10548 Pmode, 1, align_3_label);
10549 }
10550 else
10551 {
10552 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10553 check if is aligned to 4 - byte. */
10554
10555 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10556 NULL_RTX, 0, OPTAB_WIDEN);
10557
10558 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10559 Pmode, 1, align_4_label);
10560 }
10561
10562 mem = gen_rtx_MEM (QImode, out);
10563
10564 /* Now compare the bytes. */
10565
10566 /* Compare the first n unaligned byte on a byte per byte basis. */
10567 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10568 QImode, 1, end_0_label);
10569
10570 /* Increment the address. */
10571 if (TARGET_64BIT)
10572 emit_insn (gen_adddi3 (out, out, const1_rtx));
10573 else
10574 emit_insn (gen_addsi3 (out, out, const1_rtx));
10575
10576 /* Not needed with an alignment of 2 */
10577 if (align != 2)
10578 {
10579 emit_label (align_2_label);
10580
10581 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10582 end_0_label);
10583
10584 if (TARGET_64BIT)
10585 emit_insn (gen_adddi3 (out, out, const1_rtx));
10586 else
10587 emit_insn (gen_addsi3 (out, out, const1_rtx));
10588
10589 emit_label (align_3_label);
10590 }
10591
10592 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10593 end_0_label);
10594
10595 if (TARGET_64BIT)
10596 emit_insn (gen_adddi3 (out, out, const1_rtx));
10597 else
10598 emit_insn (gen_addsi3 (out, out, const1_rtx));
10599 }
10600
10601 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10602 align this loop. It gives only huge programs, but does not help to
10603 speed up. */
10604 emit_label (align_4_label);
10605
10606 mem = gen_rtx_MEM (SImode, out);
10607 emit_move_insn (scratch, mem);
10608 if (TARGET_64BIT)
10609 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10610 else
10611 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10612
10613 /* This formula yields a nonzero result iff one of the bytes is zero.
10614 This saves three branches inside loop and many cycles. */
10615
10616 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10617 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10618 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10619 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10620 gen_int_mode (0x80808080, SImode)));
10621 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10622 align_4_label);
10623
10624 if (TARGET_CMOVE)
10625 {
10626 rtx reg = gen_reg_rtx (SImode);
10627 rtx reg2 = gen_reg_rtx (Pmode);
10628 emit_move_insn (reg, tmpreg);
10629 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10630
10631 /* If zero is not in the first two bytes, move two bytes forward. */
10632 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10633 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10634 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10635 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10636 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10637 reg,
10638 tmpreg)));
10639 /* Emit lea manually to avoid clobbering of flags. */
10640 emit_insn (gen_rtx_SET (SImode, reg2,
10641 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10642
10643 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10644 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10645 emit_insn (gen_rtx_SET (VOIDmode, out,
10646 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10647 reg2,
10648 out)));
10649
10650 }
10651 else
10652 {
10653 rtx end_2_label = gen_label_rtx ();
10654 /* Is zero in the first two bytes? */
10655
10656 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10657 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10658 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10659 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10660 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10661 pc_rtx);
10662 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10663 JUMP_LABEL (tmp) = end_2_label;
10664
10665 /* Not in the first two. Move two bytes forward. */
10666 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10667 if (TARGET_64BIT)
10668 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10669 else
10670 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10671
10672 emit_label (end_2_label);
10673
10674 }
10675
10676 /* Avoid branch in fixing the byte. */
10677 tmpreg = gen_lowpart (QImode, tmpreg);
10678 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10679 if (TARGET_64BIT)
10680 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10681 else
10682 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10683
10684 emit_label (end_0_label);
10685 }
10686
10687 void
10688 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10689 rtx retval, fnaddr, callarg1, callarg2, pop;
10690 {
10691 rtx use = NULL, call;
10692
10693 if (pop == const0_rtx)
10694 pop = NULL;
10695 if (TARGET_64BIT && pop)
10696 abort ();
10697
10698 #if TARGET_MACHO
10699 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10700 fnaddr = machopic_indirect_call_target (fnaddr);
10701 #else
10702 /* Static functions and indirect calls don't need the pic register. */
10703 if (! TARGET_64BIT && flag_pic
10704 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10705 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10706 use_reg (&use, pic_offset_table_rtx);
10707
10708 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10709 {
10710 rtx al = gen_rtx_REG (QImode, 0);
10711 emit_move_insn (al, callarg2);
10712 use_reg (&use, al);
10713 }
10714 #endif /* TARGET_MACHO */
10715
10716 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10717 {
10718 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10719 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10720 }
10721
10722 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10723 if (retval)
10724 call = gen_rtx_SET (VOIDmode, retval, call);
10725 if (pop)
10726 {
10727 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10728 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10729 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10730 }
10731
10732 call = emit_call_insn (call);
10733 if (use)
10734 CALL_INSN_FUNCTION_USAGE (call) = use;
10735 }
10736
10737 \f
10738 /* Clear stack slot assignments remembered from previous functions.
10739 This is called from INIT_EXPANDERS once before RTL is emitted for each
10740 function. */
10741
10742 static struct machine_function *
10743 ix86_init_machine_status ()
10744 {
10745 return ggc_alloc_cleared (sizeof (struct machine_function));
10746 }
10747
10748 /* Return a MEM corresponding to a stack slot with mode MODE.
10749 Allocate a new slot if necessary.
10750
10751 The RTL for a function can have several slots available: N is
10752 which slot to use. */
10753
10754 rtx
10755 assign_386_stack_local (mode, n)
10756 enum machine_mode mode;
10757 int n;
10758 {
10759 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10760 abort ();
10761
10762 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10763 ix86_stack_locals[(int) mode][n]
10764 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10765
10766 return ix86_stack_locals[(int) mode][n];
10767 }
10768
10769 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10770
10771 static GTY(()) rtx ix86_tls_symbol;
10772 rtx
10773 ix86_tls_get_addr ()
10774 {
10775
10776 if (!ix86_tls_symbol)
10777 {
10778 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10779 ? "___tls_get_addr"
10780 : "__tls_get_addr"));
10781 }
10782
10783 return ix86_tls_symbol;
10784 }
10785 \f
10786 /* Calculate the length of the memory address in the instruction
10787 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10788
10789 static int
10790 memory_address_length (addr)
10791 rtx addr;
10792 {
10793 struct ix86_address parts;
10794 rtx base, index, disp;
10795 int len;
10796
10797 if (GET_CODE (addr) == PRE_DEC
10798 || GET_CODE (addr) == POST_INC
10799 || GET_CODE (addr) == PRE_MODIFY
10800 || GET_CODE (addr) == POST_MODIFY)
10801 return 0;
10802
10803 if (! ix86_decompose_address (addr, &parts))
10804 abort ();
10805
10806 base = parts.base;
10807 index = parts.index;
10808 disp = parts.disp;
10809 len = 0;
10810
10811 /* Register Indirect. */
10812 if (base && !index && !disp)
10813 {
10814 /* Special cases: ebp and esp need the two-byte modrm form. */
10815 if (addr == stack_pointer_rtx
10816 || addr == arg_pointer_rtx
10817 || addr == frame_pointer_rtx
10818 || addr == hard_frame_pointer_rtx)
10819 len = 1;
10820 }
10821
10822 /* Direct Addressing. */
10823 else if (disp && !base && !index)
10824 len = 4;
10825
10826 else
10827 {
10828 /* Find the length of the displacement constant. */
10829 if (disp)
10830 {
10831 if (GET_CODE (disp) == CONST_INT
10832 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10833 len = 1;
10834 else
10835 len = 4;
10836 }
10837
10838 /* An index requires the two-byte modrm form. */
10839 if (index)
10840 len += 1;
10841 }
10842
10843 return len;
10844 }
10845
10846 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10847 is set, expect that insn have 8bit immediate alternative. */
10848 int
10849 ix86_attr_length_immediate_default (insn, shortform)
10850 rtx insn;
10851 int shortform;
10852 {
10853 int len = 0;
10854 int i;
10855 extract_insn_cached (insn);
10856 for (i = recog_data.n_operands - 1; i >= 0; --i)
10857 if (CONSTANT_P (recog_data.operand[i]))
10858 {
10859 if (len)
10860 abort ();
10861 if (shortform
10862 && GET_CODE (recog_data.operand[i]) == CONST_INT
10863 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10864 len = 1;
10865 else
10866 {
10867 switch (get_attr_mode (insn))
10868 {
10869 case MODE_QI:
10870 len+=1;
10871 break;
10872 case MODE_HI:
10873 len+=2;
10874 break;
10875 case MODE_SI:
10876 len+=4;
10877 break;
10878 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10879 case MODE_DI:
10880 len+=4;
10881 break;
10882 default:
10883 fatal_insn ("unknown insn mode", insn);
10884 }
10885 }
10886 }
10887 return len;
10888 }
10889 /* Compute default value for "length_address" attribute. */
10890 int
10891 ix86_attr_length_address_default (insn)
10892 rtx insn;
10893 {
10894 int i;
10895 extract_insn_cached (insn);
10896 for (i = recog_data.n_operands - 1; i >= 0; --i)
10897 if (GET_CODE (recog_data.operand[i]) == MEM)
10898 {
10899 return memory_address_length (XEXP (recog_data.operand[i], 0));
10900 break;
10901 }
10902 return 0;
10903 }
10904 \f
10905 /* Return the maximum number of instructions a cpu can issue. */
10906
10907 static int
10908 ix86_issue_rate ()
10909 {
10910 switch (ix86_cpu)
10911 {
10912 case PROCESSOR_PENTIUM:
10913 case PROCESSOR_K6:
10914 return 2;
10915
10916 case PROCESSOR_PENTIUMPRO:
10917 case PROCESSOR_PENTIUM4:
10918 case PROCESSOR_ATHLON:
10919 return 3;
10920
10921 default:
10922 return 1;
10923 }
10924 }
10925
10926 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10927 by DEP_INSN and nothing set by DEP_INSN. */
10928
10929 static int
10930 ix86_flags_dependant (insn, dep_insn, insn_type)
10931 rtx insn, dep_insn;
10932 enum attr_type insn_type;
10933 {
10934 rtx set, set2;
10935
10936 /* Simplify the test for uninteresting insns. */
10937 if (insn_type != TYPE_SETCC
10938 && insn_type != TYPE_ICMOV
10939 && insn_type != TYPE_FCMOV
10940 && insn_type != TYPE_IBR)
10941 return 0;
10942
10943 if ((set = single_set (dep_insn)) != 0)
10944 {
10945 set = SET_DEST (set);
10946 set2 = NULL_RTX;
10947 }
10948 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10949 && XVECLEN (PATTERN (dep_insn), 0) == 2
10950 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10951 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10952 {
10953 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10954 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10955 }
10956 else
10957 return 0;
10958
10959 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10960 return 0;
10961
10962 /* This test is true if the dependent insn reads the flags but
10963 not any other potentially set register. */
10964 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10965 return 0;
10966
10967 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10968 return 0;
10969
10970 return 1;
10971 }
10972
10973 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10974 address with operands set by DEP_INSN. */
10975
10976 static int
10977 ix86_agi_dependant (insn, dep_insn, insn_type)
10978 rtx insn, dep_insn;
10979 enum attr_type insn_type;
10980 {
10981 rtx addr;
10982
10983 if (insn_type == TYPE_LEA
10984 && TARGET_PENTIUM)
10985 {
10986 addr = PATTERN (insn);
10987 if (GET_CODE (addr) == SET)
10988 ;
10989 else if (GET_CODE (addr) == PARALLEL
10990 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10991 addr = XVECEXP (addr, 0, 0);
10992 else
10993 abort ();
10994 addr = SET_SRC (addr);
10995 }
10996 else
10997 {
10998 int i;
10999 extract_insn_cached (insn);
11000 for (i = recog_data.n_operands - 1; i >= 0; --i)
11001 if (GET_CODE (recog_data.operand[i]) == MEM)
11002 {
11003 addr = XEXP (recog_data.operand[i], 0);
11004 goto found;
11005 }
11006 return 0;
11007 found:;
11008 }
11009
11010 return modified_in_p (addr, dep_insn);
11011 }
11012
11013 static int
11014 ix86_adjust_cost (insn, link, dep_insn, cost)
11015 rtx insn, link, dep_insn;
11016 int cost;
11017 {
11018 enum attr_type insn_type, dep_insn_type;
11019 enum attr_memory memory, dep_memory;
11020 rtx set, set2;
11021 int dep_insn_code_number;
11022
11023 /* Anti and output depenancies have zero cost on all CPUs. */
11024 if (REG_NOTE_KIND (link) != 0)
11025 return 0;
11026
11027 dep_insn_code_number = recog_memoized (dep_insn);
11028
11029 /* If we can't recognize the insns, we can't really do anything. */
11030 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11031 return cost;
11032
11033 insn_type = get_attr_type (insn);
11034 dep_insn_type = get_attr_type (dep_insn);
11035
11036 switch (ix86_cpu)
11037 {
11038 case PROCESSOR_PENTIUM:
11039 /* Address Generation Interlock adds a cycle of latency. */
11040 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11041 cost += 1;
11042
11043 /* ??? Compares pair with jump/setcc. */
11044 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11045 cost = 0;
11046
11047 /* Floating point stores require value to be ready one cycle ealier. */
11048 if (insn_type == TYPE_FMOV
11049 && get_attr_memory (insn) == MEMORY_STORE
11050 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11051 cost += 1;
11052 break;
11053
11054 case PROCESSOR_PENTIUMPRO:
11055 memory = get_attr_memory (insn);
11056 dep_memory = get_attr_memory (dep_insn);
11057
11058 /* Since we can't represent delayed latencies of load+operation,
11059 increase the cost here for non-imov insns. */
11060 if (dep_insn_type != TYPE_IMOV
11061 && dep_insn_type != TYPE_FMOV
11062 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11063 cost += 1;
11064
11065 /* INT->FP conversion is expensive. */
11066 if (get_attr_fp_int_src (dep_insn))
11067 cost += 5;
11068
11069 /* There is one cycle extra latency between an FP op and a store. */
11070 if (insn_type == TYPE_FMOV
11071 && (set = single_set (dep_insn)) != NULL_RTX
11072 && (set2 = single_set (insn)) != NULL_RTX
11073 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11074 && GET_CODE (SET_DEST (set2)) == MEM)
11075 cost += 1;
11076
11077 /* Show ability of reorder buffer to hide latency of load by executing
11078 in parallel with previous instruction in case
11079 previous instruction is not needed to compute the address. */
11080 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11081 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11082 {
11083 /* Claim moves to take one cycle, as core can issue one load
11084 at time and the next load can start cycle later. */
11085 if (dep_insn_type == TYPE_IMOV
11086 || dep_insn_type == TYPE_FMOV)
11087 cost = 1;
11088 else if (cost > 1)
11089 cost--;
11090 }
11091 break;
11092
11093 case PROCESSOR_K6:
11094 memory = get_attr_memory (insn);
11095 dep_memory = get_attr_memory (dep_insn);
11096 /* The esp dependency is resolved before the instruction is really
11097 finished. */
11098 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11099 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11100 return 1;
11101
11102 /* Since we can't represent delayed latencies of load+operation,
11103 increase the cost here for non-imov insns. */
11104 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11105 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11106
11107 /* INT->FP conversion is expensive. */
11108 if (get_attr_fp_int_src (dep_insn))
11109 cost += 5;
11110
11111 /* Show ability of reorder buffer to hide latency of load by executing
11112 in parallel with previous instruction in case
11113 previous instruction is not needed to compute the address. */
11114 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11115 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11116 {
11117 /* Claim moves to take one cycle, as core can issue one load
11118 at time and the next load can start cycle later. */
11119 if (dep_insn_type == TYPE_IMOV
11120 || dep_insn_type == TYPE_FMOV)
11121 cost = 1;
11122 else if (cost > 2)
11123 cost -= 2;
11124 else
11125 cost = 1;
11126 }
11127 break;
11128
11129 case PROCESSOR_ATHLON:
11130 memory = get_attr_memory (insn);
11131 dep_memory = get_attr_memory (dep_insn);
11132
11133 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11134 {
11135 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11136 cost += 2;
11137 else
11138 cost += 3;
11139 }
11140 /* Show ability of reorder buffer to hide latency of load by executing
11141 in parallel with previous instruction in case
11142 previous instruction is not needed to compute the address. */
11143 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11144 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11145 {
11146 /* Claim moves to take one cycle, as core can issue one load
11147 at time and the next load can start cycle later. */
11148 if (dep_insn_type == TYPE_IMOV
11149 || dep_insn_type == TYPE_FMOV)
11150 cost = 0;
11151 else if (cost >= 3)
11152 cost -= 3;
11153 else
11154 cost = 0;
11155 }
11156
11157 default:
11158 break;
11159 }
11160
11161 return cost;
11162 }
11163
11164 static union
11165 {
11166 struct ppro_sched_data
11167 {
11168 rtx decode[3];
11169 int issued_this_cycle;
11170 } ppro;
11171 } ix86_sched_data;
11172
11173 static enum attr_ppro_uops
11174 ix86_safe_ppro_uops (insn)
11175 rtx insn;
11176 {
11177 if (recog_memoized (insn) >= 0)
11178 return get_attr_ppro_uops (insn);
11179 else
11180 return PPRO_UOPS_MANY;
11181 }
11182
11183 static void
11184 ix86_dump_ppro_packet (dump)
11185 FILE *dump;
11186 {
11187 if (ix86_sched_data.ppro.decode[0])
11188 {
11189 fprintf (dump, "PPRO packet: %d",
11190 INSN_UID (ix86_sched_data.ppro.decode[0]));
11191 if (ix86_sched_data.ppro.decode[1])
11192 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11193 if (ix86_sched_data.ppro.decode[2])
11194 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11195 fputc ('\n', dump);
11196 }
11197 }
11198
11199 /* We're beginning a new block. Initialize data structures as necessary. */
11200
11201 static void
11202 ix86_sched_init (dump, sched_verbose, veclen)
11203 FILE *dump ATTRIBUTE_UNUSED;
11204 int sched_verbose ATTRIBUTE_UNUSED;
11205 int veclen ATTRIBUTE_UNUSED;
11206 {
11207 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11208 }
11209
11210 /* Shift INSN to SLOT, and shift everything else down. */
11211
11212 static void
11213 ix86_reorder_insn (insnp, slot)
11214 rtx *insnp, *slot;
11215 {
11216 if (insnp != slot)
11217 {
11218 rtx insn = *insnp;
11219 do
11220 insnp[0] = insnp[1];
11221 while (++insnp != slot);
11222 *insnp = insn;
11223 }
11224 }
11225
11226 static void
11227 ix86_sched_reorder_ppro (ready, e_ready)
11228 rtx *ready;
11229 rtx *e_ready;
11230 {
11231 rtx decode[3];
11232 enum attr_ppro_uops cur_uops;
11233 int issued_this_cycle;
11234 rtx *insnp;
11235 int i;
11236
11237 /* At this point .ppro.decode contains the state of the three
11238 decoders from last "cycle". That is, those insns that were
11239 actually independent. But here we're scheduling for the
11240 decoder, and we may find things that are decodable in the
11241 same cycle. */
11242
11243 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11244 issued_this_cycle = 0;
11245
11246 insnp = e_ready;
11247 cur_uops = ix86_safe_ppro_uops (*insnp);
11248
11249 /* If the decoders are empty, and we've a complex insn at the
11250 head of the priority queue, let it issue without complaint. */
11251 if (decode[0] == NULL)
11252 {
11253 if (cur_uops == PPRO_UOPS_MANY)
11254 {
11255 decode[0] = *insnp;
11256 goto ppro_done;
11257 }
11258
11259 /* Otherwise, search for a 2-4 uop unsn to issue. */
11260 while (cur_uops != PPRO_UOPS_FEW)
11261 {
11262 if (insnp == ready)
11263 break;
11264 cur_uops = ix86_safe_ppro_uops (*--insnp);
11265 }
11266
11267 /* If so, move it to the head of the line. */
11268 if (cur_uops == PPRO_UOPS_FEW)
11269 ix86_reorder_insn (insnp, e_ready);
11270
11271 /* Issue the head of the queue. */
11272 issued_this_cycle = 1;
11273 decode[0] = *e_ready--;
11274 }
11275
11276 /* Look for simple insns to fill in the other two slots. */
11277 for (i = 1; i < 3; ++i)
11278 if (decode[i] == NULL)
11279 {
11280 if (ready > e_ready)
11281 goto ppro_done;
11282
11283 insnp = e_ready;
11284 cur_uops = ix86_safe_ppro_uops (*insnp);
11285 while (cur_uops != PPRO_UOPS_ONE)
11286 {
11287 if (insnp == ready)
11288 break;
11289 cur_uops = ix86_safe_ppro_uops (*--insnp);
11290 }
11291
11292 /* Found one. Move it to the head of the queue and issue it. */
11293 if (cur_uops == PPRO_UOPS_ONE)
11294 {
11295 ix86_reorder_insn (insnp, e_ready);
11296 decode[i] = *e_ready--;
11297 issued_this_cycle++;
11298 continue;
11299 }
11300
11301 /* ??? Didn't find one. Ideally, here we would do a lazy split
11302 of 2-uop insns, issue one and queue the other. */
11303 }
11304
11305 ppro_done:
11306 if (issued_this_cycle == 0)
11307 issued_this_cycle = 1;
11308 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11309 }
11310
11311 /* We are about to being issuing insns for this clock cycle.
11312 Override the default sort algorithm to better slot instructions. */
11313 static int
11314 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11315 FILE *dump ATTRIBUTE_UNUSED;
11316 int sched_verbose ATTRIBUTE_UNUSED;
11317 rtx *ready;
11318 int *n_readyp;
11319 int clock_var ATTRIBUTE_UNUSED;
11320 {
11321 int n_ready = *n_readyp;
11322 rtx *e_ready = ready + n_ready - 1;
11323
11324 /* Make sure to go ahead and initialize key items in
11325 ix86_sched_data if we are not going to bother trying to
11326 reorder the ready queue. */
11327 if (n_ready < 2)
11328 {
11329 ix86_sched_data.ppro.issued_this_cycle = 1;
11330 goto out;
11331 }
11332
11333 switch (ix86_cpu)
11334 {
11335 default:
11336 break;
11337
11338 case PROCESSOR_PENTIUMPRO:
11339 ix86_sched_reorder_ppro (ready, e_ready);
11340 break;
11341 }
11342
11343 out:
11344 return ix86_issue_rate ();
11345 }
11346
11347 /* We are about to issue INSN. Return the number of insns left on the
11348 ready queue that can be issued this cycle. */
11349
11350 static int
11351 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11352 FILE *dump;
11353 int sched_verbose;
11354 rtx insn;
11355 int can_issue_more;
11356 {
11357 int i;
11358 switch (ix86_cpu)
11359 {
11360 default:
11361 return can_issue_more - 1;
11362
11363 case PROCESSOR_PENTIUMPRO:
11364 {
11365 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11366
11367 if (uops == PPRO_UOPS_MANY)
11368 {
11369 if (sched_verbose)
11370 ix86_dump_ppro_packet (dump);
11371 ix86_sched_data.ppro.decode[0] = insn;
11372 ix86_sched_data.ppro.decode[1] = NULL;
11373 ix86_sched_data.ppro.decode[2] = NULL;
11374 if (sched_verbose)
11375 ix86_dump_ppro_packet (dump);
11376 ix86_sched_data.ppro.decode[0] = NULL;
11377 }
11378 else if (uops == PPRO_UOPS_FEW)
11379 {
11380 if (sched_verbose)
11381 ix86_dump_ppro_packet (dump);
11382 ix86_sched_data.ppro.decode[0] = insn;
11383 ix86_sched_data.ppro.decode[1] = NULL;
11384 ix86_sched_data.ppro.decode[2] = NULL;
11385 }
11386 else
11387 {
11388 for (i = 0; i < 3; ++i)
11389 if (ix86_sched_data.ppro.decode[i] == NULL)
11390 {
11391 ix86_sched_data.ppro.decode[i] = insn;
11392 break;
11393 }
11394 if (i == 3)
11395 abort ();
11396 if (i == 2)
11397 {
11398 if (sched_verbose)
11399 ix86_dump_ppro_packet (dump);
11400 ix86_sched_data.ppro.decode[0] = NULL;
11401 ix86_sched_data.ppro.decode[1] = NULL;
11402 ix86_sched_data.ppro.decode[2] = NULL;
11403 }
11404 }
11405 }
11406 return --ix86_sched_data.ppro.issued_this_cycle;
11407 }
11408 }
11409
11410 static int
11411 ia32_use_dfa_pipeline_interface ()
11412 {
11413 if (ix86_cpu == PROCESSOR_PENTIUM)
11414 return 1;
11415 return 0;
11416 }
11417
11418 /* How many alternative schedules to try. This should be as wide as the
11419 scheduling freedom in the DFA, but no wider. Making this value too
11420 large results extra work for the scheduler. */
11421
11422 static int
11423 ia32_multipass_dfa_lookahead ()
11424 {
11425 if (ix86_cpu == PROCESSOR_PENTIUM)
11426 return 2;
11427 else
11428 return 0;
11429 }
11430
11431 \f
11432 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11433 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11434 appropriate. */
11435
11436 void
11437 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11438 rtx insns;
11439 rtx dstref, srcref, dstreg, srcreg;
11440 {
11441 rtx insn;
11442
11443 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11444 if (INSN_P (insn))
11445 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11446 dstreg, srcreg);
11447 }
11448
11449 /* Subroutine of above to actually do the updating by recursively walking
11450 the rtx. */
11451
11452 static void
11453 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11454 rtx x;
11455 rtx dstref, srcref, dstreg, srcreg;
11456 {
11457 enum rtx_code code = GET_CODE (x);
11458 const char *format_ptr = GET_RTX_FORMAT (code);
11459 int i, j;
11460
11461 if (code == MEM && XEXP (x, 0) == dstreg)
11462 MEM_COPY_ATTRIBUTES (x, dstref);
11463 else if (code == MEM && XEXP (x, 0) == srcreg)
11464 MEM_COPY_ATTRIBUTES (x, srcref);
11465
11466 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11467 {
11468 if (*format_ptr == 'e')
11469 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11470 dstreg, srcreg);
11471 else if (*format_ptr == 'E')
11472 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11473 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11474 dstreg, srcreg);
11475 }
11476 }
11477 \f
11478 /* Compute the alignment given to a constant that is being placed in memory.
11479 EXP is the constant and ALIGN is the alignment that the object would
11480 ordinarily have.
11481 The value of this function is used instead of that alignment to align
11482 the object. */
11483
11484 int
11485 ix86_constant_alignment (exp, align)
11486 tree exp;
11487 int align;
11488 {
11489 if (TREE_CODE (exp) == REAL_CST)
11490 {
11491 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11492 return 64;
11493 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11494 return 128;
11495 }
11496 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11497 && align < 256)
11498 return 256;
11499
11500 return align;
11501 }
11502
11503 /* Compute the alignment for a static variable.
11504 TYPE is the data type, and ALIGN is the alignment that
11505 the object would ordinarily have. The value of this function is used
11506 instead of that alignment to align the object. */
11507
11508 int
11509 ix86_data_alignment (type, align)
11510 tree type;
11511 int align;
11512 {
11513 if (AGGREGATE_TYPE_P (type)
11514 && TYPE_SIZE (type)
11515 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11516 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11517 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11518 return 256;
11519
11520 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11521 to 16byte boundary. */
11522 if (TARGET_64BIT)
11523 {
11524 if (AGGREGATE_TYPE_P (type)
11525 && TYPE_SIZE (type)
11526 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11527 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11528 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11529 return 128;
11530 }
11531
11532 if (TREE_CODE (type) == ARRAY_TYPE)
11533 {
11534 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11535 return 64;
11536 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11537 return 128;
11538 }
11539 else if (TREE_CODE (type) == COMPLEX_TYPE)
11540 {
11541
11542 if (TYPE_MODE (type) == DCmode && align < 64)
11543 return 64;
11544 if (TYPE_MODE (type) == XCmode && align < 128)
11545 return 128;
11546 }
11547 else if ((TREE_CODE (type) == RECORD_TYPE
11548 || TREE_CODE (type) == UNION_TYPE
11549 || TREE_CODE (type) == QUAL_UNION_TYPE)
11550 && TYPE_FIELDS (type))
11551 {
11552 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11553 return 64;
11554 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11555 return 128;
11556 }
11557 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11558 || TREE_CODE (type) == INTEGER_TYPE)
11559 {
11560 if (TYPE_MODE (type) == DFmode && align < 64)
11561 return 64;
11562 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11563 return 128;
11564 }
11565
11566 return align;
11567 }
11568
11569 /* Compute the alignment for a local variable.
11570 TYPE is the data type, and ALIGN is the alignment that
11571 the object would ordinarily have. The value of this macro is used
11572 instead of that alignment to align the object. */
11573
11574 int
11575 ix86_local_alignment (type, align)
11576 tree type;
11577 int align;
11578 {
11579 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11580 to 16byte boundary. */
11581 if (TARGET_64BIT)
11582 {
11583 if (AGGREGATE_TYPE_P (type)
11584 && TYPE_SIZE (type)
11585 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11586 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11587 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11588 return 128;
11589 }
11590 if (TREE_CODE (type) == ARRAY_TYPE)
11591 {
11592 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11593 return 64;
11594 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11595 return 128;
11596 }
11597 else if (TREE_CODE (type) == COMPLEX_TYPE)
11598 {
11599 if (TYPE_MODE (type) == DCmode && align < 64)
11600 return 64;
11601 if (TYPE_MODE (type) == XCmode && align < 128)
11602 return 128;
11603 }
11604 else if ((TREE_CODE (type) == RECORD_TYPE
11605 || TREE_CODE (type) == UNION_TYPE
11606 || TREE_CODE (type) == QUAL_UNION_TYPE)
11607 && TYPE_FIELDS (type))
11608 {
11609 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11610 return 64;
11611 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11612 return 128;
11613 }
11614 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11615 || TREE_CODE (type) == INTEGER_TYPE)
11616 {
11617
11618 if (TYPE_MODE (type) == DFmode && align < 64)
11619 return 64;
11620 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11621 return 128;
11622 }
11623 return align;
11624 }
11625 \f
11626 /* Emit RTL insns to initialize the variable parts of a trampoline.
11627 FNADDR is an RTX for the address of the function's pure code.
11628 CXT is an RTX for the static chain value for the function. */
11629 void
11630 x86_initialize_trampoline (tramp, fnaddr, cxt)
11631 rtx tramp, fnaddr, cxt;
11632 {
11633 if (!TARGET_64BIT)
11634 {
11635 /* Compute offset from the end of the jmp to the target function. */
11636 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11637 plus_constant (tramp, 10),
11638 NULL_RTX, 1, OPTAB_DIRECT);
11639 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11640 gen_int_mode (0xb9, QImode));
11641 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11642 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11643 gen_int_mode (0xe9, QImode));
11644 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11645 }
11646 else
11647 {
11648 int offset = 0;
11649 /* Try to load address using shorter movl instead of movabs.
11650 We may want to support movq for kernel mode, but kernel does not use
11651 trampolines at the moment. */
11652 if (x86_64_zero_extended_value (fnaddr))
11653 {
11654 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11655 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11656 gen_int_mode (0xbb41, HImode));
11657 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11658 gen_lowpart (SImode, fnaddr));
11659 offset += 6;
11660 }
11661 else
11662 {
11663 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11664 gen_int_mode (0xbb49, HImode));
11665 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11666 fnaddr);
11667 offset += 10;
11668 }
11669 /* Load static chain using movabs to r10. */
11670 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11671 gen_int_mode (0xba49, HImode));
11672 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11673 cxt);
11674 offset += 10;
11675 /* Jump to the r11 */
11676 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11677 gen_int_mode (0xff49, HImode));
11678 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11679 gen_int_mode (0xe3, QImode));
11680 offset += 3;
11681 if (offset > TRAMPOLINE_SIZE)
11682 abort ();
11683 }
11684 }
11685 \f
11686 #define def_builtin(MASK, NAME, TYPE, CODE) \
11687 do { \
11688 if ((MASK) & target_flags) \
11689 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11690 NULL, NULL_TREE); \
11691 } while (0)
11692
11693 struct builtin_description
11694 {
11695 const unsigned int mask;
11696 const enum insn_code icode;
11697 const char *const name;
11698 const enum ix86_builtins code;
11699 const enum rtx_code comparison;
11700 const unsigned int flag;
11701 };
11702
11703 /* Used for builtins that are enabled both by -msse and -msse2. */
11704 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11705
11706 static const struct builtin_description bdesc_comi[] =
11707 {
11708 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11709 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11710 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11711 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11712 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11713 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11714 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11715 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11716 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11717 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11718 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11719 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11720 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11721 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11722 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11723 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11724 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11725 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11726 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11727 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11728 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11729 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11730 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11731 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11732 };
11733
11734 static const struct builtin_description bdesc_2arg[] =
11735 {
11736 /* SSE */
11737 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11738 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11739 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11740 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11741 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11742 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11743 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11744 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11745
11746 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11747 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11748 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11749 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11750 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11751 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11752 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11753 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11754 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11755 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11756 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11757 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11758 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11759 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11760 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11761 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11762 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11763 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11764 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11765 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11766 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11767 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11768 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11769 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11770
11771 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11772 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11773 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11774 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11775
11776 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11777 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11778 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11779 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11780 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11781
11782 /* MMX */
11783 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11784 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11785 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11786 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11787 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11788 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11789
11790 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11791 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11792 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11793 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11794 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11795 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11796 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11797 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11798
11799 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11800 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11801 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11802
11803 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11804 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11805 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11806 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11807
11808 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11809 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11810
11811 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11812 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11813 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11814 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11815 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11816 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11817
11818 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11819 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11820 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11821 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11822
11823 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11824 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11825 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11826 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11827 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11828 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11829
11830 /* Special. */
11831 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11832 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11833 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11834
11835 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11836 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11837
11838 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11839 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11840 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11841 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11842 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11843 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11844
11845 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11846 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11847 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11848 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11849 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11850 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11851
11852 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11853 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11854 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11855 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11856
11857 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11858 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11859
11860 /* SSE2 */
11861 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11862 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11863 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11864 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11865 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11866 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11867 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11868 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11869
11870 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11871 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11872 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11873 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11874 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11875 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11876 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11877 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11878 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11879 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11880 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11881 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11882 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11883 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11884 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11885 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11886 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11887 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11888 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11889 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11890 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11891 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11892 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11893 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11894
11895 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11897 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11898 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11899
11900 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11902 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11904
11905 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11908
11909 /* SSE2 MMX */
11910 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11913 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11914 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11915 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11917 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11918
11919 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11920 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11921 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11922 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11923 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11924 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11925 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11926 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11927
11928 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11929 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11930 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11931 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11932
11933 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11934 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11935 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11937
11938 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11939 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11940
11941 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11942 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11943 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11944 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11945 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11946 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11947
11948 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11949 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11950 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11951 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11952
11953 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11954 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11955 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11956 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11957 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11958 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11959
11960 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11961 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11962 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11963
11964 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11965 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11966
11967 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11968 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11969 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11970 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11971 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11972 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11973
11974 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11975 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11976 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11977 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11978 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11979 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11980
11981 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11982 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11983 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11984 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11985
11986 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11987
11988 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11989 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11990 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11991 };
11992
11993 static const struct builtin_description bdesc_1arg[] =
11994 {
11995 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11996 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11997
11998 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11999 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12000 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12001
12002 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12003 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12004 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12005 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12006
12007 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12008 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12009 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12010
12011 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12012
12013 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12014 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12015
12016 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12017 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12018 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12019 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12020 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12021
12022 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12023
12024 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12025 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12026
12027 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12028 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12029 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
12030 };
12031
12032 void
12033 ix86_init_builtins ()
12034 {
12035 if (TARGET_MMX)
12036 ix86_init_mmx_sse_builtins ();
12037 }
12038
12039 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12040 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12041 builtins. */
12042 static void
12043 ix86_init_mmx_sse_builtins ()
12044 {
12045 const struct builtin_description * d;
12046 size_t i;
12047
12048 tree pchar_type_node = build_pointer_type (char_type_node);
12049 tree pfloat_type_node = build_pointer_type (float_type_node);
12050 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12051 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12052 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12053
12054 /* Comparisons. */
12055 tree int_ftype_v4sf_v4sf
12056 = build_function_type_list (integer_type_node,
12057 V4SF_type_node, V4SF_type_node, NULL_TREE);
12058 tree v4si_ftype_v4sf_v4sf
12059 = build_function_type_list (V4SI_type_node,
12060 V4SF_type_node, V4SF_type_node, NULL_TREE);
12061 /* MMX/SSE/integer conversions. */
12062 tree int_ftype_v4sf
12063 = build_function_type_list (integer_type_node,
12064 V4SF_type_node, NULL_TREE);
12065 tree int_ftype_v8qi
12066 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12067 tree v4sf_ftype_v4sf_int
12068 = build_function_type_list (V4SF_type_node,
12069 V4SF_type_node, integer_type_node, NULL_TREE);
12070 tree v4sf_ftype_v4sf_v2si
12071 = build_function_type_list (V4SF_type_node,
12072 V4SF_type_node, V2SI_type_node, NULL_TREE);
12073 tree int_ftype_v4hi_int
12074 = build_function_type_list (integer_type_node,
12075 V4HI_type_node, integer_type_node, NULL_TREE);
12076 tree v4hi_ftype_v4hi_int_int
12077 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12078 integer_type_node, integer_type_node,
12079 NULL_TREE);
12080 /* Miscellaneous. */
12081 tree v8qi_ftype_v4hi_v4hi
12082 = build_function_type_list (V8QI_type_node,
12083 V4HI_type_node, V4HI_type_node, NULL_TREE);
12084 tree v4hi_ftype_v2si_v2si
12085 = build_function_type_list (V4HI_type_node,
12086 V2SI_type_node, V2SI_type_node, NULL_TREE);
12087 tree v4sf_ftype_v4sf_v4sf_int
12088 = build_function_type_list (V4SF_type_node,
12089 V4SF_type_node, V4SF_type_node,
12090 integer_type_node, NULL_TREE);
12091 tree v2si_ftype_v4hi_v4hi
12092 = build_function_type_list (V2SI_type_node,
12093 V4HI_type_node, V4HI_type_node, NULL_TREE);
12094 tree v4hi_ftype_v4hi_int
12095 = build_function_type_list (V4HI_type_node,
12096 V4HI_type_node, integer_type_node, NULL_TREE);
12097 tree v4hi_ftype_v4hi_di
12098 = build_function_type_list (V4HI_type_node,
12099 V4HI_type_node, long_long_unsigned_type_node,
12100 NULL_TREE);
12101 tree v2si_ftype_v2si_di
12102 = build_function_type_list (V2SI_type_node,
12103 V2SI_type_node, long_long_unsigned_type_node,
12104 NULL_TREE);
12105 tree void_ftype_void
12106 = build_function_type (void_type_node, void_list_node);
12107 tree void_ftype_unsigned
12108 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12109 tree unsigned_ftype_void
12110 = build_function_type (unsigned_type_node, void_list_node);
12111 tree di_ftype_void
12112 = build_function_type (long_long_unsigned_type_node, void_list_node);
12113 tree v4sf_ftype_void
12114 = build_function_type (V4SF_type_node, void_list_node);
12115 tree v2si_ftype_v4sf
12116 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12117 /* Loads/stores. */
12118 tree void_ftype_v8qi_v8qi_pchar
12119 = build_function_type_list (void_type_node,
12120 V8QI_type_node, V8QI_type_node,
12121 pchar_type_node, NULL_TREE);
12122 tree v4sf_ftype_pfloat
12123 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12124 /* @@@ the type is bogus */
12125 tree v4sf_ftype_v4sf_pv2si
12126 = build_function_type_list (V4SF_type_node,
12127 V4SF_type_node, pv2di_type_node, NULL_TREE);
12128 tree void_ftype_pv2si_v4sf
12129 = build_function_type_list (void_type_node,
12130 pv2di_type_node, V4SF_type_node, NULL_TREE);
12131 tree void_ftype_pfloat_v4sf
12132 = build_function_type_list (void_type_node,
12133 pfloat_type_node, V4SF_type_node, NULL_TREE);
12134 tree void_ftype_pdi_di
12135 = build_function_type_list (void_type_node,
12136 pdi_type_node, long_long_unsigned_type_node,
12137 NULL_TREE);
12138 tree void_ftype_pv2di_v2di
12139 = build_function_type_list (void_type_node,
12140 pv2di_type_node, V2DI_type_node, NULL_TREE);
12141 /* Normal vector unops. */
12142 tree v4sf_ftype_v4sf
12143 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12144
12145 /* Normal vector binops. */
12146 tree v4sf_ftype_v4sf_v4sf
12147 = build_function_type_list (V4SF_type_node,
12148 V4SF_type_node, V4SF_type_node, NULL_TREE);
12149 tree v8qi_ftype_v8qi_v8qi
12150 = build_function_type_list (V8QI_type_node,
12151 V8QI_type_node, V8QI_type_node, NULL_TREE);
12152 tree v4hi_ftype_v4hi_v4hi
12153 = build_function_type_list (V4HI_type_node,
12154 V4HI_type_node, V4HI_type_node, NULL_TREE);
12155 tree v2si_ftype_v2si_v2si
12156 = build_function_type_list (V2SI_type_node,
12157 V2SI_type_node, V2SI_type_node, NULL_TREE);
12158 tree di_ftype_di_di
12159 = build_function_type_list (long_long_unsigned_type_node,
12160 long_long_unsigned_type_node,
12161 long_long_unsigned_type_node, NULL_TREE);
12162
12163 tree v2si_ftype_v2sf
12164 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12165 tree v2sf_ftype_v2si
12166 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12167 tree v2si_ftype_v2si
12168 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12169 tree v2sf_ftype_v2sf
12170 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12171 tree v2sf_ftype_v2sf_v2sf
12172 = build_function_type_list (V2SF_type_node,
12173 V2SF_type_node, V2SF_type_node, NULL_TREE);
12174 tree v2si_ftype_v2sf_v2sf
12175 = build_function_type_list (V2SI_type_node,
12176 V2SF_type_node, V2SF_type_node, NULL_TREE);
12177 tree pint_type_node = build_pointer_type (integer_type_node);
12178 tree pdouble_type_node = build_pointer_type (double_type_node);
12179 tree int_ftype_v2df_v2df
12180 = build_function_type_list (integer_type_node,
12181 V2DF_type_node, V2DF_type_node, NULL_TREE);
12182
12183 tree ti_ftype_void
12184 = build_function_type (intTI_type_node, void_list_node);
12185 tree ti_ftype_ti_ti
12186 = build_function_type_list (intTI_type_node,
12187 intTI_type_node, intTI_type_node, NULL_TREE);
12188 tree void_ftype_pvoid
12189 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12190 tree v2di_ftype_di
12191 = build_function_type_list (V2DI_type_node,
12192 long_long_unsigned_type_node, NULL_TREE);
12193 tree v4sf_ftype_v4si
12194 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12195 tree v4si_ftype_v4sf
12196 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12197 tree v2df_ftype_v4si
12198 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12199 tree v4si_ftype_v2df
12200 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12201 tree v2si_ftype_v2df
12202 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12203 tree v4sf_ftype_v2df
12204 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12205 tree v2df_ftype_v2si
12206 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12207 tree v2df_ftype_v4sf
12208 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12209 tree int_ftype_v2df
12210 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12211 tree v2df_ftype_v2df_int
12212 = build_function_type_list (V2DF_type_node,
12213 V2DF_type_node, integer_type_node, NULL_TREE);
12214 tree v4sf_ftype_v4sf_v2df
12215 = build_function_type_list (V4SF_type_node,
12216 V4SF_type_node, V2DF_type_node, NULL_TREE);
12217 tree v2df_ftype_v2df_v4sf
12218 = build_function_type_list (V2DF_type_node,
12219 V2DF_type_node, V4SF_type_node, NULL_TREE);
12220 tree v2df_ftype_v2df_v2df_int
12221 = build_function_type_list (V2DF_type_node,
12222 V2DF_type_node, V2DF_type_node,
12223 integer_type_node,
12224 NULL_TREE);
12225 tree v2df_ftype_v2df_pv2si
12226 = build_function_type_list (V2DF_type_node,
12227 V2DF_type_node, pv2si_type_node, NULL_TREE);
12228 tree void_ftype_pv2si_v2df
12229 = build_function_type_list (void_type_node,
12230 pv2si_type_node, V2DF_type_node, NULL_TREE);
12231 tree void_ftype_pdouble_v2df
12232 = build_function_type_list (void_type_node,
12233 pdouble_type_node, V2DF_type_node, NULL_TREE);
12234 tree void_ftype_pint_int
12235 = build_function_type_list (void_type_node,
12236 pint_type_node, integer_type_node, NULL_TREE);
12237 tree void_ftype_v16qi_v16qi_pchar
12238 = build_function_type_list (void_type_node,
12239 V16QI_type_node, V16QI_type_node,
12240 pchar_type_node, NULL_TREE);
12241 tree v2df_ftype_pdouble
12242 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12243 tree v2df_ftype_v2df_v2df
12244 = build_function_type_list (V2DF_type_node,
12245 V2DF_type_node, V2DF_type_node, NULL_TREE);
12246 tree v16qi_ftype_v16qi_v16qi
12247 = build_function_type_list (V16QI_type_node,
12248 V16QI_type_node, V16QI_type_node, NULL_TREE);
12249 tree v8hi_ftype_v8hi_v8hi
12250 = build_function_type_list (V8HI_type_node,
12251 V8HI_type_node, V8HI_type_node, NULL_TREE);
12252 tree v4si_ftype_v4si_v4si
12253 = build_function_type_list (V4SI_type_node,
12254 V4SI_type_node, V4SI_type_node, NULL_TREE);
12255 tree v2di_ftype_v2di_v2di
12256 = build_function_type_list (V2DI_type_node,
12257 V2DI_type_node, V2DI_type_node, NULL_TREE);
12258 tree v2di_ftype_v2df_v2df
12259 = build_function_type_list (V2DI_type_node,
12260 V2DF_type_node, V2DF_type_node, NULL_TREE);
12261 tree v2df_ftype_v2df
12262 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12263 tree v2df_ftype_double
12264 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12265 tree v2df_ftype_double_double
12266 = build_function_type_list (V2DF_type_node,
12267 double_type_node, double_type_node, NULL_TREE);
12268 tree int_ftype_v8hi_int
12269 = build_function_type_list (integer_type_node,
12270 V8HI_type_node, integer_type_node, NULL_TREE);
12271 tree v8hi_ftype_v8hi_int_int
12272 = build_function_type_list (V8HI_type_node,
12273 V8HI_type_node, integer_type_node,
12274 integer_type_node, NULL_TREE);
12275 tree v2di_ftype_v2di_int
12276 = build_function_type_list (V2DI_type_node,
12277 V2DI_type_node, integer_type_node, NULL_TREE);
12278 tree v4si_ftype_v4si_int
12279 = build_function_type_list (V4SI_type_node,
12280 V4SI_type_node, integer_type_node, NULL_TREE);
12281 tree v8hi_ftype_v8hi_int
12282 = build_function_type_list (V8HI_type_node,
12283 V8HI_type_node, integer_type_node, NULL_TREE);
12284 tree v8hi_ftype_v8hi_v2di
12285 = build_function_type_list (V8HI_type_node,
12286 V8HI_type_node, V2DI_type_node, NULL_TREE);
12287 tree v4si_ftype_v4si_v2di
12288 = build_function_type_list (V4SI_type_node,
12289 V4SI_type_node, V2DI_type_node, NULL_TREE);
12290 tree v4si_ftype_v8hi_v8hi
12291 = build_function_type_list (V4SI_type_node,
12292 V8HI_type_node, V8HI_type_node, NULL_TREE);
12293 tree di_ftype_v8qi_v8qi
12294 = build_function_type_list (long_long_unsigned_type_node,
12295 V8QI_type_node, V8QI_type_node, NULL_TREE);
12296 tree v2di_ftype_v16qi_v16qi
12297 = build_function_type_list (V2DI_type_node,
12298 V16QI_type_node, V16QI_type_node, NULL_TREE);
12299 tree int_ftype_v16qi
12300 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12301
12302 /* Add all builtins that are more or less simple operations on two
12303 operands. */
12304 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12305 {
12306 /* Use one of the operands; the target can have a different mode for
12307 mask-generating compares. */
12308 enum machine_mode mode;
12309 tree type;
12310
12311 if (d->name == 0)
12312 continue;
12313 mode = insn_data[d->icode].operand[1].mode;
12314
12315 switch (mode)
12316 {
12317 case V16QImode:
12318 type = v16qi_ftype_v16qi_v16qi;
12319 break;
12320 case V8HImode:
12321 type = v8hi_ftype_v8hi_v8hi;
12322 break;
12323 case V4SImode:
12324 type = v4si_ftype_v4si_v4si;
12325 break;
12326 case V2DImode:
12327 type = v2di_ftype_v2di_v2di;
12328 break;
12329 case V2DFmode:
12330 type = v2df_ftype_v2df_v2df;
12331 break;
12332 case TImode:
12333 type = ti_ftype_ti_ti;
12334 break;
12335 case V4SFmode:
12336 type = v4sf_ftype_v4sf_v4sf;
12337 break;
12338 case V8QImode:
12339 type = v8qi_ftype_v8qi_v8qi;
12340 break;
12341 case V4HImode:
12342 type = v4hi_ftype_v4hi_v4hi;
12343 break;
12344 case V2SImode:
12345 type = v2si_ftype_v2si_v2si;
12346 break;
12347 case DImode:
12348 type = di_ftype_di_di;
12349 break;
12350
12351 default:
12352 abort ();
12353 }
12354
12355 /* Override for comparisons. */
12356 if (d->icode == CODE_FOR_maskcmpv4sf3
12357 || d->icode == CODE_FOR_maskncmpv4sf3
12358 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12359 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12360 type = v4si_ftype_v4sf_v4sf;
12361
12362 if (d->icode == CODE_FOR_maskcmpv2df3
12363 || d->icode == CODE_FOR_maskncmpv2df3
12364 || d->icode == CODE_FOR_vmmaskcmpv2df3
12365 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12366 type = v2di_ftype_v2df_v2df;
12367
12368 def_builtin (d->mask, d->name, type, d->code);
12369 }
12370
12371 /* Add the remaining MMX insns with somewhat more complicated types. */
12372 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12373 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12374 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12375 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12376 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12377 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12378 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12379
12380 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12381 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12382 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12383
12384 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12385 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12386
12387 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12388 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12389
12390 /* comi/ucomi insns. */
12391 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12392 if (d->mask == MASK_SSE2)
12393 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12394 else
12395 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12396
12397 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12398 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12399 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12400
12401 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12402 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12403 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12404 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12405 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12406 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12407
12408 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12409 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12410 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12411 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12412
12413 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12414 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12415
12416 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12417
12418 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12419 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12420 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12421 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12422 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12423 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12424
12425 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12426 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12427 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12428 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12429
12430 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12431 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12432 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12433 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12434
12435 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12436
12437 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12438
12439 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12440 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12441 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12442 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12443 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12444 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12445
12446 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12447
12448 /* Original 3DNow! */
12449 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12450 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12451 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12452 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12453 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12454 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12455 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12456 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12457 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12458 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12459 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12460 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12461 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12462 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12463 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12465 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12466 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12467 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12468 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12469
12470 /* 3DNow! extension as used in the Athlon CPU. */
12471 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12472 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12473 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12474 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12475 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12476 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12477
12478 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12479
12480 /* SSE2 */
12481 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12482 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12483
12484 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12485 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12486
12487 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12488 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12489 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12490 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12491 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12492 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12493
12494 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12495 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12496 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12497 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12498
12499 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12500 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12501 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12502 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12503 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12504
12505 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12506 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12507 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12508 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12509
12510 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12511 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12512
12513 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12514
12515 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12516 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12517
12518 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12519 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12520 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12521 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12522 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12523
12524 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12525
12526 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12527 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12528
12529 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12530 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12531 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12532
12533 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12534 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12535 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12536
12537 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12538 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12539 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12540 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12541 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12542 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12543 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12544
12545 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12546 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12547 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12548
12549 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12550 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12551 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12552
12553 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12554 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12555 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12556
12557 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12558 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12559
12560 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12561 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12562 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12563
12564 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12565 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12566 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12567
12568 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12569 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12570
12571 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12572 }
12573
12574 /* Errors in the source file can cause expand_expr to return const0_rtx
12575 where we expect a vector. To avoid crashing, use one of the vector
12576 clear instructions. */
12577 static rtx
12578 safe_vector_operand (x, mode)
12579 rtx x;
12580 enum machine_mode mode;
12581 {
12582 if (x != const0_rtx)
12583 return x;
12584 x = gen_reg_rtx (mode);
12585
12586 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12587 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12588 : gen_rtx_SUBREG (DImode, x, 0)));
12589 else
12590 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12591 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12592 return x;
12593 }
12594
12595 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12596
12597 static rtx
12598 ix86_expand_binop_builtin (icode, arglist, target)
12599 enum insn_code icode;
12600 tree arglist;
12601 rtx target;
12602 {
12603 rtx pat;
12604 tree arg0 = TREE_VALUE (arglist);
12605 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12606 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12607 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12608 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12609 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12610 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12611
12612 if (VECTOR_MODE_P (mode0))
12613 op0 = safe_vector_operand (op0, mode0);
12614 if (VECTOR_MODE_P (mode1))
12615 op1 = safe_vector_operand (op1, mode1);
12616
12617 if (! target
12618 || GET_MODE (target) != tmode
12619 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12620 target = gen_reg_rtx (tmode);
12621
12622 /* In case the insn wants input operands in modes different from
12623 the result, abort. */
12624 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12625 abort ();
12626
12627 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12628 op0 = copy_to_mode_reg (mode0, op0);
12629 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12630 op1 = copy_to_mode_reg (mode1, op1);
12631
12632 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12633 yet one of the two must not be a memory. This is normally enforced
12634 by expanders, but we didn't bother to create one here. */
12635 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12636 op0 = copy_to_mode_reg (mode0, op0);
12637
12638 pat = GEN_FCN (icode) (target, op0, op1);
12639 if (! pat)
12640 return 0;
12641 emit_insn (pat);
12642 return target;
12643 }
12644
12645 /* In type_for_mode we restrict the ability to create TImode types
12646 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12647 to have a V4SFmode signature. Convert them in-place to TImode. */
12648
12649 static rtx
12650 ix86_expand_timode_binop_builtin (icode, arglist, target)
12651 enum insn_code icode;
12652 tree arglist;
12653 rtx target;
12654 {
12655 rtx pat;
12656 tree arg0 = TREE_VALUE (arglist);
12657 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12658 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12659 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12660
12661 op0 = gen_lowpart (TImode, op0);
12662 op1 = gen_lowpart (TImode, op1);
12663 target = gen_reg_rtx (TImode);
12664
12665 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12666 op0 = copy_to_mode_reg (TImode, op0);
12667 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12668 op1 = copy_to_mode_reg (TImode, op1);
12669
12670 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12671 yet one of the two must not be a memory. This is normally enforced
12672 by expanders, but we didn't bother to create one here. */
12673 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12674 op0 = copy_to_mode_reg (TImode, op0);
12675
12676 pat = GEN_FCN (icode) (target, op0, op1);
12677 if (! pat)
12678 return 0;
12679 emit_insn (pat);
12680
12681 return gen_lowpart (V4SFmode, target);
12682 }
12683
12684 /* Subroutine of ix86_expand_builtin to take care of stores. */
12685
12686 static rtx
12687 ix86_expand_store_builtin (icode, arglist)
12688 enum insn_code icode;
12689 tree arglist;
12690 {
12691 rtx pat;
12692 tree arg0 = TREE_VALUE (arglist);
12693 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12694 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12695 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12696 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12697 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12698
12699 if (VECTOR_MODE_P (mode1))
12700 op1 = safe_vector_operand (op1, mode1);
12701
12702 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12703
12704 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12705 op1 = copy_to_mode_reg (mode1, op1);
12706
12707 pat = GEN_FCN (icode) (op0, op1);
12708 if (pat)
12709 emit_insn (pat);
12710 return 0;
12711 }
12712
12713 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12714
12715 static rtx
12716 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12717 enum insn_code icode;
12718 tree arglist;
12719 rtx target;
12720 int do_load;
12721 {
12722 rtx pat;
12723 tree arg0 = TREE_VALUE (arglist);
12724 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12725 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12726 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12727
12728 if (! target
12729 || GET_MODE (target) != tmode
12730 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12731 target = gen_reg_rtx (tmode);
12732 if (do_load)
12733 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12734 else
12735 {
12736 if (VECTOR_MODE_P (mode0))
12737 op0 = safe_vector_operand (op0, mode0);
12738
12739 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12740 op0 = copy_to_mode_reg (mode0, op0);
12741 }
12742
12743 pat = GEN_FCN (icode) (target, op0);
12744 if (! pat)
12745 return 0;
12746 emit_insn (pat);
12747 return target;
12748 }
12749
12750 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12751 sqrtss, rsqrtss, rcpss. */
12752
12753 static rtx
12754 ix86_expand_unop1_builtin (icode, arglist, target)
12755 enum insn_code icode;
12756 tree arglist;
12757 rtx target;
12758 {
12759 rtx pat;
12760 tree arg0 = TREE_VALUE (arglist);
12761 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12762 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12763 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12764
12765 if (! target
12766 || GET_MODE (target) != tmode
12767 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12768 target = gen_reg_rtx (tmode);
12769
12770 if (VECTOR_MODE_P (mode0))
12771 op0 = safe_vector_operand (op0, mode0);
12772
12773 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12774 op0 = copy_to_mode_reg (mode0, op0);
12775
12776 op1 = op0;
12777 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12778 op1 = copy_to_mode_reg (mode0, op1);
12779
12780 pat = GEN_FCN (icode) (target, op0, op1);
12781 if (! pat)
12782 return 0;
12783 emit_insn (pat);
12784 return target;
12785 }
12786
12787 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12788
12789 static rtx
12790 ix86_expand_sse_compare (d, arglist, target)
12791 const struct builtin_description *d;
12792 tree arglist;
12793 rtx target;
12794 {
12795 rtx pat;
12796 tree arg0 = TREE_VALUE (arglist);
12797 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12798 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12799 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12800 rtx op2;
12801 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12802 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12803 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12804 enum rtx_code comparison = d->comparison;
12805
12806 if (VECTOR_MODE_P (mode0))
12807 op0 = safe_vector_operand (op0, mode0);
12808 if (VECTOR_MODE_P (mode1))
12809 op1 = safe_vector_operand (op1, mode1);
12810
12811 /* Swap operands if we have a comparison that isn't available in
12812 hardware. */
12813 if (d->flag)
12814 {
12815 rtx tmp = gen_reg_rtx (mode1);
12816 emit_move_insn (tmp, op1);
12817 op1 = op0;
12818 op0 = tmp;
12819 }
12820
12821 if (! target
12822 || GET_MODE (target) != tmode
12823 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12824 target = gen_reg_rtx (tmode);
12825
12826 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12827 op0 = copy_to_mode_reg (mode0, op0);
12828 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12829 op1 = copy_to_mode_reg (mode1, op1);
12830
12831 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12832 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12833 if (! pat)
12834 return 0;
12835 emit_insn (pat);
12836 return target;
12837 }
12838
12839 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12840
12841 static rtx
12842 ix86_expand_sse_comi (d, arglist, target)
12843 const struct builtin_description *d;
12844 tree arglist;
12845 rtx target;
12846 {
12847 rtx pat;
12848 tree arg0 = TREE_VALUE (arglist);
12849 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12850 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12851 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12852 rtx op2;
12853 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12854 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12855 enum rtx_code comparison = d->comparison;
12856
12857 if (VECTOR_MODE_P (mode0))
12858 op0 = safe_vector_operand (op0, mode0);
12859 if (VECTOR_MODE_P (mode1))
12860 op1 = safe_vector_operand (op1, mode1);
12861
12862 /* Swap operands if we have a comparison that isn't available in
12863 hardware. */
12864 if (d->flag)
12865 {
12866 rtx tmp = op1;
12867 op1 = op0;
12868 op0 = tmp;
12869 }
12870
12871 target = gen_reg_rtx (SImode);
12872 emit_move_insn (target, const0_rtx);
12873 target = gen_rtx_SUBREG (QImode, target, 0);
12874
12875 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12876 op0 = copy_to_mode_reg (mode0, op0);
12877 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12878 op1 = copy_to_mode_reg (mode1, op1);
12879
12880 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12881 pat = GEN_FCN (d->icode) (op0, op1, op2);
12882 if (! pat)
12883 return 0;
12884 emit_insn (pat);
12885 emit_insn (gen_rtx_SET (VOIDmode,
12886 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12887 gen_rtx_fmt_ee (comparison, QImode,
12888 gen_rtx_REG (CCmode, FLAGS_REG),
12889 const0_rtx)));
12890
12891 return SUBREG_REG (target);
12892 }
12893
12894 /* Expand an expression EXP that calls a built-in function,
12895 with result going to TARGET if that's convenient
12896 (and in mode MODE if that's convenient).
12897 SUBTARGET may be used as the target for computing one of EXP's operands.
12898 IGNORE is nonzero if the value is to be ignored. */
12899
12900 rtx
12901 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12902 tree exp;
12903 rtx target;
12904 rtx subtarget ATTRIBUTE_UNUSED;
12905 enum machine_mode mode ATTRIBUTE_UNUSED;
12906 int ignore ATTRIBUTE_UNUSED;
12907 {
12908 const struct builtin_description *d;
12909 size_t i;
12910 enum insn_code icode;
12911 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12912 tree arglist = TREE_OPERAND (exp, 1);
12913 tree arg0, arg1, arg2;
12914 rtx op0, op1, op2, pat;
12915 enum machine_mode tmode, mode0, mode1, mode2;
12916 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12917
12918 switch (fcode)
12919 {
12920 case IX86_BUILTIN_EMMS:
12921 emit_insn (gen_emms ());
12922 return 0;
12923
12924 case IX86_BUILTIN_SFENCE:
12925 emit_insn (gen_sfence ());
12926 return 0;
12927
12928 case IX86_BUILTIN_PEXTRW:
12929 case IX86_BUILTIN_PEXTRW128:
12930 icode = (fcode == IX86_BUILTIN_PEXTRW
12931 ? CODE_FOR_mmx_pextrw
12932 : CODE_FOR_sse2_pextrw);
12933 arg0 = TREE_VALUE (arglist);
12934 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12935 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12936 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12937 tmode = insn_data[icode].operand[0].mode;
12938 mode0 = insn_data[icode].operand[1].mode;
12939 mode1 = insn_data[icode].operand[2].mode;
12940
12941 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12942 op0 = copy_to_mode_reg (mode0, op0);
12943 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12944 {
12945 /* @@@ better error message */
12946 error ("selector must be an immediate");
12947 return gen_reg_rtx (tmode);
12948 }
12949 if (target == 0
12950 || GET_MODE (target) != tmode
12951 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12952 target = gen_reg_rtx (tmode);
12953 pat = GEN_FCN (icode) (target, op0, op1);
12954 if (! pat)
12955 return 0;
12956 emit_insn (pat);
12957 return target;
12958
12959 case IX86_BUILTIN_PINSRW:
12960 case IX86_BUILTIN_PINSRW128:
12961 icode = (fcode == IX86_BUILTIN_PINSRW
12962 ? CODE_FOR_mmx_pinsrw
12963 : CODE_FOR_sse2_pinsrw);
12964 arg0 = TREE_VALUE (arglist);
12965 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12966 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12967 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12968 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12969 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12970 tmode = insn_data[icode].operand[0].mode;
12971 mode0 = insn_data[icode].operand[1].mode;
12972 mode1 = insn_data[icode].operand[2].mode;
12973 mode2 = insn_data[icode].operand[3].mode;
12974
12975 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12976 op0 = copy_to_mode_reg (mode0, op0);
12977 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12978 op1 = copy_to_mode_reg (mode1, op1);
12979 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12980 {
12981 /* @@@ better error message */
12982 error ("selector must be an immediate");
12983 return const0_rtx;
12984 }
12985 if (target == 0
12986 || GET_MODE (target) != tmode
12987 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12988 target = gen_reg_rtx (tmode);
12989 pat = GEN_FCN (icode) (target, op0, op1, op2);
12990 if (! pat)
12991 return 0;
12992 emit_insn (pat);
12993 return target;
12994
12995 case IX86_BUILTIN_MASKMOVQ:
12996 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12997 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12998 : CODE_FOR_sse2_maskmovdqu);
12999 /* Note the arg order is different from the operand order. */
13000 arg1 = TREE_VALUE (arglist);
13001 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13002 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13003 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13004 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13005 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13006 mode0 = insn_data[icode].operand[0].mode;
13007 mode1 = insn_data[icode].operand[1].mode;
13008 mode2 = insn_data[icode].operand[2].mode;
13009
13010 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13011 op0 = copy_to_mode_reg (mode0, op0);
13012 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13013 op1 = copy_to_mode_reg (mode1, op1);
13014 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13015 op2 = copy_to_mode_reg (mode2, op2);
13016 pat = GEN_FCN (icode) (op0, op1, op2);
13017 if (! pat)
13018 return 0;
13019 emit_insn (pat);
13020 return 0;
13021
13022 case IX86_BUILTIN_SQRTSS:
13023 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13024 case IX86_BUILTIN_RSQRTSS:
13025 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13026 case IX86_BUILTIN_RCPSS:
13027 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13028
13029 case IX86_BUILTIN_ANDPS:
13030 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
13031 arglist, target);
13032 case IX86_BUILTIN_ANDNPS:
13033 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
13034 arglist, target);
13035 case IX86_BUILTIN_ORPS:
13036 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
13037 arglist, target);
13038 case IX86_BUILTIN_XORPS:
13039 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
13040 arglist, target);
13041
13042 case IX86_BUILTIN_LOADAPS:
13043 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13044
13045 case IX86_BUILTIN_LOADUPS:
13046 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13047
13048 case IX86_BUILTIN_STOREAPS:
13049 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13050 case IX86_BUILTIN_STOREUPS:
13051 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13052
13053 case IX86_BUILTIN_LOADSS:
13054 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13055
13056 case IX86_BUILTIN_STORESS:
13057 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13058
13059 case IX86_BUILTIN_LOADHPS:
13060 case IX86_BUILTIN_LOADLPS:
13061 case IX86_BUILTIN_LOADHPD:
13062 case IX86_BUILTIN_LOADLPD:
13063 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13064 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13065 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13066 : CODE_FOR_sse2_movlpd);
13067 arg0 = TREE_VALUE (arglist);
13068 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13069 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13070 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13071 tmode = insn_data[icode].operand[0].mode;
13072 mode0 = insn_data[icode].operand[1].mode;
13073 mode1 = insn_data[icode].operand[2].mode;
13074
13075 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13076 op0 = copy_to_mode_reg (mode0, op0);
13077 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13078 if (target == 0
13079 || GET_MODE (target) != tmode
13080 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13081 target = gen_reg_rtx (tmode);
13082 pat = GEN_FCN (icode) (target, op0, op1);
13083 if (! pat)
13084 return 0;
13085 emit_insn (pat);
13086 return target;
13087
13088 case IX86_BUILTIN_STOREHPS:
13089 case IX86_BUILTIN_STORELPS:
13090 case IX86_BUILTIN_STOREHPD:
13091 case IX86_BUILTIN_STORELPD:
13092 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13093 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13094 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13095 : CODE_FOR_sse2_movlpd);
13096 arg0 = TREE_VALUE (arglist);
13097 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13098 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13099 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13100 mode0 = insn_data[icode].operand[1].mode;
13101 mode1 = insn_data[icode].operand[2].mode;
13102
13103 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13104 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13105 op1 = copy_to_mode_reg (mode1, op1);
13106
13107 pat = GEN_FCN (icode) (op0, op0, op1);
13108 if (! pat)
13109 return 0;
13110 emit_insn (pat);
13111 return 0;
13112
13113 case IX86_BUILTIN_MOVNTPS:
13114 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13115 case IX86_BUILTIN_MOVNTQ:
13116 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13117
13118 case IX86_BUILTIN_LDMXCSR:
13119 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13120 target = assign_386_stack_local (SImode, 0);
13121 emit_move_insn (target, op0);
13122 emit_insn (gen_ldmxcsr (target));
13123 return 0;
13124
13125 case IX86_BUILTIN_STMXCSR:
13126 target = assign_386_stack_local (SImode, 0);
13127 emit_insn (gen_stmxcsr (target));
13128 return copy_to_mode_reg (SImode, target);
13129
13130 case IX86_BUILTIN_SHUFPS:
13131 case IX86_BUILTIN_SHUFPD:
13132 icode = (fcode == IX86_BUILTIN_SHUFPS
13133 ? CODE_FOR_sse_shufps
13134 : CODE_FOR_sse2_shufpd);
13135 arg0 = TREE_VALUE (arglist);
13136 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13137 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13138 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13139 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13140 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13141 tmode = insn_data[icode].operand[0].mode;
13142 mode0 = insn_data[icode].operand[1].mode;
13143 mode1 = insn_data[icode].operand[2].mode;
13144 mode2 = insn_data[icode].operand[3].mode;
13145
13146 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13147 op0 = copy_to_mode_reg (mode0, op0);
13148 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13149 op1 = copy_to_mode_reg (mode1, op1);
13150 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13151 {
13152 /* @@@ better error message */
13153 error ("mask must be an immediate");
13154 return gen_reg_rtx (tmode);
13155 }
13156 if (target == 0
13157 || GET_MODE (target) != tmode
13158 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13159 target = gen_reg_rtx (tmode);
13160 pat = GEN_FCN (icode) (target, op0, op1, op2);
13161 if (! pat)
13162 return 0;
13163 emit_insn (pat);
13164 return target;
13165
13166 case IX86_BUILTIN_PSHUFW:
13167 case IX86_BUILTIN_PSHUFD:
13168 case IX86_BUILTIN_PSHUFHW:
13169 case IX86_BUILTIN_PSHUFLW:
13170 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13171 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13172 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13173 : CODE_FOR_mmx_pshufw);
13174 arg0 = TREE_VALUE (arglist);
13175 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13176 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13177 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13178 tmode = insn_data[icode].operand[0].mode;
13179 mode1 = insn_data[icode].operand[1].mode;
13180 mode2 = insn_data[icode].operand[2].mode;
13181
13182 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13183 op0 = copy_to_mode_reg (mode1, op0);
13184 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13185 {
13186 /* @@@ better error message */
13187 error ("mask must be an immediate");
13188 return const0_rtx;
13189 }
13190 if (target == 0
13191 || GET_MODE (target) != tmode
13192 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13193 target = gen_reg_rtx (tmode);
13194 pat = GEN_FCN (icode) (target, op0, op1);
13195 if (! pat)
13196 return 0;
13197 emit_insn (pat);
13198 return target;
13199
13200 case IX86_BUILTIN_FEMMS:
13201 emit_insn (gen_femms ());
13202 return NULL_RTX;
13203
13204 case IX86_BUILTIN_PAVGUSB:
13205 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13206
13207 case IX86_BUILTIN_PF2ID:
13208 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13209
13210 case IX86_BUILTIN_PFACC:
13211 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13212
13213 case IX86_BUILTIN_PFADD:
13214 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13215
13216 case IX86_BUILTIN_PFCMPEQ:
13217 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13218
13219 case IX86_BUILTIN_PFCMPGE:
13220 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13221
13222 case IX86_BUILTIN_PFCMPGT:
13223 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13224
13225 case IX86_BUILTIN_PFMAX:
13226 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13227
13228 case IX86_BUILTIN_PFMIN:
13229 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13230
13231 case IX86_BUILTIN_PFMUL:
13232 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13233
13234 case IX86_BUILTIN_PFRCP:
13235 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13236
13237 case IX86_BUILTIN_PFRCPIT1:
13238 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13239
13240 case IX86_BUILTIN_PFRCPIT2:
13241 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13242
13243 case IX86_BUILTIN_PFRSQIT1:
13244 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13245
13246 case IX86_BUILTIN_PFRSQRT:
13247 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13248
13249 case IX86_BUILTIN_PFSUB:
13250 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13251
13252 case IX86_BUILTIN_PFSUBR:
13253 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13254
13255 case IX86_BUILTIN_PI2FD:
13256 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13257
13258 case IX86_BUILTIN_PMULHRW:
13259 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13260
13261 case IX86_BUILTIN_PF2IW:
13262 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13263
13264 case IX86_BUILTIN_PFNACC:
13265 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13266
13267 case IX86_BUILTIN_PFPNACC:
13268 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13269
13270 case IX86_BUILTIN_PI2FW:
13271 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13272
13273 case IX86_BUILTIN_PSWAPDSI:
13274 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13275
13276 case IX86_BUILTIN_PSWAPDSF:
13277 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13278
13279 case IX86_BUILTIN_SSE_ZERO:
13280 target = gen_reg_rtx (V4SFmode);
13281 emit_insn (gen_sse_clrv4sf (target));
13282 return target;
13283
13284 case IX86_BUILTIN_MMX_ZERO:
13285 target = gen_reg_rtx (DImode);
13286 emit_insn (gen_mmx_clrdi (target));
13287 return target;
13288
13289 case IX86_BUILTIN_SQRTSD:
13290 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13291 case IX86_BUILTIN_LOADAPD:
13292 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13293 case IX86_BUILTIN_LOADUPD:
13294 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13295
13296 case IX86_BUILTIN_STOREAPD:
13297 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13298 case IX86_BUILTIN_STOREUPD:
13299 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13300
13301 case IX86_BUILTIN_LOADSD:
13302 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13303
13304 case IX86_BUILTIN_STORESD:
13305 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13306
13307 case IX86_BUILTIN_SETPD1:
13308 target = assign_386_stack_local (DFmode, 0);
13309 arg0 = TREE_VALUE (arglist);
13310 emit_move_insn (adjust_address (target, DFmode, 0),
13311 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13312 op0 = gen_reg_rtx (V2DFmode);
13313 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13314 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13315 return op0;
13316
13317 case IX86_BUILTIN_SETPD:
13318 target = assign_386_stack_local (V2DFmode, 0);
13319 arg0 = TREE_VALUE (arglist);
13320 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13321 emit_move_insn (adjust_address (target, DFmode, 0),
13322 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13323 emit_move_insn (adjust_address (target, DFmode, 8),
13324 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13325 op0 = gen_reg_rtx (V2DFmode);
13326 emit_insn (gen_sse2_movapd (op0, target));
13327 return op0;
13328
13329 case IX86_BUILTIN_LOADRPD:
13330 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13331 gen_reg_rtx (V2DFmode), 1);
13332 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13333 return target;
13334
13335 case IX86_BUILTIN_LOADPD1:
13336 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13337 gen_reg_rtx (V2DFmode), 1);
13338 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13339 return target;
13340
13341 case IX86_BUILTIN_STOREPD1:
13342 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13343 case IX86_BUILTIN_STORERPD:
13344 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13345
13346 case IX86_BUILTIN_MFENCE:
13347 emit_insn (gen_sse2_mfence ());
13348 return 0;
13349 case IX86_BUILTIN_LFENCE:
13350 emit_insn (gen_sse2_lfence ());
13351 return 0;
13352
13353 case IX86_BUILTIN_CLFLUSH:
13354 arg0 = TREE_VALUE (arglist);
13355 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13356 icode = CODE_FOR_sse2_clflush;
13357 mode0 = insn_data[icode].operand[0].mode;
13358 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13359 op0 = copy_to_mode_reg (mode0, op0);
13360
13361 emit_insn (gen_sse2_clflush (op0));
13362 return 0;
13363
13364 case IX86_BUILTIN_MOVNTPD:
13365 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13366 case IX86_BUILTIN_MOVNTDQ:
13367 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13368 case IX86_BUILTIN_MOVNTI:
13369 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13370
13371 default:
13372 break;
13373 }
13374
13375 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13376 if (d->code == fcode)
13377 {
13378 /* Compares are treated specially. */
13379 if (d->icode == CODE_FOR_maskcmpv4sf3
13380 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13381 || d->icode == CODE_FOR_maskncmpv4sf3
13382 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13383 || d->icode == CODE_FOR_maskcmpv2df3
13384 || d->icode == CODE_FOR_vmmaskcmpv2df3
13385 || d->icode == CODE_FOR_maskncmpv2df3
13386 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13387 return ix86_expand_sse_compare (d, arglist, target);
13388
13389 return ix86_expand_binop_builtin (d->icode, arglist, target);
13390 }
13391
13392 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13393 if (d->code == fcode)
13394 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13395
13396 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13397 if (d->code == fcode)
13398 return ix86_expand_sse_comi (d, arglist, target);
13399
13400 /* @@@ Should really do something sensible here. */
13401 return 0;
13402 }
13403
13404 /* Store OPERAND to the memory after reload is completed. This means
13405 that we can't easily use assign_stack_local. */
13406 rtx
13407 ix86_force_to_memory (mode, operand)
13408 enum machine_mode mode;
13409 rtx operand;
13410 {
13411 rtx result;
13412 if (!reload_completed)
13413 abort ();
13414 if (TARGET_64BIT && TARGET_RED_ZONE)
13415 {
13416 result = gen_rtx_MEM (mode,
13417 gen_rtx_PLUS (Pmode,
13418 stack_pointer_rtx,
13419 GEN_INT (-RED_ZONE_SIZE)));
13420 emit_move_insn (result, operand);
13421 }
13422 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13423 {
13424 switch (mode)
13425 {
13426 case HImode:
13427 case SImode:
13428 operand = gen_lowpart (DImode, operand);
13429 /* FALLTHRU */
13430 case DImode:
13431 emit_insn (
13432 gen_rtx_SET (VOIDmode,
13433 gen_rtx_MEM (DImode,
13434 gen_rtx_PRE_DEC (DImode,
13435 stack_pointer_rtx)),
13436 operand));
13437 break;
13438 default:
13439 abort ();
13440 }
13441 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13442 }
13443 else
13444 {
13445 switch (mode)
13446 {
13447 case DImode:
13448 {
13449 rtx operands[2];
13450 split_di (&operand, 1, operands, operands + 1);
13451 emit_insn (
13452 gen_rtx_SET (VOIDmode,
13453 gen_rtx_MEM (SImode,
13454 gen_rtx_PRE_DEC (Pmode,
13455 stack_pointer_rtx)),
13456 operands[1]));
13457 emit_insn (
13458 gen_rtx_SET (VOIDmode,
13459 gen_rtx_MEM (SImode,
13460 gen_rtx_PRE_DEC (Pmode,
13461 stack_pointer_rtx)),
13462 operands[0]));
13463 }
13464 break;
13465 case HImode:
13466 /* It is better to store HImodes as SImodes. */
13467 if (!TARGET_PARTIAL_REG_STALL)
13468 operand = gen_lowpart (SImode, operand);
13469 /* FALLTHRU */
13470 case SImode:
13471 emit_insn (
13472 gen_rtx_SET (VOIDmode,
13473 gen_rtx_MEM (GET_MODE (operand),
13474 gen_rtx_PRE_DEC (SImode,
13475 stack_pointer_rtx)),
13476 operand));
13477 break;
13478 default:
13479 abort ();
13480 }
13481 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13482 }
13483 return result;
13484 }
13485
13486 /* Free operand from the memory. */
13487 void
13488 ix86_free_from_memory (mode)
13489 enum machine_mode mode;
13490 {
13491 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13492 {
13493 int size;
13494
13495 if (mode == DImode || TARGET_64BIT)
13496 size = 8;
13497 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13498 size = 2;
13499 else
13500 size = 4;
13501 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13502 to pop or add instruction if registers are available. */
13503 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13504 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13505 GEN_INT (size))));
13506 }
13507 }
13508
13509 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13510 QImode must go into class Q_REGS.
13511 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13512 movdf to do mem-to-mem moves through integer regs. */
13513 enum reg_class
13514 ix86_preferred_reload_class (x, class)
13515 rtx x;
13516 enum reg_class class;
13517 {
13518 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13519 {
13520 /* SSE can't load any constant directly yet. */
13521 if (SSE_CLASS_P (class))
13522 return NO_REGS;
13523 /* Floats can load 0 and 1. */
13524 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13525 {
13526 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13527 if (MAYBE_SSE_CLASS_P (class))
13528 return (reg_class_subset_p (class, GENERAL_REGS)
13529 ? GENERAL_REGS : FLOAT_REGS);
13530 else
13531 return class;
13532 }
13533 /* General regs can load everything. */
13534 if (reg_class_subset_p (class, GENERAL_REGS))
13535 return GENERAL_REGS;
13536 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13537 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13538 return NO_REGS;
13539 }
13540 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13541 return NO_REGS;
13542 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13543 return Q_REGS;
13544 return class;
13545 }
13546
13547 /* If we are copying between general and FP registers, we need a memory
13548 location. The same is true for SSE and MMX registers.
13549
13550 The macro can't work reliably when one of the CLASSES is class containing
13551 registers from multiple units (SSE, MMX, integer). We avoid this by never
13552 combining those units in single alternative in the machine description.
13553 Ensure that this constraint holds to avoid unexpected surprises.
13554
13555 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13556 enforce these sanity checks. */
13557 int
13558 ix86_secondary_memory_needed (class1, class2, mode, strict)
13559 enum reg_class class1, class2;
13560 enum machine_mode mode;
13561 int strict;
13562 {
13563 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13564 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13565 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13566 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13567 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13568 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13569 {
13570 if (strict)
13571 abort ();
13572 else
13573 return 1;
13574 }
13575 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13576 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13577 && (mode) != SImode)
13578 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13579 && (mode) != SImode));
13580 }
13581 /* Return the cost of moving data from a register in class CLASS1 to
13582 one in class CLASS2.
13583
13584 It is not required that the cost always equal 2 when FROM is the same as TO;
13585 on some machines it is expensive to move between registers if they are not
13586 general registers. */
13587 int
13588 ix86_register_move_cost (mode, class1, class2)
13589 enum machine_mode mode;
13590 enum reg_class class1, class2;
13591 {
13592 /* In case we require secondary memory, compute cost of the store followed
13593 by load. In case of copying from general_purpose_register we may emit
13594 multiple stores followed by single load causing memory size mismatch
13595 stall. Count this as arbitarily high cost of 20. */
13596 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13597 {
13598 int add_cost = 0;
13599 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13600 add_cost = 20;
13601 return (MEMORY_MOVE_COST (mode, class1, 0)
13602 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13603 }
13604 /* Moves between SSE/MMX and integer unit are expensive. */
13605 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13606 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13607 return ix86_cost->mmxsse_to_integer;
13608 if (MAYBE_FLOAT_CLASS_P (class1))
13609 return ix86_cost->fp_move;
13610 if (MAYBE_SSE_CLASS_P (class1))
13611 return ix86_cost->sse_move;
13612 if (MAYBE_MMX_CLASS_P (class1))
13613 return ix86_cost->mmx_move;
13614 return 2;
13615 }
13616
13617 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13618 int
13619 ix86_hard_regno_mode_ok (regno, mode)
13620 int regno;
13621 enum machine_mode mode;
13622 {
13623 /* Flags and only flags can only hold CCmode values. */
13624 if (CC_REGNO_P (regno))
13625 return GET_MODE_CLASS (mode) == MODE_CC;
13626 if (GET_MODE_CLASS (mode) == MODE_CC
13627 || GET_MODE_CLASS (mode) == MODE_RANDOM
13628 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13629 return 0;
13630 if (FP_REGNO_P (regno))
13631 return VALID_FP_MODE_P (mode);
13632 if (SSE_REGNO_P (regno))
13633 return VALID_SSE_REG_MODE (mode);
13634 if (MMX_REGNO_P (regno))
13635 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13636 /* We handle both integer and floats in the general purpose registers.
13637 In future we should be able to handle vector modes as well. */
13638 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13639 return 0;
13640 /* Take care for QImode values - they can be in non-QI regs, but then
13641 they do cause partial register stalls. */
13642 if (regno < 4 || mode != QImode || TARGET_64BIT)
13643 return 1;
13644 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13645 }
13646
13647 /* Return the cost of moving data of mode M between a
13648 register and memory. A value of 2 is the default; this cost is
13649 relative to those in `REGISTER_MOVE_COST'.
13650
13651 If moving between registers and memory is more expensive than
13652 between two registers, you should define this macro to express the
13653 relative cost.
13654
13655 Model also increased moving costs of QImode registers in non
13656 Q_REGS classes.
13657 */
13658 int
13659 ix86_memory_move_cost (mode, class, in)
13660 enum machine_mode mode;
13661 enum reg_class class;
13662 int in;
13663 {
13664 if (FLOAT_CLASS_P (class))
13665 {
13666 int index;
13667 switch (mode)
13668 {
13669 case SFmode:
13670 index = 0;
13671 break;
13672 case DFmode:
13673 index = 1;
13674 break;
13675 case XFmode:
13676 case TFmode:
13677 index = 2;
13678 break;
13679 default:
13680 return 100;
13681 }
13682 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13683 }
13684 if (SSE_CLASS_P (class))
13685 {
13686 int index;
13687 switch (GET_MODE_SIZE (mode))
13688 {
13689 case 4:
13690 index = 0;
13691 break;
13692 case 8:
13693 index = 1;
13694 break;
13695 case 16:
13696 index = 2;
13697 break;
13698 default:
13699 return 100;
13700 }
13701 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13702 }
13703 if (MMX_CLASS_P (class))
13704 {
13705 int index;
13706 switch (GET_MODE_SIZE (mode))
13707 {
13708 case 4:
13709 index = 0;
13710 break;
13711 case 8:
13712 index = 1;
13713 break;
13714 default:
13715 return 100;
13716 }
13717 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13718 }
13719 switch (GET_MODE_SIZE (mode))
13720 {
13721 case 1:
13722 if (in)
13723 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13724 : ix86_cost->movzbl_load);
13725 else
13726 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13727 : ix86_cost->int_store[0] + 4);
13728 break;
13729 case 2:
13730 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13731 default:
13732 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13733 if (mode == TFmode)
13734 mode = XFmode;
13735 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13736 * (int) GET_MODE_SIZE (mode) / 4);
13737 }
13738 }
13739
13740 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13741 static void
13742 ix86_svr3_asm_out_constructor (symbol, priority)
13743 rtx symbol;
13744 int priority ATTRIBUTE_UNUSED;
13745 {
13746 init_section ();
13747 fputs ("\tpushl $", asm_out_file);
13748 assemble_name (asm_out_file, XSTR (symbol, 0));
13749 fputc ('\n', asm_out_file);
13750 }
13751 #endif
13752
13753 #if TARGET_MACHO
13754
13755 static int current_machopic_label_num;
13756
13757 /* Given a symbol name and its associated stub, write out the
13758 definition of the stub. */
13759
13760 void
13761 machopic_output_stub (file, symb, stub)
13762 FILE *file;
13763 const char *symb, *stub;
13764 {
13765 unsigned int length;
13766 char *binder_name, *symbol_name, lazy_ptr_name[32];
13767 int label = ++current_machopic_label_num;
13768
13769 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13770 symb = (*targetm.strip_name_encoding) (symb);
13771
13772 length = strlen (stub);
13773 binder_name = alloca (length + 32);
13774 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13775
13776 length = strlen (symb);
13777 symbol_name = alloca (length + 32);
13778 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13779
13780 sprintf (lazy_ptr_name, "L%d$lz", label);
13781
13782 if (MACHOPIC_PURE)
13783 machopic_picsymbol_stub_section ();
13784 else
13785 machopic_symbol_stub_section ();
13786
13787 fprintf (file, "%s:\n", stub);
13788 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13789
13790 if (MACHOPIC_PURE)
13791 {
13792 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13793 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13794 fprintf (file, "\tjmp %%edx\n");
13795 }
13796 else
13797 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13798
13799 fprintf (file, "%s:\n", binder_name);
13800
13801 if (MACHOPIC_PURE)
13802 {
13803 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13804 fprintf (file, "\tpushl %%eax\n");
13805 }
13806 else
13807 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13808
13809 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13810
13811 machopic_lazy_symbol_ptr_section ();
13812 fprintf (file, "%s:\n", lazy_ptr_name);
13813 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13814 fprintf (file, "\t.long %s\n", binder_name);
13815 }
13816 #endif /* TARGET_MACHO */
13817
13818 /* Order the registers for register allocator. */
13819
13820 void
13821 x86_order_regs_for_local_alloc ()
13822 {
13823 int pos = 0;
13824 int i;
13825
13826 /* First allocate the local general purpose registers. */
13827 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13828 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13829 reg_alloc_order [pos++] = i;
13830
13831 /* Global general purpose registers. */
13832 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13833 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13834 reg_alloc_order [pos++] = i;
13835
13836 /* x87 registers come first in case we are doing FP math
13837 using them. */
13838 if (!TARGET_SSE_MATH)
13839 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13840 reg_alloc_order [pos++] = i;
13841
13842 /* SSE registers. */
13843 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13844 reg_alloc_order [pos++] = i;
13845 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13846 reg_alloc_order [pos++] = i;
13847
13848 /* x87 registerts. */
13849 if (TARGET_SSE_MATH)
13850 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13851 reg_alloc_order [pos++] = i;
13852
13853 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13854 reg_alloc_order [pos++] = i;
13855
13856 /* Initialize the rest of array as we do not allocate some registers
13857 at all. */
13858 while (pos < FIRST_PSEUDO_REGISTER)
13859 reg_alloc_order [pos++] = 0;
13860 }
13861
13862 void
13863 x86_output_mi_thunk (file, delta, function)
13864 FILE *file;
13865 int delta;
13866 tree function;
13867 {
13868 tree parm;
13869 rtx xops[3];
13870
13871 if (ix86_regparm > 0)
13872 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13873 else
13874 parm = NULL_TREE;
13875 for (; parm; parm = TREE_CHAIN (parm))
13876 if (TREE_VALUE (parm) == void_type_node)
13877 break;
13878
13879 xops[0] = GEN_INT (delta);
13880 if (TARGET_64BIT)
13881 {
13882 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13883 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13884 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13885 if (flag_pic)
13886 {
13887 fprintf (file, "\tjmp *");
13888 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13889 fprintf (file, "@GOTPCREL(%%rip)\n");
13890 }
13891 else
13892 {
13893 fprintf (file, "\tjmp ");
13894 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13895 fprintf (file, "\n");
13896 }
13897 }
13898 else
13899 {
13900 if (parm)
13901 xops[1] = gen_rtx_REG (SImode, 0);
13902 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13903 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13904 else
13905 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13906 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13907
13908 if (flag_pic)
13909 {
13910 xops[0] = pic_offset_table_rtx;
13911 xops[1] = gen_label_rtx ();
13912 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13913
13914 if (ix86_regparm > 2)
13915 abort ();
13916 output_asm_insn ("push{l}\t%0", xops);
13917 output_asm_insn ("call\t%P1", xops);
13918 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13919 output_asm_insn ("pop{l}\t%0", xops);
13920 output_asm_insn
13921 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13922 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13923 output_asm_insn
13924 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13925 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13926 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13927 }
13928 else
13929 {
13930 fprintf (file, "\tjmp ");
13931 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13932 fprintf (file, "\n");
13933 }
13934 }
13935 }
13936
13937 int
13938 x86_field_alignment (field, computed)
13939 tree field;
13940 int computed;
13941 {
13942 enum machine_mode mode;
13943 tree type = TREE_TYPE (field);
13944
13945 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13946 return computed;
13947 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13948 ? get_inner_array_type (type) : type);
13949 if (mode == DFmode || mode == DCmode
13950 || GET_MODE_CLASS (mode) == MODE_INT
13951 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13952 return MIN (32, computed);
13953 return computed;
13954 }
13955
13956 /* Implement machine specific optimizations.
13957 At the moment we implement single transformation: AMD Athlon works faster
13958 when RET is not destination of conditional jump or directly preceeded
13959 by other jump instruction. We avoid the penalty by inserting NOP just
13960 before the RET instructions in such cases. */
13961 void
13962 x86_machine_dependent_reorg (first)
13963 rtx first ATTRIBUTE_UNUSED;
13964 {
13965 edge e;
13966
13967 if (!TARGET_ATHLON || !optimize || optimize_size)
13968 return;
13969 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13970 {
13971 basic_block bb = e->src;
13972 rtx ret = bb->end;
13973 rtx prev;
13974 bool insert = false;
13975
13976 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13977 continue;
13978 prev = prev_nonnote_insn (ret);
13979 if (prev && GET_CODE (prev) == CODE_LABEL)
13980 {
13981 edge e;
13982 for (e = bb->pred; e; e = e->pred_next)
13983 if (EDGE_FREQUENCY (e) && e->src->index > 0
13984 && !(e->flags & EDGE_FALLTHRU))
13985 insert = 1;
13986 }
13987 if (!insert)
13988 {
13989 prev = prev_real_insn (ret);
13990 if (prev && GET_CODE (prev) == JUMP_INSN
13991 && any_condjump_p (prev))
13992 insert = 1;
13993 }
13994 if (insert)
13995 emit_insn_before (gen_nop (), ret);
13996 }
13997 }
13998
13999 #include "gt-i386.h"
This page took 0.685546 seconds and 6 git commands to generate.