]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
252c724f2dec9f20a2f8d6bb449679d63cee11dd
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 };
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 };
125
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 };
162
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
198 };
199
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
235 };
236
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
272 };
273
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309 };
310
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 };
347
348 const struct processor_costs *ix86_cost = &pentium_cost;
349
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
399
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403 #define FAST_PROLOGUE_INSN_COUNT 30
404
405 /* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407 static int use_fast_prologue_epilogue;
408
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
413
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
416
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
418 {
419 /* ax, dx, cx, bx */
420 AREG, DREG, CREG, BREG,
421 /* si, di, bp, sp */
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
423 /* FP registers */
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
426 /* arg pointer */
427 NON_Q_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
431 SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
433 MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
437 SSE_REGS, SSE_REGS,
438 };
439
440 /* The "default" register map used in 32bit mode. */
441
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
443 {
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
451 };
452
453 static int const x86_64_int_parameter_registers[6] =
454 {
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
457 };
458
459 static int const x86_64_int_return_registers[4] =
460 {
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
462 };
463
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
466 {
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
474 };
475
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
520 numbers.
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
529 */
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
531 {
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
539 };
540
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
543
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
546
547 /* The encoding characters for the four TLS models present in ELF. */
548
549 static char const tls_model_chars[] = " GLil";
550
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
554
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function GTY(())
557 {
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
562 };
563
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
566
567 /* Structure describing stack frame layout.
568 Stack grows downward:
569
570 [arguments]
571 <- ARG_POINTER
572 saved pc
573
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
576 [saved regs]
577
578 [padding1] \
579 )
580 [va_arg registers] (
581 > to_allocate <- FRAME_POINTER
582 [frame] (
583 )
584 [padding2] /
585 */
586 struct ix86_frame
587 {
588 int nregs;
589 int padding1;
590 int va_arg_size;
591 HOST_WIDE_INT frame;
592 int padding2;
593 int outgoing_arguments_size;
594 int red_zone_size;
595
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
601 };
602
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
607 /* Parsed value. */
608 enum cmodel ix86_cmodel;
609 /* Asm dialect. */
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
612 /* TLS dialext. */
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
615
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
618
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
623
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
628
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
631
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
634
635 /* ix86_regparm_string as a number */
636 int ix86_regparm;
637
638 /* Alignment to use for loops and jumps: */
639
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
642
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
645
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
648
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
651
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
655
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
658
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
662 \f
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
667 int, int, FILE *));
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
673 rtx *, rtx *));
674 static rtx get_thread_pointer PARAMS ((void));
675 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
676 static rtx gen_push PARAMS ((rtx));
677 static int memory_address_length PARAMS ((rtx addr));
678 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681 static void ix86_dump_ppro_packet PARAMS ((FILE *));
682 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
683 static struct machine_function * ix86_init_machine_status PARAMS ((void));
684 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
685 static int ix86_nsaved_regs PARAMS ((void));
686 static void ix86_emit_save_regs PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
688 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
689 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
704
705 struct ix86_address
706 {
707 rtx base, index, disp;
708 HOST_WIDE_INT scale;
709 };
710
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
712
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
715 ATTRIBUTE_UNUSED;
716
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
719 tree, rtx));
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
721 tree, rtx));
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
726 tree, rtx));
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
731 enum rtx_code *,
732 enum rtx_code *,
733 enum rtx_code *));
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
735 rtx *, rtx *));
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
741 static int ix86_save_reg PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
743 static int ix86_comp_type_attributes PARAMS ((tree, tree));
744 const struct attribute_spec ix86_attribute_table[];
745 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static int ix86_value_regno PARAMS ((enum machine_mode));
748
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
751 #endif
752
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
757
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
760 */
761 enum x86_64_reg_class
762 {
763 X86_64_NO_CLASS,
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
766 X86_64_SSE_CLASS,
767 X86_64_SSESF_CLASS,
768 X86_64_SSEDF_CLASS,
769 X86_64_SSEUP_CLASS,
770 X86_64_X87_CLASS,
771 X86_64_X87UP_CLASS,
772 X86_64_MEMORY_CLASS
773 };
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
776
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
780 int));
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
782 int *));
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
784 const int *, int));
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
787 \f
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
794 #endif
795
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
798
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
801
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
804
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
807
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
812
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
817 #ifdef ASM_QUAD
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
820 #endif
821
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
828
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
845
846 #ifdef HAVE_AS_TLS
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
849 #endif
850
851 struct gcc_target targetm = TARGET_INITIALIZER;
852 \f
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
857 been parsed.
858
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
861
862 void
863 override_options ()
864 {
865 int i;
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
868
869 static struct ptt
870 {
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
880 }
881 const processor_target_table[PROCESSOR_max] =
882 {
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
890 };
891
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
893 static struct pta
894 {
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
897 const enum pta_flags
898 {
899 PTA_SSE = 1,
900 PTA_SSE2 = 2,
901 PTA_MMX = 4,
902 PTA_PREFETCH_SSE = 8,
903 PTA_3DNOW = 16,
904 PTA_3DNOW_A = 64
905 } flags;
906 }
907 const processor_alias_table[] =
908 {
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
915 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
916 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
917 {"i686", PROCESSOR_PENTIUMPRO, 0},
918 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
919 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
920 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
921 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
922 PTA_MMX | PTA_PREFETCH_SSE},
923 {"k6", PROCESSOR_K6, PTA_MMX},
924 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
925 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
926 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
927 | PTA_3DNOW_A},
928 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
929 | PTA_3DNOW | PTA_3DNOW_A},
930 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
931 | PTA_3DNOW_A | PTA_SSE},
932 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
933 | PTA_3DNOW_A | PTA_SSE},
934 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
935 | PTA_3DNOW_A | PTA_SSE},
936 };
937
938 int const pta_size = ARRAY_SIZE (processor_alias_table);
939
940 /* By default our XFmode is the 80-bit extended format. If we have
941 use TFmode instead, it's also the 80-bit format, but with padding. */
942 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
943 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
944
945 #ifdef SUBTARGET_OVERRIDE_OPTIONS
946 SUBTARGET_OVERRIDE_OPTIONS;
947 #endif
948
949 if (!ix86_cpu_string && ix86_arch_string)
950 ix86_cpu_string = ix86_arch_string;
951 if (!ix86_cpu_string)
952 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
953 if (!ix86_arch_string)
954 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
955
956 if (ix86_cmodel_string != 0)
957 {
958 if (!strcmp (ix86_cmodel_string, "small"))
959 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
960 else if (flag_pic)
961 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
962 else if (!strcmp (ix86_cmodel_string, "32"))
963 ix86_cmodel = CM_32;
964 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
965 ix86_cmodel = CM_KERNEL;
966 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
967 ix86_cmodel = CM_MEDIUM;
968 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
969 ix86_cmodel = CM_LARGE;
970 else
971 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
972 }
973 else
974 {
975 ix86_cmodel = CM_32;
976 if (TARGET_64BIT)
977 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
978 }
979 if (ix86_asm_string != 0)
980 {
981 if (!strcmp (ix86_asm_string, "intel"))
982 ix86_asm_dialect = ASM_INTEL;
983 else if (!strcmp (ix86_asm_string, "att"))
984 ix86_asm_dialect = ASM_ATT;
985 else
986 error ("bad value (%s) for -masm= switch", ix86_asm_string);
987 }
988 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
989 error ("code model `%s' not supported in the %s bit mode",
990 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
991 if (ix86_cmodel == CM_LARGE)
992 sorry ("code model `large' not supported yet");
993 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
994 sorry ("%i-bit mode not compiled in",
995 (target_flags & MASK_64BIT) ? 64 : 32);
996
997 for (i = 0; i < pta_size; i++)
998 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
999 {
1000 ix86_arch = processor_alias_table[i].processor;
1001 /* Default cpu tuning to the architecture. */
1002 ix86_cpu = ix86_arch;
1003 if (processor_alias_table[i].flags & PTA_MMX
1004 && !(target_flags & MASK_MMX_SET))
1005 target_flags |= MASK_MMX;
1006 if (processor_alias_table[i].flags & PTA_3DNOW
1007 && !(target_flags & MASK_3DNOW_SET))
1008 target_flags |= MASK_3DNOW;
1009 if (processor_alias_table[i].flags & PTA_3DNOW_A
1010 && !(target_flags & MASK_3DNOW_A_SET))
1011 target_flags |= MASK_3DNOW_A;
1012 if (processor_alias_table[i].flags & PTA_SSE
1013 && !(target_flags & MASK_SSE_SET))
1014 target_flags |= MASK_SSE;
1015 if (processor_alias_table[i].flags & PTA_SSE2
1016 && !(target_flags & MASK_SSE2_SET))
1017 target_flags |= MASK_SSE2;
1018 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1019 x86_prefetch_sse = true;
1020 break;
1021 }
1022
1023 if (i == pta_size)
1024 error ("bad value (%s) for -march= switch", ix86_arch_string);
1025
1026 for (i = 0; i < pta_size; i++)
1027 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1028 {
1029 ix86_cpu = processor_alias_table[i].processor;
1030 break;
1031 }
1032 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1033 x86_prefetch_sse = true;
1034 if (i == pta_size)
1035 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1036
1037 if (optimize_size)
1038 ix86_cost = &size_cost;
1039 else
1040 ix86_cost = processor_target_table[ix86_cpu].cost;
1041 target_flags |= processor_target_table[ix86_cpu].target_enable;
1042 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1043
1044 /* Arrange to set up i386_stack_locals for all functions. */
1045 init_machine_status = ix86_init_machine_status;
1046
1047 /* Validate -mregparm= value. */
1048 if (ix86_regparm_string)
1049 {
1050 i = atoi (ix86_regparm_string);
1051 if (i < 0 || i > REGPARM_MAX)
1052 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1053 else
1054 ix86_regparm = i;
1055 }
1056 else
1057 if (TARGET_64BIT)
1058 ix86_regparm = REGPARM_MAX;
1059
1060 /* If the user has provided any of the -malign-* options,
1061 warn and use that value only if -falign-* is not set.
1062 Remove this code in GCC 3.2 or later. */
1063 if (ix86_align_loops_string)
1064 {
1065 warning ("-malign-loops is obsolete, use -falign-loops");
1066 if (align_loops == 0)
1067 {
1068 i = atoi (ix86_align_loops_string);
1069 if (i < 0 || i > MAX_CODE_ALIGN)
1070 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1071 else
1072 align_loops = 1 << i;
1073 }
1074 }
1075
1076 if (ix86_align_jumps_string)
1077 {
1078 warning ("-malign-jumps is obsolete, use -falign-jumps");
1079 if (align_jumps == 0)
1080 {
1081 i = atoi (ix86_align_jumps_string);
1082 if (i < 0 || i > MAX_CODE_ALIGN)
1083 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1084 else
1085 align_jumps = 1 << i;
1086 }
1087 }
1088
1089 if (ix86_align_funcs_string)
1090 {
1091 warning ("-malign-functions is obsolete, use -falign-functions");
1092 if (align_functions == 0)
1093 {
1094 i = atoi (ix86_align_funcs_string);
1095 if (i < 0 || i > MAX_CODE_ALIGN)
1096 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1097 else
1098 align_functions = 1 << i;
1099 }
1100 }
1101
1102 /* Default align_* from the processor table. */
1103 if (align_loops == 0)
1104 {
1105 align_loops = processor_target_table[ix86_cpu].align_loop;
1106 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1107 }
1108 if (align_jumps == 0)
1109 {
1110 align_jumps = processor_target_table[ix86_cpu].align_jump;
1111 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1112 }
1113 if (align_functions == 0)
1114 {
1115 align_functions = processor_target_table[ix86_cpu].align_func;
1116 }
1117
1118 /* Validate -mpreferred-stack-boundary= value, or provide default.
1119 The default of 128 bits is for Pentium III's SSE __m128, but we
1120 don't want additional code to keep the stack aligned when
1121 optimizing for code size. */
1122 ix86_preferred_stack_boundary = (optimize_size
1123 ? TARGET_64BIT ? 64 : 32
1124 : 128);
1125 if (ix86_preferred_stack_boundary_string)
1126 {
1127 i = atoi (ix86_preferred_stack_boundary_string);
1128 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1129 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1130 TARGET_64BIT ? 3 : 2);
1131 else
1132 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1133 }
1134
1135 /* Validate -mbranch-cost= value, or provide default. */
1136 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1137 if (ix86_branch_cost_string)
1138 {
1139 i = atoi (ix86_branch_cost_string);
1140 if (i < 0 || i > 5)
1141 error ("-mbranch-cost=%d is not between 0 and 5", i);
1142 else
1143 ix86_branch_cost = i;
1144 }
1145
1146 if (ix86_tls_dialect_string)
1147 {
1148 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1149 ix86_tls_dialect = TLS_DIALECT_GNU;
1150 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1151 ix86_tls_dialect = TLS_DIALECT_SUN;
1152 else
1153 error ("bad value (%s) for -mtls-dialect= switch",
1154 ix86_tls_dialect_string);
1155 }
1156
1157 if (profile_flag)
1158 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1159
1160 /* Keep nonleaf frame pointers. */
1161 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1162 flag_omit_frame_pointer = 1;
1163
1164 /* If we're doing fast math, we don't care about comparison order
1165 wrt NaNs. This lets us use a shorter comparison sequence. */
1166 if (flag_unsafe_math_optimizations)
1167 target_flags &= ~MASK_IEEE_FP;
1168
1169 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1170 since the insns won't need emulation. */
1171 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1172 target_flags &= ~MASK_NO_FANCY_MATH_387;
1173
1174 if (TARGET_64BIT)
1175 {
1176 if (TARGET_ALIGN_DOUBLE)
1177 error ("-malign-double makes no sense in the 64bit mode");
1178 if (TARGET_RTD)
1179 error ("-mrtd calling convention not supported in the 64bit mode");
1180 /* Enable by default the SSE and MMX builtins. */
1181 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1182 ix86_fpmath = FPMATH_SSE;
1183 }
1184 else
1185 ix86_fpmath = FPMATH_387;
1186
1187 if (ix86_fpmath_string != 0)
1188 {
1189 if (! strcmp (ix86_fpmath_string, "387"))
1190 ix86_fpmath = FPMATH_387;
1191 else if (! strcmp (ix86_fpmath_string, "sse"))
1192 {
1193 if (!TARGET_SSE)
1194 {
1195 warning ("SSE instruction set disabled, using 387 arithmetics");
1196 ix86_fpmath = FPMATH_387;
1197 }
1198 else
1199 ix86_fpmath = FPMATH_SSE;
1200 }
1201 else if (! strcmp (ix86_fpmath_string, "387,sse")
1202 || ! strcmp (ix86_fpmath_string, "sse,387"))
1203 {
1204 if (!TARGET_SSE)
1205 {
1206 warning ("SSE instruction set disabled, using 387 arithmetics");
1207 ix86_fpmath = FPMATH_387;
1208 }
1209 else if (!TARGET_80387)
1210 {
1211 warning ("387 instruction set disabled, using SSE arithmetics");
1212 ix86_fpmath = FPMATH_SSE;
1213 }
1214 else
1215 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1216 }
1217 else
1218 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1219 }
1220
1221 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1222 on by -msse. */
1223 if (TARGET_SSE)
1224 {
1225 target_flags |= MASK_MMX;
1226 x86_prefetch_sse = true;
1227 }
1228
1229 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1230 if (TARGET_3DNOW)
1231 {
1232 target_flags |= MASK_MMX;
1233 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1234 extensions it adds. */
1235 if (x86_3dnow_a & (1 << ix86_arch))
1236 target_flags |= MASK_3DNOW_A;
1237 }
1238 if ((x86_accumulate_outgoing_args & CPUMASK)
1239 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1240 && !optimize_size)
1241 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1242
1243 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1244 {
1245 char *p;
1246 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1247 p = strchr (internal_label_prefix, 'X');
1248 internal_label_prefix_len = p - internal_label_prefix;
1249 *p = '\0';
1250 }
1251 }
1252 \f
1253 void
1254 optimization_options (level, size)
1255 int level;
1256 int size ATTRIBUTE_UNUSED;
1257 {
1258 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1259 make the problem with not enough registers even worse. */
1260 #ifdef INSN_SCHEDULING
1261 if (level > 1)
1262 flag_schedule_insns = 0;
1263 #endif
1264 if (TARGET_64BIT && optimize >= 1)
1265 flag_omit_frame_pointer = 1;
1266 if (TARGET_64BIT)
1267 {
1268 flag_pcc_struct_return = 0;
1269 flag_asynchronous_unwind_tables = 1;
1270 }
1271 if (profile_flag)
1272 flag_omit_frame_pointer = 0;
1273 }
1274 \f
1275 /* Table of valid machine attributes. */
1276 const struct attribute_spec ix86_attribute_table[] =
1277 {
1278 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1279 /* Stdcall attribute says callee is responsible for popping arguments
1280 if they are not variable. */
1281 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1282 /* Cdecl attribute says the callee is a normal C declaration */
1283 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1284 /* Regparm attribute specifies how many integer arguments are to be
1285 passed in registers. */
1286 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1287 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1288 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1289 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1290 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1291 #endif
1292 { NULL, 0, 0, false, false, false, NULL }
1293 };
1294
1295 /* Handle a "cdecl" or "stdcall" attribute;
1296 arguments as in struct attribute_spec.handler. */
1297 static tree
1298 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1299 tree *node;
1300 tree name;
1301 tree args ATTRIBUTE_UNUSED;
1302 int flags ATTRIBUTE_UNUSED;
1303 bool *no_add_attrs;
1304 {
1305 if (TREE_CODE (*node) != FUNCTION_TYPE
1306 && TREE_CODE (*node) != METHOD_TYPE
1307 && TREE_CODE (*node) != FIELD_DECL
1308 && TREE_CODE (*node) != TYPE_DECL)
1309 {
1310 warning ("`%s' attribute only applies to functions",
1311 IDENTIFIER_POINTER (name));
1312 *no_add_attrs = true;
1313 }
1314
1315 if (TARGET_64BIT)
1316 {
1317 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1318 *no_add_attrs = true;
1319 }
1320
1321 return NULL_TREE;
1322 }
1323
1324 /* Handle a "regparm" attribute;
1325 arguments as in struct attribute_spec.handler. */
1326 static tree
1327 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1328 tree *node;
1329 tree name;
1330 tree args;
1331 int flags ATTRIBUTE_UNUSED;
1332 bool *no_add_attrs;
1333 {
1334 if (TREE_CODE (*node) != FUNCTION_TYPE
1335 && TREE_CODE (*node) != METHOD_TYPE
1336 && TREE_CODE (*node) != FIELD_DECL
1337 && TREE_CODE (*node) != TYPE_DECL)
1338 {
1339 warning ("`%s' attribute only applies to functions",
1340 IDENTIFIER_POINTER (name));
1341 *no_add_attrs = true;
1342 }
1343 else
1344 {
1345 tree cst;
1346
1347 cst = TREE_VALUE (args);
1348 if (TREE_CODE (cst) != INTEGER_CST)
1349 {
1350 warning ("`%s' attribute requires an integer constant argument",
1351 IDENTIFIER_POINTER (name));
1352 *no_add_attrs = true;
1353 }
1354 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1355 {
1356 warning ("argument to `%s' attribute larger than %d",
1357 IDENTIFIER_POINTER (name), REGPARM_MAX);
1358 *no_add_attrs = true;
1359 }
1360 }
1361
1362 return NULL_TREE;
1363 }
1364
1365 /* Return 0 if the attributes for two types are incompatible, 1 if they
1366 are compatible, and 2 if they are nearly compatible (which causes a
1367 warning to be generated). */
1368
1369 static int
1370 ix86_comp_type_attributes (type1, type2)
1371 tree type1;
1372 tree type2;
1373 {
1374 /* Check for mismatch of non-default calling convention. */
1375 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1376
1377 if (TREE_CODE (type1) != FUNCTION_TYPE)
1378 return 1;
1379
1380 /* Check for mismatched return types (cdecl vs stdcall). */
1381 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1382 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1383 return 0;
1384 return 1;
1385 }
1386 \f
1387 /* Value is the number of bytes of arguments automatically
1388 popped when returning from a subroutine call.
1389 FUNDECL is the declaration node of the function (as a tree),
1390 FUNTYPE is the data type of the function (as a tree),
1391 or for a library call it is an identifier node for the subroutine name.
1392 SIZE is the number of bytes of arguments passed on the stack.
1393
1394 On the 80386, the RTD insn may be used to pop them if the number
1395 of args is fixed, but if the number is variable then the caller
1396 must pop them all. RTD can't be used for library calls now
1397 because the library is compiled with the Unix compiler.
1398 Use of RTD is a selectable option, since it is incompatible with
1399 standard Unix calling sequences. If the option is not selected,
1400 the caller must always pop the args.
1401
1402 The attribute stdcall is equivalent to RTD on a per module basis. */
1403
1404 int
1405 ix86_return_pops_args (fundecl, funtype, size)
1406 tree fundecl;
1407 tree funtype;
1408 int size;
1409 {
1410 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1411
1412 /* Cdecl functions override -mrtd, and never pop the stack. */
1413 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1414
1415 /* Stdcall functions will pop the stack if not variable args. */
1416 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1417 rtd = 1;
1418
1419 if (rtd
1420 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1421 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1422 == void_type_node)))
1423 return size;
1424 }
1425
1426 /* Lose any fake structure return argument if it is passed on the stack. */
1427 if (aggregate_value_p (TREE_TYPE (funtype))
1428 && !TARGET_64BIT)
1429 {
1430 int nregs = ix86_regparm;
1431
1432 if (funtype)
1433 {
1434 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1435
1436 if (attr)
1437 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1438 }
1439
1440 if (!nregs)
1441 return GET_MODE_SIZE (Pmode);
1442 }
1443
1444 return 0;
1445 }
1446 \f
1447 /* Argument support functions. */
1448
1449 /* Return true when register may be used to pass function parameters. */
1450 bool
1451 ix86_function_arg_regno_p (regno)
1452 int regno;
1453 {
1454 int i;
1455 if (!TARGET_64BIT)
1456 return (regno < REGPARM_MAX
1457 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1458 if (SSE_REGNO_P (regno) && TARGET_SSE)
1459 return true;
1460 /* RAX is used as hidden argument to va_arg functions. */
1461 if (!regno)
1462 return true;
1463 for (i = 0; i < REGPARM_MAX; i++)
1464 if (regno == x86_64_int_parameter_registers[i])
1465 return true;
1466 return false;
1467 }
1468
1469 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1470 for a call to a function whose data type is FNTYPE.
1471 For a library call, FNTYPE is 0. */
1472
1473 void
1474 init_cumulative_args (cum, fntype, libname)
1475 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1476 tree fntype; /* tree ptr for function decl */
1477 rtx libname; /* SYMBOL_REF of library name or 0 */
1478 {
1479 static CUMULATIVE_ARGS zero_cum;
1480 tree param, next_param;
1481
1482 if (TARGET_DEBUG_ARG)
1483 {
1484 fprintf (stderr, "\ninit_cumulative_args (");
1485 if (fntype)
1486 fprintf (stderr, "fntype code = %s, ret code = %s",
1487 tree_code_name[(int) TREE_CODE (fntype)],
1488 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1489 else
1490 fprintf (stderr, "no fntype");
1491
1492 if (libname)
1493 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1494 }
1495
1496 *cum = zero_cum;
1497
1498 /* Set up the number of registers to use for passing arguments. */
1499 cum->nregs = ix86_regparm;
1500 cum->sse_nregs = SSE_REGPARM_MAX;
1501 if (fntype && !TARGET_64BIT)
1502 {
1503 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1504
1505 if (attr)
1506 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1507 }
1508 cum->maybe_vaarg = false;
1509
1510 /* Determine if this function has variable arguments. This is
1511 indicated by the last argument being 'void_type_mode' if there
1512 are no variable arguments. If there are variable arguments, then
1513 we won't pass anything in registers */
1514
1515 if (cum->nregs)
1516 {
1517 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1518 param != 0; param = next_param)
1519 {
1520 next_param = TREE_CHAIN (param);
1521 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1522 {
1523 if (!TARGET_64BIT)
1524 cum->nregs = 0;
1525 cum->maybe_vaarg = true;
1526 }
1527 }
1528 }
1529 if ((!fntype && !libname)
1530 || (fntype && !TYPE_ARG_TYPES (fntype)))
1531 cum->maybe_vaarg = 1;
1532
1533 if (TARGET_DEBUG_ARG)
1534 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1535
1536 return;
1537 }
1538
1539 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1540 of this code is to classify each 8bytes of incoming argument by the register
1541 class and assign registers accordingly. */
1542
1543 /* Return the union class of CLASS1 and CLASS2.
1544 See the x86-64 PS ABI for details. */
1545
1546 static enum x86_64_reg_class
1547 merge_classes (class1, class2)
1548 enum x86_64_reg_class class1, class2;
1549 {
1550 /* Rule #1: If both classes are equal, this is the resulting class. */
1551 if (class1 == class2)
1552 return class1;
1553
1554 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1555 the other class. */
1556 if (class1 == X86_64_NO_CLASS)
1557 return class2;
1558 if (class2 == X86_64_NO_CLASS)
1559 return class1;
1560
1561 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1562 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1563 return X86_64_MEMORY_CLASS;
1564
1565 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1566 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1567 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1568 return X86_64_INTEGERSI_CLASS;
1569 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1570 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1571 return X86_64_INTEGER_CLASS;
1572
1573 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1574 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1575 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1576 return X86_64_MEMORY_CLASS;
1577
1578 /* Rule #6: Otherwise class SSE is used. */
1579 return X86_64_SSE_CLASS;
1580 }
1581
1582 /* Classify the argument of type TYPE and mode MODE.
1583 CLASSES will be filled by the register class used to pass each word
1584 of the operand. The number of words is returned. In case the parameter
1585 should be passed in memory, 0 is returned. As a special case for zero
1586 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1587
1588 BIT_OFFSET is used internally for handling records and specifies offset
1589 of the offset in bits modulo 256 to avoid overflow cases.
1590
1591 See the x86-64 PS ABI for details.
1592 */
1593
1594 static int
1595 classify_argument (mode, type, classes, bit_offset)
1596 enum machine_mode mode;
1597 tree type;
1598 enum x86_64_reg_class classes[MAX_CLASSES];
1599 int bit_offset;
1600 {
1601 int bytes =
1602 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1603 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1604
1605 /* Variable sized entities are always passed/returned in memory. */
1606 if (bytes < 0)
1607 return 0;
1608
1609 if (type && AGGREGATE_TYPE_P (type))
1610 {
1611 int i;
1612 tree field;
1613 enum x86_64_reg_class subclasses[MAX_CLASSES];
1614
1615 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1616 if (bytes > 16)
1617 return 0;
1618
1619 for (i = 0; i < words; i++)
1620 classes[i] = X86_64_NO_CLASS;
1621
1622 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1623 signalize memory class, so handle it as special case. */
1624 if (!words)
1625 {
1626 classes[0] = X86_64_NO_CLASS;
1627 return 1;
1628 }
1629
1630 /* Classify each field of record and merge classes. */
1631 if (TREE_CODE (type) == RECORD_TYPE)
1632 {
1633 /* For classes first merge in the field of the subclasses. */
1634 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1635 {
1636 tree bases = TYPE_BINFO_BASETYPES (type);
1637 int n_bases = TREE_VEC_LENGTH (bases);
1638 int i;
1639
1640 for (i = 0; i < n_bases; ++i)
1641 {
1642 tree binfo = TREE_VEC_ELT (bases, i);
1643 int num;
1644 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1645 tree type = BINFO_TYPE (binfo);
1646
1647 num = classify_argument (TYPE_MODE (type),
1648 type, subclasses,
1649 (offset + bit_offset) % 256);
1650 if (!num)
1651 return 0;
1652 for (i = 0; i < num; i++)
1653 {
1654 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1655 classes[i + pos] =
1656 merge_classes (subclasses[i], classes[i + pos]);
1657 }
1658 }
1659 }
1660 /* And now merge the fields of structure. */
1661 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1662 {
1663 if (TREE_CODE (field) == FIELD_DECL)
1664 {
1665 int num;
1666
1667 /* Bitfields are always classified as integer. Handle them
1668 early, since later code would consider them to be
1669 misaligned integers. */
1670 if (DECL_BIT_FIELD (field))
1671 {
1672 for (i = int_bit_position (field) / 8 / 8;
1673 i < (int_bit_position (field)
1674 + tree_low_cst (DECL_SIZE (field), 0)
1675 + 63) / 8 / 8; i++)
1676 classes[i] =
1677 merge_classes (X86_64_INTEGER_CLASS,
1678 classes[i]);
1679 }
1680 else
1681 {
1682 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1683 TREE_TYPE (field), subclasses,
1684 (int_bit_position (field)
1685 + bit_offset) % 256);
1686 if (!num)
1687 return 0;
1688 for (i = 0; i < num; i++)
1689 {
1690 int pos =
1691 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1692 classes[i + pos] =
1693 merge_classes (subclasses[i], classes[i + pos]);
1694 }
1695 }
1696 }
1697 }
1698 }
1699 /* Arrays are handled as small records. */
1700 else if (TREE_CODE (type) == ARRAY_TYPE)
1701 {
1702 int num;
1703 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1704 TREE_TYPE (type), subclasses, bit_offset);
1705 if (!num)
1706 return 0;
1707
1708 /* The partial classes are now full classes. */
1709 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1710 subclasses[0] = X86_64_SSE_CLASS;
1711 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1712 subclasses[0] = X86_64_INTEGER_CLASS;
1713
1714 for (i = 0; i < words; i++)
1715 classes[i] = subclasses[i % num];
1716 }
1717 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1718 else if (TREE_CODE (type) == UNION_TYPE
1719 || TREE_CODE (type) == QUAL_UNION_TYPE)
1720 {
1721 /* For classes first merge in the field of the subclasses. */
1722 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1723 {
1724 tree bases = TYPE_BINFO_BASETYPES (type);
1725 int n_bases = TREE_VEC_LENGTH (bases);
1726 int i;
1727
1728 for (i = 0; i < n_bases; ++i)
1729 {
1730 tree binfo = TREE_VEC_ELT (bases, i);
1731 int num;
1732 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1733 tree type = BINFO_TYPE (binfo);
1734
1735 num = classify_argument (TYPE_MODE (type),
1736 type, subclasses,
1737 (offset + (bit_offset % 64)) % 256);
1738 if (!num)
1739 return 0;
1740 for (i = 0; i < num; i++)
1741 {
1742 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1743 classes[i + pos] =
1744 merge_classes (subclasses[i], classes[i + pos]);
1745 }
1746 }
1747 }
1748 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1749 {
1750 if (TREE_CODE (field) == FIELD_DECL)
1751 {
1752 int num;
1753 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1754 TREE_TYPE (field), subclasses,
1755 bit_offset);
1756 if (!num)
1757 return 0;
1758 for (i = 0; i < num; i++)
1759 classes[i] = merge_classes (subclasses[i], classes[i]);
1760 }
1761 }
1762 }
1763 else
1764 abort ();
1765
1766 /* Final merger cleanup. */
1767 for (i = 0; i < words; i++)
1768 {
1769 /* If one class is MEMORY, everything should be passed in
1770 memory. */
1771 if (classes[i] == X86_64_MEMORY_CLASS)
1772 return 0;
1773
1774 /* The X86_64_SSEUP_CLASS should be always preceded by
1775 X86_64_SSE_CLASS. */
1776 if (classes[i] == X86_64_SSEUP_CLASS
1777 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1778 classes[i] = X86_64_SSE_CLASS;
1779
1780 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1781 if (classes[i] == X86_64_X87UP_CLASS
1782 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1783 classes[i] = X86_64_SSE_CLASS;
1784 }
1785 return words;
1786 }
1787
1788 /* Compute alignment needed. We align all types to natural boundaries with
1789 exception of XFmode that is aligned to 64bits. */
1790 if (mode != VOIDmode && mode != BLKmode)
1791 {
1792 int mode_alignment = GET_MODE_BITSIZE (mode);
1793
1794 if (mode == XFmode)
1795 mode_alignment = 128;
1796 else if (mode == XCmode)
1797 mode_alignment = 256;
1798 /* Misaligned fields are always returned in memory. */
1799 if (bit_offset % mode_alignment)
1800 return 0;
1801 }
1802
1803 /* Classification of atomic types. */
1804 switch (mode)
1805 {
1806 case DImode:
1807 case SImode:
1808 case HImode:
1809 case QImode:
1810 case CSImode:
1811 case CHImode:
1812 case CQImode:
1813 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1814 classes[0] = X86_64_INTEGERSI_CLASS;
1815 else
1816 classes[0] = X86_64_INTEGER_CLASS;
1817 return 1;
1818 case CDImode:
1819 case TImode:
1820 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1821 return 2;
1822 case CTImode:
1823 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1824 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1825 return 4;
1826 case SFmode:
1827 if (!(bit_offset % 64))
1828 classes[0] = X86_64_SSESF_CLASS;
1829 else
1830 classes[0] = X86_64_SSE_CLASS;
1831 return 1;
1832 case DFmode:
1833 classes[0] = X86_64_SSEDF_CLASS;
1834 return 1;
1835 case TFmode:
1836 classes[0] = X86_64_X87_CLASS;
1837 classes[1] = X86_64_X87UP_CLASS;
1838 return 2;
1839 case TCmode:
1840 classes[0] = X86_64_X87_CLASS;
1841 classes[1] = X86_64_X87UP_CLASS;
1842 classes[2] = X86_64_X87_CLASS;
1843 classes[3] = X86_64_X87UP_CLASS;
1844 return 4;
1845 case DCmode:
1846 classes[0] = X86_64_SSEDF_CLASS;
1847 classes[1] = X86_64_SSEDF_CLASS;
1848 return 2;
1849 case SCmode:
1850 classes[0] = X86_64_SSE_CLASS;
1851 return 1;
1852 case V4SFmode:
1853 case V4SImode:
1854 case V16QImode:
1855 case V8HImode:
1856 case V2DFmode:
1857 case V2DImode:
1858 classes[0] = X86_64_SSE_CLASS;
1859 classes[1] = X86_64_SSEUP_CLASS;
1860 return 2;
1861 case V2SFmode:
1862 case V2SImode:
1863 case V4HImode:
1864 case V8QImode:
1865 classes[0] = X86_64_SSE_CLASS;
1866 return 1;
1867 case BLKmode:
1868 case VOIDmode:
1869 return 0;
1870 default:
1871 abort ();
1872 }
1873 }
1874
1875 /* Examine the argument and return set number of register required in each
1876 class. Return 0 iff parameter should be passed in memory. */
1877 static int
1878 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1879 enum machine_mode mode;
1880 tree type;
1881 int *int_nregs, *sse_nregs;
1882 int in_return;
1883 {
1884 enum x86_64_reg_class class[MAX_CLASSES];
1885 int n = classify_argument (mode, type, class, 0);
1886
1887 *int_nregs = 0;
1888 *sse_nregs = 0;
1889 if (!n)
1890 return 0;
1891 for (n--; n >= 0; n--)
1892 switch (class[n])
1893 {
1894 case X86_64_INTEGER_CLASS:
1895 case X86_64_INTEGERSI_CLASS:
1896 (*int_nregs)++;
1897 break;
1898 case X86_64_SSE_CLASS:
1899 case X86_64_SSESF_CLASS:
1900 case X86_64_SSEDF_CLASS:
1901 (*sse_nregs)++;
1902 break;
1903 case X86_64_NO_CLASS:
1904 case X86_64_SSEUP_CLASS:
1905 break;
1906 case X86_64_X87_CLASS:
1907 case X86_64_X87UP_CLASS:
1908 if (!in_return)
1909 return 0;
1910 break;
1911 case X86_64_MEMORY_CLASS:
1912 abort ();
1913 }
1914 return 1;
1915 }
1916 /* Construct container for the argument used by GCC interface. See
1917 FUNCTION_ARG for the detailed description. */
1918 static rtx
1919 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1920 enum machine_mode mode;
1921 tree type;
1922 int in_return;
1923 int nintregs, nsseregs;
1924 const int * intreg;
1925 int sse_regno;
1926 {
1927 enum machine_mode tmpmode;
1928 int bytes =
1929 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1930 enum x86_64_reg_class class[MAX_CLASSES];
1931 int n;
1932 int i;
1933 int nexps = 0;
1934 int needed_sseregs, needed_intregs;
1935 rtx exp[MAX_CLASSES];
1936 rtx ret;
1937
1938 n = classify_argument (mode, type, class, 0);
1939 if (TARGET_DEBUG_ARG)
1940 {
1941 if (!n)
1942 fprintf (stderr, "Memory class\n");
1943 else
1944 {
1945 fprintf (stderr, "Classes:");
1946 for (i = 0; i < n; i++)
1947 {
1948 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1949 }
1950 fprintf (stderr, "\n");
1951 }
1952 }
1953 if (!n)
1954 return NULL;
1955 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1956 return NULL;
1957 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1958 return NULL;
1959
1960 /* First construct simple cases. Avoid SCmode, since we want to use
1961 single register to pass this type. */
1962 if (n == 1 && mode != SCmode)
1963 switch (class[0])
1964 {
1965 case X86_64_INTEGER_CLASS:
1966 case X86_64_INTEGERSI_CLASS:
1967 return gen_rtx_REG (mode, intreg[0]);
1968 case X86_64_SSE_CLASS:
1969 case X86_64_SSESF_CLASS:
1970 case X86_64_SSEDF_CLASS:
1971 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1972 case X86_64_X87_CLASS:
1973 return gen_rtx_REG (mode, FIRST_STACK_REG);
1974 case X86_64_NO_CLASS:
1975 /* Zero sized array, struct or class. */
1976 return NULL;
1977 default:
1978 abort ();
1979 }
1980 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1981 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1982 if (n == 2
1983 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1984 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1985 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1986 && class[1] == X86_64_INTEGER_CLASS
1987 && (mode == CDImode || mode == TImode)
1988 && intreg[0] + 1 == intreg[1])
1989 return gen_rtx_REG (mode, intreg[0]);
1990 if (n == 4
1991 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1992 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1993 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1994
1995 /* Otherwise figure out the entries of the PARALLEL. */
1996 for (i = 0; i < n; i++)
1997 {
1998 switch (class[i])
1999 {
2000 case X86_64_NO_CLASS:
2001 break;
2002 case X86_64_INTEGER_CLASS:
2003 case X86_64_INTEGERSI_CLASS:
2004 /* Merge TImodes on aligned occassions here too. */
2005 if (i * 8 + 8 > bytes)
2006 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2007 else if (class[i] == X86_64_INTEGERSI_CLASS)
2008 tmpmode = SImode;
2009 else
2010 tmpmode = DImode;
2011 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2012 if (tmpmode == BLKmode)
2013 tmpmode = DImode;
2014 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2015 gen_rtx_REG (tmpmode, *intreg),
2016 GEN_INT (i*8));
2017 intreg++;
2018 break;
2019 case X86_64_SSESF_CLASS:
2020 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2021 gen_rtx_REG (SFmode,
2022 SSE_REGNO (sse_regno)),
2023 GEN_INT (i*8));
2024 sse_regno++;
2025 break;
2026 case X86_64_SSEDF_CLASS:
2027 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2028 gen_rtx_REG (DFmode,
2029 SSE_REGNO (sse_regno)),
2030 GEN_INT (i*8));
2031 sse_regno++;
2032 break;
2033 case X86_64_SSE_CLASS:
2034 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2035 tmpmode = TImode, i++;
2036 else
2037 tmpmode = DImode;
2038 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2039 gen_rtx_REG (tmpmode,
2040 SSE_REGNO (sse_regno)),
2041 GEN_INT (i*8));
2042 sse_regno++;
2043 break;
2044 default:
2045 abort ();
2046 }
2047 }
2048 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2049 for (i = 0; i < nexps; i++)
2050 XVECEXP (ret, 0, i) = exp [i];
2051 return ret;
2052 }
2053
2054 /* Update the data in CUM to advance over an argument
2055 of mode MODE and data type TYPE.
2056 (TYPE is null for libcalls where that information may not be available.) */
2057
2058 void
2059 function_arg_advance (cum, mode, type, named)
2060 CUMULATIVE_ARGS *cum; /* current arg information */
2061 enum machine_mode mode; /* current arg mode */
2062 tree type; /* type of the argument or 0 if lib support */
2063 int named; /* whether or not the argument was named */
2064 {
2065 int bytes =
2066 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2067 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2068
2069 if (TARGET_DEBUG_ARG)
2070 fprintf (stderr,
2071 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2072 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2073 if (TARGET_64BIT)
2074 {
2075 int int_nregs, sse_nregs;
2076 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2077 cum->words += words;
2078 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2079 {
2080 cum->nregs -= int_nregs;
2081 cum->sse_nregs -= sse_nregs;
2082 cum->regno += int_nregs;
2083 cum->sse_regno += sse_nregs;
2084 }
2085 else
2086 cum->words += words;
2087 }
2088 else
2089 {
2090 if (TARGET_SSE && mode == TImode)
2091 {
2092 cum->sse_words += words;
2093 cum->sse_nregs -= 1;
2094 cum->sse_regno += 1;
2095 if (cum->sse_nregs <= 0)
2096 {
2097 cum->sse_nregs = 0;
2098 cum->sse_regno = 0;
2099 }
2100 }
2101 else
2102 {
2103 cum->words += words;
2104 cum->nregs -= words;
2105 cum->regno += words;
2106
2107 if (cum->nregs <= 0)
2108 {
2109 cum->nregs = 0;
2110 cum->regno = 0;
2111 }
2112 }
2113 }
2114 return;
2115 }
2116
2117 /* Define where to put the arguments to a function.
2118 Value is zero to push the argument on the stack,
2119 or a hard register in which to store the argument.
2120
2121 MODE is the argument's machine mode.
2122 TYPE is the data type of the argument (as a tree).
2123 This is null for libcalls where that information may
2124 not be available.
2125 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2126 the preceding args and about the function being called.
2127 NAMED is nonzero if this argument is a named parameter
2128 (otherwise it is an extra parameter matching an ellipsis). */
2129
2130 rtx
2131 function_arg (cum, mode, type, named)
2132 CUMULATIVE_ARGS *cum; /* current arg information */
2133 enum machine_mode mode; /* current arg mode */
2134 tree type; /* type of the argument or 0 if lib support */
2135 int named; /* != 0 for normal args, == 0 for ... args */
2136 {
2137 rtx ret = NULL_RTX;
2138 int bytes =
2139 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2140 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2141
2142 /* Handle an hidden AL argument containing number of registers for varargs
2143 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2144 any AL settings. */
2145 if (mode == VOIDmode)
2146 {
2147 if (TARGET_64BIT)
2148 return GEN_INT (cum->maybe_vaarg
2149 ? (cum->sse_nregs < 0
2150 ? SSE_REGPARM_MAX
2151 : cum->sse_regno)
2152 : -1);
2153 else
2154 return constm1_rtx;
2155 }
2156 if (TARGET_64BIT)
2157 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2158 &x86_64_int_parameter_registers [cum->regno],
2159 cum->sse_regno);
2160 else
2161 switch (mode)
2162 {
2163 /* For now, pass fp/complex values on the stack. */
2164 default:
2165 break;
2166
2167 case BLKmode:
2168 case DImode:
2169 case SImode:
2170 case HImode:
2171 case QImode:
2172 if (words <= cum->nregs)
2173 ret = gen_rtx_REG (mode, cum->regno);
2174 break;
2175 case TImode:
2176 if (cum->sse_nregs)
2177 ret = gen_rtx_REG (mode, cum->sse_regno);
2178 break;
2179 }
2180
2181 if (TARGET_DEBUG_ARG)
2182 {
2183 fprintf (stderr,
2184 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2185 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2186
2187 if (ret)
2188 print_simple_rtl (stderr, ret);
2189 else
2190 fprintf (stderr, ", stack");
2191
2192 fprintf (stderr, " )\n");
2193 }
2194
2195 return ret;
2196 }
2197
2198 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2199 and type. */
2200
2201 int
2202 ix86_function_arg_boundary (mode, type)
2203 enum machine_mode mode;
2204 tree type;
2205 {
2206 int align;
2207 if (!TARGET_64BIT)
2208 return PARM_BOUNDARY;
2209 if (type)
2210 align = TYPE_ALIGN (type);
2211 else
2212 align = GET_MODE_ALIGNMENT (mode);
2213 if (align < PARM_BOUNDARY)
2214 align = PARM_BOUNDARY;
2215 if (align > 128)
2216 align = 128;
2217 return align;
2218 }
2219
2220 /* Return true if N is a possible register number of function value. */
2221 bool
2222 ix86_function_value_regno_p (regno)
2223 int regno;
2224 {
2225 if (!TARGET_64BIT)
2226 {
2227 return ((regno) == 0
2228 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2229 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2230 }
2231 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2232 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2233 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2234 }
2235
2236 /* Define how to find the value returned by a function.
2237 VALTYPE is the data type of the value (as a tree).
2238 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2239 otherwise, FUNC is 0. */
2240 rtx
2241 ix86_function_value (valtype)
2242 tree valtype;
2243 {
2244 if (TARGET_64BIT)
2245 {
2246 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2247 REGPARM_MAX, SSE_REGPARM_MAX,
2248 x86_64_int_return_registers, 0);
2249 /* For zero sized structures, construct_continer return NULL, but we need
2250 to keep rest of compiler happy by returning meaningfull value. */
2251 if (!ret)
2252 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2253 return ret;
2254 }
2255 else
2256 return gen_rtx_REG (TYPE_MODE (valtype),
2257 ix86_value_regno (TYPE_MODE (valtype)));
2258 }
2259
2260 /* Return false iff type is returned in memory. */
2261 int
2262 ix86_return_in_memory (type)
2263 tree type;
2264 {
2265 int needed_intregs, needed_sseregs;
2266 if (TARGET_64BIT)
2267 {
2268 return !examine_argument (TYPE_MODE (type), type, 1,
2269 &needed_intregs, &needed_sseregs);
2270 }
2271 else
2272 {
2273 if (TYPE_MODE (type) == BLKmode
2274 || (VECTOR_MODE_P (TYPE_MODE (type))
2275 && int_size_in_bytes (type) == 8)
2276 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2277 && TYPE_MODE (type) != TFmode
2278 && !VECTOR_MODE_P (TYPE_MODE (type))))
2279 return 1;
2280 return 0;
2281 }
2282 }
2283
2284 /* Define how to find the value returned by a library function
2285 assuming the value has mode MODE. */
2286 rtx
2287 ix86_libcall_value (mode)
2288 enum machine_mode mode;
2289 {
2290 if (TARGET_64BIT)
2291 {
2292 switch (mode)
2293 {
2294 case SFmode:
2295 case SCmode:
2296 case DFmode:
2297 case DCmode:
2298 return gen_rtx_REG (mode, FIRST_SSE_REG);
2299 case TFmode:
2300 case TCmode:
2301 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2302 default:
2303 return gen_rtx_REG (mode, 0);
2304 }
2305 }
2306 else
2307 return gen_rtx_REG (mode, ix86_value_regno (mode));
2308 }
2309
2310 /* Given a mode, return the register to use for a return value. */
2311
2312 static int
2313 ix86_value_regno (mode)
2314 enum machine_mode mode;
2315 {
2316 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2317 return FIRST_FLOAT_REG;
2318 if (mode == TImode || VECTOR_MODE_P (mode))
2319 return FIRST_SSE_REG;
2320 return 0;
2321 }
2322 \f
2323 /* Create the va_list data type. */
2324
2325 tree
2326 ix86_build_va_list ()
2327 {
2328 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2329
2330 /* For i386 we use plain pointer to argument area. */
2331 if (!TARGET_64BIT)
2332 return build_pointer_type (char_type_node);
2333
2334 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2335 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2336
2337 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2338 unsigned_type_node);
2339 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2340 unsigned_type_node);
2341 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2342 ptr_type_node);
2343 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2344 ptr_type_node);
2345
2346 DECL_FIELD_CONTEXT (f_gpr) = record;
2347 DECL_FIELD_CONTEXT (f_fpr) = record;
2348 DECL_FIELD_CONTEXT (f_ovf) = record;
2349 DECL_FIELD_CONTEXT (f_sav) = record;
2350
2351 TREE_CHAIN (record) = type_decl;
2352 TYPE_NAME (record) = type_decl;
2353 TYPE_FIELDS (record) = f_gpr;
2354 TREE_CHAIN (f_gpr) = f_fpr;
2355 TREE_CHAIN (f_fpr) = f_ovf;
2356 TREE_CHAIN (f_ovf) = f_sav;
2357
2358 layout_type (record);
2359
2360 /* The correct type is an array type of one element. */
2361 return build_array_type (record, build_index_type (size_zero_node));
2362 }
2363
2364 /* Perform any needed actions needed for a function that is receiving a
2365 variable number of arguments.
2366
2367 CUM is as above.
2368
2369 MODE and TYPE are the mode and type of the current parameter.
2370
2371 PRETEND_SIZE is a variable that should be set to the amount of stack
2372 that must be pushed by the prolog to pretend that our caller pushed
2373 it.
2374
2375 Normally, this macro will push all remaining incoming registers on the
2376 stack and set PRETEND_SIZE to the length of the registers pushed. */
2377
2378 void
2379 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2380 CUMULATIVE_ARGS *cum;
2381 enum machine_mode mode;
2382 tree type;
2383 int *pretend_size ATTRIBUTE_UNUSED;
2384 int no_rtl;
2385
2386 {
2387 CUMULATIVE_ARGS next_cum;
2388 rtx save_area = NULL_RTX, mem;
2389 rtx label;
2390 rtx label_ref;
2391 rtx tmp_reg;
2392 rtx nsse_reg;
2393 int set;
2394 tree fntype;
2395 int stdarg_p;
2396 int i;
2397
2398 if (!TARGET_64BIT)
2399 return;
2400
2401 /* Indicate to allocate space on the stack for varargs save area. */
2402 ix86_save_varrargs_registers = 1;
2403
2404 fntype = TREE_TYPE (current_function_decl);
2405 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2406 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2407 != void_type_node));
2408
2409 /* For varargs, we do not want to skip the dummy va_dcl argument.
2410 For stdargs, we do want to skip the last named argument. */
2411 next_cum = *cum;
2412 if (stdarg_p)
2413 function_arg_advance (&next_cum, mode, type, 1);
2414
2415 if (!no_rtl)
2416 save_area = frame_pointer_rtx;
2417
2418 set = get_varargs_alias_set ();
2419
2420 for (i = next_cum.regno; i < ix86_regparm; i++)
2421 {
2422 mem = gen_rtx_MEM (Pmode,
2423 plus_constant (save_area, i * UNITS_PER_WORD));
2424 set_mem_alias_set (mem, set);
2425 emit_move_insn (mem, gen_rtx_REG (Pmode,
2426 x86_64_int_parameter_registers[i]));
2427 }
2428
2429 if (next_cum.sse_nregs)
2430 {
2431 /* Now emit code to save SSE registers. The AX parameter contains number
2432 of SSE parameter regsiters used to call this function. We use
2433 sse_prologue_save insn template that produces computed jump across
2434 SSE saves. We need some preparation work to get this working. */
2435
2436 label = gen_label_rtx ();
2437 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2438
2439 /* Compute address to jump to :
2440 label - 5*eax + nnamed_sse_arguments*5 */
2441 tmp_reg = gen_reg_rtx (Pmode);
2442 nsse_reg = gen_reg_rtx (Pmode);
2443 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2444 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2445 gen_rtx_MULT (Pmode, nsse_reg,
2446 GEN_INT (4))));
2447 if (next_cum.sse_regno)
2448 emit_move_insn
2449 (nsse_reg,
2450 gen_rtx_CONST (DImode,
2451 gen_rtx_PLUS (DImode,
2452 label_ref,
2453 GEN_INT (next_cum.sse_regno * 4))));
2454 else
2455 emit_move_insn (nsse_reg, label_ref);
2456 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2457
2458 /* Compute address of memory block we save into. We always use pointer
2459 pointing 127 bytes after first byte to store - this is needed to keep
2460 instruction size limited by 4 bytes. */
2461 tmp_reg = gen_reg_rtx (Pmode);
2462 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2463 plus_constant (save_area,
2464 8 * REGPARM_MAX + 127)));
2465 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2466 set_mem_alias_set (mem, set);
2467 set_mem_align (mem, BITS_PER_WORD);
2468
2469 /* And finally do the dirty job! */
2470 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2471 GEN_INT (next_cum.sse_regno), label));
2472 }
2473
2474 }
2475
2476 /* Implement va_start. */
2477
2478 void
2479 ix86_va_start (valist, nextarg)
2480 tree valist;
2481 rtx nextarg;
2482 {
2483 HOST_WIDE_INT words, n_gpr, n_fpr;
2484 tree f_gpr, f_fpr, f_ovf, f_sav;
2485 tree gpr, fpr, ovf, sav, t;
2486
2487 /* Only 64bit target needs something special. */
2488 if (!TARGET_64BIT)
2489 {
2490 std_expand_builtin_va_start (valist, nextarg);
2491 return;
2492 }
2493
2494 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2495 f_fpr = TREE_CHAIN (f_gpr);
2496 f_ovf = TREE_CHAIN (f_fpr);
2497 f_sav = TREE_CHAIN (f_ovf);
2498
2499 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2500 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2501 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2502 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2503 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2504
2505 /* Count number of gp and fp argument registers used. */
2506 words = current_function_args_info.words;
2507 n_gpr = current_function_args_info.regno;
2508 n_fpr = current_function_args_info.sse_regno;
2509
2510 if (TARGET_DEBUG_ARG)
2511 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2512 (int) words, (int) n_gpr, (int) n_fpr);
2513
2514 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2515 build_int_2 (n_gpr * 8, 0));
2516 TREE_SIDE_EFFECTS (t) = 1;
2517 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2518
2519 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2520 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2521 TREE_SIDE_EFFECTS (t) = 1;
2522 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2523
2524 /* Find the overflow area. */
2525 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2526 if (words != 0)
2527 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2528 build_int_2 (words * UNITS_PER_WORD, 0));
2529 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2530 TREE_SIDE_EFFECTS (t) = 1;
2531 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2532
2533 /* Find the register save area.
2534 Prologue of the function save it right above stack frame. */
2535 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2536 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2537 TREE_SIDE_EFFECTS (t) = 1;
2538 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2539 }
2540
2541 /* Implement va_arg. */
2542 rtx
2543 ix86_va_arg (valist, type)
2544 tree valist, type;
2545 {
2546 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2547 tree f_gpr, f_fpr, f_ovf, f_sav;
2548 tree gpr, fpr, ovf, sav, t;
2549 int size, rsize;
2550 rtx lab_false, lab_over = NULL_RTX;
2551 rtx addr_rtx, r;
2552 rtx container;
2553
2554 /* Only 64bit target needs something special. */
2555 if (!TARGET_64BIT)
2556 {
2557 return std_expand_builtin_va_arg (valist, type);
2558 }
2559
2560 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2561 f_fpr = TREE_CHAIN (f_gpr);
2562 f_ovf = TREE_CHAIN (f_fpr);
2563 f_sav = TREE_CHAIN (f_ovf);
2564
2565 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2566 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2567 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2568 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2569 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2570
2571 size = int_size_in_bytes (type);
2572 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2573
2574 container = construct_container (TYPE_MODE (type), type, 0,
2575 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2576 /*
2577 * Pull the value out of the saved registers ...
2578 */
2579
2580 addr_rtx = gen_reg_rtx (Pmode);
2581
2582 if (container)
2583 {
2584 rtx int_addr_rtx, sse_addr_rtx;
2585 int needed_intregs, needed_sseregs;
2586 int need_temp;
2587
2588 lab_over = gen_label_rtx ();
2589 lab_false = gen_label_rtx ();
2590
2591 examine_argument (TYPE_MODE (type), type, 0,
2592 &needed_intregs, &needed_sseregs);
2593
2594
2595 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2596 || TYPE_ALIGN (type) > 128);
2597
2598 /* In case we are passing structure, verify that it is consetuctive block
2599 on the register save area. If not we need to do moves. */
2600 if (!need_temp && !REG_P (container))
2601 {
2602 /* Verify that all registers are strictly consetuctive */
2603 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2604 {
2605 int i;
2606
2607 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2608 {
2609 rtx slot = XVECEXP (container, 0, i);
2610 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2611 || INTVAL (XEXP (slot, 1)) != i * 16)
2612 need_temp = 1;
2613 }
2614 }
2615 else
2616 {
2617 int i;
2618
2619 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2620 {
2621 rtx slot = XVECEXP (container, 0, i);
2622 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2623 || INTVAL (XEXP (slot, 1)) != i * 8)
2624 need_temp = 1;
2625 }
2626 }
2627 }
2628 if (!need_temp)
2629 {
2630 int_addr_rtx = addr_rtx;
2631 sse_addr_rtx = addr_rtx;
2632 }
2633 else
2634 {
2635 int_addr_rtx = gen_reg_rtx (Pmode);
2636 sse_addr_rtx = gen_reg_rtx (Pmode);
2637 }
2638 /* First ensure that we fit completely in registers. */
2639 if (needed_intregs)
2640 {
2641 emit_cmp_and_jump_insns (expand_expr
2642 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2643 GEN_INT ((REGPARM_MAX - needed_intregs +
2644 1) * 8), GE, const1_rtx, SImode,
2645 1, lab_false);
2646 }
2647 if (needed_sseregs)
2648 {
2649 emit_cmp_and_jump_insns (expand_expr
2650 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2651 GEN_INT ((SSE_REGPARM_MAX -
2652 needed_sseregs + 1) * 16 +
2653 REGPARM_MAX * 8), GE, const1_rtx,
2654 SImode, 1, lab_false);
2655 }
2656
2657 /* Compute index to start of area used for integer regs. */
2658 if (needed_intregs)
2659 {
2660 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2661 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2662 if (r != int_addr_rtx)
2663 emit_move_insn (int_addr_rtx, r);
2664 }
2665 if (needed_sseregs)
2666 {
2667 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2668 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2669 if (r != sse_addr_rtx)
2670 emit_move_insn (sse_addr_rtx, r);
2671 }
2672 if (need_temp)
2673 {
2674 int i;
2675 rtx mem;
2676
2677 /* Never use the memory itself, as it has the alias set. */
2678 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2679 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2680 set_mem_alias_set (mem, get_varargs_alias_set ());
2681 set_mem_align (mem, BITS_PER_UNIT);
2682
2683 for (i = 0; i < XVECLEN (container, 0); i++)
2684 {
2685 rtx slot = XVECEXP (container, 0, i);
2686 rtx reg = XEXP (slot, 0);
2687 enum machine_mode mode = GET_MODE (reg);
2688 rtx src_addr;
2689 rtx src_mem;
2690 int src_offset;
2691 rtx dest_mem;
2692
2693 if (SSE_REGNO_P (REGNO (reg)))
2694 {
2695 src_addr = sse_addr_rtx;
2696 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2697 }
2698 else
2699 {
2700 src_addr = int_addr_rtx;
2701 src_offset = REGNO (reg) * 8;
2702 }
2703 src_mem = gen_rtx_MEM (mode, src_addr);
2704 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2705 src_mem = adjust_address (src_mem, mode, src_offset);
2706 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2707 emit_move_insn (dest_mem, src_mem);
2708 }
2709 }
2710
2711 if (needed_intregs)
2712 {
2713 t =
2714 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2715 build_int_2 (needed_intregs * 8, 0));
2716 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2717 TREE_SIDE_EFFECTS (t) = 1;
2718 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2719 }
2720 if (needed_sseregs)
2721 {
2722 t =
2723 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2724 build_int_2 (needed_sseregs * 16, 0));
2725 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2726 TREE_SIDE_EFFECTS (t) = 1;
2727 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2728 }
2729
2730 emit_jump_insn (gen_jump (lab_over));
2731 emit_barrier ();
2732 emit_label (lab_false);
2733 }
2734
2735 /* ... otherwise out of the overflow area. */
2736
2737 /* Care for on-stack alignment if needed. */
2738 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2739 t = ovf;
2740 else
2741 {
2742 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2743 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2744 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2745 }
2746 t = save_expr (t);
2747
2748 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2749 if (r != addr_rtx)
2750 emit_move_insn (addr_rtx, r);
2751
2752 t =
2753 build (PLUS_EXPR, TREE_TYPE (t), t,
2754 build_int_2 (rsize * UNITS_PER_WORD, 0));
2755 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2756 TREE_SIDE_EFFECTS (t) = 1;
2757 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2758
2759 if (container)
2760 emit_label (lab_over);
2761
2762 return addr_rtx;
2763 }
2764 \f
2765 /* Return nonzero if OP is either a i387 or SSE fp register. */
2766 int
2767 any_fp_register_operand (op, mode)
2768 rtx op;
2769 enum machine_mode mode ATTRIBUTE_UNUSED;
2770 {
2771 return ANY_FP_REG_P (op);
2772 }
2773
2774 /* Return nonzero if OP is an i387 fp register. */
2775 int
2776 fp_register_operand (op, mode)
2777 rtx op;
2778 enum machine_mode mode ATTRIBUTE_UNUSED;
2779 {
2780 return FP_REG_P (op);
2781 }
2782
2783 /* Return nonzero if OP is a non-fp register_operand. */
2784 int
2785 register_and_not_any_fp_reg_operand (op, mode)
2786 rtx op;
2787 enum machine_mode mode;
2788 {
2789 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2790 }
2791
2792 /* Return nonzero of OP is a register operand other than an
2793 i387 fp register. */
2794 int
2795 register_and_not_fp_reg_operand (op, mode)
2796 rtx op;
2797 enum machine_mode mode;
2798 {
2799 return register_operand (op, mode) && !FP_REG_P (op);
2800 }
2801
2802 /* Return nonzero if OP is general operand representable on x86_64. */
2803
2804 int
2805 x86_64_general_operand (op, mode)
2806 rtx op;
2807 enum machine_mode mode;
2808 {
2809 if (!TARGET_64BIT)
2810 return general_operand (op, mode);
2811 if (nonimmediate_operand (op, mode))
2812 return 1;
2813 return x86_64_sign_extended_value (op);
2814 }
2815
2816 /* Return nonzero if OP is general operand representable on x86_64
2817 as either sign extended or zero extended constant. */
2818
2819 int
2820 x86_64_szext_general_operand (op, mode)
2821 rtx op;
2822 enum machine_mode mode;
2823 {
2824 if (!TARGET_64BIT)
2825 return general_operand (op, mode);
2826 if (nonimmediate_operand (op, mode))
2827 return 1;
2828 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2829 }
2830
2831 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2832
2833 int
2834 x86_64_nonmemory_operand (op, mode)
2835 rtx op;
2836 enum machine_mode mode;
2837 {
2838 if (!TARGET_64BIT)
2839 return nonmemory_operand (op, mode);
2840 if (register_operand (op, mode))
2841 return 1;
2842 return x86_64_sign_extended_value (op);
2843 }
2844
2845 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2846
2847 int
2848 x86_64_movabs_operand (op, mode)
2849 rtx op;
2850 enum machine_mode mode;
2851 {
2852 if (!TARGET_64BIT || !flag_pic)
2853 return nonmemory_operand (op, mode);
2854 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2855 return 1;
2856 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2857 return 1;
2858 return 0;
2859 }
2860
2861 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2862
2863 int
2864 x86_64_szext_nonmemory_operand (op, mode)
2865 rtx op;
2866 enum machine_mode mode;
2867 {
2868 if (!TARGET_64BIT)
2869 return nonmemory_operand (op, mode);
2870 if (register_operand (op, mode))
2871 return 1;
2872 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2873 }
2874
2875 /* Return nonzero if OP is immediate operand representable on x86_64. */
2876
2877 int
2878 x86_64_immediate_operand (op, mode)
2879 rtx op;
2880 enum machine_mode mode;
2881 {
2882 if (!TARGET_64BIT)
2883 return immediate_operand (op, mode);
2884 return x86_64_sign_extended_value (op);
2885 }
2886
2887 /* Return nonzero if OP is immediate operand representable on x86_64. */
2888
2889 int
2890 x86_64_zext_immediate_operand (op, mode)
2891 rtx op;
2892 enum machine_mode mode ATTRIBUTE_UNUSED;
2893 {
2894 return x86_64_zero_extended_value (op);
2895 }
2896
2897 /* Return nonzero if OP is (const_int 1), else return zero. */
2898
2899 int
2900 const_int_1_operand (op, mode)
2901 rtx op;
2902 enum machine_mode mode ATTRIBUTE_UNUSED;
2903 {
2904 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2905 }
2906
2907 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2908 for shift & compare patterns, as shifting by 0 does not change flags),
2909 else return zero. */
2910
2911 int
2912 const_int_1_31_operand (op, mode)
2913 rtx op;
2914 enum machine_mode mode ATTRIBUTE_UNUSED;
2915 {
2916 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2917 }
2918
2919 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2920 reference and a constant. */
2921
2922 int
2923 symbolic_operand (op, mode)
2924 register rtx op;
2925 enum machine_mode mode ATTRIBUTE_UNUSED;
2926 {
2927 switch (GET_CODE (op))
2928 {
2929 case SYMBOL_REF:
2930 case LABEL_REF:
2931 return 1;
2932
2933 case CONST:
2934 op = XEXP (op, 0);
2935 if (GET_CODE (op) == SYMBOL_REF
2936 || GET_CODE (op) == LABEL_REF
2937 || (GET_CODE (op) == UNSPEC
2938 && (XINT (op, 1) == UNSPEC_GOT
2939 || XINT (op, 1) == UNSPEC_GOTOFF
2940 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2941 return 1;
2942 if (GET_CODE (op) != PLUS
2943 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2944 return 0;
2945
2946 op = XEXP (op, 0);
2947 if (GET_CODE (op) == SYMBOL_REF
2948 || GET_CODE (op) == LABEL_REF)
2949 return 1;
2950 /* Only @GOTOFF gets offsets. */
2951 if (GET_CODE (op) != UNSPEC
2952 || XINT (op, 1) != UNSPEC_GOTOFF)
2953 return 0;
2954
2955 op = XVECEXP (op, 0, 0);
2956 if (GET_CODE (op) == SYMBOL_REF
2957 || GET_CODE (op) == LABEL_REF)
2958 return 1;
2959 return 0;
2960
2961 default:
2962 return 0;
2963 }
2964 }
2965
2966 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2967
2968 int
2969 pic_symbolic_operand (op, mode)
2970 register rtx op;
2971 enum machine_mode mode ATTRIBUTE_UNUSED;
2972 {
2973 if (GET_CODE (op) != CONST)
2974 return 0;
2975 op = XEXP (op, 0);
2976 if (TARGET_64BIT)
2977 {
2978 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2979 return 1;
2980 }
2981 else
2982 {
2983 if (GET_CODE (op) == UNSPEC)
2984 return 1;
2985 if (GET_CODE (op) != PLUS
2986 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2987 return 0;
2988 op = XEXP (op, 0);
2989 if (GET_CODE (op) == UNSPEC)
2990 return 1;
2991 }
2992 return 0;
2993 }
2994
2995 /* Return true if OP is a symbolic operand that resolves locally. */
2996
2997 static int
2998 local_symbolic_operand (op, mode)
2999 rtx op;
3000 enum machine_mode mode ATTRIBUTE_UNUSED;
3001 {
3002 if (GET_CODE (op) == LABEL_REF)
3003 return 1;
3004
3005 if (GET_CODE (op) == CONST
3006 && GET_CODE (XEXP (op, 0)) == PLUS
3007 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3008 op = XEXP (XEXP (op, 0), 0);
3009
3010 if (GET_CODE (op) != SYMBOL_REF)
3011 return 0;
3012
3013 /* These we've been told are local by varasm and encode_section_info
3014 respectively. */
3015 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3016 return 1;
3017
3018 /* There is, however, a not insubstantial body of code in the rest of
3019 the compiler that assumes it can just stick the results of
3020 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3021 /* ??? This is a hack. Should update the body of the compiler to
3022 always create a DECL an invoke targetm.encode_section_info. */
3023 if (strncmp (XSTR (op, 0), internal_label_prefix,
3024 internal_label_prefix_len) == 0)
3025 return 1;
3026
3027 return 0;
3028 }
3029
3030 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3031
3032 int
3033 tls_symbolic_operand (op, mode)
3034 register rtx op;
3035 enum machine_mode mode ATTRIBUTE_UNUSED;
3036 {
3037 const char *symbol_str;
3038
3039 if (GET_CODE (op) != SYMBOL_REF)
3040 return 0;
3041 symbol_str = XSTR (op, 0);
3042
3043 if (symbol_str[0] != '%')
3044 return 0;
3045 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3046 }
3047
3048 static int
3049 tls_symbolic_operand_1 (op, kind)
3050 rtx op;
3051 enum tls_model kind;
3052 {
3053 const char *symbol_str;
3054
3055 if (GET_CODE (op) != SYMBOL_REF)
3056 return 0;
3057 symbol_str = XSTR (op, 0);
3058
3059 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3060 }
3061
3062 int
3063 global_dynamic_symbolic_operand (op, mode)
3064 register rtx op;
3065 enum machine_mode mode ATTRIBUTE_UNUSED;
3066 {
3067 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3068 }
3069
3070 int
3071 local_dynamic_symbolic_operand (op, mode)
3072 register rtx op;
3073 enum machine_mode mode ATTRIBUTE_UNUSED;
3074 {
3075 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3076 }
3077
3078 int
3079 initial_exec_symbolic_operand (op, mode)
3080 register rtx op;
3081 enum machine_mode mode ATTRIBUTE_UNUSED;
3082 {
3083 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3084 }
3085
3086 int
3087 local_exec_symbolic_operand (op, mode)
3088 register rtx op;
3089 enum machine_mode mode ATTRIBUTE_UNUSED;
3090 {
3091 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3092 }
3093
3094 /* Test for a valid operand for a call instruction. Don't allow the
3095 arg pointer register or virtual regs since they may decay into
3096 reg + const, which the patterns can't handle. */
3097
3098 int
3099 call_insn_operand (op, mode)
3100 rtx op;
3101 enum machine_mode mode ATTRIBUTE_UNUSED;
3102 {
3103 /* Disallow indirect through a virtual register. This leads to
3104 compiler aborts when trying to eliminate them. */
3105 if (GET_CODE (op) == REG
3106 && (op == arg_pointer_rtx
3107 || op == frame_pointer_rtx
3108 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3109 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3110 return 0;
3111
3112 /* Disallow `call 1234'. Due to varying assembler lameness this
3113 gets either rejected or translated to `call .+1234'. */
3114 if (GET_CODE (op) == CONST_INT)
3115 return 0;
3116
3117 /* Explicitly allow SYMBOL_REF even if pic. */
3118 if (GET_CODE (op) == SYMBOL_REF)
3119 return 1;
3120
3121 /* Otherwise we can allow any general_operand in the address. */
3122 return general_operand (op, Pmode);
3123 }
3124
3125 int
3126 constant_call_address_operand (op, mode)
3127 rtx op;
3128 enum machine_mode mode ATTRIBUTE_UNUSED;
3129 {
3130 if (GET_CODE (op) == CONST
3131 && GET_CODE (XEXP (op, 0)) == PLUS
3132 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3133 op = XEXP (XEXP (op, 0), 0);
3134 return GET_CODE (op) == SYMBOL_REF;
3135 }
3136
3137 /* Match exactly zero and one. */
3138
3139 int
3140 const0_operand (op, mode)
3141 register rtx op;
3142 enum machine_mode mode;
3143 {
3144 return op == CONST0_RTX (mode);
3145 }
3146
3147 int
3148 const1_operand (op, mode)
3149 register rtx op;
3150 enum machine_mode mode ATTRIBUTE_UNUSED;
3151 {
3152 return op == const1_rtx;
3153 }
3154
3155 /* Match 2, 4, or 8. Used for leal multiplicands. */
3156
3157 int
3158 const248_operand (op, mode)
3159 register rtx op;
3160 enum machine_mode mode ATTRIBUTE_UNUSED;
3161 {
3162 return (GET_CODE (op) == CONST_INT
3163 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3164 }
3165
3166 /* True if this is a constant appropriate for an increment or decremenmt. */
3167
3168 int
3169 incdec_operand (op, mode)
3170 register rtx op;
3171 enum machine_mode mode ATTRIBUTE_UNUSED;
3172 {
3173 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3174 registers, since carry flag is not set. */
3175 if (TARGET_PENTIUM4 && !optimize_size)
3176 return 0;
3177 return op == const1_rtx || op == constm1_rtx;
3178 }
3179
3180 /* Return nonzero if OP is acceptable as operand of DImode shift
3181 expander. */
3182
3183 int
3184 shiftdi_operand (op, mode)
3185 rtx op;
3186 enum machine_mode mode ATTRIBUTE_UNUSED;
3187 {
3188 if (TARGET_64BIT)
3189 return nonimmediate_operand (op, mode);
3190 else
3191 return register_operand (op, mode);
3192 }
3193
3194 /* Return false if this is the stack pointer, or any other fake
3195 register eliminable to the stack pointer. Otherwise, this is
3196 a register operand.
3197
3198 This is used to prevent esp from being used as an index reg.
3199 Which would only happen in pathological cases. */
3200
3201 int
3202 reg_no_sp_operand (op, mode)
3203 register rtx op;
3204 enum machine_mode mode;
3205 {
3206 rtx t = op;
3207 if (GET_CODE (t) == SUBREG)
3208 t = SUBREG_REG (t);
3209 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3210 return 0;
3211
3212 return register_operand (op, mode);
3213 }
3214
3215 int
3216 mmx_reg_operand (op, mode)
3217 register rtx op;
3218 enum machine_mode mode ATTRIBUTE_UNUSED;
3219 {
3220 return MMX_REG_P (op);
3221 }
3222
3223 /* Return false if this is any eliminable register. Otherwise
3224 general_operand. */
3225
3226 int
3227 general_no_elim_operand (op, mode)
3228 register rtx op;
3229 enum machine_mode mode;
3230 {
3231 rtx t = op;
3232 if (GET_CODE (t) == SUBREG)
3233 t = SUBREG_REG (t);
3234 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3235 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3236 || t == virtual_stack_dynamic_rtx)
3237 return 0;
3238 if (REG_P (t)
3239 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3240 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3241 return 0;
3242
3243 return general_operand (op, mode);
3244 }
3245
3246 /* Return false if this is any eliminable register. Otherwise
3247 register_operand or const_int. */
3248
3249 int
3250 nonmemory_no_elim_operand (op, mode)
3251 register rtx op;
3252 enum machine_mode mode;
3253 {
3254 rtx t = op;
3255 if (GET_CODE (t) == SUBREG)
3256 t = SUBREG_REG (t);
3257 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3258 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3259 || t == virtual_stack_dynamic_rtx)
3260 return 0;
3261
3262 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3263 }
3264
3265 /* Return false if this is any eliminable register or stack register,
3266 otherwise work like register_operand. */
3267
3268 int
3269 index_register_operand (op, mode)
3270 register rtx op;
3271 enum machine_mode mode;
3272 {
3273 rtx t = op;
3274 if (GET_CODE (t) == SUBREG)
3275 t = SUBREG_REG (t);
3276 if (!REG_P (t))
3277 return 0;
3278 if (t == arg_pointer_rtx
3279 || t == frame_pointer_rtx
3280 || t == virtual_incoming_args_rtx
3281 || t == virtual_stack_vars_rtx
3282 || t == virtual_stack_dynamic_rtx
3283 || REGNO (t) == STACK_POINTER_REGNUM)
3284 return 0;
3285
3286 return general_operand (op, mode);
3287 }
3288
3289 /* Return true if op is a Q_REGS class register. */
3290
3291 int
3292 q_regs_operand (op, mode)
3293 register rtx op;
3294 enum machine_mode mode;
3295 {
3296 if (mode != VOIDmode && GET_MODE (op) != mode)
3297 return 0;
3298 if (GET_CODE (op) == SUBREG)
3299 op = SUBREG_REG (op);
3300 return ANY_QI_REG_P (op);
3301 }
3302
3303 /* Return true if op is a NON_Q_REGS class register. */
3304
3305 int
3306 non_q_regs_operand (op, mode)
3307 register rtx op;
3308 enum machine_mode mode;
3309 {
3310 if (mode != VOIDmode && GET_MODE (op) != mode)
3311 return 0;
3312 if (GET_CODE (op) == SUBREG)
3313 op = SUBREG_REG (op);
3314 return NON_QI_REG_P (op);
3315 }
3316
3317 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3318 insns. */
3319 int
3320 sse_comparison_operator (op, mode)
3321 rtx op;
3322 enum machine_mode mode ATTRIBUTE_UNUSED;
3323 {
3324 enum rtx_code code = GET_CODE (op);
3325 switch (code)
3326 {
3327 /* Operations supported directly. */
3328 case EQ:
3329 case LT:
3330 case LE:
3331 case UNORDERED:
3332 case NE:
3333 case UNGE:
3334 case UNGT:
3335 case ORDERED:
3336 return 1;
3337 /* These are equivalent to ones above in non-IEEE comparisons. */
3338 case UNEQ:
3339 case UNLT:
3340 case UNLE:
3341 case LTGT:
3342 case GE:
3343 case GT:
3344 return !TARGET_IEEE_FP;
3345 default:
3346 return 0;
3347 }
3348 }
3349 /* Return 1 if OP is a valid comparison operator in valid mode. */
3350 int
3351 ix86_comparison_operator (op, mode)
3352 register rtx op;
3353 enum machine_mode mode;
3354 {
3355 enum machine_mode inmode;
3356 enum rtx_code code = GET_CODE (op);
3357 if (mode != VOIDmode && GET_MODE (op) != mode)
3358 return 0;
3359 if (GET_RTX_CLASS (code) != '<')
3360 return 0;
3361 inmode = GET_MODE (XEXP (op, 0));
3362
3363 if (inmode == CCFPmode || inmode == CCFPUmode)
3364 {
3365 enum rtx_code second_code, bypass_code;
3366 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3367 return (bypass_code == NIL && second_code == NIL);
3368 }
3369 switch (code)
3370 {
3371 case EQ: case NE:
3372 return 1;
3373 case LT: case GE:
3374 if (inmode == CCmode || inmode == CCGCmode
3375 || inmode == CCGOCmode || inmode == CCNOmode)
3376 return 1;
3377 return 0;
3378 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3379 if (inmode == CCmode)
3380 return 1;
3381 return 0;
3382 case GT: case LE:
3383 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3384 return 1;
3385 return 0;
3386 default:
3387 return 0;
3388 }
3389 }
3390
3391 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3392
3393 int
3394 fcmov_comparison_operator (op, mode)
3395 register rtx op;
3396 enum machine_mode mode;
3397 {
3398 enum machine_mode inmode;
3399 enum rtx_code code = GET_CODE (op);
3400 if (mode != VOIDmode && GET_MODE (op) != mode)
3401 return 0;
3402 if (GET_RTX_CLASS (code) != '<')
3403 return 0;
3404 inmode = GET_MODE (XEXP (op, 0));
3405 if (inmode == CCFPmode || inmode == CCFPUmode)
3406 {
3407 enum rtx_code second_code, bypass_code;
3408 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3409 if (bypass_code != NIL || second_code != NIL)
3410 return 0;
3411 code = ix86_fp_compare_code_to_integer (code);
3412 }
3413 /* i387 supports just limited amount of conditional codes. */
3414 switch (code)
3415 {
3416 case LTU: case GTU: case LEU: case GEU:
3417 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3418 return 1;
3419 return 0;
3420 case ORDERED: case UNORDERED:
3421 case EQ: case NE:
3422 return 1;
3423 default:
3424 return 0;
3425 }
3426 }
3427
3428 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3429
3430 int
3431 promotable_binary_operator (op, mode)
3432 register rtx op;
3433 enum machine_mode mode ATTRIBUTE_UNUSED;
3434 {
3435 switch (GET_CODE (op))
3436 {
3437 case MULT:
3438 /* Modern CPUs have same latency for HImode and SImode multiply,
3439 but 386 and 486 do HImode multiply faster. */
3440 return ix86_cpu > PROCESSOR_I486;
3441 case PLUS:
3442 case AND:
3443 case IOR:
3444 case XOR:
3445 case ASHIFT:
3446 return 1;
3447 default:
3448 return 0;
3449 }
3450 }
3451
3452 /* Nearly general operand, but accept any const_double, since we wish
3453 to be able to drop them into memory rather than have them get pulled
3454 into registers. */
3455
3456 int
3457 cmp_fp_expander_operand (op, mode)
3458 register rtx op;
3459 enum machine_mode mode;
3460 {
3461 if (mode != VOIDmode && mode != GET_MODE (op))
3462 return 0;
3463 if (GET_CODE (op) == CONST_DOUBLE)
3464 return 1;
3465 return general_operand (op, mode);
3466 }
3467
3468 /* Match an SI or HImode register for a zero_extract. */
3469
3470 int
3471 ext_register_operand (op, mode)
3472 register rtx op;
3473 enum machine_mode mode ATTRIBUTE_UNUSED;
3474 {
3475 int regno;
3476 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3477 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3478 return 0;
3479
3480 if (!register_operand (op, VOIDmode))
3481 return 0;
3482
3483 /* Be curefull to accept only registers having upper parts. */
3484 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3485 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3486 }
3487
3488 /* Return 1 if this is a valid binary floating-point operation.
3489 OP is the expression matched, and MODE is its mode. */
3490
3491 int
3492 binary_fp_operator (op, mode)
3493 register rtx op;
3494 enum machine_mode mode;
3495 {
3496 if (mode != VOIDmode && mode != GET_MODE (op))
3497 return 0;
3498
3499 switch (GET_CODE (op))
3500 {
3501 case PLUS:
3502 case MINUS:
3503 case MULT:
3504 case DIV:
3505 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3506
3507 default:
3508 return 0;
3509 }
3510 }
3511
3512 int
3513 mult_operator (op, mode)
3514 register rtx op;
3515 enum machine_mode mode ATTRIBUTE_UNUSED;
3516 {
3517 return GET_CODE (op) == MULT;
3518 }
3519
3520 int
3521 div_operator (op, mode)
3522 register rtx op;
3523 enum machine_mode mode ATTRIBUTE_UNUSED;
3524 {
3525 return GET_CODE (op) == DIV;
3526 }
3527
3528 int
3529 arith_or_logical_operator (op, mode)
3530 rtx op;
3531 enum machine_mode mode;
3532 {
3533 return ((mode == VOIDmode || GET_MODE (op) == mode)
3534 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3535 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3536 }
3537
3538 /* Returns 1 if OP is memory operand with a displacement. */
3539
3540 int
3541 memory_displacement_operand (op, mode)
3542 register rtx op;
3543 enum machine_mode mode;
3544 {
3545 struct ix86_address parts;
3546
3547 if (! memory_operand (op, mode))
3548 return 0;
3549
3550 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3551 abort ();
3552
3553 return parts.disp != NULL_RTX;
3554 }
3555
3556 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3557 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3558
3559 ??? It seems likely that this will only work because cmpsi is an
3560 expander, and no actual insns use this. */
3561
3562 int
3563 cmpsi_operand (op, mode)
3564 rtx op;
3565 enum machine_mode mode;
3566 {
3567 if (nonimmediate_operand (op, mode))
3568 return 1;
3569
3570 if (GET_CODE (op) == AND
3571 && GET_MODE (op) == SImode
3572 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3573 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3574 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3575 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3576 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3577 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3578 return 1;
3579
3580 return 0;
3581 }
3582
3583 /* Returns 1 if OP is memory operand that can not be represented by the
3584 modRM array. */
3585
3586 int
3587 long_memory_operand (op, mode)
3588 register rtx op;
3589 enum machine_mode mode;
3590 {
3591 if (! memory_operand (op, mode))
3592 return 0;
3593
3594 return memory_address_length (op) != 0;
3595 }
3596
3597 /* Return nonzero if the rtx is known aligned. */
3598
3599 int
3600 aligned_operand (op, mode)
3601 rtx op;
3602 enum machine_mode mode;
3603 {
3604 struct ix86_address parts;
3605
3606 if (!general_operand (op, mode))
3607 return 0;
3608
3609 /* Registers and immediate operands are always "aligned". */
3610 if (GET_CODE (op) != MEM)
3611 return 1;
3612
3613 /* Don't even try to do any aligned optimizations with volatiles. */
3614 if (MEM_VOLATILE_P (op))
3615 return 0;
3616
3617 op = XEXP (op, 0);
3618
3619 /* Pushes and pops are only valid on the stack pointer. */
3620 if (GET_CODE (op) == PRE_DEC
3621 || GET_CODE (op) == POST_INC)
3622 return 1;
3623
3624 /* Decode the address. */
3625 if (! ix86_decompose_address (op, &parts))
3626 abort ();
3627
3628 if (parts.base && GET_CODE (parts.base) == SUBREG)
3629 parts.base = SUBREG_REG (parts.base);
3630 if (parts.index && GET_CODE (parts.index) == SUBREG)
3631 parts.index = SUBREG_REG (parts.index);
3632
3633 /* Look for some component that isn't known to be aligned. */
3634 if (parts.index)
3635 {
3636 if (parts.scale < 4
3637 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3638 return 0;
3639 }
3640 if (parts.base)
3641 {
3642 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3643 return 0;
3644 }
3645 if (parts.disp)
3646 {
3647 if (GET_CODE (parts.disp) != CONST_INT
3648 || (INTVAL (parts.disp) & 3) != 0)
3649 return 0;
3650 }
3651
3652 /* Didn't find one -- this must be an aligned address. */
3653 return 1;
3654 }
3655 \f
3656 /* Return true if the constant is something that can be loaded with
3657 a special instruction. Only handle 0.0 and 1.0; others are less
3658 worthwhile. */
3659
3660 int
3661 standard_80387_constant_p (x)
3662 rtx x;
3663 {
3664 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3665 return -1;
3666 /* Note that on the 80387, other constants, such as pi, that we should support
3667 too. On some machines, these are much slower to load as standard constant,
3668 than to load from doubles in memory. */
3669 if (x == CONST0_RTX (GET_MODE (x)))
3670 return 1;
3671 if (x == CONST1_RTX (GET_MODE (x)))
3672 return 2;
3673 return 0;
3674 }
3675
3676 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3677 */
3678 int
3679 standard_sse_constant_p (x)
3680 rtx x;
3681 {
3682 if (GET_CODE (x) != CONST_DOUBLE)
3683 return -1;
3684 return (x == CONST0_RTX (GET_MODE (x)));
3685 }
3686
3687 /* Returns 1 if OP contains a symbol reference */
3688
3689 int
3690 symbolic_reference_mentioned_p (op)
3691 rtx op;
3692 {
3693 register const char *fmt;
3694 register int i;
3695
3696 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3697 return 1;
3698
3699 fmt = GET_RTX_FORMAT (GET_CODE (op));
3700 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3701 {
3702 if (fmt[i] == 'E')
3703 {
3704 register int j;
3705
3706 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3707 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3708 return 1;
3709 }
3710
3711 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3712 return 1;
3713 }
3714
3715 return 0;
3716 }
3717
3718 /* Return 1 if it is appropriate to emit `ret' instructions in the
3719 body of a function. Do this only if the epilogue is simple, needing a
3720 couple of insns. Prior to reloading, we can't tell how many registers
3721 must be saved, so return 0 then. Return 0 if there is no frame
3722 marker to de-allocate.
3723
3724 If NON_SAVING_SETJMP is defined and true, then it is not possible
3725 for the epilogue to be simple, so return 0. This is a special case
3726 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3727 until final, but jump_optimize may need to know sooner if a
3728 `return' is OK. */
3729
3730 int
3731 ix86_can_use_return_insn_p ()
3732 {
3733 struct ix86_frame frame;
3734
3735 #ifdef NON_SAVING_SETJMP
3736 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3737 return 0;
3738 #endif
3739
3740 if (! reload_completed || frame_pointer_needed)
3741 return 0;
3742
3743 /* Don't allow more than 32 pop, since that's all we can do
3744 with one instruction. */
3745 if (current_function_pops_args
3746 && current_function_args_size >= 32768)
3747 return 0;
3748
3749 ix86_compute_frame_layout (&frame);
3750 return frame.to_allocate == 0 && frame.nregs == 0;
3751 }
3752 \f
3753 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3754 int
3755 x86_64_sign_extended_value (value)
3756 rtx value;
3757 {
3758 switch (GET_CODE (value))
3759 {
3760 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3761 to be at least 32 and this all acceptable constants are
3762 represented as CONST_INT. */
3763 case CONST_INT:
3764 if (HOST_BITS_PER_WIDE_INT == 32)
3765 return 1;
3766 else
3767 {
3768 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3769 return trunc_int_for_mode (val, SImode) == val;
3770 }
3771 break;
3772
3773 /* For certain code models, the symbolic references are known to fit. */
3774 case SYMBOL_REF:
3775 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3776
3777 /* For certain code models, the code is near as well. */
3778 case LABEL_REF:
3779 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3780
3781 /* We also may accept the offsetted memory references in certain special
3782 cases. */
3783 case CONST:
3784 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3785 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3786 return 1;
3787 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3788 {
3789 rtx op1 = XEXP (XEXP (value, 0), 0);
3790 rtx op2 = XEXP (XEXP (value, 0), 1);
3791 HOST_WIDE_INT offset;
3792
3793 if (ix86_cmodel == CM_LARGE)
3794 return 0;
3795 if (GET_CODE (op2) != CONST_INT)
3796 return 0;
3797 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3798 switch (GET_CODE (op1))
3799 {
3800 case SYMBOL_REF:
3801 /* For CM_SMALL assume that latest object is 1MB before
3802 end of 31bits boundary. We may also accept pretty
3803 large negative constants knowing that all objects are
3804 in the positive half of address space. */
3805 if (ix86_cmodel == CM_SMALL
3806 && offset < 1024*1024*1024
3807 && trunc_int_for_mode (offset, SImode) == offset)
3808 return 1;
3809 /* For CM_KERNEL we know that all object resist in the
3810 negative half of 32bits address space. We may not
3811 accept negative offsets, since they may be just off
3812 and we may accept pretty large positive ones. */
3813 if (ix86_cmodel == CM_KERNEL
3814 && offset > 0
3815 && trunc_int_for_mode (offset, SImode) == offset)
3816 return 1;
3817 break;
3818 case LABEL_REF:
3819 /* These conditions are similar to SYMBOL_REF ones, just the
3820 constraints for code models differ. */
3821 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3822 && offset < 1024*1024*1024
3823 && trunc_int_for_mode (offset, SImode) == offset)
3824 return 1;
3825 if (ix86_cmodel == CM_KERNEL
3826 && offset > 0
3827 && trunc_int_for_mode (offset, SImode) == offset)
3828 return 1;
3829 break;
3830 default:
3831 return 0;
3832 }
3833 }
3834 return 0;
3835 default:
3836 return 0;
3837 }
3838 }
3839
3840 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3841 int
3842 x86_64_zero_extended_value (value)
3843 rtx value;
3844 {
3845 switch (GET_CODE (value))
3846 {
3847 case CONST_DOUBLE:
3848 if (HOST_BITS_PER_WIDE_INT == 32)
3849 return (GET_MODE (value) == VOIDmode
3850 && !CONST_DOUBLE_HIGH (value));
3851 else
3852 return 0;
3853 case CONST_INT:
3854 if (HOST_BITS_PER_WIDE_INT == 32)
3855 return INTVAL (value) >= 0;
3856 else
3857 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3858 break;
3859
3860 /* For certain code models, the symbolic references are known to fit. */
3861 case SYMBOL_REF:
3862 return ix86_cmodel == CM_SMALL;
3863
3864 /* For certain code models, the code is near as well. */
3865 case LABEL_REF:
3866 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3867
3868 /* We also may accept the offsetted memory references in certain special
3869 cases. */
3870 case CONST:
3871 if (GET_CODE (XEXP (value, 0)) == PLUS)
3872 {
3873 rtx op1 = XEXP (XEXP (value, 0), 0);
3874 rtx op2 = XEXP (XEXP (value, 0), 1);
3875
3876 if (ix86_cmodel == CM_LARGE)
3877 return 0;
3878 switch (GET_CODE (op1))
3879 {
3880 case SYMBOL_REF:
3881 return 0;
3882 /* For small code model we may accept pretty large positive
3883 offsets, since one bit is available for free. Negative
3884 offsets are limited by the size of NULL pointer area
3885 specified by the ABI. */
3886 if (ix86_cmodel == CM_SMALL
3887 && GET_CODE (op2) == CONST_INT
3888 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3889 && (trunc_int_for_mode (INTVAL (op2), SImode)
3890 == INTVAL (op2)))
3891 return 1;
3892 /* ??? For the kernel, we may accept adjustment of
3893 -0x10000000, since we know that it will just convert
3894 negative address space to positive, but perhaps this
3895 is not worthwhile. */
3896 break;
3897 case LABEL_REF:
3898 /* These conditions are similar to SYMBOL_REF ones, just the
3899 constraints for code models differ. */
3900 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3901 && GET_CODE (op2) == CONST_INT
3902 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3903 && (trunc_int_for_mode (INTVAL (op2), SImode)
3904 == INTVAL (op2)))
3905 return 1;
3906 break;
3907 default:
3908 return 0;
3909 }
3910 }
3911 return 0;
3912 default:
3913 return 0;
3914 }
3915 }
3916
3917 /* Value should be nonzero if functions must have frame pointers.
3918 Zero means the frame pointer need not be set up (and parms may
3919 be accessed via the stack pointer) in functions that seem suitable. */
3920
3921 int
3922 ix86_frame_pointer_required ()
3923 {
3924 /* If we accessed previous frames, then the generated code expects
3925 to be able to access the saved ebp value in our frame. */
3926 if (cfun->machine->accesses_prev_frame)
3927 return 1;
3928
3929 /* Several x86 os'es need a frame pointer for other reasons,
3930 usually pertaining to setjmp. */
3931 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3932 return 1;
3933
3934 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3935 the frame pointer by default. Turn it back on now if we've not
3936 got a leaf function. */
3937 if (TARGET_OMIT_LEAF_FRAME_POINTER
3938 && (!current_function_is_leaf || current_function_profile))
3939 return 1;
3940
3941 return 0;
3942 }
3943
3944 /* Record that the current function accesses previous call frames. */
3945
3946 void
3947 ix86_setup_frame_addresses ()
3948 {
3949 cfun->machine->accesses_prev_frame = 1;
3950 }
3951 \f
3952 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3953 # define USE_HIDDEN_LINKONCE 1
3954 #else
3955 # define USE_HIDDEN_LINKONCE 0
3956 #endif
3957
3958 static int pic_labels_used;
3959
3960 /* Fills in the label name that should be used for a pc thunk for
3961 the given register. */
3962
3963 static void
3964 get_pc_thunk_name (name, regno)
3965 char name[32];
3966 unsigned int regno;
3967 {
3968 if (USE_HIDDEN_LINKONCE)
3969 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3970 else
3971 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3972 }
3973
3974
3975 /* This function generates code for -fpic that loads %ebx with
3976 the return address of the caller and then returns. */
3977
3978 void
3979 ix86_asm_file_end (file)
3980 FILE *file;
3981 {
3982 rtx xops[2];
3983 int regno;
3984
3985 for (regno = 0; regno < 8; ++regno)
3986 {
3987 char name[32];
3988
3989 if (! ((pic_labels_used >> regno) & 1))
3990 continue;
3991
3992 get_pc_thunk_name (name, regno);
3993
3994 if (USE_HIDDEN_LINKONCE)
3995 {
3996 tree decl;
3997
3998 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3999 error_mark_node);
4000 TREE_PUBLIC (decl) = 1;
4001 TREE_STATIC (decl) = 1;
4002 DECL_ONE_ONLY (decl) = 1;
4003
4004 (*targetm.asm_out.unique_section) (decl, 0);
4005 named_section (decl, NULL, 0);
4006
4007 (*targetm.asm_out.globalize_label) (file, name);
4008 fputs ("\t.hidden\t", file);
4009 assemble_name (file, name);
4010 fputc ('\n', file);
4011 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4012 }
4013 else
4014 {
4015 text_section ();
4016 ASM_OUTPUT_LABEL (file, name);
4017 }
4018
4019 xops[0] = gen_rtx_REG (SImode, regno);
4020 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4021 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4022 output_asm_insn ("ret", xops);
4023 }
4024 }
4025
4026 /* Emit code for the SET_GOT patterns. */
4027
4028 const char *
4029 output_set_got (dest)
4030 rtx dest;
4031 {
4032 rtx xops[3];
4033
4034 xops[0] = dest;
4035 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4036
4037 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4038 {
4039 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4040
4041 if (!flag_pic)
4042 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4043 else
4044 output_asm_insn ("call\t%a2", xops);
4045
4046 #if TARGET_MACHO
4047 /* Output the "canonical" label name ("Lxx$pb") here too. This
4048 is what will be referred to by the Mach-O PIC subsystem. */
4049 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4050 #endif
4051 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4052 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4053
4054 if (flag_pic)
4055 output_asm_insn ("pop{l}\t%0", xops);
4056 }
4057 else
4058 {
4059 char name[32];
4060 get_pc_thunk_name (name, REGNO (dest));
4061 pic_labels_used |= 1 << REGNO (dest);
4062
4063 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4064 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4065 output_asm_insn ("call\t%X2", xops);
4066 }
4067
4068 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4069 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4070 else if (!TARGET_MACHO)
4071 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4072
4073 return "";
4074 }
4075
4076 /* Generate an "push" pattern for input ARG. */
4077
4078 static rtx
4079 gen_push (arg)
4080 rtx arg;
4081 {
4082 return gen_rtx_SET (VOIDmode,
4083 gen_rtx_MEM (Pmode,
4084 gen_rtx_PRE_DEC (Pmode,
4085 stack_pointer_rtx)),
4086 arg);
4087 }
4088
4089 /* Return >= 0 if there is an unused call-clobbered register available
4090 for the entire function. */
4091
4092 static unsigned int
4093 ix86_select_alt_pic_regnum ()
4094 {
4095 if (current_function_is_leaf && !current_function_profile)
4096 {
4097 int i;
4098 for (i = 2; i >= 0; --i)
4099 if (!regs_ever_live[i])
4100 return i;
4101 }
4102
4103 return INVALID_REGNUM;
4104 }
4105
4106 /* Return 1 if we need to save REGNO. */
4107 static int
4108 ix86_save_reg (regno, maybe_eh_return)
4109 unsigned int regno;
4110 int maybe_eh_return;
4111 {
4112 if (pic_offset_table_rtx
4113 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4114 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4115 || current_function_profile
4116 || current_function_calls_eh_return))
4117 {
4118 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4119 return 0;
4120 return 1;
4121 }
4122
4123 if (current_function_calls_eh_return && maybe_eh_return)
4124 {
4125 unsigned i;
4126 for (i = 0; ; i++)
4127 {
4128 unsigned test = EH_RETURN_DATA_REGNO (i);
4129 if (test == INVALID_REGNUM)
4130 break;
4131 if (test == regno)
4132 return 1;
4133 }
4134 }
4135
4136 return (regs_ever_live[regno]
4137 && !call_used_regs[regno]
4138 && !fixed_regs[regno]
4139 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4140 }
4141
4142 /* Return number of registers to be saved on the stack. */
4143
4144 static int
4145 ix86_nsaved_regs ()
4146 {
4147 int nregs = 0;
4148 int regno;
4149
4150 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4151 if (ix86_save_reg (regno, true))
4152 nregs++;
4153 return nregs;
4154 }
4155
4156 /* Return the offset between two registers, one to be eliminated, and the other
4157 its replacement, at the start of a routine. */
4158
4159 HOST_WIDE_INT
4160 ix86_initial_elimination_offset (from, to)
4161 int from;
4162 int to;
4163 {
4164 struct ix86_frame frame;
4165 ix86_compute_frame_layout (&frame);
4166
4167 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4168 return frame.hard_frame_pointer_offset;
4169 else if (from == FRAME_POINTER_REGNUM
4170 && to == HARD_FRAME_POINTER_REGNUM)
4171 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4172 else
4173 {
4174 if (to != STACK_POINTER_REGNUM)
4175 abort ();
4176 else if (from == ARG_POINTER_REGNUM)
4177 return frame.stack_pointer_offset;
4178 else if (from != FRAME_POINTER_REGNUM)
4179 abort ();
4180 else
4181 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4182 }
4183 }
4184
4185 /* Fill structure ix86_frame about frame of currently computed function. */
4186
4187 static void
4188 ix86_compute_frame_layout (frame)
4189 struct ix86_frame *frame;
4190 {
4191 HOST_WIDE_INT total_size;
4192 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4193 int offset;
4194 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4195 HOST_WIDE_INT size = get_frame_size ();
4196
4197 frame->nregs = ix86_nsaved_regs ();
4198 total_size = size;
4199
4200 /* Skip return address and saved base pointer. */
4201 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4202
4203 frame->hard_frame_pointer_offset = offset;
4204
4205 /* Do some sanity checking of stack_alignment_needed and
4206 preferred_alignment, since i386 port is the only using those features
4207 that may break easily. */
4208
4209 if (size && !stack_alignment_needed)
4210 abort ();
4211 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4212 abort ();
4213 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4214 abort ();
4215 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4216 abort ();
4217
4218 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4219 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4220
4221 /* Register save area */
4222 offset += frame->nregs * UNITS_PER_WORD;
4223
4224 /* Va-arg area */
4225 if (ix86_save_varrargs_registers)
4226 {
4227 offset += X86_64_VARARGS_SIZE;
4228 frame->va_arg_size = X86_64_VARARGS_SIZE;
4229 }
4230 else
4231 frame->va_arg_size = 0;
4232
4233 /* Align start of frame for local function. */
4234 frame->padding1 = ((offset + stack_alignment_needed - 1)
4235 & -stack_alignment_needed) - offset;
4236
4237 offset += frame->padding1;
4238
4239 /* Frame pointer points here. */
4240 frame->frame_pointer_offset = offset;
4241
4242 offset += size;
4243
4244 /* Add outgoing arguments area. Can be skipped if we eliminated
4245 all the function calls as dead code. */
4246 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4247 {
4248 offset += current_function_outgoing_args_size;
4249 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4250 }
4251 else
4252 frame->outgoing_arguments_size = 0;
4253
4254 /* Align stack boundary. Only needed if we're calling another function
4255 or using alloca. */
4256 if (!current_function_is_leaf || current_function_calls_alloca)
4257 frame->padding2 = ((offset + preferred_alignment - 1)
4258 & -preferred_alignment) - offset;
4259 else
4260 frame->padding2 = 0;
4261
4262 offset += frame->padding2;
4263
4264 /* We've reached end of stack frame. */
4265 frame->stack_pointer_offset = offset;
4266
4267 /* Size prologue needs to allocate. */
4268 frame->to_allocate =
4269 (size + frame->padding1 + frame->padding2
4270 + frame->outgoing_arguments_size + frame->va_arg_size);
4271
4272 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4273 && current_function_is_leaf)
4274 {
4275 frame->red_zone_size = frame->to_allocate;
4276 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4277 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4278 }
4279 else
4280 frame->red_zone_size = 0;
4281 frame->to_allocate -= frame->red_zone_size;
4282 frame->stack_pointer_offset -= frame->red_zone_size;
4283 #if 0
4284 fprintf (stderr, "nregs: %i\n", frame->nregs);
4285 fprintf (stderr, "size: %i\n", size);
4286 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4287 fprintf (stderr, "padding1: %i\n", frame->padding1);
4288 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4289 fprintf (stderr, "padding2: %i\n", frame->padding2);
4290 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4291 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4292 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4293 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4294 frame->hard_frame_pointer_offset);
4295 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4296 #endif
4297 }
4298
4299 /* Emit code to save registers in the prologue. */
4300
4301 static void
4302 ix86_emit_save_regs ()
4303 {
4304 register int regno;
4305 rtx insn;
4306
4307 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4308 if (ix86_save_reg (regno, true))
4309 {
4310 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4311 RTX_FRAME_RELATED_P (insn) = 1;
4312 }
4313 }
4314
4315 /* Emit code to save registers using MOV insns. First register
4316 is restored from POINTER + OFFSET. */
4317 static void
4318 ix86_emit_save_regs_using_mov (pointer, offset)
4319 rtx pointer;
4320 HOST_WIDE_INT offset;
4321 {
4322 int regno;
4323 rtx insn;
4324
4325 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4326 if (ix86_save_reg (regno, true))
4327 {
4328 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4329 Pmode, offset),
4330 gen_rtx_REG (Pmode, regno));
4331 RTX_FRAME_RELATED_P (insn) = 1;
4332 offset += UNITS_PER_WORD;
4333 }
4334 }
4335
4336 /* Expand the prologue into a bunch of separate insns. */
4337
4338 void
4339 ix86_expand_prologue ()
4340 {
4341 rtx insn;
4342 bool pic_reg_used;
4343 struct ix86_frame frame;
4344 int use_mov = 0;
4345 HOST_WIDE_INT allocate;
4346
4347 if (!optimize_size)
4348 {
4349 use_fast_prologue_epilogue
4350 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4351 if (TARGET_PROLOGUE_USING_MOVE)
4352 use_mov = use_fast_prologue_epilogue;
4353 }
4354 ix86_compute_frame_layout (&frame);
4355
4356 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4357 slower on all targets. Also sdb doesn't like it. */
4358
4359 if (frame_pointer_needed)
4360 {
4361 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4362 RTX_FRAME_RELATED_P (insn) = 1;
4363
4364 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4365 RTX_FRAME_RELATED_P (insn) = 1;
4366 }
4367
4368 allocate = frame.to_allocate;
4369 /* In case we are dealing only with single register and empty frame,
4370 push is equivalent of the mov+add sequence. */
4371 if (allocate == 0 && frame.nregs <= 1)
4372 use_mov = 0;
4373
4374 if (!use_mov)
4375 ix86_emit_save_regs ();
4376 else
4377 allocate += frame.nregs * UNITS_PER_WORD;
4378
4379 if (allocate == 0)
4380 ;
4381 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4382 {
4383 insn = emit_insn (gen_pro_epilogue_adjust_stack
4384 (stack_pointer_rtx, stack_pointer_rtx,
4385 GEN_INT (-allocate)));
4386 RTX_FRAME_RELATED_P (insn) = 1;
4387 }
4388 else
4389 {
4390 /* ??? Is this only valid for Win32? */
4391
4392 rtx arg0, sym;
4393
4394 if (TARGET_64BIT)
4395 abort ();
4396
4397 arg0 = gen_rtx_REG (SImode, 0);
4398 emit_move_insn (arg0, GEN_INT (allocate));
4399
4400 sym = gen_rtx_MEM (FUNCTION_MODE,
4401 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4402 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4403
4404 CALL_INSN_FUNCTION_USAGE (insn)
4405 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4406 CALL_INSN_FUNCTION_USAGE (insn));
4407 }
4408 if (use_mov)
4409 {
4410 if (!frame_pointer_needed || !frame.to_allocate)
4411 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4412 else
4413 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4414 -frame.nregs * UNITS_PER_WORD);
4415 }
4416
4417 #ifdef SUBTARGET_PROLOGUE
4418 SUBTARGET_PROLOGUE;
4419 #endif
4420
4421 pic_reg_used = false;
4422 if (pic_offset_table_rtx
4423 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4424 || current_function_profile))
4425 {
4426 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4427
4428 if (alt_pic_reg_used != INVALID_REGNUM)
4429 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4430
4431 pic_reg_used = true;
4432 }
4433
4434 if (pic_reg_used)
4435 {
4436 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4437
4438 /* Even with accurate pre-reload life analysis, we can wind up
4439 deleting all references to the pic register after reload.
4440 Consider if cross-jumping unifies two sides of a branch
4441 controled by a comparison vs the only read from a global.
4442 In which case, allow the set_got to be deleted, though we're
4443 too late to do anything about the ebx save in the prologue. */
4444 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4445 }
4446
4447 /* Prevent function calls from be scheduled before the call to mcount.
4448 In the pic_reg_used case, make sure that the got load isn't deleted. */
4449 if (current_function_profile)
4450 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4451 }
4452
4453 /* Emit code to restore saved registers using MOV insns. First register
4454 is restored from POINTER + OFFSET. */
4455 static void
4456 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4457 rtx pointer;
4458 int offset;
4459 int maybe_eh_return;
4460 {
4461 int regno;
4462
4463 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4464 if (ix86_save_reg (regno, maybe_eh_return))
4465 {
4466 emit_move_insn (gen_rtx_REG (Pmode, regno),
4467 adjust_address (gen_rtx_MEM (Pmode, pointer),
4468 Pmode, offset));
4469 offset += UNITS_PER_WORD;
4470 }
4471 }
4472
4473 /* Restore function stack, frame, and registers. */
4474
4475 void
4476 ix86_expand_epilogue (style)
4477 int style;
4478 {
4479 int regno;
4480 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4481 struct ix86_frame frame;
4482 HOST_WIDE_INT offset;
4483
4484 ix86_compute_frame_layout (&frame);
4485
4486 /* Calculate start of saved registers relative to ebp. Special care
4487 must be taken for the normal return case of a function using
4488 eh_return: the eax and edx registers are marked as saved, but not
4489 restored along this path. */
4490 offset = frame.nregs;
4491 if (current_function_calls_eh_return && style != 2)
4492 offset -= 2;
4493 offset *= -UNITS_PER_WORD;
4494
4495 /* If we're only restoring one register and sp is not valid then
4496 using a move instruction to restore the register since it's
4497 less work than reloading sp and popping the register.
4498
4499 The default code result in stack adjustment using add/lea instruction,
4500 while this code results in LEAVE instruction (or discrete equivalent),
4501 so it is profitable in some other cases as well. Especially when there
4502 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4503 and there is exactly one register to pop. This heruistic may need some
4504 tuning in future. */
4505 if ((!sp_valid && frame.nregs <= 1)
4506 || (TARGET_EPILOGUE_USING_MOVE
4507 && use_fast_prologue_epilogue
4508 && (frame.nregs > 1 || frame.to_allocate))
4509 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4510 || (frame_pointer_needed && TARGET_USE_LEAVE
4511 && use_fast_prologue_epilogue && frame.nregs == 1)
4512 || current_function_calls_eh_return)
4513 {
4514 /* Restore registers. We can use ebp or esp to address the memory
4515 locations. If both are available, default to ebp, since offsets
4516 are known to be small. Only exception is esp pointing directly to the
4517 end of block of saved registers, where we may simplify addressing
4518 mode. */
4519
4520 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4521 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4522 frame.to_allocate, style == 2);
4523 else
4524 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4525 offset, style == 2);
4526
4527 /* eh_return epilogues need %ecx added to the stack pointer. */
4528 if (style == 2)
4529 {
4530 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4531
4532 if (frame_pointer_needed)
4533 {
4534 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4535 tmp = plus_constant (tmp, UNITS_PER_WORD);
4536 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4537
4538 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4539 emit_move_insn (hard_frame_pointer_rtx, tmp);
4540
4541 emit_insn (gen_pro_epilogue_adjust_stack
4542 (stack_pointer_rtx, sa, const0_rtx));
4543 }
4544 else
4545 {
4546 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4547 tmp = plus_constant (tmp, (frame.to_allocate
4548 + frame.nregs * UNITS_PER_WORD));
4549 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4550 }
4551 }
4552 else if (!frame_pointer_needed)
4553 emit_insn (gen_pro_epilogue_adjust_stack
4554 (stack_pointer_rtx, stack_pointer_rtx,
4555 GEN_INT (frame.to_allocate
4556 + frame.nregs * UNITS_PER_WORD)));
4557 /* If not an i386, mov & pop is faster than "leave". */
4558 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4559 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4560 else
4561 {
4562 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4563 hard_frame_pointer_rtx,
4564 const0_rtx));
4565 if (TARGET_64BIT)
4566 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4567 else
4568 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4569 }
4570 }
4571 else
4572 {
4573 /* First step is to deallocate the stack frame so that we can
4574 pop the registers. */
4575 if (!sp_valid)
4576 {
4577 if (!frame_pointer_needed)
4578 abort ();
4579 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4580 hard_frame_pointer_rtx,
4581 GEN_INT (offset)));
4582 }
4583 else if (frame.to_allocate)
4584 emit_insn (gen_pro_epilogue_adjust_stack
4585 (stack_pointer_rtx, stack_pointer_rtx,
4586 GEN_INT (frame.to_allocate)));
4587
4588 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4589 if (ix86_save_reg (regno, false))
4590 {
4591 if (TARGET_64BIT)
4592 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4593 else
4594 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4595 }
4596 if (frame_pointer_needed)
4597 {
4598 /* Leave results in shorter dependency chains on CPUs that are
4599 able to grok it fast. */
4600 if (TARGET_USE_LEAVE)
4601 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4602 else if (TARGET_64BIT)
4603 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4604 else
4605 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4606 }
4607 }
4608
4609 /* Sibcall epilogues don't want a return instruction. */
4610 if (style == 0)
4611 return;
4612
4613 if (current_function_pops_args && current_function_args_size)
4614 {
4615 rtx popc = GEN_INT (current_function_pops_args);
4616
4617 /* i386 can only pop 64K bytes. If asked to pop more, pop
4618 return address, do explicit add, and jump indirectly to the
4619 caller. */
4620
4621 if (current_function_pops_args >= 65536)
4622 {
4623 rtx ecx = gen_rtx_REG (SImode, 2);
4624
4625 /* There are is no "pascal" calling convention in 64bit ABI. */
4626 if (TARGET_64BIT)
4627 abort ();
4628
4629 emit_insn (gen_popsi1 (ecx));
4630 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4631 emit_jump_insn (gen_return_indirect_internal (ecx));
4632 }
4633 else
4634 emit_jump_insn (gen_return_pop_internal (popc));
4635 }
4636 else
4637 emit_jump_insn (gen_return_internal ());
4638 }
4639
4640 /* Reset from the function's potential modifications. */
4641
4642 static void
4643 ix86_output_function_epilogue (file, size)
4644 FILE *file ATTRIBUTE_UNUSED;
4645 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4646 {
4647 if (pic_offset_table_rtx)
4648 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4649 }
4650 \f
4651 /* Extract the parts of an RTL expression that is a valid memory address
4652 for an instruction. Return 0 if the structure of the address is
4653 grossly off. Return -1 if the address contains ASHIFT, so it is not
4654 strictly valid, but still used for computing length of lea instruction.
4655 */
4656
4657 static int
4658 ix86_decompose_address (addr, out)
4659 register rtx addr;
4660 struct ix86_address *out;
4661 {
4662 rtx base = NULL_RTX;
4663 rtx index = NULL_RTX;
4664 rtx disp = NULL_RTX;
4665 HOST_WIDE_INT scale = 1;
4666 rtx scale_rtx = NULL_RTX;
4667 int retval = 1;
4668
4669 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4670 base = addr;
4671 else if (GET_CODE (addr) == PLUS)
4672 {
4673 rtx op0 = XEXP (addr, 0);
4674 rtx op1 = XEXP (addr, 1);
4675 enum rtx_code code0 = GET_CODE (op0);
4676 enum rtx_code code1 = GET_CODE (op1);
4677
4678 if (code0 == REG || code0 == SUBREG)
4679 {
4680 if (code1 == REG || code1 == SUBREG)
4681 index = op0, base = op1; /* index + base */
4682 else
4683 base = op0, disp = op1; /* base + displacement */
4684 }
4685 else if (code0 == MULT)
4686 {
4687 index = XEXP (op0, 0);
4688 scale_rtx = XEXP (op0, 1);
4689 if (code1 == REG || code1 == SUBREG)
4690 base = op1; /* index*scale + base */
4691 else
4692 disp = op1; /* index*scale + disp */
4693 }
4694 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4695 {
4696 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4697 scale_rtx = XEXP (XEXP (op0, 0), 1);
4698 base = XEXP (op0, 1);
4699 disp = op1;
4700 }
4701 else if (code0 == PLUS)
4702 {
4703 index = XEXP (op0, 0); /* index + base + disp */
4704 base = XEXP (op0, 1);
4705 disp = op1;
4706 }
4707 else
4708 return 0;
4709 }
4710 else if (GET_CODE (addr) == MULT)
4711 {
4712 index = XEXP (addr, 0); /* index*scale */
4713 scale_rtx = XEXP (addr, 1);
4714 }
4715 else if (GET_CODE (addr) == ASHIFT)
4716 {
4717 rtx tmp;
4718
4719 /* We're called for lea too, which implements ashift on occasion. */
4720 index = XEXP (addr, 0);
4721 tmp = XEXP (addr, 1);
4722 if (GET_CODE (tmp) != CONST_INT)
4723 return 0;
4724 scale = INTVAL (tmp);
4725 if ((unsigned HOST_WIDE_INT) scale > 3)
4726 return 0;
4727 scale = 1 << scale;
4728 retval = -1;
4729 }
4730 else
4731 disp = addr; /* displacement */
4732
4733 /* Extract the integral value of scale. */
4734 if (scale_rtx)
4735 {
4736 if (GET_CODE (scale_rtx) != CONST_INT)
4737 return 0;
4738 scale = INTVAL (scale_rtx);
4739 }
4740
4741 /* Allow arg pointer and stack pointer as index if there is not scaling */
4742 if (base && index && scale == 1
4743 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4744 || index == stack_pointer_rtx))
4745 {
4746 rtx tmp = base;
4747 base = index;
4748 index = tmp;
4749 }
4750
4751 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4752 if ((base == hard_frame_pointer_rtx
4753 || base == frame_pointer_rtx
4754 || base == arg_pointer_rtx) && !disp)
4755 disp = const0_rtx;
4756
4757 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4758 Avoid this by transforming to [%esi+0]. */
4759 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4760 && base && !index && !disp
4761 && REG_P (base)
4762 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4763 disp = const0_rtx;
4764
4765 /* Special case: encode reg+reg instead of reg*2. */
4766 if (!base && index && scale && scale == 2)
4767 base = index, scale = 1;
4768
4769 /* Special case: scaling cannot be encoded without base or displacement. */
4770 if (!base && !disp && index && scale != 1)
4771 disp = const0_rtx;
4772
4773 out->base = base;
4774 out->index = index;
4775 out->disp = disp;
4776 out->scale = scale;
4777
4778 return retval;
4779 }
4780 \f
4781 /* Return cost of the memory address x.
4782 For i386, it is better to use a complex address than let gcc copy
4783 the address into a reg and make a new pseudo. But not if the address
4784 requires to two regs - that would mean more pseudos with longer
4785 lifetimes. */
4786 int
4787 ix86_address_cost (x)
4788 rtx x;
4789 {
4790 struct ix86_address parts;
4791 int cost = 1;
4792
4793 if (!ix86_decompose_address (x, &parts))
4794 abort ();
4795
4796 if (parts.base && GET_CODE (parts.base) == SUBREG)
4797 parts.base = SUBREG_REG (parts.base);
4798 if (parts.index && GET_CODE (parts.index) == SUBREG)
4799 parts.index = SUBREG_REG (parts.index);
4800
4801 /* More complex memory references are better. */
4802 if (parts.disp && parts.disp != const0_rtx)
4803 cost--;
4804
4805 /* Attempt to minimize number of registers in the address. */
4806 if ((parts.base
4807 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4808 || (parts.index
4809 && (!REG_P (parts.index)
4810 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4811 cost++;
4812
4813 if (parts.base
4814 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4815 && parts.index
4816 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4817 && parts.base != parts.index)
4818 cost++;
4819
4820 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4821 since it's predecode logic can't detect the length of instructions
4822 and it degenerates to vector decoded. Increase cost of such
4823 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4824 to split such addresses or even refuse such addresses at all.
4825
4826 Following addressing modes are affected:
4827 [base+scale*index]
4828 [scale*index+disp]
4829 [base+index]
4830
4831 The first and last case may be avoidable by explicitly coding the zero in
4832 memory address, but I don't have AMD-K6 machine handy to check this
4833 theory. */
4834
4835 if (TARGET_K6
4836 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4837 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4838 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4839 cost += 10;
4840
4841 return cost;
4842 }
4843 \f
4844 /* If X is a machine specific address (i.e. a symbol or label being
4845 referenced as a displacement from the GOT implemented using an
4846 UNSPEC), then return the base term. Otherwise return X. */
4847
4848 rtx
4849 ix86_find_base_term (x)
4850 rtx x;
4851 {
4852 rtx term;
4853
4854 if (TARGET_64BIT)
4855 {
4856 if (GET_CODE (x) != CONST)
4857 return x;
4858 term = XEXP (x, 0);
4859 if (GET_CODE (term) == PLUS
4860 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4861 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4862 term = XEXP (term, 0);
4863 if (GET_CODE (term) != UNSPEC
4864 || XINT (term, 1) != UNSPEC_GOTPCREL)
4865 return x;
4866
4867 term = XVECEXP (term, 0, 0);
4868
4869 if (GET_CODE (term) != SYMBOL_REF
4870 && GET_CODE (term) != LABEL_REF)
4871 return x;
4872
4873 return term;
4874 }
4875
4876 if (GET_CODE (x) != PLUS
4877 || XEXP (x, 0) != pic_offset_table_rtx
4878 || GET_CODE (XEXP (x, 1)) != CONST)
4879 return x;
4880
4881 term = XEXP (XEXP (x, 1), 0);
4882
4883 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4884 term = XEXP (term, 0);
4885
4886 if (GET_CODE (term) != UNSPEC
4887 || XINT (term, 1) != UNSPEC_GOTOFF)
4888 return x;
4889
4890 term = XVECEXP (term, 0, 0);
4891
4892 if (GET_CODE (term) != SYMBOL_REF
4893 && GET_CODE (term) != LABEL_REF)
4894 return x;
4895
4896 return term;
4897 }
4898 \f
4899 /* Determine if a given RTX is a valid constant. We already know this
4900 satisfies CONSTANT_P. */
4901
4902 bool
4903 legitimate_constant_p (x)
4904 rtx x;
4905 {
4906 rtx inner;
4907
4908 switch (GET_CODE (x))
4909 {
4910 case SYMBOL_REF:
4911 /* TLS symbols are not constant. */
4912 if (tls_symbolic_operand (x, Pmode))
4913 return false;
4914 break;
4915
4916 case CONST:
4917 inner = XEXP (x, 0);
4918
4919 /* Offsets of TLS symbols are never valid.
4920 Discourage CSE from creating them. */
4921 if (GET_CODE (inner) == PLUS
4922 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4923 return false;
4924
4925 /* Only some unspecs are valid as "constants". */
4926 if (GET_CODE (inner) == UNSPEC)
4927 switch (XINT (inner, 1))
4928 {
4929 case UNSPEC_TPOFF:
4930 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4931 default:
4932 return false;
4933 }
4934 break;
4935
4936 default:
4937 break;
4938 }
4939
4940 /* Otherwise we handle everything else in the move patterns. */
4941 return true;
4942 }
4943
4944 /* Determine if a given RTX is a valid constant address. */
4945
4946 bool
4947 constant_address_p (x)
4948 rtx x;
4949 {
4950 switch (GET_CODE (x))
4951 {
4952 case LABEL_REF:
4953 case CONST_INT:
4954 return true;
4955
4956 case CONST_DOUBLE:
4957 return TARGET_64BIT;
4958
4959 case CONST:
4960 /* For Mach-O, really believe the CONST. */
4961 if (TARGET_MACHO)
4962 return true;
4963 /* Otherwise fall through. */
4964 case SYMBOL_REF:
4965 return !flag_pic && legitimate_constant_p (x);
4966
4967 default:
4968 return false;
4969 }
4970 }
4971
4972 /* Nonzero if the constant value X is a legitimate general operand
4973 when generating PIC code. It is given that flag_pic is on and
4974 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4975
4976 bool
4977 legitimate_pic_operand_p (x)
4978 rtx x;
4979 {
4980 rtx inner;
4981
4982 switch (GET_CODE (x))
4983 {
4984 case CONST:
4985 inner = XEXP (x, 0);
4986
4987 /* Only some unspecs are valid as "constants". */
4988 if (GET_CODE (inner) == UNSPEC)
4989 switch (XINT (inner, 1))
4990 {
4991 case UNSPEC_TPOFF:
4992 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4993 default:
4994 return false;
4995 }
4996 /* FALLTHRU */
4997
4998 case SYMBOL_REF:
4999 case LABEL_REF:
5000 return legitimate_pic_address_disp_p (x);
5001
5002 default:
5003 return true;
5004 }
5005 }
5006
5007 /* Determine if a given CONST RTX is a valid memory displacement
5008 in PIC mode. */
5009
5010 int
5011 legitimate_pic_address_disp_p (disp)
5012 register rtx disp;
5013 {
5014 bool saw_plus;
5015
5016 /* In 64bit mode we can allow direct addresses of symbols and labels
5017 when they are not dynamic symbols. */
5018 if (TARGET_64BIT)
5019 {
5020 rtx x = disp;
5021 if (GET_CODE (disp) == CONST)
5022 x = XEXP (disp, 0);
5023 /* ??? Handle PIC code models */
5024 if (GET_CODE (x) == PLUS
5025 && (GET_CODE (XEXP (x, 1)) == CONST_INT
5026 && ix86_cmodel == CM_SMALL_PIC
5027 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
5028 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
5029 x = XEXP (x, 0);
5030 if (local_symbolic_operand (x, Pmode))
5031 return 1;
5032 }
5033 if (GET_CODE (disp) != CONST)
5034 return 0;
5035 disp = XEXP (disp, 0);
5036
5037 if (TARGET_64BIT)
5038 {
5039 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5040 of GOT tables. We should not need these anyway. */
5041 if (GET_CODE (disp) != UNSPEC
5042 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5043 return 0;
5044
5045 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5046 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5047 return 0;
5048 return 1;
5049 }
5050
5051 saw_plus = false;
5052 if (GET_CODE (disp) == PLUS)
5053 {
5054 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5055 return 0;
5056 disp = XEXP (disp, 0);
5057 saw_plus = true;
5058 }
5059
5060 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5061 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5062 {
5063 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5064 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5065 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5066 {
5067 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5068 if (strstr (sym_name, "$pb") != 0)
5069 return 1;
5070 }
5071 }
5072
5073 if (GET_CODE (disp) != UNSPEC)
5074 return 0;
5075
5076 switch (XINT (disp, 1))
5077 {
5078 case UNSPEC_GOT:
5079 if (saw_plus)
5080 return false;
5081 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5082 case UNSPEC_GOTOFF:
5083 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5084 case UNSPEC_GOTTPOFF:
5085 case UNSPEC_GOTNTPOFF:
5086 case UNSPEC_INDNTPOFF:
5087 if (saw_plus)
5088 return false;
5089 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5090 case UNSPEC_NTPOFF:
5091 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5092 case UNSPEC_DTPOFF:
5093 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5094 }
5095
5096 return 0;
5097 }
5098
5099 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5100 memory address for an instruction. The MODE argument is the machine mode
5101 for the MEM expression that wants to use this address.
5102
5103 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5104 convert common non-canonical forms to canonical form so that they will
5105 be recognized. */
5106
5107 int
5108 legitimate_address_p (mode, addr, strict)
5109 enum machine_mode mode;
5110 register rtx addr;
5111 int strict;
5112 {
5113 struct ix86_address parts;
5114 rtx base, index, disp;
5115 HOST_WIDE_INT scale;
5116 const char *reason = NULL;
5117 rtx reason_rtx = NULL_RTX;
5118
5119 if (TARGET_DEBUG_ADDR)
5120 {
5121 fprintf (stderr,
5122 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5123 GET_MODE_NAME (mode), strict);
5124 debug_rtx (addr);
5125 }
5126
5127 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5128 {
5129 if (TARGET_DEBUG_ADDR)
5130 fprintf (stderr, "Success.\n");
5131 return TRUE;
5132 }
5133
5134 if (ix86_decompose_address (addr, &parts) <= 0)
5135 {
5136 reason = "decomposition failed";
5137 goto report_error;
5138 }
5139
5140 base = parts.base;
5141 index = parts.index;
5142 disp = parts.disp;
5143 scale = parts.scale;
5144
5145 /* Validate base register.
5146
5147 Don't allow SUBREG's here, it can lead to spill failures when the base
5148 is one word out of a two word structure, which is represented internally
5149 as a DImode int. */
5150
5151 if (base)
5152 {
5153 rtx reg;
5154 reason_rtx = base;
5155
5156 if (GET_CODE (base) == SUBREG)
5157 reg = SUBREG_REG (base);
5158 else
5159 reg = base;
5160
5161 if (GET_CODE (reg) != REG)
5162 {
5163 reason = "base is not a register";
5164 goto report_error;
5165 }
5166
5167 if (GET_MODE (base) != Pmode)
5168 {
5169 reason = "base is not in Pmode";
5170 goto report_error;
5171 }
5172
5173 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5174 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5175 {
5176 reason = "base is not valid";
5177 goto report_error;
5178 }
5179 }
5180
5181 /* Validate index register.
5182
5183 Don't allow SUBREG's here, it can lead to spill failures when the index
5184 is one word out of a two word structure, which is represented internally
5185 as a DImode int. */
5186
5187 if (index)
5188 {
5189 rtx reg;
5190 reason_rtx = index;
5191
5192 if (GET_CODE (index) == SUBREG)
5193 reg = SUBREG_REG (index);
5194 else
5195 reg = index;
5196
5197 if (GET_CODE (reg) != REG)
5198 {
5199 reason = "index is not a register";
5200 goto report_error;
5201 }
5202
5203 if (GET_MODE (index) != Pmode)
5204 {
5205 reason = "index is not in Pmode";
5206 goto report_error;
5207 }
5208
5209 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5210 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5211 {
5212 reason = "index is not valid";
5213 goto report_error;
5214 }
5215 }
5216
5217 /* Validate scale factor. */
5218 if (scale != 1)
5219 {
5220 reason_rtx = GEN_INT (scale);
5221 if (!index)
5222 {
5223 reason = "scale without index";
5224 goto report_error;
5225 }
5226
5227 if (scale != 2 && scale != 4 && scale != 8)
5228 {
5229 reason = "scale is not a valid multiplier";
5230 goto report_error;
5231 }
5232 }
5233
5234 /* Validate displacement. */
5235 if (disp)
5236 {
5237 reason_rtx = disp;
5238
5239 if (TARGET_64BIT)
5240 {
5241 if (!x86_64_sign_extended_value (disp))
5242 {
5243 reason = "displacement is out of range";
5244 goto report_error;
5245 }
5246 }
5247 else
5248 {
5249 if (GET_CODE (disp) == CONST_DOUBLE)
5250 {
5251 reason = "displacement is a const_double";
5252 goto report_error;
5253 }
5254 }
5255
5256 if (GET_CODE (disp) == CONST
5257 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5258 switch (XINT (XEXP (disp, 0), 1))
5259 {
5260 case UNSPEC_GOT:
5261 case UNSPEC_GOTOFF:
5262 case UNSPEC_GOTPCREL:
5263 if (!flag_pic)
5264 abort ();
5265 goto is_legitimate_pic;
5266
5267 case UNSPEC_GOTTPOFF:
5268 case UNSPEC_GOTNTPOFF:
5269 case UNSPEC_INDNTPOFF:
5270 case UNSPEC_NTPOFF:
5271 case UNSPEC_DTPOFF:
5272 break;
5273
5274 default:
5275 reason = "invalid address unspec";
5276 goto report_error;
5277 }
5278
5279 else if (flag_pic && (SYMBOLIC_CONST (disp)
5280 #if TARGET_MACHO
5281 && !machopic_operand_p (disp)
5282 #endif
5283 ))
5284 {
5285 is_legitimate_pic:
5286 if (TARGET_64BIT && (index || base))
5287 {
5288 reason = "non-constant pic memory reference";
5289 goto report_error;
5290 }
5291 if (! legitimate_pic_address_disp_p (disp))
5292 {
5293 reason = "displacement is an invalid pic construct";
5294 goto report_error;
5295 }
5296
5297 /* This code used to verify that a symbolic pic displacement
5298 includes the pic_offset_table_rtx register.
5299
5300 While this is good idea, unfortunately these constructs may
5301 be created by "adds using lea" optimization for incorrect
5302 code like:
5303
5304 int a;
5305 int foo(int i)
5306 {
5307 return *(&a+i);
5308 }
5309
5310 This code is nonsensical, but results in addressing
5311 GOT table with pic_offset_table_rtx base. We can't
5312 just refuse it easily, since it gets matched by
5313 "addsi3" pattern, that later gets split to lea in the
5314 case output register differs from input. While this
5315 can be handled by separate addsi pattern for this case
5316 that never results in lea, this seems to be easier and
5317 correct fix for crash to disable this test. */
5318 }
5319 else if (!CONSTANT_ADDRESS_P (disp))
5320 {
5321 reason = "displacement is not constant";
5322 goto report_error;
5323 }
5324 }
5325
5326 /* Everything looks valid. */
5327 if (TARGET_DEBUG_ADDR)
5328 fprintf (stderr, "Success.\n");
5329 return TRUE;
5330
5331 report_error:
5332 if (TARGET_DEBUG_ADDR)
5333 {
5334 fprintf (stderr, "Error: %s\n", reason);
5335 debug_rtx (reason_rtx);
5336 }
5337 return FALSE;
5338 }
5339 \f
5340 /* Return an unique alias set for the GOT. */
5341
5342 static HOST_WIDE_INT
5343 ix86_GOT_alias_set ()
5344 {
5345 static HOST_WIDE_INT set = -1;
5346 if (set == -1)
5347 set = new_alias_set ();
5348 return set;
5349 }
5350
5351 /* Return a legitimate reference for ORIG (an address) using the
5352 register REG. If REG is 0, a new pseudo is generated.
5353
5354 There are two types of references that must be handled:
5355
5356 1. Global data references must load the address from the GOT, via
5357 the PIC reg. An insn is emitted to do this load, and the reg is
5358 returned.
5359
5360 2. Static data references, constant pool addresses, and code labels
5361 compute the address as an offset from the GOT, whose base is in
5362 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5363 differentiate them from global data objects. The returned
5364 address is the PIC reg + an unspec constant.
5365
5366 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5367 reg also appears in the address. */
5368
5369 rtx
5370 legitimize_pic_address (orig, reg)
5371 rtx orig;
5372 rtx reg;
5373 {
5374 rtx addr = orig;
5375 rtx new = orig;
5376 rtx base;
5377
5378 #if TARGET_MACHO
5379 if (reg == 0)
5380 reg = gen_reg_rtx (Pmode);
5381 /* Use the generic Mach-O PIC machinery. */
5382 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5383 #endif
5384
5385 if (local_symbolic_operand (addr, Pmode))
5386 {
5387 /* In 64bit mode we can address such objects directly. */
5388 if (TARGET_64BIT)
5389 new = addr;
5390 else
5391 {
5392 /* This symbol may be referenced via a displacement from the PIC
5393 base address (@GOTOFF). */
5394
5395 if (reload_in_progress)
5396 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5397 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5398 new = gen_rtx_CONST (Pmode, new);
5399 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5400
5401 if (reg != 0)
5402 {
5403 emit_move_insn (reg, new);
5404 new = reg;
5405 }
5406 }
5407 }
5408 else if (GET_CODE (addr) == SYMBOL_REF)
5409 {
5410 if (TARGET_64BIT)
5411 {
5412 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5413 new = gen_rtx_CONST (Pmode, new);
5414 new = gen_rtx_MEM (Pmode, new);
5415 RTX_UNCHANGING_P (new) = 1;
5416 set_mem_alias_set (new, ix86_GOT_alias_set ());
5417
5418 if (reg == 0)
5419 reg = gen_reg_rtx (Pmode);
5420 /* Use directly gen_movsi, otherwise the address is loaded
5421 into register for CSE. We don't want to CSE this addresses,
5422 instead we CSE addresses from the GOT table, so skip this. */
5423 emit_insn (gen_movsi (reg, new));
5424 new = reg;
5425 }
5426 else
5427 {
5428 /* This symbol must be referenced via a load from the
5429 Global Offset Table (@GOT). */
5430
5431 if (reload_in_progress)
5432 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5433 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5434 new = gen_rtx_CONST (Pmode, new);
5435 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5436 new = gen_rtx_MEM (Pmode, new);
5437 RTX_UNCHANGING_P (new) = 1;
5438 set_mem_alias_set (new, ix86_GOT_alias_set ());
5439
5440 if (reg == 0)
5441 reg = gen_reg_rtx (Pmode);
5442 emit_move_insn (reg, new);
5443 new = reg;
5444 }
5445 }
5446 else
5447 {
5448 if (GET_CODE (addr) == CONST)
5449 {
5450 addr = XEXP (addr, 0);
5451
5452 /* We must match stuff we generate before. Assume the only
5453 unspecs that can get here are ours. Not that we could do
5454 anything with them anyway... */
5455 if (GET_CODE (addr) == UNSPEC
5456 || (GET_CODE (addr) == PLUS
5457 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5458 return orig;
5459 if (GET_CODE (addr) != PLUS)
5460 abort ();
5461 }
5462 if (GET_CODE (addr) == PLUS)
5463 {
5464 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5465
5466 /* Check first to see if this is a constant offset from a @GOTOFF
5467 symbol reference. */
5468 if (local_symbolic_operand (op0, Pmode)
5469 && GET_CODE (op1) == CONST_INT)
5470 {
5471 if (!TARGET_64BIT)
5472 {
5473 if (reload_in_progress)
5474 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5475 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5476 UNSPEC_GOTOFF);
5477 new = gen_rtx_PLUS (Pmode, new, op1);
5478 new = gen_rtx_CONST (Pmode, new);
5479 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5480
5481 if (reg != 0)
5482 {
5483 emit_move_insn (reg, new);
5484 new = reg;
5485 }
5486 }
5487 else
5488 {
5489 /* ??? We need to limit offsets here. */
5490 }
5491 }
5492 else
5493 {
5494 base = legitimize_pic_address (XEXP (addr, 0), reg);
5495 new = legitimize_pic_address (XEXP (addr, 1),
5496 base == reg ? NULL_RTX : reg);
5497
5498 if (GET_CODE (new) == CONST_INT)
5499 new = plus_constant (base, INTVAL (new));
5500 else
5501 {
5502 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5503 {
5504 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5505 new = XEXP (new, 1);
5506 }
5507 new = gen_rtx_PLUS (Pmode, base, new);
5508 }
5509 }
5510 }
5511 }
5512 return new;
5513 }
5514
5515 static void
5516 ix86_encode_section_info (decl, first)
5517 tree decl;
5518 int first ATTRIBUTE_UNUSED;
5519 {
5520 bool local_p = (*targetm.binds_local_p) (decl);
5521 rtx rtl, symbol;
5522
5523 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5524 if (GET_CODE (rtl) != MEM)
5525 return;
5526 symbol = XEXP (rtl, 0);
5527 if (GET_CODE (symbol) != SYMBOL_REF)
5528 return;
5529
5530 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5531 symbol so that we may access it directly in the GOT. */
5532
5533 if (flag_pic)
5534 SYMBOL_REF_FLAG (symbol) = local_p;
5535
5536 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5537 "local dynamic", "initial exec" or "local exec" TLS models
5538 respectively. */
5539
5540 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5541 {
5542 const char *symbol_str;
5543 char *newstr;
5544 size_t len;
5545 enum tls_model kind;
5546
5547 if (!flag_pic)
5548 {
5549 if (local_p)
5550 kind = TLS_MODEL_LOCAL_EXEC;
5551 else
5552 kind = TLS_MODEL_INITIAL_EXEC;
5553 }
5554 /* Local dynamic is inefficient when we're not combining the
5555 parts of the address. */
5556 else if (optimize && local_p)
5557 kind = TLS_MODEL_LOCAL_DYNAMIC;
5558 else
5559 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5560 if (kind < flag_tls_default)
5561 kind = flag_tls_default;
5562
5563 symbol_str = XSTR (symbol, 0);
5564
5565 if (symbol_str[0] == '%')
5566 {
5567 if (symbol_str[1] == tls_model_chars[kind])
5568 return;
5569 symbol_str += 2;
5570 }
5571 len = strlen (symbol_str) + 1;
5572 newstr = alloca (len + 2);
5573
5574 newstr[0] = '%';
5575 newstr[1] = tls_model_chars[kind];
5576 memcpy (newstr + 2, symbol_str, len);
5577
5578 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5579 }
5580 }
5581
5582 /* Undo the above when printing symbol names. */
5583
5584 static const char *
5585 ix86_strip_name_encoding (str)
5586 const char *str;
5587 {
5588 if (str[0] == '%')
5589 str += 2;
5590 if (str [0] == '*')
5591 str += 1;
5592 return str;
5593 }
5594 \f
5595 /* Load the thread pointer into a register. */
5596
5597 static rtx
5598 get_thread_pointer ()
5599 {
5600 rtx tp;
5601
5602 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5603 tp = gen_rtx_MEM (Pmode, tp);
5604 RTX_UNCHANGING_P (tp) = 1;
5605 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5606 tp = force_reg (Pmode, tp);
5607
5608 return tp;
5609 }
5610
5611 /* Try machine-dependent ways of modifying an illegitimate address
5612 to be legitimate. If we find one, return the new, valid address.
5613 This macro is used in only one place: `memory_address' in explow.c.
5614
5615 OLDX is the address as it was before break_out_memory_refs was called.
5616 In some cases it is useful to look at this to decide what needs to be done.
5617
5618 MODE and WIN are passed so that this macro can use
5619 GO_IF_LEGITIMATE_ADDRESS.
5620
5621 It is always safe for this macro to do nothing. It exists to recognize
5622 opportunities to optimize the output.
5623
5624 For the 80386, we handle X+REG by loading X into a register R and
5625 using R+REG. R will go in a general reg and indexing will be used.
5626 However, if REG is a broken-out memory address or multiplication,
5627 nothing needs to be done because REG can certainly go in a general reg.
5628
5629 When -fpic is used, special handling is needed for symbolic references.
5630 See comments by legitimize_pic_address in i386.c for details. */
5631
5632 rtx
5633 legitimize_address (x, oldx, mode)
5634 register rtx x;
5635 register rtx oldx ATTRIBUTE_UNUSED;
5636 enum machine_mode mode;
5637 {
5638 int changed = 0;
5639 unsigned log;
5640
5641 if (TARGET_DEBUG_ADDR)
5642 {
5643 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5644 GET_MODE_NAME (mode));
5645 debug_rtx (x);
5646 }
5647
5648 log = tls_symbolic_operand (x, mode);
5649 if (log)
5650 {
5651 rtx dest, base, off, pic;
5652
5653 switch (log)
5654 {
5655 case TLS_MODEL_GLOBAL_DYNAMIC:
5656 dest = gen_reg_rtx (Pmode);
5657 emit_insn (gen_tls_global_dynamic (dest, x));
5658 break;
5659
5660 case TLS_MODEL_LOCAL_DYNAMIC:
5661 base = gen_reg_rtx (Pmode);
5662 emit_insn (gen_tls_local_dynamic_base (base));
5663
5664 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5665 off = gen_rtx_CONST (Pmode, off);
5666
5667 return gen_rtx_PLUS (Pmode, base, off);
5668
5669 case TLS_MODEL_INITIAL_EXEC:
5670 if (flag_pic)
5671 {
5672 if (reload_in_progress)
5673 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5674 pic = pic_offset_table_rtx;
5675 }
5676 else if (!TARGET_GNU_TLS)
5677 {
5678 pic = gen_reg_rtx (Pmode);
5679 emit_insn (gen_set_got (pic));
5680 }
5681 else
5682 pic = NULL;
5683
5684 base = get_thread_pointer ();
5685
5686 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5687 !TARGET_GNU_TLS
5688 ? UNSPEC_GOTTPOFF
5689 : flag_pic ? UNSPEC_GOTNTPOFF
5690 : UNSPEC_INDNTPOFF);
5691 off = gen_rtx_CONST (Pmode, off);
5692 if (flag_pic || !TARGET_GNU_TLS)
5693 off = gen_rtx_PLUS (Pmode, pic, off);
5694 off = gen_rtx_MEM (Pmode, off);
5695 RTX_UNCHANGING_P (off) = 1;
5696 set_mem_alias_set (off, ix86_GOT_alias_set ());
5697 dest = gen_reg_rtx (Pmode);
5698
5699 if (TARGET_GNU_TLS)
5700 {
5701 emit_move_insn (dest, off);
5702 return gen_rtx_PLUS (Pmode, base, dest);
5703 }
5704 else
5705 emit_insn (gen_subsi3 (dest, base, off));
5706 break;
5707
5708 case TLS_MODEL_LOCAL_EXEC:
5709 base = get_thread_pointer ();
5710
5711 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5712 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5713 off = gen_rtx_CONST (Pmode, off);
5714
5715 if (TARGET_GNU_TLS)
5716 return gen_rtx_PLUS (Pmode, base, off);
5717 else
5718 {
5719 dest = gen_reg_rtx (Pmode);
5720 emit_insn (gen_subsi3 (dest, base, off));
5721 }
5722 break;
5723
5724 default:
5725 abort ();
5726 }
5727
5728 return dest;
5729 }
5730
5731 if (flag_pic && SYMBOLIC_CONST (x))
5732 return legitimize_pic_address (x, 0);
5733
5734 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5735 if (GET_CODE (x) == ASHIFT
5736 && GET_CODE (XEXP (x, 1)) == CONST_INT
5737 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5738 {
5739 changed = 1;
5740 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5741 GEN_INT (1 << log));
5742 }
5743
5744 if (GET_CODE (x) == PLUS)
5745 {
5746 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5747
5748 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5749 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5750 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5751 {
5752 changed = 1;
5753 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5754 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5755 GEN_INT (1 << log));
5756 }
5757
5758 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5759 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5760 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5761 {
5762 changed = 1;
5763 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5764 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5765 GEN_INT (1 << log));
5766 }
5767
5768 /* Put multiply first if it isn't already. */
5769 if (GET_CODE (XEXP (x, 1)) == MULT)
5770 {
5771 rtx tmp = XEXP (x, 0);
5772 XEXP (x, 0) = XEXP (x, 1);
5773 XEXP (x, 1) = tmp;
5774 changed = 1;
5775 }
5776
5777 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5778 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5779 created by virtual register instantiation, register elimination, and
5780 similar optimizations. */
5781 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5782 {
5783 changed = 1;
5784 x = gen_rtx_PLUS (Pmode,
5785 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5786 XEXP (XEXP (x, 1), 0)),
5787 XEXP (XEXP (x, 1), 1));
5788 }
5789
5790 /* Canonicalize
5791 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5792 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5793 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5794 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5795 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5796 && CONSTANT_P (XEXP (x, 1)))
5797 {
5798 rtx constant;
5799 rtx other = NULL_RTX;
5800
5801 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5802 {
5803 constant = XEXP (x, 1);
5804 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5805 }
5806 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5807 {
5808 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5809 other = XEXP (x, 1);
5810 }
5811 else
5812 constant = 0;
5813
5814 if (constant)
5815 {
5816 changed = 1;
5817 x = gen_rtx_PLUS (Pmode,
5818 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5819 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5820 plus_constant (other, INTVAL (constant)));
5821 }
5822 }
5823
5824 if (changed && legitimate_address_p (mode, x, FALSE))
5825 return x;
5826
5827 if (GET_CODE (XEXP (x, 0)) == MULT)
5828 {
5829 changed = 1;
5830 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5831 }
5832
5833 if (GET_CODE (XEXP (x, 1)) == MULT)
5834 {
5835 changed = 1;
5836 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5837 }
5838
5839 if (changed
5840 && GET_CODE (XEXP (x, 1)) == REG
5841 && GET_CODE (XEXP (x, 0)) == REG)
5842 return x;
5843
5844 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5845 {
5846 changed = 1;
5847 x = legitimize_pic_address (x, 0);
5848 }
5849
5850 if (changed && legitimate_address_p (mode, x, FALSE))
5851 return x;
5852
5853 if (GET_CODE (XEXP (x, 0)) == REG)
5854 {
5855 register rtx temp = gen_reg_rtx (Pmode);
5856 register rtx val = force_operand (XEXP (x, 1), temp);
5857 if (val != temp)
5858 emit_move_insn (temp, val);
5859
5860 XEXP (x, 1) = temp;
5861 return x;
5862 }
5863
5864 else if (GET_CODE (XEXP (x, 1)) == REG)
5865 {
5866 register rtx temp = gen_reg_rtx (Pmode);
5867 register rtx val = force_operand (XEXP (x, 0), temp);
5868 if (val != temp)
5869 emit_move_insn (temp, val);
5870
5871 XEXP (x, 0) = temp;
5872 return x;
5873 }
5874 }
5875
5876 return x;
5877 }
5878 \f
5879 /* Print an integer constant expression in assembler syntax. Addition
5880 and subtraction are the only arithmetic that may appear in these
5881 expressions. FILE is the stdio stream to write to, X is the rtx, and
5882 CODE is the operand print code from the output string. */
5883
5884 static void
5885 output_pic_addr_const (file, x, code)
5886 FILE *file;
5887 rtx x;
5888 int code;
5889 {
5890 char buf[256];
5891
5892 switch (GET_CODE (x))
5893 {
5894 case PC:
5895 if (flag_pic)
5896 putc ('.', file);
5897 else
5898 abort ();
5899 break;
5900
5901 case SYMBOL_REF:
5902 assemble_name (file, XSTR (x, 0));
5903 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5904 fputs ("@PLT", file);
5905 break;
5906
5907 case LABEL_REF:
5908 x = XEXP (x, 0);
5909 /* FALLTHRU */
5910 case CODE_LABEL:
5911 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5912 assemble_name (asm_out_file, buf);
5913 break;
5914
5915 case CONST_INT:
5916 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5917 break;
5918
5919 case CONST:
5920 /* This used to output parentheses around the expression,
5921 but that does not work on the 386 (either ATT or BSD assembler). */
5922 output_pic_addr_const (file, XEXP (x, 0), code);
5923 break;
5924
5925 case CONST_DOUBLE:
5926 if (GET_MODE (x) == VOIDmode)
5927 {
5928 /* We can use %d if the number is <32 bits and positive. */
5929 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5930 fprintf (file, "0x%lx%08lx",
5931 (unsigned long) CONST_DOUBLE_HIGH (x),
5932 (unsigned long) CONST_DOUBLE_LOW (x));
5933 else
5934 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5935 }
5936 else
5937 /* We can't handle floating point constants;
5938 PRINT_OPERAND must handle them. */
5939 output_operand_lossage ("floating constant misused");
5940 break;
5941
5942 case PLUS:
5943 /* Some assemblers need integer constants to appear first. */
5944 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5945 {
5946 output_pic_addr_const (file, XEXP (x, 0), code);
5947 putc ('+', file);
5948 output_pic_addr_const (file, XEXP (x, 1), code);
5949 }
5950 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5951 {
5952 output_pic_addr_const (file, XEXP (x, 1), code);
5953 putc ('+', file);
5954 output_pic_addr_const (file, XEXP (x, 0), code);
5955 }
5956 else
5957 abort ();
5958 break;
5959
5960 case MINUS:
5961 if (!TARGET_MACHO)
5962 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5963 output_pic_addr_const (file, XEXP (x, 0), code);
5964 putc ('-', file);
5965 output_pic_addr_const (file, XEXP (x, 1), code);
5966 if (!TARGET_MACHO)
5967 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5968 break;
5969
5970 case UNSPEC:
5971 if (XVECLEN (x, 0) != 1)
5972 abort ();
5973 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5974 switch (XINT (x, 1))
5975 {
5976 case UNSPEC_GOT:
5977 fputs ("@GOT", file);
5978 break;
5979 case UNSPEC_GOTOFF:
5980 fputs ("@GOTOFF", file);
5981 break;
5982 case UNSPEC_GOTPCREL:
5983 fputs ("@GOTPCREL(%rip)", file);
5984 break;
5985 case UNSPEC_GOTTPOFF:
5986 /* FIXME: This might be @TPOFF in Sun ld too. */
5987 fputs ("@GOTTPOFF", file);
5988 break;
5989 case UNSPEC_TPOFF:
5990 fputs ("@TPOFF", file);
5991 break;
5992 case UNSPEC_NTPOFF:
5993 fputs ("@NTPOFF", file);
5994 break;
5995 case UNSPEC_DTPOFF:
5996 fputs ("@DTPOFF", file);
5997 break;
5998 case UNSPEC_GOTNTPOFF:
5999 fputs ("@GOTNTPOFF", file);
6000 break;
6001 case UNSPEC_INDNTPOFF:
6002 fputs ("@INDNTPOFF", file);
6003 break;
6004 default:
6005 output_operand_lossage ("invalid UNSPEC as operand");
6006 break;
6007 }
6008 break;
6009
6010 default:
6011 output_operand_lossage ("invalid expression as operand");
6012 }
6013 }
6014
6015 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6016 We need to handle our special PIC relocations. */
6017
6018 void
6019 i386_dwarf_output_addr_const (file, x)
6020 FILE *file;
6021 rtx x;
6022 {
6023 #ifdef ASM_QUAD
6024 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6025 #else
6026 if (TARGET_64BIT)
6027 abort ();
6028 fprintf (file, "%s", ASM_LONG);
6029 #endif
6030 if (flag_pic)
6031 output_pic_addr_const (file, x, '\0');
6032 else
6033 output_addr_const (file, x);
6034 fputc ('\n', file);
6035 }
6036
6037 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6038 We need to emit DTP-relative relocations. */
6039
6040 void
6041 i386_output_dwarf_dtprel (file, size, x)
6042 FILE *file;
6043 int size;
6044 rtx x;
6045 {
6046 switch (size)
6047 {
6048 case 4:
6049 fputs (ASM_LONG, file);
6050 break;
6051 case 8:
6052 #ifdef ASM_QUAD
6053 fputs (ASM_QUAD, file);
6054 break;
6055 #endif
6056 default:
6057 abort ();
6058 }
6059
6060 output_addr_const (file, x);
6061 fputs ("@DTPOFF", file);
6062 }
6063
6064 /* In the name of slightly smaller debug output, and to cater to
6065 general assembler losage, recognize PIC+GOTOFF and turn it back
6066 into a direct symbol reference. */
6067
6068 rtx
6069 i386_simplify_dwarf_addr (orig_x)
6070 rtx orig_x;
6071 {
6072 rtx x = orig_x, y;
6073
6074 if (GET_CODE (x) == MEM)
6075 x = XEXP (x, 0);
6076
6077 if (TARGET_64BIT)
6078 {
6079 if (GET_CODE (x) != CONST
6080 || GET_CODE (XEXP (x, 0)) != UNSPEC
6081 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6082 || GET_CODE (orig_x) != MEM)
6083 return orig_x;
6084 return XVECEXP (XEXP (x, 0), 0, 0);
6085 }
6086
6087 if (GET_CODE (x) != PLUS
6088 || GET_CODE (XEXP (x, 1)) != CONST)
6089 return orig_x;
6090
6091 if (GET_CODE (XEXP (x, 0)) == REG
6092 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6093 /* %ebx + GOT/GOTOFF */
6094 y = NULL;
6095 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6096 {
6097 /* %ebx + %reg * scale + GOT/GOTOFF */
6098 y = XEXP (x, 0);
6099 if (GET_CODE (XEXP (y, 0)) == REG
6100 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6101 y = XEXP (y, 1);
6102 else if (GET_CODE (XEXP (y, 1)) == REG
6103 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6104 y = XEXP (y, 0);
6105 else
6106 return orig_x;
6107 if (GET_CODE (y) != REG
6108 && GET_CODE (y) != MULT
6109 && GET_CODE (y) != ASHIFT)
6110 return orig_x;
6111 }
6112 else
6113 return orig_x;
6114
6115 x = XEXP (XEXP (x, 1), 0);
6116 if (GET_CODE (x) == UNSPEC
6117 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6118 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6119 {
6120 if (y)
6121 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6122 return XVECEXP (x, 0, 0);
6123 }
6124
6125 if (GET_CODE (x) == PLUS
6126 && GET_CODE (XEXP (x, 0)) == UNSPEC
6127 && GET_CODE (XEXP (x, 1)) == CONST_INT
6128 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6129 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6130 && GET_CODE (orig_x) != MEM)))
6131 {
6132 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6133 if (y)
6134 return gen_rtx_PLUS (Pmode, y, x);
6135 return x;
6136 }
6137
6138 return orig_x;
6139 }
6140 \f
6141 static void
6142 put_condition_code (code, mode, reverse, fp, file)
6143 enum rtx_code code;
6144 enum machine_mode mode;
6145 int reverse, fp;
6146 FILE *file;
6147 {
6148 const char *suffix;
6149
6150 if (mode == CCFPmode || mode == CCFPUmode)
6151 {
6152 enum rtx_code second_code, bypass_code;
6153 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6154 if (bypass_code != NIL || second_code != NIL)
6155 abort ();
6156 code = ix86_fp_compare_code_to_integer (code);
6157 mode = CCmode;
6158 }
6159 if (reverse)
6160 code = reverse_condition (code);
6161
6162 switch (code)
6163 {
6164 case EQ:
6165 suffix = "e";
6166 break;
6167 case NE:
6168 suffix = "ne";
6169 break;
6170 case GT:
6171 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6172 abort ();
6173 suffix = "g";
6174 break;
6175 case GTU:
6176 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6177 Those same assemblers have the same but opposite losage on cmov. */
6178 if (mode != CCmode)
6179 abort ();
6180 suffix = fp ? "nbe" : "a";
6181 break;
6182 case LT:
6183 if (mode == CCNOmode || mode == CCGOCmode)
6184 suffix = "s";
6185 else if (mode == CCmode || mode == CCGCmode)
6186 suffix = "l";
6187 else
6188 abort ();
6189 break;
6190 case LTU:
6191 if (mode != CCmode)
6192 abort ();
6193 suffix = "b";
6194 break;
6195 case GE:
6196 if (mode == CCNOmode || mode == CCGOCmode)
6197 suffix = "ns";
6198 else if (mode == CCmode || mode == CCGCmode)
6199 suffix = "ge";
6200 else
6201 abort ();
6202 break;
6203 case GEU:
6204 /* ??? As above. */
6205 if (mode != CCmode)
6206 abort ();
6207 suffix = fp ? "nb" : "ae";
6208 break;
6209 case LE:
6210 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6211 abort ();
6212 suffix = "le";
6213 break;
6214 case LEU:
6215 if (mode != CCmode)
6216 abort ();
6217 suffix = "be";
6218 break;
6219 case UNORDERED:
6220 suffix = fp ? "u" : "p";
6221 break;
6222 case ORDERED:
6223 suffix = fp ? "nu" : "np";
6224 break;
6225 default:
6226 abort ();
6227 }
6228 fputs (suffix, file);
6229 }
6230
6231 void
6232 print_reg (x, code, file)
6233 rtx x;
6234 int code;
6235 FILE *file;
6236 {
6237 if (REGNO (x) == ARG_POINTER_REGNUM
6238 || REGNO (x) == FRAME_POINTER_REGNUM
6239 || REGNO (x) == FLAGS_REG
6240 || REGNO (x) == FPSR_REG)
6241 abort ();
6242
6243 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6244 putc ('%', file);
6245
6246 if (code == 'w' || MMX_REG_P (x))
6247 code = 2;
6248 else if (code == 'b')
6249 code = 1;
6250 else if (code == 'k')
6251 code = 4;
6252 else if (code == 'q')
6253 code = 8;
6254 else if (code == 'y')
6255 code = 3;
6256 else if (code == 'h')
6257 code = 0;
6258 else
6259 code = GET_MODE_SIZE (GET_MODE (x));
6260
6261 /* Irritatingly, AMD extended registers use different naming convention
6262 from the normal registers. */
6263 if (REX_INT_REG_P (x))
6264 {
6265 if (!TARGET_64BIT)
6266 abort ();
6267 switch (code)
6268 {
6269 case 0:
6270 error ("extended registers have no high halves");
6271 break;
6272 case 1:
6273 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6274 break;
6275 case 2:
6276 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6277 break;
6278 case 4:
6279 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6280 break;
6281 case 8:
6282 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6283 break;
6284 default:
6285 error ("unsupported operand size for extended register");
6286 break;
6287 }
6288 return;
6289 }
6290 switch (code)
6291 {
6292 case 3:
6293 if (STACK_TOP_P (x))
6294 {
6295 fputs ("st(0)", file);
6296 break;
6297 }
6298 /* FALLTHRU */
6299 case 8:
6300 case 4:
6301 case 12:
6302 if (! ANY_FP_REG_P (x))
6303 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6304 /* FALLTHRU */
6305 case 16:
6306 case 2:
6307 fputs (hi_reg_name[REGNO (x)], file);
6308 break;
6309 case 1:
6310 fputs (qi_reg_name[REGNO (x)], file);
6311 break;
6312 case 0:
6313 fputs (qi_high_reg_name[REGNO (x)], file);
6314 break;
6315 default:
6316 abort ();
6317 }
6318 }
6319
6320 /* Locate some local-dynamic symbol still in use by this function
6321 so that we can print its name in some tls_local_dynamic_base
6322 pattern. */
6323
6324 static const char *
6325 get_some_local_dynamic_name ()
6326 {
6327 rtx insn;
6328
6329 if (cfun->machine->some_ld_name)
6330 return cfun->machine->some_ld_name;
6331
6332 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6333 if (INSN_P (insn)
6334 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6335 return cfun->machine->some_ld_name;
6336
6337 abort ();
6338 }
6339
6340 static int
6341 get_some_local_dynamic_name_1 (px, data)
6342 rtx *px;
6343 void *data ATTRIBUTE_UNUSED;
6344 {
6345 rtx x = *px;
6346
6347 if (GET_CODE (x) == SYMBOL_REF
6348 && local_dynamic_symbolic_operand (x, Pmode))
6349 {
6350 cfun->machine->some_ld_name = XSTR (x, 0);
6351 return 1;
6352 }
6353
6354 return 0;
6355 }
6356
6357 /* Meaning of CODE:
6358 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6359 C -- print opcode suffix for set/cmov insn.
6360 c -- like C, but print reversed condition
6361 F,f -- likewise, but for floating-point.
6362 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6363 nothing
6364 R -- print the prefix for register names.
6365 z -- print the opcode suffix for the size of the current operand.
6366 * -- print a star (in certain assembler syntax)
6367 A -- print an absolute memory reference.
6368 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6369 s -- print a shift double count, followed by the assemblers argument
6370 delimiter.
6371 b -- print the QImode name of the register for the indicated operand.
6372 %b0 would print %al if operands[0] is reg 0.
6373 w -- likewise, print the HImode name of the register.
6374 k -- likewise, print the SImode name of the register.
6375 q -- likewise, print the DImode name of the register.
6376 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6377 y -- print "st(0)" instead of "st" as a register.
6378 D -- print condition for SSE cmp instruction.
6379 P -- if PIC, print an @PLT suffix.
6380 X -- don't print any sort of PIC '@' suffix for a symbol.
6381 & -- print some in-use local-dynamic symbol name.
6382 */
6383
6384 void
6385 print_operand (file, x, code)
6386 FILE *file;
6387 rtx x;
6388 int code;
6389 {
6390 if (code)
6391 {
6392 switch (code)
6393 {
6394 case '*':
6395 if (ASSEMBLER_DIALECT == ASM_ATT)
6396 putc ('*', file);
6397 return;
6398
6399 case '&':
6400 assemble_name (file, get_some_local_dynamic_name ());
6401 return;
6402
6403 case 'A':
6404 if (ASSEMBLER_DIALECT == ASM_ATT)
6405 putc ('*', file);
6406 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6407 {
6408 /* Intel syntax. For absolute addresses, registers should not
6409 be surrounded by braces. */
6410 if (GET_CODE (x) != REG)
6411 {
6412 putc ('[', file);
6413 PRINT_OPERAND (file, x, 0);
6414 putc (']', file);
6415 return;
6416 }
6417 }
6418 else
6419 abort ();
6420
6421 PRINT_OPERAND (file, x, 0);
6422 return;
6423
6424
6425 case 'L':
6426 if (ASSEMBLER_DIALECT == ASM_ATT)
6427 putc ('l', file);
6428 return;
6429
6430 case 'W':
6431 if (ASSEMBLER_DIALECT == ASM_ATT)
6432 putc ('w', file);
6433 return;
6434
6435 case 'B':
6436 if (ASSEMBLER_DIALECT == ASM_ATT)
6437 putc ('b', file);
6438 return;
6439
6440 case 'Q':
6441 if (ASSEMBLER_DIALECT == ASM_ATT)
6442 putc ('l', file);
6443 return;
6444
6445 case 'S':
6446 if (ASSEMBLER_DIALECT == ASM_ATT)
6447 putc ('s', file);
6448 return;
6449
6450 case 'T':
6451 if (ASSEMBLER_DIALECT == ASM_ATT)
6452 putc ('t', file);
6453 return;
6454
6455 case 'z':
6456 /* 387 opcodes don't get size suffixes if the operands are
6457 registers. */
6458 if (STACK_REG_P (x))
6459 return;
6460
6461 /* Likewise if using Intel opcodes. */
6462 if (ASSEMBLER_DIALECT == ASM_INTEL)
6463 return;
6464
6465 /* This is the size of op from size of operand. */
6466 switch (GET_MODE_SIZE (GET_MODE (x)))
6467 {
6468 case 2:
6469 #ifdef HAVE_GAS_FILDS_FISTS
6470 putc ('s', file);
6471 #endif
6472 return;
6473
6474 case 4:
6475 if (GET_MODE (x) == SFmode)
6476 {
6477 putc ('s', file);
6478 return;
6479 }
6480 else
6481 putc ('l', file);
6482 return;
6483
6484 case 12:
6485 case 16:
6486 putc ('t', file);
6487 return;
6488
6489 case 8:
6490 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6491 {
6492 #ifdef GAS_MNEMONICS
6493 putc ('q', file);
6494 #else
6495 putc ('l', file);
6496 putc ('l', file);
6497 #endif
6498 }
6499 else
6500 putc ('l', file);
6501 return;
6502
6503 default:
6504 abort ();
6505 }
6506
6507 case 'b':
6508 case 'w':
6509 case 'k':
6510 case 'q':
6511 case 'h':
6512 case 'y':
6513 case 'X':
6514 case 'P':
6515 break;
6516
6517 case 's':
6518 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6519 {
6520 PRINT_OPERAND (file, x, 0);
6521 putc (',', file);
6522 }
6523 return;
6524
6525 case 'D':
6526 /* Little bit of braindamage here. The SSE compare instructions
6527 does use completely different names for the comparisons that the
6528 fp conditional moves. */
6529 switch (GET_CODE (x))
6530 {
6531 case EQ:
6532 case UNEQ:
6533 fputs ("eq", file);
6534 break;
6535 case LT:
6536 case UNLT:
6537 fputs ("lt", file);
6538 break;
6539 case LE:
6540 case UNLE:
6541 fputs ("le", file);
6542 break;
6543 case UNORDERED:
6544 fputs ("unord", file);
6545 break;
6546 case NE:
6547 case LTGT:
6548 fputs ("neq", file);
6549 break;
6550 case UNGE:
6551 case GE:
6552 fputs ("nlt", file);
6553 break;
6554 case UNGT:
6555 case GT:
6556 fputs ("nle", file);
6557 break;
6558 case ORDERED:
6559 fputs ("ord", file);
6560 break;
6561 default:
6562 abort ();
6563 break;
6564 }
6565 return;
6566 case 'O':
6567 #ifdef CMOV_SUN_AS_SYNTAX
6568 if (ASSEMBLER_DIALECT == ASM_ATT)
6569 {
6570 switch (GET_MODE (x))
6571 {
6572 case HImode: putc ('w', file); break;
6573 case SImode:
6574 case SFmode: putc ('l', file); break;
6575 case DImode:
6576 case DFmode: putc ('q', file); break;
6577 default: abort ();
6578 }
6579 putc ('.', file);
6580 }
6581 #endif
6582 return;
6583 case 'C':
6584 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6585 return;
6586 case 'F':
6587 #ifdef CMOV_SUN_AS_SYNTAX
6588 if (ASSEMBLER_DIALECT == ASM_ATT)
6589 putc ('.', file);
6590 #endif
6591 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6592 return;
6593
6594 /* Like above, but reverse condition */
6595 case 'c':
6596 /* Check to see if argument to %c is really a constant
6597 and not a condition code which needs to be reversed. */
6598 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6599 {
6600 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6601 return;
6602 }
6603 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6604 return;
6605 case 'f':
6606 #ifdef CMOV_SUN_AS_SYNTAX
6607 if (ASSEMBLER_DIALECT == ASM_ATT)
6608 putc ('.', file);
6609 #endif
6610 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6611 return;
6612 case '+':
6613 {
6614 rtx x;
6615
6616 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6617 return;
6618
6619 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6620 if (x)
6621 {
6622 int pred_val = INTVAL (XEXP (x, 0));
6623
6624 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6625 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6626 {
6627 int taken = pred_val > REG_BR_PROB_BASE / 2;
6628 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6629
6630 /* Emit hints only in the case default branch prediction
6631 heruistics would fail. */
6632 if (taken != cputaken)
6633 {
6634 /* We use 3e (DS) prefix for taken branches and
6635 2e (CS) prefix for not taken branches. */
6636 if (taken)
6637 fputs ("ds ; ", file);
6638 else
6639 fputs ("cs ; ", file);
6640 }
6641 }
6642 }
6643 return;
6644 }
6645 default:
6646 output_operand_lossage ("invalid operand code `%c'", code);
6647 }
6648 }
6649
6650 if (GET_CODE (x) == REG)
6651 {
6652 PRINT_REG (x, code, file);
6653 }
6654
6655 else if (GET_CODE (x) == MEM)
6656 {
6657 /* No `byte ptr' prefix for call instructions. */
6658 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6659 {
6660 const char * size;
6661 switch (GET_MODE_SIZE (GET_MODE (x)))
6662 {
6663 case 1: size = "BYTE"; break;
6664 case 2: size = "WORD"; break;
6665 case 4: size = "DWORD"; break;
6666 case 8: size = "QWORD"; break;
6667 case 12: size = "XWORD"; break;
6668 case 16: size = "XMMWORD"; break;
6669 default:
6670 abort ();
6671 }
6672
6673 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6674 if (code == 'b')
6675 size = "BYTE";
6676 else if (code == 'w')
6677 size = "WORD";
6678 else if (code == 'k')
6679 size = "DWORD";
6680
6681 fputs (size, file);
6682 fputs (" PTR ", file);
6683 }
6684
6685 x = XEXP (x, 0);
6686 if (flag_pic && CONSTANT_ADDRESS_P (x))
6687 output_pic_addr_const (file, x, code);
6688 /* Avoid (%rip) for call operands. */
6689 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6690 && GET_CODE (x) != CONST_INT)
6691 output_addr_const (file, x);
6692 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6693 output_operand_lossage ("invalid constraints for operand");
6694 else
6695 output_address (x);
6696 }
6697
6698 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6699 {
6700 REAL_VALUE_TYPE r;
6701 long l;
6702
6703 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6704 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6705
6706 if (ASSEMBLER_DIALECT == ASM_ATT)
6707 putc ('$', file);
6708 fprintf (file, "0x%lx", l);
6709 }
6710
6711 /* These float cases don't actually occur as immediate operands. */
6712 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6713 {
6714 REAL_VALUE_TYPE r;
6715 char dstr[30];
6716
6717 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6718 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6719 fprintf (file, "%s", dstr);
6720 }
6721
6722 else if (GET_CODE (x) == CONST_DOUBLE
6723 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6724 {
6725 REAL_VALUE_TYPE r;
6726 char dstr[30];
6727
6728 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6729 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6730 fprintf (file, "%s", dstr);
6731 }
6732
6733 else
6734 {
6735 if (code != 'P')
6736 {
6737 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6738 {
6739 if (ASSEMBLER_DIALECT == ASM_ATT)
6740 putc ('$', file);
6741 }
6742 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6743 || GET_CODE (x) == LABEL_REF)
6744 {
6745 if (ASSEMBLER_DIALECT == ASM_ATT)
6746 putc ('$', file);
6747 else
6748 fputs ("OFFSET FLAT:", file);
6749 }
6750 }
6751 if (GET_CODE (x) == CONST_INT)
6752 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6753 else if (flag_pic)
6754 output_pic_addr_const (file, x, code);
6755 else
6756 output_addr_const (file, x);
6757 }
6758 }
6759 \f
6760 /* Print a memory operand whose address is ADDR. */
6761
6762 void
6763 print_operand_address (file, addr)
6764 FILE *file;
6765 register rtx addr;
6766 {
6767 struct ix86_address parts;
6768 rtx base, index, disp;
6769 int scale;
6770
6771 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6772 {
6773 if (ASSEMBLER_DIALECT == ASM_INTEL)
6774 fputs ("DWORD PTR ", file);
6775 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6776 putc ('%', file);
6777 fputs ("gs:0", file);
6778 return;
6779 }
6780
6781 if (! ix86_decompose_address (addr, &parts))
6782 abort ();
6783
6784 base = parts.base;
6785 index = parts.index;
6786 disp = parts.disp;
6787 scale = parts.scale;
6788
6789 if (!base && !index)
6790 {
6791 /* Displacement only requires special attention. */
6792
6793 if (GET_CODE (disp) == CONST_INT)
6794 {
6795 if (ASSEMBLER_DIALECT == ASM_INTEL)
6796 {
6797 if (USER_LABEL_PREFIX[0] == 0)
6798 putc ('%', file);
6799 fputs ("ds:", file);
6800 }
6801 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6802 }
6803 else if (flag_pic)
6804 output_pic_addr_const (file, addr, 0);
6805 else
6806 output_addr_const (file, addr);
6807
6808 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6809 if (TARGET_64BIT
6810 && (GET_CODE (addr) == SYMBOL_REF
6811 || GET_CODE (addr) == LABEL_REF
6812 || (GET_CODE (addr) == CONST
6813 && GET_CODE (XEXP (addr, 0)) == PLUS
6814 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6815 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6816 fputs ("(%rip)", file);
6817 }
6818 else
6819 {
6820 if (ASSEMBLER_DIALECT == ASM_ATT)
6821 {
6822 if (disp)
6823 {
6824 if (flag_pic)
6825 output_pic_addr_const (file, disp, 0);
6826 else if (GET_CODE (disp) == LABEL_REF)
6827 output_asm_label (disp);
6828 else
6829 output_addr_const (file, disp);
6830 }
6831
6832 putc ('(', file);
6833 if (base)
6834 PRINT_REG (base, 0, file);
6835 if (index)
6836 {
6837 putc (',', file);
6838 PRINT_REG (index, 0, file);
6839 if (scale != 1)
6840 fprintf (file, ",%d", scale);
6841 }
6842 putc (')', file);
6843 }
6844 else
6845 {
6846 rtx offset = NULL_RTX;
6847
6848 if (disp)
6849 {
6850 /* Pull out the offset of a symbol; print any symbol itself. */
6851 if (GET_CODE (disp) == CONST
6852 && GET_CODE (XEXP (disp, 0)) == PLUS
6853 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6854 {
6855 offset = XEXP (XEXP (disp, 0), 1);
6856 disp = gen_rtx_CONST (VOIDmode,
6857 XEXP (XEXP (disp, 0), 0));
6858 }
6859
6860 if (flag_pic)
6861 output_pic_addr_const (file, disp, 0);
6862 else if (GET_CODE (disp) == LABEL_REF)
6863 output_asm_label (disp);
6864 else if (GET_CODE (disp) == CONST_INT)
6865 offset = disp;
6866 else
6867 output_addr_const (file, disp);
6868 }
6869
6870 putc ('[', file);
6871 if (base)
6872 {
6873 PRINT_REG (base, 0, file);
6874 if (offset)
6875 {
6876 if (INTVAL (offset) >= 0)
6877 putc ('+', file);
6878 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6879 }
6880 }
6881 else if (offset)
6882 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6883 else
6884 putc ('0', file);
6885
6886 if (index)
6887 {
6888 putc ('+', file);
6889 PRINT_REG (index, 0, file);
6890 if (scale != 1)
6891 fprintf (file, "*%d", scale);
6892 }
6893 putc (']', file);
6894 }
6895 }
6896 }
6897
6898 bool
6899 output_addr_const_extra (file, x)
6900 FILE *file;
6901 rtx x;
6902 {
6903 rtx op;
6904
6905 if (GET_CODE (x) != UNSPEC)
6906 return false;
6907
6908 op = XVECEXP (x, 0, 0);
6909 switch (XINT (x, 1))
6910 {
6911 case UNSPEC_GOTTPOFF:
6912 output_addr_const (file, op);
6913 /* FIXME: This might be @TPOFF in Sun ld. */
6914 fputs ("@GOTTPOFF", file);
6915 break;
6916 case UNSPEC_TPOFF:
6917 output_addr_const (file, op);
6918 fputs ("@TPOFF", file);
6919 break;
6920 case UNSPEC_NTPOFF:
6921 output_addr_const (file, op);
6922 fputs ("@NTPOFF", file);
6923 break;
6924 case UNSPEC_DTPOFF:
6925 output_addr_const (file, op);
6926 fputs ("@DTPOFF", file);
6927 break;
6928 case UNSPEC_GOTNTPOFF:
6929 output_addr_const (file, op);
6930 fputs ("@GOTNTPOFF", file);
6931 break;
6932 case UNSPEC_INDNTPOFF:
6933 output_addr_const (file, op);
6934 fputs ("@INDNTPOFF", file);
6935 break;
6936
6937 default:
6938 return false;
6939 }
6940
6941 return true;
6942 }
6943 \f
6944 /* Split one or more DImode RTL references into pairs of SImode
6945 references. The RTL can be REG, offsettable MEM, integer constant, or
6946 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6947 split and "num" is its length. lo_half and hi_half are output arrays
6948 that parallel "operands". */
6949
6950 void
6951 split_di (operands, num, lo_half, hi_half)
6952 rtx operands[];
6953 int num;
6954 rtx lo_half[], hi_half[];
6955 {
6956 while (num--)
6957 {
6958 rtx op = operands[num];
6959
6960 /* simplify_subreg refuse to split volatile memory addresses,
6961 but we still have to handle it. */
6962 if (GET_CODE (op) == MEM)
6963 {
6964 lo_half[num] = adjust_address (op, SImode, 0);
6965 hi_half[num] = adjust_address (op, SImode, 4);
6966 }
6967 else
6968 {
6969 lo_half[num] = simplify_gen_subreg (SImode, op,
6970 GET_MODE (op) == VOIDmode
6971 ? DImode : GET_MODE (op), 0);
6972 hi_half[num] = simplify_gen_subreg (SImode, op,
6973 GET_MODE (op) == VOIDmode
6974 ? DImode : GET_MODE (op), 4);
6975 }
6976 }
6977 }
6978 /* Split one or more TImode RTL references into pairs of SImode
6979 references. The RTL can be REG, offsettable MEM, integer constant, or
6980 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6981 split and "num" is its length. lo_half and hi_half are output arrays
6982 that parallel "operands". */
6983
6984 void
6985 split_ti (operands, num, lo_half, hi_half)
6986 rtx operands[];
6987 int num;
6988 rtx lo_half[], hi_half[];
6989 {
6990 while (num--)
6991 {
6992 rtx op = operands[num];
6993
6994 /* simplify_subreg refuse to split volatile memory addresses, but we
6995 still have to handle it. */
6996 if (GET_CODE (op) == MEM)
6997 {
6998 lo_half[num] = adjust_address (op, DImode, 0);
6999 hi_half[num] = adjust_address (op, DImode, 8);
7000 }
7001 else
7002 {
7003 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7004 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7005 }
7006 }
7007 }
7008 \f
7009 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7010 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7011 is the expression of the binary operation. The output may either be
7012 emitted here, or returned to the caller, like all output_* functions.
7013
7014 There is no guarantee that the operands are the same mode, as they
7015 might be within FLOAT or FLOAT_EXTEND expressions. */
7016
7017 #ifndef SYSV386_COMPAT
7018 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7019 wants to fix the assemblers because that causes incompatibility
7020 with gcc. No-one wants to fix gcc because that causes
7021 incompatibility with assemblers... You can use the option of
7022 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7023 #define SYSV386_COMPAT 1
7024 #endif
7025
7026 const char *
7027 output_387_binary_op (insn, operands)
7028 rtx insn;
7029 rtx *operands;
7030 {
7031 static char buf[30];
7032 const char *p;
7033 const char *ssep;
7034 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7035
7036 #ifdef ENABLE_CHECKING
7037 /* Even if we do not want to check the inputs, this documents input
7038 constraints. Which helps in understanding the following code. */
7039 if (STACK_REG_P (operands[0])
7040 && ((REG_P (operands[1])
7041 && REGNO (operands[0]) == REGNO (operands[1])
7042 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7043 || (REG_P (operands[2])
7044 && REGNO (operands[0]) == REGNO (operands[2])
7045 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7046 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7047 ; /* ok */
7048 else if (!is_sse)
7049 abort ();
7050 #endif
7051
7052 switch (GET_CODE (operands[3]))
7053 {
7054 case PLUS:
7055 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7056 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7057 p = "fiadd";
7058 else
7059 p = "fadd";
7060 ssep = "add";
7061 break;
7062
7063 case MINUS:
7064 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7065 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7066 p = "fisub";
7067 else
7068 p = "fsub";
7069 ssep = "sub";
7070 break;
7071
7072 case MULT:
7073 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7074 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7075 p = "fimul";
7076 else
7077 p = "fmul";
7078 ssep = "mul";
7079 break;
7080
7081 case DIV:
7082 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7083 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7084 p = "fidiv";
7085 else
7086 p = "fdiv";
7087 ssep = "div";
7088 break;
7089
7090 default:
7091 abort ();
7092 }
7093
7094 if (is_sse)
7095 {
7096 strcpy (buf, ssep);
7097 if (GET_MODE (operands[0]) == SFmode)
7098 strcat (buf, "ss\t{%2, %0|%0, %2}");
7099 else
7100 strcat (buf, "sd\t{%2, %0|%0, %2}");
7101 return buf;
7102 }
7103 strcpy (buf, p);
7104
7105 switch (GET_CODE (operands[3]))
7106 {
7107 case MULT:
7108 case PLUS:
7109 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7110 {
7111 rtx temp = operands[2];
7112 operands[2] = operands[1];
7113 operands[1] = temp;
7114 }
7115
7116 /* know operands[0] == operands[1]. */
7117
7118 if (GET_CODE (operands[2]) == MEM)
7119 {
7120 p = "%z2\t%2";
7121 break;
7122 }
7123
7124 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7125 {
7126 if (STACK_TOP_P (operands[0]))
7127 /* How is it that we are storing to a dead operand[2]?
7128 Well, presumably operands[1] is dead too. We can't
7129 store the result to st(0) as st(0) gets popped on this
7130 instruction. Instead store to operands[2] (which I
7131 think has to be st(1)). st(1) will be popped later.
7132 gcc <= 2.8.1 didn't have this check and generated
7133 assembly code that the Unixware assembler rejected. */
7134 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7135 else
7136 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7137 break;
7138 }
7139
7140 if (STACK_TOP_P (operands[0]))
7141 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7142 else
7143 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7144 break;
7145
7146 case MINUS:
7147 case DIV:
7148 if (GET_CODE (operands[1]) == MEM)
7149 {
7150 p = "r%z1\t%1";
7151 break;
7152 }
7153
7154 if (GET_CODE (operands[2]) == MEM)
7155 {
7156 p = "%z2\t%2";
7157 break;
7158 }
7159
7160 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7161 {
7162 #if SYSV386_COMPAT
7163 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7164 derived assemblers, confusingly reverse the direction of
7165 the operation for fsub{r} and fdiv{r} when the
7166 destination register is not st(0). The Intel assembler
7167 doesn't have this brain damage. Read !SYSV386_COMPAT to
7168 figure out what the hardware really does. */
7169 if (STACK_TOP_P (operands[0]))
7170 p = "{p\t%0, %2|rp\t%2, %0}";
7171 else
7172 p = "{rp\t%2, %0|p\t%0, %2}";
7173 #else
7174 if (STACK_TOP_P (operands[0]))
7175 /* As above for fmul/fadd, we can't store to st(0). */
7176 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7177 else
7178 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7179 #endif
7180 break;
7181 }
7182
7183 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7184 {
7185 #if SYSV386_COMPAT
7186 if (STACK_TOP_P (operands[0]))
7187 p = "{rp\t%0, %1|p\t%1, %0}";
7188 else
7189 p = "{p\t%1, %0|rp\t%0, %1}";
7190 #else
7191 if (STACK_TOP_P (operands[0]))
7192 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7193 else
7194 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7195 #endif
7196 break;
7197 }
7198
7199 if (STACK_TOP_P (operands[0]))
7200 {
7201 if (STACK_TOP_P (operands[1]))
7202 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7203 else
7204 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7205 break;
7206 }
7207 else if (STACK_TOP_P (operands[1]))
7208 {
7209 #if SYSV386_COMPAT
7210 p = "{\t%1, %0|r\t%0, %1}";
7211 #else
7212 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7213 #endif
7214 }
7215 else
7216 {
7217 #if SYSV386_COMPAT
7218 p = "{r\t%2, %0|\t%0, %2}";
7219 #else
7220 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7221 #endif
7222 }
7223 break;
7224
7225 default:
7226 abort ();
7227 }
7228
7229 strcat (buf, p);
7230 return buf;
7231 }
7232
7233 /* Output code to initialize control word copies used by
7234 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7235 is set to control word rounding downwards. */
7236 void
7237 emit_i387_cw_initialization (normal, round_down)
7238 rtx normal, round_down;
7239 {
7240 rtx reg = gen_reg_rtx (HImode);
7241
7242 emit_insn (gen_x86_fnstcw_1 (normal));
7243 emit_move_insn (reg, normal);
7244 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7245 && !TARGET_64BIT)
7246 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7247 else
7248 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7249 emit_move_insn (round_down, reg);
7250 }
7251
7252 /* Output code for INSN to convert a float to a signed int. OPERANDS
7253 are the insn operands. The output may be [HSD]Imode and the input
7254 operand may be [SDX]Fmode. */
7255
7256 const char *
7257 output_fix_trunc (insn, operands)
7258 rtx insn;
7259 rtx *operands;
7260 {
7261 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7262 int dimode_p = GET_MODE (operands[0]) == DImode;
7263
7264 /* Jump through a hoop or two for DImode, since the hardware has no
7265 non-popping instruction. We used to do this a different way, but
7266 that was somewhat fragile and broke with post-reload splitters. */
7267 if (dimode_p && !stack_top_dies)
7268 output_asm_insn ("fld\t%y1", operands);
7269
7270 if (!STACK_TOP_P (operands[1]))
7271 abort ();
7272
7273 if (GET_CODE (operands[0]) != MEM)
7274 abort ();
7275
7276 output_asm_insn ("fldcw\t%3", operands);
7277 if (stack_top_dies || dimode_p)
7278 output_asm_insn ("fistp%z0\t%0", operands);
7279 else
7280 output_asm_insn ("fist%z0\t%0", operands);
7281 output_asm_insn ("fldcw\t%2", operands);
7282
7283 return "";
7284 }
7285
7286 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7287 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7288 when fucom should be used. */
7289
7290 const char *
7291 output_fp_compare (insn, operands, eflags_p, unordered_p)
7292 rtx insn;
7293 rtx *operands;
7294 int eflags_p, unordered_p;
7295 {
7296 int stack_top_dies;
7297 rtx cmp_op0 = operands[0];
7298 rtx cmp_op1 = operands[1];
7299 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7300
7301 if (eflags_p == 2)
7302 {
7303 cmp_op0 = cmp_op1;
7304 cmp_op1 = operands[2];
7305 }
7306 if (is_sse)
7307 {
7308 if (GET_MODE (operands[0]) == SFmode)
7309 if (unordered_p)
7310 return "ucomiss\t{%1, %0|%0, %1}";
7311 else
7312 return "comiss\t{%1, %0|%0, %y}";
7313 else
7314 if (unordered_p)
7315 return "ucomisd\t{%1, %0|%0, %1}";
7316 else
7317 return "comisd\t{%1, %0|%0, %y}";
7318 }
7319
7320 if (! STACK_TOP_P (cmp_op0))
7321 abort ();
7322
7323 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7324
7325 if (STACK_REG_P (cmp_op1)
7326 && stack_top_dies
7327 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7328 && REGNO (cmp_op1) != FIRST_STACK_REG)
7329 {
7330 /* If both the top of the 387 stack dies, and the other operand
7331 is also a stack register that dies, then this must be a
7332 `fcompp' float compare */
7333
7334 if (eflags_p == 1)
7335 {
7336 /* There is no double popping fcomi variant. Fortunately,
7337 eflags is immune from the fstp's cc clobbering. */
7338 if (unordered_p)
7339 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7340 else
7341 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7342 return "fstp\t%y0";
7343 }
7344 else
7345 {
7346 if (eflags_p == 2)
7347 {
7348 if (unordered_p)
7349 return "fucompp\n\tfnstsw\t%0";
7350 else
7351 return "fcompp\n\tfnstsw\t%0";
7352 }
7353 else
7354 {
7355 if (unordered_p)
7356 return "fucompp";
7357 else
7358 return "fcompp";
7359 }
7360 }
7361 }
7362 else
7363 {
7364 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7365
7366 static const char * const alt[24] =
7367 {
7368 "fcom%z1\t%y1",
7369 "fcomp%z1\t%y1",
7370 "fucom%z1\t%y1",
7371 "fucomp%z1\t%y1",
7372
7373 "ficom%z1\t%y1",
7374 "ficomp%z1\t%y1",
7375 NULL,
7376 NULL,
7377
7378 "fcomi\t{%y1, %0|%0, %y1}",
7379 "fcomip\t{%y1, %0|%0, %y1}",
7380 "fucomi\t{%y1, %0|%0, %y1}",
7381 "fucomip\t{%y1, %0|%0, %y1}",
7382
7383 NULL,
7384 NULL,
7385 NULL,
7386 NULL,
7387
7388 "fcom%z2\t%y2\n\tfnstsw\t%0",
7389 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7390 "fucom%z2\t%y2\n\tfnstsw\t%0",
7391 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7392
7393 "ficom%z2\t%y2\n\tfnstsw\t%0",
7394 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7395 NULL,
7396 NULL
7397 };
7398
7399 int mask;
7400 const char *ret;
7401
7402 mask = eflags_p << 3;
7403 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7404 mask |= unordered_p << 1;
7405 mask |= stack_top_dies;
7406
7407 if (mask >= 24)
7408 abort ();
7409 ret = alt[mask];
7410 if (ret == NULL)
7411 abort ();
7412
7413 return ret;
7414 }
7415 }
7416
7417 void
7418 ix86_output_addr_vec_elt (file, value)
7419 FILE *file;
7420 int value;
7421 {
7422 const char *directive = ASM_LONG;
7423
7424 if (TARGET_64BIT)
7425 {
7426 #ifdef ASM_QUAD
7427 directive = ASM_QUAD;
7428 #else
7429 abort ();
7430 #endif
7431 }
7432
7433 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7434 }
7435
7436 void
7437 ix86_output_addr_diff_elt (file, value, rel)
7438 FILE *file;
7439 int value, rel;
7440 {
7441 if (TARGET_64BIT)
7442 fprintf (file, "%s%s%d-%s%d\n",
7443 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7444 else if (HAVE_AS_GOTOFF_IN_DATA)
7445 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7446 #if TARGET_MACHO
7447 else if (TARGET_MACHO)
7448 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7449 machopic_function_base_name () + 1);
7450 #endif
7451 else
7452 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7453 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7454 }
7455 \f
7456 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7457 for the target. */
7458
7459 void
7460 ix86_expand_clear (dest)
7461 rtx dest;
7462 {
7463 rtx tmp;
7464
7465 /* We play register width games, which are only valid after reload. */
7466 if (!reload_completed)
7467 abort ();
7468
7469 /* Avoid HImode and its attendant prefix byte. */
7470 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7471 dest = gen_rtx_REG (SImode, REGNO (dest));
7472
7473 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7474
7475 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7476 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7477 {
7478 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7479 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7480 }
7481
7482 emit_insn (tmp);
7483 }
7484
7485 /* X is an unchanging MEM. If it is a constant pool reference, return
7486 the constant pool rtx, else NULL. */
7487
7488 static rtx
7489 maybe_get_pool_constant (x)
7490 rtx x;
7491 {
7492 x = XEXP (x, 0);
7493
7494 if (flag_pic)
7495 {
7496 if (GET_CODE (x) != PLUS)
7497 return NULL_RTX;
7498 if (XEXP (x, 0) != pic_offset_table_rtx)
7499 return NULL_RTX;
7500 x = XEXP (x, 1);
7501 if (GET_CODE (x) != CONST)
7502 return NULL_RTX;
7503 x = XEXP (x, 0);
7504 if (GET_CODE (x) != UNSPEC)
7505 return NULL_RTX;
7506 if (XINT (x, 1) != UNSPEC_GOTOFF)
7507 return NULL_RTX;
7508 x = XVECEXP (x, 0, 0);
7509 }
7510
7511 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7512 return get_pool_constant (x);
7513
7514 return NULL_RTX;
7515 }
7516
7517 void
7518 ix86_expand_move (mode, operands)
7519 enum machine_mode mode;
7520 rtx operands[];
7521 {
7522 int strict = (reload_in_progress || reload_completed);
7523 rtx insn, op0, op1, tmp;
7524
7525 op0 = operands[0];
7526 op1 = operands[1];
7527
7528 /* ??? We have a slight problem. We need to say that tls symbols are
7529 not legitimate constants so that reload does not helpfully reload
7530 these constants from a REG_EQUIV, which we cannot handle. (Recall
7531 that general- and local-dynamic address resolution requires a
7532 function call.)
7533
7534 However, if we say that tls symbols are not legitimate constants,
7535 then emit_move_insn helpfully drop them into the constant pool.
7536
7537 It is far easier to work around emit_move_insn than reload. Recognize
7538 the MEM that we would have created and extract the symbol_ref. */
7539
7540 if (mode == Pmode
7541 && GET_CODE (op1) == MEM
7542 && RTX_UNCHANGING_P (op1))
7543 {
7544 tmp = maybe_get_pool_constant (op1);
7545 /* Note that we only care about symbolic constants here, which
7546 unlike CONST_INT will always have a proper mode. */
7547 if (tmp && GET_MODE (tmp) == Pmode)
7548 op1 = tmp;
7549 }
7550
7551 if (tls_symbolic_operand (op1, Pmode))
7552 {
7553 op1 = legitimize_address (op1, op1, VOIDmode);
7554 if (GET_CODE (op0) == MEM)
7555 {
7556 tmp = gen_reg_rtx (mode);
7557 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7558 op1 = tmp;
7559 }
7560 }
7561 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7562 {
7563 #if TARGET_MACHO
7564 if (MACHOPIC_PURE)
7565 {
7566 rtx temp = ((reload_in_progress
7567 || ((op0 && GET_CODE (op0) == REG)
7568 && mode == Pmode))
7569 ? op0 : gen_reg_rtx (Pmode));
7570 op1 = machopic_indirect_data_reference (op1, temp);
7571 op1 = machopic_legitimize_pic_address (op1, mode,
7572 temp == op1 ? 0 : temp);
7573 }
7574 else
7575 {
7576 if (MACHOPIC_INDIRECT)
7577 op1 = machopic_indirect_data_reference (op1, 0);
7578 }
7579 if (op0 != op1)
7580 {
7581 insn = gen_rtx_SET (VOIDmode, op0, op1);
7582 emit_insn (insn);
7583 }
7584 return;
7585 #endif /* TARGET_MACHO */
7586 if (GET_CODE (op0) == MEM)
7587 op1 = force_reg (Pmode, op1);
7588 else
7589 {
7590 rtx temp = op0;
7591 if (GET_CODE (temp) != REG)
7592 temp = gen_reg_rtx (Pmode);
7593 temp = legitimize_pic_address (op1, temp);
7594 if (temp == op0)
7595 return;
7596 op1 = temp;
7597 }
7598 }
7599 else
7600 {
7601 if (GET_CODE (op0) == MEM
7602 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7603 || !push_operand (op0, mode))
7604 && GET_CODE (op1) == MEM)
7605 op1 = force_reg (mode, op1);
7606
7607 if (push_operand (op0, mode)
7608 && ! general_no_elim_operand (op1, mode))
7609 op1 = copy_to_mode_reg (mode, op1);
7610
7611 /* Force large constants in 64bit compilation into register
7612 to get them CSEed. */
7613 if (TARGET_64BIT && mode == DImode
7614 && immediate_operand (op1, mode)
7615 && !x86_64_zero_extended_value (op1)
7616 && !register_operand (op0, mode)
7617 && optimize && !reload_completed && !reload_in_progress)
7618 op1 = copy_to_mode_reg (mode, op1);
7619
7620 if (FLOAT_MODE_P (mode))
7621 {
7622 /* If we are loading a floating point constant to a register,
7623 force the value to memory now, since we'll get better code
7624 out the back end. */
7625
7626 if (strict)
7627 ;
7628 else if (GET_CODE (op1) == CONST_DOUBLE
7629 && register_operand (op0, mode))
7630 op1 = validize_mem (force_const_mem (mode, op1));
7631 }
7632 }
7633
7634 insn = gen_rtx_SET (VOIDmode, op0, op1);
7635
7636 emit_insn (insn);
7637 }
7638
7639 void
7640 ix86_expand_vector_move (mode, operands)
7641 enum machine_mode mode;
7642 rtx operands[];
7643 {
7644 /* Force constants other than zero into memory. We do not know how
7645 the instructions used to build constants modify the upper 64 bits
7646 of the register, once we have that information we may be able
7647 to handle some of them more efficiently. */
7648 if ((reload_in_progress | reload_completed) == 0
7649 && register_operand (operands[0], mode)
7650 && CONSTANT_P (operands[1]))
7651 {
7652 rtx addr = gen_reg_rtx (Pmode);
7653 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7654 operands[1] = gen_rtx_MEM (mode, addr);
7655 }
7656
7657 /* Make operand1 a register if it isn't already. */
7658 if ((reload_in_progress | reload_completed) == 0
7659 && !register_operand (operands[0], mode)
7660 && !register_operand (operands[1], mode)
7661 && operands[1] != CONST0_RTX (mode))
7662 {
7663 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7664 emit_move_insn (operands[0], temp);
7665 return;
7666 }
7667
7668 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7669 }
7670
7671 /* Attempt to expand a binary operator. Make the expansion closer to the
7672 actual machine, then just general_operand, which will allow 3 separate
7673 memory references (one output, two input) in a single insn. */
7674
7675 void
7676 ix86_expand_binary_operator (code, mode, operands)
7677 enum rtx_code code;
7678 enum machine_mode mode;
7679 rtx operands[];
7680 {
7681 int matching_memory;
7682 rtx src1, src2, dst, op, clob;
7683
7684 dst = operands[0];
7685 src1 = operands[1];
7686 src2 = operands[2];
7687
7688 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7689 if (GET_RTX_CLASS (code) == 'c'
7690 && (rtx_equal_p (dst, src2)
7691 || immediate_operand (src1, mode)))
7692 {
7693 rtx temp = src1;
7694 src1 = src2;
7695 src2 = temp;
7696 }
7697
7698 /* If the destination is memory, and we do not have matching source
7699 operands, do things in registers. */
7700 matching_memory = 0;
7701 if (GET_CODE (dst) == MEM)
7702 {
7703 if (rtx_equal_p (dst, src1))
7704 matching_memory = 1;
7705 else if (GET_RTX_CLASS (code) == 'c'
7706 && rtx_equal_p (dst, src2))
7707 matching_memory = 2;
7708 else
7709 dst = gen_reg_rtx (mode);
7710 }
7711
7712 /* Both source operands cannot be in memory. */
7713 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7714 {
7715 if (matching_memory != 2)
7716 src2 = force_reg (mode, src2);
7717 else
7718 src1 = force_reg (mode, src1);
7719 }
7720
7721 /* If the operation is not commutable, source 1 cannot be a constant
7722 or non-matching memory. */
7723 if ((CONSTANT_P (src1)
7724 || (!matching_memory && GET_CODE (src1) == MEM))
7725 && GET_RTX_CLASS (code) != 'c')
7726 src1 = force_reg (mode, src1);
7727
7728 /* If optimizing, copy to regs to improve CSE */
7729 if (optimize && ! no_new_pseudos)
7730 {
7731 if (GET_CODE (dst) == MEM)
7732 dst = gen_reg_rtx (mode);
7733 if (GET_CODE (src1) == MEM)
7734 src1 = force_reg (mode, src1);
7735 if (GET_CODE (src2) == MEM)
7736 src2 = force_reg (mode, src2);
7737 }
7738
7739 /* Emit the instruction. */
7740
7741 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7742 if (reload_in_progress)
7743 {
7744 /* Reload doesn't know about the flags register, and doesn't know that
7745 it doesn't want to clobber it. We can only do this with PLUS. */
7746 if (code != PLUS)
7747 abort ();
7748 emit_insn (op);
7749 }
7750 else
7751 {
7752 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7753 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7754 }
7755
7756 /* Fix up the destination if needed. */
7757 if (dst != operands[0])
7758 emit_move_insn (operands[0], dst);
7759 }
7760
7761 /* Return TRUE or FALSE depending on whether the binary operator meets the
7762 appropriate constraints. */
7763
7764 int
7765 ix86_binary_operator_ok (code, mode, operands)
7766 enum rtx_code code;
7767 enum machine_mode mode ATTRIBUTE_UNUSED;
7768 rtx operands[3];
7769 {
7770 /* Both source operands cannot be in memory. */
7771 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7772 return 0;
7773 /* If the operation is not commutable, source 1 cannot be a constant. */
7774 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7775 return 0;
7776 /* If the destination is memory, we must have a matching source operand. */
7777 if (GET_CODE (operands[0]) == MEM
7778 && ! (rtx_equal_p (operands[0], operands[1])
7779 || (GET_RTX_CLASS (code) == 'c'
7780 && rtx_equal_p (operands[0], operands[2]))))
7781 return 0;
7782 /* If the operation is not commutable and the source 1 is memory, we must
7783 have a matching destination. */
7784 if (GET_CODE (operands[1]) == MEM
7785 && GET_RTX_CLASS (code) != 'c'
7786 && ! rtx_equal_p (operands[0], operands[1]))
7787 return 0;
7788 return 1;
7789 }
7790
7791 /* Attempt to expand a unary operator. Make the expansion closer to the
7792 actual machine, then just general_operand, which will allow 2 separate
7793 memory references (one output, one input) in a single insn. */
7794
7795 void
7796 ix86_expand_unary_operator (code, mode, operands)
7797 enum rtx_code code;
7798 enum machine_mode mode;
7799 rtx operands[];
7800 {
7801 int matching_memory;
7802 rtx src, dst, op, clob;
7803
7804 dst = operands[0];
7805 src = operands[1];
7806
7807 /* If the destination is memory, and we do not have matching source
7808 operands, do things in registers. */
7809 matching_memory = 0;
7810 if (GET_CODE (dst) == MEM)
7811 {
7812 if (rtx_equal_p (dst, src))
7813 matching_memory = 1;
7814 else
7815 dst = gen_reg_rtx (mode);
7816 }
7817
7818 /* When source operand is memory, destination must match. */
7819 if (!matching_memory && GET_CODE (src) == MEM)
7820 src = force_reg (mode, src);
7821
7822 /* If optimizing, copy to regs to improve CSE */
7823 if (optimize && ! no_new_pseudos)
7824 {
7825 if (GET_CODE (dst) == MEM)
7826 dst = gen_reg_rtx (mode);
7827 if (GET_CODE (src) == MEM)
7828 src = force_reg (mode, src);
7829 }
7830
7831 /* Emit the instruction. */
7832
7833 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7834 if (reload_in_progress || code == NOT)
7835 {
7836 /* Reload doesn't know about the flags register, and doesn't know that
7837 it doesn't want to clobber it. */
7838 if (code != NOT)
7839 abort ();
7840 emit_insn (op);
7841 }
7842 else
7843 {
7844 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7845 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7846 }
7847
7848 /* Fix up the destination if needed. */
7849 if (dst != operands[0])
7850 emit_move_insn (operands[0], dst);
7851 }
7852
7853 /* Return TRUE or FALSE depending on whether the unary operator meets the
7854 appropriate constraints. */
7855
7856 int
7857 ix86_unary_operator_ok (code, mode, operands)
7858 enum rtx_code code ATTRIBUTE_UNUSED;
7859 enum machine_mode mode ATTRIBUTE_UNUSED;
7860 rtx operands[2] ATTRIBUTE_UNUSED;
7861 {
7862 /* If one of operands is memory, source and destination must match. */
7863 if ((GET_CODE (operands[0]) == MEM
7864 || GET_CODE (operands[1]) == MEM)
7865 && ! rtx_equal_p (operands[0], operands[1]))
7866 return FALSE;
7867 return TRUE;
7868 }
7869
7870 /* Return TRUE or FALSE depending on whether the first SET in INSN
7871 has source and destination with matching CC modes, and that the
7872 CC mode is at least as constrained as REQ_MODE. */
7873
7874 int
7875 ix86_match_ccmode (insn, req_mode)
7876 rtx insn;
7877 enum machine_mode req_mode;
7878 {
7879 rtx set;
7880 enum machine_mode set_mode;
7881
7882 set = PATTERN (insn);
7883 if (GET_CODE (set) == PARALLEL)
7884 set = XVECEXP (set, 0, 0);
7885 if (GET_CODE (set) != SET)
7886 abort ();
7887 if (GET_CODE (SET_SRC (set)) != COMPARE)
7888 abort ();
7889
7890 set_mode = GET_MODE (SET_DEST (set));
7891 switch (set_mode)
7892 {
7893 case CCNOmode:
7894 if (req_mode != CCNOmode
7895 && (req_mode != CCmode
7896 || XEXP (SET_SRC (set), 1) != const0_rtx))
7897 return 0;
7898 break;
7899 case CCmode:
7900 if (req_mode == CCGCmode)
7901 return 0;
7902 /* FALLTHRU */
7903 case CCGCmode:
7904 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7905 return 0;
7906 /* FALLTHRU */
7907 case CCGOCmode:
7908 if (req_mode == CCZmode)
7909 return 0;
7910 /* FALLTHRU */
7911 case CCZmode:
7912 break;
7913
7914 default:
7915 abort ();
7916 }
7917
7918 return (GET_MODE (SET_SRC (set)) == set_mode);
7919 }
7920
7921 /* Generate insn patterns to do an integer compare of OPERANDS. */
7922
7923 static rtx
7924 ix86_expand_int_compare (code, op0, op1)
7925 enum rtx_code code;
7926 rtx op0, op1;
7927 {
7928 enum machine_mode cmpmode;
7929 rtx tmp, flags;
7930
7931 cmpmode = SELECT_CC_MODE (code, op0, op1);
7932 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7933
7934 /* This is very simple, but making the interface the same as in the
7935 FP case makes the rest of the code easier. */
7936 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7937 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7938
7939 /* Return the test that should be put into the flags user, i.e.
7940 the bcc, scc, or cmov instruction. */
7941 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7942 }
7943
7944 /* Figure out whether to use ordered or unordered fp comparisons.
7945 Return the appropriate mode to use. */
7946
7947 enum machine_mode
7948 ix86_fp_compare_mode (code)
7949 enum rtx_code code ATTRIBUTE_UNUSED;
7950 {
7951 /* ??? In order to make all comparisons reversible, we do all comparisons
7952 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7953 all forms trapping and nontrapping comparisons, we can make inequality
7954 comparisons trapping again, since it results in better code when using
7955 FCOM based compares. */
7956 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7957 }
7958
7959 enum machine_mode
7960 ix86_cc_mode (code, op0, op1)
7961 enum rtx_code code;
7962 rtx op0, op1;
7963 {
7964 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7965 return ix86_fp_compare_mode (code);
7966 switch (code)
7967 {
7968 /* Only zero flag is needed. */
7969 case EQ: /* ZF=0 */
7970 case NE: /* ZF!=0 */
7971 return CCZmode;
7972 /* Codes needing carry flag. */
7973 case GEU: /* CF=0 */
7974 case GTU: /* CF=0 & ZF=0 */
7975 case LTU: /* CF=1 */
7976 case LEU: /* CF=1 | ZF=1 */
7977 return CCmode;
7978 /* Codes possibly doable only with sign flag when
7979 comparing against zero. */
7980 case GE: /* SF=OF or SF=0 */
7981 case LT: /* SF<>OF or SF=1 */
7982 if (op1 == const0_rtx)
7983 return CCGOCmode;
7984 else
7985 /* For other cases Carry flag is not required. */
7986 return CCGCmode;
7987 /* Codes doable only with sign flag when comparing
7988 against zero, but we miss jump instruction for it
7989 so we need to use relational tests agains overflow
7990 that thus needs to be zero. */
7991 case GT: /* ZF=0 & SF=OF */
7992 case LE: /* ZF=1 | SF<>OF */
7993 if (op1 == const0_rtx)
7994 return CCNOmode;
7995 else
7996 return CCGCmode;
7997 /* strcmp pattern do (use flags) and combine may ask us for proper
7998 mode. */
7999 case USE:
8000 return CCmode;
8001 default:
8002 abort ();
8003 }
8004 }
8005
8006 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8007
8008 int
8009 ix86_use_fcomi_compare (code)
8010 enum rtx_code code ATTRIBUTE_UNUSED;
8011 {
8012 enum rtx_code swapped_code = swap_condition (code);
8013 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8014 || (ix86_fp_comparison_cost (swapped_code)
8015 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8016 }
8017
8018 /* Swap, force into registers, or otherwise massage the two operands
8019 to a fp comparison. The operands are updated in place; the new
8020 comparsion code is returned. */
8021
8022 static enum rtx_code
8023 ix86_prepare_fp_compare_args (code, pop0, pop1)
8024 enum rtx_code code;
8025 rtx *pop0, *pop1;
8026 {
8027 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8028 rtx op0 = *pop0, op1 = *pop1;
8029 enum machine_mode op_mode = GET_MODE (op0);
8030 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8031
8032 /* All of the unordered compare instructions only work on registers.
8033 The same is true of the XFmode compare instructions. The same is
8034 true of the fcomi compare instructions. */
8035
8036 if (!is_sse
8037 && (fpcmp_mode == CCFPUmode
8038 || op_mode == XFmode
8039 || op_mode == TFmode
8040 || ix86_use_fcomi_compare (code)))
8041 {
8042 op0 = force_reg (op_mode, op0);
8043 op1 = force_reg (op_mode, op1);
8044 }
8045 else
8046 {
8047 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8048 things around if they appear profitable, otherwise force op0
8049 into a register. */
8050
8051 if (standard_80387_constant_p (op0) == 0
8052 || (GET_CODE (op0) == MEM
8053 && ! (standard_80387_constant_p (op1) == 0
8054 || GET_CODE (op1) == MEM)))
8055 {
8056 rtx tmp;
8057 tmp = op0, op0 = op1, op1 = tmp;
8058 code = swap_condition (code);
8059 }
8060
8061 if (GET_CODE (op0) != REG)
8062 op0 = force_reg (op_mode, op0);
8063
8064 if (CONSTANT_P (op1))
8065 {
8066 if (standard_80387_constant_p (op1))
8067 op1 = force_reg (op_mode, op1);
8068 else
8069 op1 = validize_mem (force_const_mem (op_mode, op1));
8070 }
8071 }
8072
8073 /* Try to rearrange the comparison to make it cheaper. */
8074 if (ix86_fp_comparison_cost (code)
8075 > ix86_fp_comparison_cost (swap_condition (code))
8076 && (GET_CODE (op1) == REG || !no_new_pseudos))
8077 {
8078 rtx tmp;
8079 tmp = op0, op0 = op1, op1 = tmp;
8080 code = swap_condition (code);
8081 if (GET_CODE (op0) != REG)
8082 op0 = force_reg (op_mode, op0);
8083 }
8084
8085 *pop0 = op0;
8086 *pop1 = op1;
8087 return code;
8088 }
8089
8090 /* Convert comparison codes we use to represent FP comparison to integer
8091 code that will result in proper branch. Return UNKNOWN if no such code
8092 is available. */
8093 static enum rtx_code
8094 ix86_fp_compare_code_to_integer (code)
8095 enum rtx_code code;
8096 {
8097 switch (code)
8098 {
8099 case GT:
8100 return GTU;
8101 case GE:
8102 return GEU;
8103 case ORDERED:
8104 case UNORDERED:
8105 return code;
8106 break;
8107 case UNEQ:
8108 return EQ;
8109 break;
8110 case UNLT:
8111 return LTU;
8112 break;
8113 case UNLE:
8114 return LEU;
8115 break;
8116 case LTGT:
8117 return NE;
8118 break;
8119 default:
8120 return UNKNOWN;
8121 }
8122 }
8123
8124 /* Split comparison code CODE into comparisons we can do using branch
8125 instructions. BYPASS_CODE is comparison code for branch that will
8126 branch around FIRST_CODE and SECOND_CODE. If some of branches
8127 is not required, set value to NIL.
8128 We never require more than two branches. */
8129 static void
8130 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8131 enum rtx_code code, *bypass_code, *first_code, *second_code;
8132 {
8133 *first_code = code;
8134 *bypass_code = NIL;
8135 *second_code = NIL;
8136
8137 /* The fcomi comparison sets flags as follows:
8138
8139 cmp ZF PF CF
8140 > 0 0 0
8141 < 0 0 1
8142 = 1 0 0
8143 un 1 1 1 */
8144
8145 switch (code)
8146 {
8147 case GT: /* GTU - CF=0 & ZF=0 */
8148 case GE: /* GEU - CF=0 */
8149 case ORDERED: /* PF=0 */
8150 case UNORDERED: /* PF=1 */
8151 case UNEQ: /* EQ - ZF=1 */
8152 case UNLT: /* LTU - CF=1 */
8153 case UNLE: /* LEU - CF=1 | ZF=1 */
8154 case LTGT: /* EQ - ZF=0 */
8155 break;
8156 case LT: /* LTU - CF=1 - fails on unordered */
8157 *first_code = UNLT;
8158 *bypass_code = UNORDERED;
8159 break;
8160 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8161 *first_code = UNLE;
8162 *bypass_code = UNORDERED;
8163 break;
8164 case EQ: /* EQ - ZF=1 - fails on unordered */
8165 *first_code = UNEQ;
8166 *bypass_code = UNORDERED;
8167 break;
8168 case NE: /* NE - ZF=0 - fails on unordered */
8169 *first_code = LTGT;
8170 *second_code = UNORDERED;
8171 break;
8172 case UNGE: /* GEU - CF=0 - fails on unordered */
8173 *first_code = GE;
8174 *second_code = UNORDERED;
8175 break;
8176 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8177 *first_code = GT;
8178 *second_code = UNORDERED;
8179 break;
8180 default:
8181 abort ();
8182 }
8183 if (!TARGET_IEEE_FP)
8184 {
8185 *second_code = NIL;
8186 *bypass_code = NIL;
8187 }
8188 }
8189
8190 /* Return cost of comparison done fcom + arithmetics operations on AX.
8191 All following functions do use number of instructions as an cost metrics.
8192 In future this should be tweaked to compute bytes for optimize_size and
8193 take into account performance of various instructions on various CPUs. */
8194 static int
8195 ix86_fp_comparison_arithmetics_cost (code)
8196 enum rtx_code code;
8197 {
8198 if (!TARGET_IEEE_FP)
8199 return 4;
8200 /* The cost of code output by ix86_expand_fp_compare. */
8201 switch (code)
8202 {
8203 case UNLE:
8204 case UNLT:
8205 case LTGT:
8206 case GT:
8207 case GE:
8208 case UNORDERED:
8209 case ORDERED:
8210 case UNEQ:
8211 return 4;
8212 break;
8213 case LT:
8214 case NE:
8215 case EQ:
8216 case UNGE:
8217 return 5;
8218 break;
8219 case LE:
8220 case UNGT:
8221 return 6;
8222 break;
8223 default:
8224 abort ();
8225 }
8226 }
8227
8228 /* Return cost of comparison done using fcomi operation.
8229 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8230 static int
8231 ix86_fp_comparison_fcomi_cost (code)
8232 enum rtx_code code;
8233 {
8234 enum rtx_code bypass_code, first_code, second_code;
8235 /* Return arbitarily high cost when instruction is not supported - this
8236 prevents gcc from using it. */
8237 if (!TARGET_CMOVE)
8238 return 1024;
8239 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8240 return (bypass_code != NIL || second_code != NIL) + 2;
8241 }
8242
8243 /* Return cost of comparison done using sahf operation.
8244 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8245 static int
8246 ix86_fp_comparison_sahf_cost (code)
8247 enum rtx_code code;
8248 {
8249 enum rtx_code bypass_code, first_code, second_code;
8250 /* Return arbitarily high cost when instruction is not preferred - this
8251 avoids gcc from using it. */
8252 if (!TARGET_USE_SAHF && !optimize_size)
8253 return 1024;
8254 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8255 return (bypass_code != NIL || second_code != NIL) + 3;
8256 }
8257
8258 /* Compute cost of the comparison done using any method.
8259 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8260 static int
8261 ix86_fp_comparison_cost (code)
8262 enum rtx_code code;
8263 {
8264 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8265 int min;
8266
8267 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8268 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8269
8270 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8271 if (min > sahf_cost)
8272 min = sahf_cost;
8273 if (min > fcomi_cost)
8274 min = fcomi_cost;
8275 return min;
8276 }
8277
8278 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8279
8280 static rtx
8281 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8282 enum rtx_code code;
8283 rtx op0, op1, scratch;
8284 rtx *second_test;
8285 rtx *bypass_test;
8286 {
8287 enum machine_mode fpcmp_mode, intcmp_mode;
8288 rtx tmp, tmp2;
8289 int cost = ix86_fp_comparison_cost (code);
8290 enum rtx_code bypass_code, first_code, second_code;
8291
8292 fpcmp_mode = ix86_fp_compare_mode (code);
8293 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8294
8295 if (second_test)
8296 *second_test = NULL_RTX;
8297 if (bypass_test)
8298 *bypass_test = NULL_RTX;
8299
8300 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8301
8302 /* Do fcomi/sahf based test when profitable. */
8303 if ((bypass_code == NIL || bypass_test)
8304 && (second_code == NIL || second_test)
8305 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8306 {
8307 if (TARGET_CMOVE)
8308 {
8309 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8310 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8311 tmp);
8312 emit_insn (tmp);
8313 }
8314 else
8315 {
8316 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8317 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8318 if (!scratch)
8319 scratch = gen_reg_rtx (HImode);
8320 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8321 emit_insn (gen_x86_sahf_1 (scratch));
8322 }
8323
8324 /* The FP codes work out to act like unsigned. */
8325 intcmp_mode = fpcmp_mode;
8326 code = first_code;
8327 if (bypass_code != NIL)
8328 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8329 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8330 const0_rtx);
8331 if (second_code != NIL)
8332 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8333 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8334 const0_rtx);
8335 }
8336 else
8337 {
8338 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8339 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8340 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8341 if (!scratch)
8342 scratch = gen_reg_rtx (HImode);
8343 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8344
8345 /* In the unordered case, we have to check C2 for NaN's, which
8346 doesn't happen to work out to anything nice combination-wise.
8347 So do some bit twiddling on the value we've got in AH to come
8348 up with an appropriate set of condition codes. */
8349
8350 intcmp_mode = CCNOmode;
8351 switch (code)
8352 {
8353 case GT:
8354 case UNGT:
8355 if (code == GT || !TARGET_IEEE_FP)
8356 {
8357 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8358 code = EQ;
8359 }
8360 else
8361 {
8362 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8363 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8364 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8365 intcmp_mode = CCmode;
8366 code = GEU;
8367 }
8368 break;
8369 case LT:
8370 case UNLT:
8371 if (code == LT && TARGET_IEEE_FP)
8372 {
8373 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8374 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8375 intcmp_mode = CCmode;
8376 code = EQ;
8377 }
8378 else
8379 {
8380 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8381 code = NE;
8382 }
8383 break;
8384 case GE:
8385 case UNGE:
8386 if (code == GE || !TARGET_IEEE_FP)
8387 {
8388 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8389 code = EQ;
8390 }
8391 else
8392 {
8393 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8394 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8395 GEN_INT (0x01)));
8396 code = NE;
8397 }
8398 break;
8399 case LE:
8400 case UNLE:
8401 if (code == LE && TARGET_IEEE_FP)
8402 {
8403 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8404 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8405 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8406 intcmp_mode = CCmode;
8407 code = LTU;
8408 }
8409 else
8410 {
8411 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8412 code = NE;
8413 }
8414 break;
8415 case EQ:
8416 case UNEQ:
8417 if (code == EQ && TARGET_IEEE_FP)
8418 {
8419 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8420 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8421 intcmp_mode = CCmode;
8422 code = EQ;
8423 }
8424 else
8425 {
8426 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8427 code = NE;
8428 break;
8429 }
8430 break;
8431 case NE:
8432 case LTGT:
8433 if (code == NE && TARGET_IEEE_FP)
8434 {
8435 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8436 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8437 GEN_INT (0x40)));
8438 code = NE;
8439 }
8440 else
8441 {
8442 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8443 code = EQ;
8444 }
8445 break;
8446
8447 case UNORDERED:
8448 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8449 code = NE;
8450 break;
8451 case ORDERED:
8452 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8453 code = EQ;
8454 break;
8455
8456 default:
8457 abort ();
8458 }
8459 }
8460
8461 /* Return the test that should be put into the flags user, i.e.
8462 the bcc, scc, or cmov instruction. */
8463 return gen_rtx_fmt_ee (code, VOIDmode,
8464 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8465 const0_rtx);
8466 }
8467
8468 rtx
8469 ix86_expand_compare (code, second_test, bypass_test)
8470 enum rtx_code code;
8471 rtx *second_test, *bypass_test;
8472 {
8473 rtx op0, op1, ret;
8474 op0 = ix86_compare_op0;
8475 op1 = ix86_compare_op1;
8476
8477 if (second_test)
8478 *second_test = NULL_RTX;
8479 if (bypass_test)
8480 *bypass_test = NULL_RTX;
8481
8482 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8483 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8484 second_test, bypass_test);
8485 else
8486 ret = ix86_expand_int_compare (code, op0, op1);
8487
8488 return ret;
8489 }
8490
8491 /* Return true if the CODE will result in nontrivial jump sequence. */
8492 bool
8493 ix86_fp_jump_nontrivial_p (code)
8494 enum rtx_code code;
8495 {
8496 enum rtx_code bypass_code, first_code, second_code;
8497 if (!TARGET_CMOVE)
8498 return true;
8499 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8500 return bypass_code != NIL || second_code != NIL;
8501 }
8502
8503 void
8504 ix86_expand_branch (code, label)
8505 enum rtx_code code;
8506 rtx label;
8507 {
8508 rtx tmp;
8509
8510 switch (GET_MODE (ix86_compare_op0))
8511 {
8512 case QImode:
8513 case HImode:
8514 case SImode:
8515 simple:
8516 tmp = ix86_expand_compare (code, NULL, NULL);
8517 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8518 gen_rtx_LABEL_REF (VOIDmode, label),
8519 pc_rtx);
8520 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8521 return;
8522
8523 case SFmode:
8524 case DFmode:
8525 case XFmode:
8526 case TFmode:
8527 {
8528 rtvec vec;
8529 int use_fcomi;
8530 enum rtx_code bypass_code, first_code, second_code;
8531
8532 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8533 &ix86_compare_op1);
8534
8535 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8536
8537 /* Check whether we will use the natural sequence with one jump. If
8538 so, we can expand jump early. Otherwise delay expansion by
8539 creating compound insn to not confuse optimizers. */
8540 if (bypass_code == NIL && second_code == NIL
8541 && TARGET_CMOVE)
8542 {
8543 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8544 gen_rtx_LABEL_REF (VOIDmode, label),
8545 pc_rtx, NULL_RTX);
8546 }
8547 else
8548 {
8549 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8550 ix86_compare_op0, ix86_compare_op1);
8551 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8552 gen_rtx_LABEL_REF (VOIDmode, label),
8553 pc_rtx);
8554 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8555
8556 use_fcomi = ix86_use_fcomi_compare (code);
8557 vec = rtvec_alloc (3 + !use_fcomi);
8558 RTVEC_ELT (vec, 0) = tmp;
8559 RTVEC_ELT (vec, 1)
8560 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8561 RTVEC_ELT (vec, 2)
8562 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8563 if (! use_fcomi)
8564 RTVEC_ELT (vec, 3)
8565 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8566
8567 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8568 }
8569 return;
8570 }
8571
8572 case DImode:
8573 if (TARGET_64BIT)
8574 goto simple;
8575 /* Expand DImode branch into multiple compare+branch. */
8576 {
8577 rtx lo[2], hi[2], label2;
8578 enum rtx_code code1, code2, code3;
8579
8580 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8581 {
8582 tmp = ix86_compare_op0;
8583 ix86_compare_op0 = ix86_compare_op1;
8584 ix86_compare_op1 = tmp;
8585 code = swap_condition (code);
8586 }
8587 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8588 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8589
8590 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8591 avoid two branches. This costs one extra insn, so disable when
8592 optimizing for size. */
8593
8594 if ((code == EQ || code == NE)
8595 && (!optimize_size
8596 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8597 {
8598 rtx xor0, xor1;
8599
8600 xor1 = hi[0];
8601 if (hi[1] != const0_rtx)
8602 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8603 NULL_RTX, 0, OPTAB_WIDEN);
8604
8605 xor0 = lo[0];
8606 if (lo[1] != const0_rtx)
8607 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8608 NULL_RTX, 0, OPTAB_WIDEN);
8609
8610 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8611 NULL_RTX, 0, OPTAB_WIDEN);
8612
8613 ix86_compare_op0 = tmp;
8614 ix86_compare_op1 = const0_rtx;
8615 ix86_expand_branch (code, label);
8616 return;
8617 }
8618
8619 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8620 op1 is a constant and the low word is zero, then we can just
8621 examine the high word. */
8622
8623 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8624 switch (code)
8625 {
8626 case LT: case LTU: case GE: case GEU:
8627 ix86_compare_op0 = hi[0];
8628 ix86_compare_op1 = hi[1];
8629 ix86_expand_branch (code, label);
8630 return;
8631 default:
8632 break;
8633 }
8634
8635 /* Otherwise, we need two or three jumps. */
8636
8637 label2 = gen_label_rtx ();
8638
8639 code1 = code;
8640 code2 = swap_condition (code);
8641 code3 = unsigned_condition (code);
8642
8643 switch (code)
8644 {
8645 case LT: case GT: case LTU: case GTU:
8646 break;
8647
8648 case LE: code1 = LT; code2 = GT; break;
8649 case GE: code1 = GT; code2 = LT; break;
8650 case LEU: code1 = LTU; code2 = GTU; break;
8651 case GEU: code1 = GTU; code2 = LTU; break;
8652
8653 case EQ: code1 = NIL; code2 = NE; break;
8654 case NE: code2 = NIL; break;
8655
8656 default:
8657 abort ();
8658 }
8659
8660 /*
8661 * a < b =>
8662 * if (hi(a) < hi(b)) goto true;
8663 * if (hi(a) > hi(b)) goto false;
8664 * if (lo(a) < lo(b)) goto true;
8665 * false:
8666 */
8667
8668 ix86_compare_op0 = hi[0];
8669 ix86_compare_op1 = hi[1];
8670
8671 if (code1 != NIL)
8672 ix86_expand_branch (code1, label);
8673 if (code2 != NIL)
8674 ix86_expand_branch (code2, label2);
8675
8676 ix86_compare_op0 = lo[0];
8677 ix86_compare_op1 = lo[1];
8678 ix86_expand_branch (code3, label);
8679
8680 if (code2 != NIL)
8681 emit_label (label2);
8682 return;
8683 }
8684
8685 default:
8686 abort ();
8687 }
8688 }
8689
8690 /* Split branch based on floating point condition. */
8691 void
8692 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8693 enum rtx_code code;
8694 rtx op1, op2, target1, target2, tmp;
8695 {
8696 rtx second, bypass;
8697 rtx label = NULL_RTX;
8698 rtx condition;
8699 int bypass_probability = -1, second_probability = -1, probability = -1;
8700 rtx i;
8701
8702 if (target2 != pc_rtx)
8703 {
8704 rtx tmp = target2;
8705 code = reverse_condition_maybe_unordered (code);
8706 target2 = target1;
8707 target1 = tmp;
8708 }
8709
8710 condition = ix86_expand_fp_compare (code, op1, op2,
8711 tmp, &second, &bypass);
8712
8713 if (split_branch_probability >= 0)
8714 {
8715 /* Distribute the probabilities across the jumps.
8716 Assume the BYPASS and SECOND to be always test
8717 for UNORDERED. */
8718 probability = split_branch_probability;
8719
8720 /* Value of 1 is low enough to make no need for probability
8721 to be updated. Later we may run some experiments and see
8722 if unordered values are more frequent in practice. */
8723 if (bypass)
8724 bypass_probability = 1;
8725 if (second)
8726 second_probability = 1;
8727 }
8728 if (bypass != NULL_RTX)
8729 {
8730 label = gen_label_rtx ();
8731 i = emit_jump_insn (gen_rtx_SET
8732 (VOIDmode, pc_rtx,
8733 gen_rtx_IF_THEN_ELSE (VOIDmode,
8734 bypass,
8735 gen_rtx_LABEL_REF (VOIDmode,
8736 label),
8737 pc_rtx)));
8738 if (bypass_probability >= 0)
8739 REG_NOTES (i)
8740 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8741 GEN_INT (bypass_probability),
8742 REG_NOTES (i));
8743 }
8744 i = emit_jump_insn (gen_rtx_SET
8745 (VOIDmode, pc_rtx,
8746 gen_rtx_IF_THEN_ELSE (VOIDmode,
8747 condition, target1, target2)));
8748 if (probability >= 0)
8749 REG_NOTES (i)
8750 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8751 GEN_INT (probability),
8752 REG_NOTES (i));
8753 if (second != NULL_RTX)
8754 {
8755 i = emit_jump_insn (gen_rtx_SET
8756 (VOIDmode, pc_rtx,
8757 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8758 target2)));
8759 if (second_probability >= 0)
8760 REG_NOTES (i)
8761 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8762 GEN_INT (second_probability),
8763 REG_NOTES (i));
8764 }
8765 if (label != NULL_RTX)
8766 emit_label (label);
8767 }
8768
8769 int
8770 ix86_expand_setcc (code, dest)
8771 enum rtx_code code;
8772 rtx dest;
8773 {
8774 rtx ret, tmp, tmpreg;
8775 rtx second_test, bypass_test;
8776
8777 if (GET_MODE (ix86_compare_op0) == DImode
8778 && !TARGET_64BIT)
8779 return 0; /* FAIL */
8780
8781 if (GET_MODE (dest) != QImode)
8782 abort ();
8783
8784 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8785 PUT_MODE (ret, QImode);
8786
8787 tmp = dest;
8788 tmpreg = dest;
8789
8790 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8791 if (bypass_test || second_test)
8792 {
8793 rtx test = second_test;
8794 int bypass = 0;
8795 rtx tmp2 = gen_reg_rtx (QImode);
8796 if (bypass_test)
8797 {
8798 if (second_test)
8799 abort ();
8800 test = bypass_test;
8801 bypass = 1;
8802 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8803 }
8804 PUT_MODE (test, QImode);
8805 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8806
8807 if (bypass)
8808 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8809 else
8810 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8811 }
8812
8813 return 1; /* DONE */
8814 }
8815
8816 int
8817 ix86_expand_int_movcc (operands)
8818 rtx operands[];
8819 {
8820 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8821 rtx compare_seq, compare_op;
8822 rtx second_test, bypass_test;
8823 enum machine_mode mode = GET_MODE (operands[0]);
8824
8825 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8826 In case comparsion is done with immediate, we can convert it to LTU or
8827 GEU by altering the integer. */
8828
8829 if ((code == LEU || code == GTU)
8830 && GET_CODE (ix86_compare_op1) == CONST_INT
8831 && mode != HImode
8832 && INTVAL (ix86_compare_op1) != -1
8833 /* For x86-64, the immediate field in the instruction is 32-bit
8834 signed, so we can't increment a DImode value above 0x7fffffff. */
8835 && (!TARGET_64BIT
8836 || GET_MODE (ix86_compare_op0) != DImode
8837 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8838 && GET_CODE (operands[2]) == CONST_INT
8839 && GET_CODE (operands[3]) == CONST_INT)
8840 {
8841 if (code == LEU)
8842 code = LTU;
8843 else
8844 code = GEU;
8845 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8846 GET_MODE (ix86_compare_op0));
8847 }
8848
8849 start_sequence ();
8850 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8851 compare_seq = get_insns ();
8852 end_sequence ();
8853
8854 compare_code = GET_CODE (compare_op);
8855
8856 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8857 HImode insns, we'd be swallowed in word prefix ops. */
8858
8859 if (mode != HImode
8860 && (mode != DImode || TARGET_64BIT)
8861 && GET_CODE (operands[2]) == CONST_INT
8862 && GET_CODE (operands[3]) == CONST_INT)
8863 {
8864 rtx out = operands[0];
8865 HOST_WIDE_INT ct = INTVAL (operands[2]);
8866 HOST_WIDE_INT cf = INTVAL (operands[3]);
8867 HOST_WIDE_INT diff;
8868
8869 if ((compare_code == LTU || compare_code == GEU)
8870 && !second_test && !bypass_test)
8871 {
8872 /* Detect overlap between destination and compare sources. */
8873 rtx tmp = out;
8874
8875 /* To simplify rest of code, restrict to the GEU case. */
8876 if (compare_code == LTU)
8877 {
8878 int tmp = ct;
8879 ct = cf;
8880 cf = tmp;
8881 compare_code = reverse_condition (compare_code);
8882 code = reverse_condition (code);
8883 }
8884 diff = ct - cf;
8885
8886 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8887 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8888 tmp = gen_reg_rtx (mode);
8889
8890 emit_insn (compare_seq);
8891 if (mode == DImode)
8892 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8893 else
8894 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8895
8896 if (diff == 1)
8897 {
8898 /*
8899 * cmpl op0,op1
8900 * sbbl dest,dest
8901 * [addl dest, ct]
8902 *
8903 * Size 5 - 8.
8904 */
8905 if (ct)
8906 tmp = expand_simple_binop (mode, PLUS,
8907 tmp, GEN_INT (ct),
8908 tmp, 1, OPTAB_DIRECT);
8909 }
8910 else if (cf == -1)
8911 {
8912 /*
8913 * cmpl op0,op1
8914 * sbbl dest,dest
8915 * orl $ct, dest
8916 *
8917 * Size 8.
8918 */
8919 tmp = expand_simple_binop (mode, IOR,
8920 tmp, GEN_INT (ct),
8921 tmp, 1, OPTAB_DIRECT);
8922 }
8923 else if (diff == -1 && ct)
8924 {
8925 /*
8926 * cmpl op0,op1
8927 * sbbl dest,dest
8928 * notl dest
8929 * [addl dest, cf]
8930 *
8931 * Size 8 - 11.
8932 */
8933 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8934 if (cf)
8935 tmp = expand_simple_binop (mode, PLUS,
8936 tmp, GEN_INT (cf),
8937 tmp, 1, OPTAB_DIRECT);
8938 }
8939 else
8940 {
8941 /*
8942 * cmpl op0,op1
8943 * sbbl dest,dest
8944 * [notl dest]
8945 * andl cf - ct, dest
8946 * [addl dest, ct]
8947 *
8948 * Size 8 - 11.
8949 */
8950
8951 if (cf == 0)
8952 {
8953 cf = ct;
8954 ct = 0;
8955 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8956 }
8957
8958 tmp = expand_simple_binop (mode, AND,
8959 tmp,
8960 gen_int_mode (cf - ct, mode),
8961 tmp, 1, OPTAB_DIRECT);
8962 if (ct)
8963 tmp = expand_simple_binop (mode, PLUS,
8964 tmp, GEN_INT (ct),
8965 tmp, 1, OPTAB_DIRECT);
8966 }
8967
8968 if (tmp != out)
8969 emit_move_insn (out, tmp);
8970
8971 return 1; /* DONE */
8972 }
8973
8974 diff = ct - cf;
8975 if (diff < 0)
8976 {
8977 HOST_WIDE_INT tmp;
8978 tmp = ct, ct = cf, cf = tmp;
8979 diff = -diff;
8980 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8981 {
8982 /* We may be reversing unordered compare to normal compare, that
8983 is not valid in general (we may convert non-trapping condition
8984 to trapping one), however on i386 we currently emit all
8985 comparisons unordered. */
8986 compare_code = reverse_condition_maybe_unordered (compare_code);
8987 code = reverse_condition_maybe_unordered (code);
8988 }
8989 else
8990 {
8991 compare_code = reverse_condition (compare_code);
8992 code = reverse_condition (code);
8993 }
8994 }
8995
8996 compare_code = NIL;
8997 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8998 && GET_CODE (ix86_compare_op1) == CONST_INT)
8999 {
9000 if (ix86_compare_op1 == const0_rtx
9001 && (code == LT || code == GE))
9002 compare_code = code;
9003 else if (ix86_compare_op1 == constm1_rtx)
9004 {
9005 if (code == LE)
9006 compare_code = LT;
9007 else if (code == GT)
9008 compare_code = GE;
9009 }
9010 }
9011
9012 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9013 if (compare_code != NIL
9014 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9015 && (cf == -1 || ct == -1))
9016 {
9017 /* If lea code below could be used, only optimize
9018 if it results in a 2 insn sequence. */
9019
9020 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9021 || diff == 3 || diff == 5 || diff == 9)
9022 || (compare_code == LT && ct == -1)
9023 || (compare_code == GE && cf == -1))
9024 {
9025 /*
9026 * notl op1 (if necessary)
9027 * sarl $31, op1
9028 * orl cf, op1
9029 */
9030 if (ct != -1)
9031 {
9032 cf = ct;
9033 ct = -1;
9034 code = reverse_condition (code);
9035 }
9036
9037 out = emit_store_flag (out, code, ix86_compare_op0,
9038 ix86_compare_op1, VOIDmode, 0, -1);
9039
9040 out = expand_simple_binop (mode, IOR,
9041 out, GEN_INT (cf),
9042 out, 1, OPTAB_DIRECT);
9043 if (out != operands[0])
9044 emit_move_insn (operands[0], out);
9045
9046 return 1; /* DONE */
9047 }
9048 }
9049
9050 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9051 || diff == 3 || diff == 5 || diff == 9)
9052 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9053 {
9054 /*
9055 * xorl dest,dest
9056 * cmpl op1,op2
9057 * setcc dest
9058 * lea cf(dest*(ct-cf)),dest
9059 *
9060 * Size 14.
9061 *
9062 * This also catches the degenerate setcc-only case.
9063 */
9064
9065 rtx tmp;
9066 int nops;
9067
9068 out = emit_store_flag (out, code, ix86_compare_op0,
9069 ix86_compare_op1, VOIDmode, 0, 1);
9070
9071 nops = 0;
9072 /* On x86_64 the lea instruction operates on Pmode, so we need
9073 to get arithmetics done in proper mode to match. */
9074 if (diff == 1)
9075 tmp = out;
9076 else
9077 {
9078 rtx out1;
9079 out1 = out;
9080 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9081 nops++;
9082 if (diff & 1)
9083 {
9084 tmp = gen_rtx_PLUS (mode, tmp, out1);
9085 nops++;
9086 }
9087 }
9088 if (cf != 0)
9089 {
9090 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9091 nops++;
9092 }
9093 if (tmp != out
9094 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9095 {
9096 if (nops == 1)
9097 {
9098 rtx clob;
9099
9100 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9101 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9102
9103 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9104 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9105 emit_insn (tmp);
9106 }
9107 else
9108 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9109 }
9110 if (out != operands[0])
9111 emit_move_insn (operands[0], copy_rtx (out));
9112
9113 return 1; /* DONE */
9114 }
9115
9116 /*
9117 * General case: Jumpful:
9118 * xorl dest,dest cmpl op1, op2
9119 * cmpl op1, op2 movl ct, dest
9120 * setcc dest jcc 1f
9121 * decl dest movl cf, dest
9122 * andl (cf-ct),dest 1:
9123 * addl ct,dest
9124 *
9125 * Size 20. Size 14.
9126 *
9127 * This is reasonably steep, but branch mispredict costs are
9128 * high on modern cpus, so consider failing only if optimizing
9129 * for space.
9130 *
9131 * %%% Parameterize branch_cost on the tuning architecture, then
9132 * use that. The 80386 couldn't care less about mispredicts.
9133 */
9134
9135 if (!optimize_size && !TARGET_CMOVE)
9136 {
9137 if (cf == 0)
9138 {
9139 cf = ct;
9140 ct = 0;
9141 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9142 /* We may be reversing unordered compare to normal compare,
9143 that is not valid in general (we may convert non-trapping
9144 condition to trapping one), however on i386 we currently
9145 emit all comparisons unordered. */
9146 code = reverse_condition_maybe_unordered (code);
9147 else
9148 {
9149 code = reverse_condition (code);
9150 if (compare_code != NIL)
9151 compare_code = reverse_condition (compare_code);
9152 }
9153 }
9154
9155 if (compare_code != NIL)
9156 {
9157 /* notl op1 (if needed)
9158 sarl $31, op1
9159 andl (cf-ct), op1
9160 addl ct, op1
9161
9162 For x < 0 (resp. x <= -1) there will be no notl,
9163 so if possible swap the constants to get rid of the
9164 complement.
9165 True/false will be -1/0 while code below (store flag
9166 followed by decrement) is 0/-1, so the constants need
9167 to be exchanged once more. */
9168
9169 if (compare_code == GE || !cf)
9170 {
9171 code = reverse_condition (code);
9172 compare_code = LT;
9173 }
9174 else
9175 {
9176 HOST_WIDE_INT tmp = cf;
9177 cf = ct;
9178 ct = tmp;
9179 }
9180
9181 out = emit_store_flag (out, code, ix86_compare_op0,
9182 ix86_compare_op1, VOIDmode, 0, -1);
9183 }
9184 else
9185 {
9186 out = emit_store_flag (out, code, ix86_compare_op0,
9187 ix86_compare_op1, VOIDmode, 0, 1);
9188
9189 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9190 out, 1, OPTAB_DIRECT);
9191 }
9192
9193 out = expand_simple_binop (mode, AND, out,
9194 gen_int_mode (cf - ct, mode),
9195 out, 1, OPTAB_DIRECT);
9196 if (ct)
9197 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9198 out, 1, OPTAB_DIRECT);
9199 if (out != operands[0])
9200 emit_move_insn (operands[0], out);
9201
9202 return 1; /* DONE */
9203 }
9204 }
9205
9206 if (!TARGET_CMOVE)
9207 {
9208 /* Try a few things more with specific constants and a variable. */
9209
9210 optab op;
9211 rtx var, orig_out, out, tmp;
9212
9213 if (optimize_size)
9214 return 0; /* FAIL */
9215
9216 /* If one of the two operands is an interesting constant, load a
9217 constant with the above and mask it in with a logical operation. */
9218
9219 if (GET_CODE (operands[2]) == CONST_INT)
9220 {
9221 var = operands[3];
9222 if (INTVAL (operands[2]) == 0)
9223 operands[3] = constm1_rtx, op = and_optab;
9224 else if (INTVAL (operands[2]) == -1)
9225 operands[3] = const0_rtx, op = ior_optab;
9226 else
9227 return 0; /* FAIL */
9228 }
9229 else if (GET_CODE (operands[3]) == CONST_INT)
9230 {
9231 var = operands[2];
9232 if (INTVAL (operands[3]) == 0)
9233 operands[2] = constm1_rtx, op = and_optab;
9234 else if (INTVAL (operands[3]) == -1)
9235 operands[2] = const0_rtx, op = ior_optab;
9236 else
9237 return 0; /* FAIL */
9238 }
9239 else
9240 return 0; /* FAIL */
9241
9242 orig_out = operands[0];
9243 tmp = gen_reg_rtx (mode);
9244 operands[0] = tmp;
9245
9246 /* Recurse to get the constant loaded. */
9247 if (ix86_expand_int_movcc (operands) == 0)
9248 return 0; /* FAIL */
9249
9250 /* Mask in the interesting variable. */
9251 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9252 OPTAB_WIDEN);
9253 if (out != orig_out)
9254 emit_move_insn (orig_out, out);
9255
9256 return 1; /* DONE */
9257 }
9258
9259 /*
9260 * For comparison with above,
9261 *
9262 * movl cf,dest
9263 * movl ct,tmp
9264 * cmpl op1,op2
9265 * cmovcc tmp,dest
9266 *
9267 * Size 15.
9268 */
9269
9270 if (! nonimmediate_operand (operands[2], mode))
9271 operands[2] = force_reg (mode, operands[2]);
9272 if (! nonimmediate_operand (operands[3], mode))
9273 operands[3] = force_reg (mode, operands[3]);
9274
9275 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9276 {
9277 rtx tmp = gen_reg_rtx (mode);
9278 emit_move_insn (tmp, operands[3]);
9279 operands[3] = tmp;
9280 }
9281 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9282 {
9283 rtx tmp = gen_reg_rtx (mode);
9284 emit_move_insn (tmp, operands[2]);
9285 operands[2] = tmp;
9286 }
9287 if (! register_operand (operands[2], VOIDmode)
9288 && ! register_operand (operands[3], VOIDmode))
9289 operands[2] = force_reg (mode, operands[2]);
9290
9291 emit_insn (compare_seq);
9292 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9293 gen_rtx_IF_THEN_ELSE (mode,
9294 compare_op, operands[2],
9295 operands[3])));
9296 if (bypass_test)
9297 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9298 gen_rtx_IF_THEN_ELSE (mode,
9299 bypass_test,
9300 operands[3],
9301 operands[0])));
9302 if (second_test)
9303 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9304 gen_rtx_IF_THEN_ELSE (mode,
9305 second_test,
9306 operands[2],
9307 operands[0])));
9308
9309 return 1; /* DONE */
9310 }
9311
9312 int
9313 ix86_expand_fp_movcc (operands)
9314 rtx operands[];
9315 {
9316 enum rtx_code code;
9317 rtx tmp;
9318 rtx compare_op, second_test, bypass_test;
9319
9320 /* For SF/DFmode conditional moves based on comparisons
9321 in same mode, we may want to use SSE min/max instructions. */
9322 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9323 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9324 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9325 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9326 && (!TARGET_IEEE_FP
9327 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9328 /* We may be called from the post-reload splitter. */
9329 && (!REG_P (operands[0])
9330 || SSE_REG_P (operands[0])
9331 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9332 {
9333 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9334 code = GET_CODE (operands[1]);
9335
9336 /* See if we have (cross) match between comparison operands and
9337 conditional move operands. */
9338 if (rtx_equal_p (operands[2], op1))
9339 {
9340 rtx tmp = op0;
9341 op0 = op1;
9342 op1 = tmp;
9343 code = reverse_condition_maybe_unordered (code);
9344 }
9345 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9346 {
9347 /* Check for min operation. */
9348 if (code == LT)
9349 {
9350 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9351 if (memory_operand (op0, VOIDmode))
9352 op0 = force_reg (GET_MODE (operands[0]), op0);
9353 if (GET_MODE (operands[0]) == SFmode)
9354 emit_insn (gen_minsf3 (operands[0], op0, op1));
9355 else
9356 emit_insn (gen_mindf3 (operands[0], op0, op1));
9357 return 1;
9358 }
9359 /* Check for max operation. */
9360 if (code == GT)
9361 {
9362 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9363 if (memory_operand (op0, VOIDmode))
9364 op0 = force_reg (GET_MODE (operands[0]), op0);
9365 if (GET_MODE (operands[0]) == SFmode)
9366 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9367 else
9368 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9369 return 1;
9370 }
9371 }
9372 /* Manage condition to be sse_comparison_operator. In case we are
9373 in non-ieee mode, try to canonicalize the destination operand
9374 to be first in the comparison - this helps reload to avoid extra
9375 moves. */
9376 if (!sse_comparison_operator (operands[1], VOIDmode)
9377 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9378 {
9379 rtx tmp = ix86_compare_op0;
9380 ix86_compare_op0 = ix86_compare_op1;
9381 ix86_compare_op1 = tmp;
9382 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9383 VOIDmode, ix86_compare_op0,
9384 ix86_compare_op1);
9385 }
9386 /* Similary try to manage result to be first operand of conditional
9387 move. We also don't support the NE comparison on SSE, so try to
9388 avoid it. */
9389 if ((rtx_equal_p (operands[0], operands[3])
9390 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9391 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9392 {
9393 rtx tmp = operands[2];
9394 operands[2] = operands[3];
9395 operands[3] = tmp;
9396 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9397 (GET_CODE (operands[1])),
9398 VOIDmode, ix86_compare_op0,
9399 ix86_compare_op1);
9400 }
9401 if (GET_MODE (operands[0]) == SFmode)
9402 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9403 operands[2], operands[3],
9404 ix86_compare_op0, ix86_compare_op1));
9405 else
9406 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9407 operands[2], operands[3],
9408 ix86_compare_op0, ix86_compare_op1));
9409 return 1;
9410 }
9411
9412 /* The floating point conditional move instructions don't directly
9413 support conditions resulting from a signed integer comparison. */
9414
9415 code = GET_CODE (operands[1]);
9416 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9417
9418 /* The floating point conditional move instructions don't directly
9419 support signed integer comparisons. */
9420
9421 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9422 {
9423 if (second_test != NULL || bypass_test != NULL)
9424 abort ();
9425 tmp = gen_reg_rtx (QImode);
9426 ix86_expand_setcc (code, tmp);
9427 code = NE;
9428 ix86_compare_op0 = tmp;
9429 ix86_compare_op1 = const0_rtx;
9430 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9431 }
9432 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9433 {
9434 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9435 emit_move_insn (tmp, operands[3]);
9436 operands[3] = tmp;
9437 }
9438 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9439 {
9440 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9441 emit_move_insn (tmp, operands[2]);
9442 operands[2] = tmp;
9443 }
9444
9445 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9446 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9447 compare_op,
9448 operands[2],
9449 operands[3])));
9450 if (bypass_test)
9451 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9452 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9453 bypass_test,
9454 operands[3],
9455 operands[0])));
9456 if (second_test)
9457 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9458 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9459 second_test,
9460 operands[2],
9461 operands[0])));
9462
9463 return 1;
9464 }
9465
9466 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9467 works for floating pointer parameters and nonoffsetable memories.
9468 For pushes, it returns just stack offsets; the values will be saved
9469 in the right order. Maximally three parts are generated. */
9470
9471 static int
9472 ix86_split_to_parts (operand, parts, mode)
9473 rtx operand;
9474 rtx *parts;
9475 enum machine_mode mode;
9476 {
9477 int size;
9478
9479 if (!TARGET_64BIT)
9480 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9481 else
9482 size = (GET_MODE_SIZE (mode) + 4) / 8;
9483
9484 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9485 abort ();
9486 if (size < 2 || size > 3)
9487 abort ();
9488
9489 /* Optimize constant pool reference to immediates. This is used by fp
9490 moves, that force all constants to memory to allow combining. */
9491 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9492 {
9493 rtx tmp = maybe_get_pool_constant (operand);
9494 if (tmp)
9495 operand = tmp;
9496 }
9497
9498 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9499 {
9500 /* The only non-offsetable memories we handle are pushes. */
9501 if (! push_operand (operand, VOIDmode))
9502 abort ();
9503
9504 operand = copy_rtx (operand);
9505 PUT_MODE (operand, Pmode);
9506 parts[0] = parts[1] = parts[2] = operand;
9507 }
9508 else if (!TARGET_64BIT)
9509 {
9510 if (mode == DImode)
9511 split_di (&operand, 1, &parts[0], &parts[1]);
9512 else
9513 {
9514 if (REG_P (operand))
9515 {
9516 if (!reload_completed)
9517 abort ();
9518 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9519 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9520 if (size == 3)
9521 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9522 }
9523 else if (offsettable_memref_p (operand))
9524 {
9525 operand = adjust_address (operand, SImode, 0);
9526 parts[0] = operand;
9527 parts[1] = adjust_address (operand, SImode, 4);
9528 if (size == 3)
9529 parts[2] = adjust_address (operand, SImode, 8);
9530 }
9531 else if (GET_CODE (operand) == CONST_DOUBLE)
9532 {
9533 REAL_VALUE_TYPE r;
9534 long l[4];
9535
9536 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9537 switch (mode)
9538 {
9539 case XFmode:
9540 case TFmode:
9541 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9542 parts[2] = gen_int_mode (l[2], SImode);
9543 break;
9544 case DFmode:
9545 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9546 break;
9547 default:
9548 abort ();
9549 }
9550 parts[1] = gen_int_mode (l[1], SImode);
9551 parts[0] = gen_int_mode (l[0], SImode);
9552 }
9553 else
9554 abort ();
9555 }
9556 }
9557 else
9558 {
9559 if (mode == TImode)
9560 split_ti (&operand, 1, &parts[0], &parts[1]);
9561 if (mode == XFmode || mode == TFmode)
9562 {
9563 if (REG_P (operand))
9564 {
9565 if (!reload_completed)
9566 abort ();
9567 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9568 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9569 }
9570 else if (offsettable_memref_p (operand))
9571 {
9572 operand = adjust_address (operand, DImode, 0);
9573 parts[0] = operand;
9574 parts[1] = adjust_address (operand, SImode, 8);
9575 }
9576 else if (GET_CODE (operand) == CONST_DOUBLE)
9577 {
9578 REAL_VALUE_TYPE r;
9579 long l[3];
9580
9581 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9582 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9583 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9584 if (HOST_BITS_PER_WIDE_INT >= 64)
9585 parts[0]
9586 = gen_int_mode
9587 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9588 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9589 DImode);
9590 else
9591 parts[0] = immed_double_const (l[0], l[1], DImode);
9592 parts[1] = gen_int_mode (l[2], SImode);
9593 }
9594 else
9595 abort ();
9596 }
9597 }
9598
9599 return size;
9600 }
9601
9602 /* Emit insns to perform a move or push of DI, DF, and XF values.
9603 Return false when normal moves are needed; true when all required
9604 insns have been emitted. Operands 2-4 contain the input values
9605 int the correct order; operands 5-7 contain the output values. */
9606
9607 void
9608 ix86_split_long_move (operands)
9609 rtx operands[];
9610 {
9611 rtx part[2][3];
9612 int nparts;
9613 int push = 0;
9614 int collisions = 0;
9615 enum machine_mode mode = GET_MODE (operands[0]);
9616
9617 /* The DFmode expanders may ask us to move double.
9618 For 64bit target this is single move. By hiding the fact
9619 here we simplify i386.md splitters. */
9620 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9621 {
9622 /* Optimize constant pool reference to immediates. This is used by
9623 fp moves, that force all constants to memory to allow combining. */
9624
9625 if (GET_CODE (operands[1]) == MEM
9626 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9627 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9628 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9629 if (push_operand (operands[0], VOIDmode))
9630 {
9631 operands[0] = copy_rtx (operands[0]);
9632 PUT_MODE (operands[0], Pmode);
9633 }
9634 else
9635 operands[0] = gen_lowpart (DImode, operands[0]);
9636 operands[1] = gen_lowpart (DImode, operands[1]);
9637 emit_move_insn (operands[0], operands[1]);
9638 return;
9639 }
9640
9641 /* The only non-offsettable memory we handle is push. */
9642 if (push_operand (operands[0], VOIDmode))
9643 push = 1;
9644 else if (GET_CODE (operands[0]) == MEM
9645 && ! offsettable_memref_p (operands[0]))
9646 abort ();
9647
9648 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9649 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9650
9651 /* When emitting push, take care for source operands on the stack. */
9652 if (push && GET_CODE (operands[1]) == MEM
9653 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9654 {
9655 if (nparts == 3)
9656 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9657 XEXP (part[1][2], 0));
9658 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9659 XEXP (part[1][1], 0));
9660 }
9661
9662 /* We need to do copy in the right order in case an address register
9663 of the source overlaps the destination. */
9664 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9665 {
9666 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9667 collisions++;
9668 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9669 collisions++;
9670 if (nparts == 3
9671 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9672 collisions++;
9673
9674 /* Collision in the middle part can be handled by reordering. */
9675 if (collisions == 1 && nparts == 3
9676 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9677 {
9678 rtx tmp;
9679 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9680 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9681 }
9682
9683 /* If there are more collisions, we can't handle it by reordering.
9684 Do an lea to the last part and use only one colliding move. */
9685 else if (collisions > 1)
9686 {
9687 collisions = 1;
9688 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9689 XEXP (part[1][0], 0)));
9690 part[1][0] = change_address (part[1][0],
9691 TARGET_64BIT ? DImode : SImode,
9692 part[0][nparts - 1]);
9693 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9694 if (nparts == 3)
9695 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9696 }
9697 }
9698
9699 if (push)
9700 {
9701 if (!TARGET_64BIT)
9702 {
9703 if (nparts == 3)
9704 {
9705 /* We use only first 12 bytes of TFmode value, but for pushing we
9706 are required to adjust stack as if we were pushing real 16byte
9707 value. */
9708 if (mode == TFmode && !TARGET_64BIT)
9709 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9710 GEN_INT (-4)));
9711 emit_move_insn (part[0][2], part[1][2]);
9712 }
9713 }
9714 else
9715 {
9716 /* In 64bit mode we don't have 32bit push available. In case this is
9717 register, it is OK - we will just use larger counterpart. We also
9718 retype memory - these comes from attempt to avoid REX prefix on
9719 moving of second half of TFmode value. */
9720 if (GET_MODE (part[1][1]) == SImode)
9721 {
9722 if (GET_CODE (part[1][1]) == MEM)
9723 part[1][1] = adjust_address (part[1][1], DImode, 0);
9724 else if (REG_P (part[1][1]))
9725 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9726 else
9727 abort ();
9728 if (GET_MODE (part[1][0]) == SImode)
9729 part[1][0] = part[1][1];
9730 }
9731 }
9732 emit_move_insn (part[0][1], part[1][1]);
9733 emit_move_insn (part[0][0], part[1][0]);
9734 return;
9735 }
9736
9737 /* Choose correct order to not overwrite the source before it is copied. */
9738 if ((REG_P (part[0][0])
9739 && REG_P (part[1][1])
9740 && (REGNO (part[0][0]) == REGNO (part[1][1])
9741 || (nparts == 3
9742 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9743 || (collisions > 0
9744 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9745 {
9746 if (nparts == 3)
9747 {
9748 operands[2] = part[0][2];
9749 operands[3] = part[0][1];
9750 operands[4] = part[0][0];
9751 operands[5] = part[1][2];
9752 operands[6] = part[1][1];
9753 operands[7] = part[1][0];
9754 }
9755 else
9756 {
9757 operands[2] = part[0][1];
9758 operands[3] = part[0][0];
9759 operands[5] = part[1][1];
9760 operands[6] = part[1][0];
9761 }
9762 }
9763 else
9764 {
9765 if (nparts == 3)
9766 {
9767 operands[2] = part[0][0];
9768 operands[3] = part[0][1];
9769 operands[4] = part[0][2];
9770 operands[5] = part[1][0];
9771 operands[6] = part[1][1];
9772 operands[7] = part[1][2];
9773 }
9774 else
9775 {
9776 operands[2] = part[0][0];
9777 operands[3] = part[0][1];
9778 operands[5] = part[1][0];
9779 operands[6] = part[1][1];
9780 }
9781 }
9782 emit_move_insn (operands[2], operands[5]);
9783 emit_move_insn (operands[3], operands[6]);
9784 if (nparts == 3)
9785 emit_move_insn (operands[4], operands[7]);
9786
9787 return;
9788 }
9789
9790 void
9791 ix86_split_ashldi (operands, scratch)
9792 rtx *operands, scratch;
9793 {
9794 rtx low[2], high[2];
9795 int count;
9796
9797 if (GET_CODE (operands[2]) == CONST_INT)
9798 {
9799 split_di (operands, 2, low, high);
9800 count = INTVAL (operands[2]) & 63;
9801
9802 if (count >= 32)
9803 {
9804 emit_move_insn (high[0], low[1]);
9805 emit_move_insn (low[0], const0_rtx);
9806
9807 if (count > 32)
9808 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9809 }
9810 else
9811 {
9812 if (!rtx_equal_p (operands[0], operands[1]))
9813 emit_move_insn (operands[0], operands[1]);
9814 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9815 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9816 }
9817 }
9818 else
9819 {
9820 if (!rtx_equal_p (operands[0], operands[1]))
9821 emit_move_insn (operands[0], operands[1]);
9822
9823 split_di (operands, 1, low, high);
9824
9825 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9826 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9827
9828 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9829 {
9830 if (! no_new_pseudos)
9831 scratch = force_reg (SImode, const0_rtx);
9832 else
9833 emit_move_insn (scratch, const0_rtx);
9834
9835 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9836 scratch));
9837 }
9838 else
9839 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9840 }
9841 }
9842
9843 void
9844 ix86_split_ashrdi (operands, scratch)
9845 rtx *operands, scratch;
9846 {
9847 rtx low[2], high[2];
9848 int count;
9849
9850 if (GET_CODE (operands[2]) == CONST_INT)
9851 {
9852 split_di (operands, 2, low, high);
9853 count = INTVAL (operands[2]) & 63;
9854
9855 if (count >= 32)
9856 {
9857 emit_move_insn (low[0], high[1]);
9858
9859 if (! reload_completed)
9860 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9861 else
9862 {
9863 emit_move_insn (high[0], low[0]);
9864 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9865 }
9866
9867 if (count > 32)
9868 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9869 }
9870 else
9871 {
9872 if (!rtx_equal_p (operands[0], operands[1]))
9873 emit_move_insn (operands[0], operands[1]);
9874 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9875 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9876 }
9877 }
9878 else
9879 {
9880 if (!rtx_equal_p (operands[0], operands[1]))
9881 emit_move_insn (operands[0], operands[1]);
9882
9883 split_di (operands, 1, low, high);
9884
9885 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9886 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9887
9888 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9889 {
9890 if (! no_new_pseudos)
9891 scratch = gen_reg_rtx (SImode);
9892 emit_move_insn (scratch, high[0]);
9893 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9894 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9895 scratch));
9896 }
9897 else
9898 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9899 }
9900 }
9901
9902 void
9903 ix86_split_lshrdi (operands, scratch)
9904 rtx *operands, scratch;
9905 {
9906 rtx low[2], high[2];
9907 int count;
9908
9909 if (GET_CODE (operands[2]) == CONST_INT)
9910 {
9911 split_di (operands, 2, low, high);
9912 count = INTVAL (operands[2]) & 63;
9913
9914 if (count >= 32)
9915 {
9916 emit_move_insn (low[0], high[1]);
9917 emit_move_insn (high[0], const0_rtx);
9918
9919 if (count > 32)
9920 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9921 }
9922 else
9923 {
9924 if (!rtx_equal_p (operands[0], operands[1]))
9925 emit_move_insn (operands[0], operands[1]);
9926 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9927 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9928 }
9929 }
9930 else
9931 {
9932 if (!rtx_equal_p (operands[0], operands[1]))
9933 emit_move_insn (operands[0], operands[1]);
9934
9935 split_di (operands, 1, low, high);
9936
9937 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9938 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9939
9940 /* Heh. By reversing the arguments, we can reuse this pattern. */
9941 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9942 {
9943 if (! no_new_pseudos)
9944 scratch = force_reg (SImode, const0_rtx);
9945 else
9946 emit_move_insn (scratch, const0_rtx);
9947
9948 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9949 scratch));
9950 }
9951 else
9952 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9953 }
9954 }
9955
9956 /* Helper function for the string operations below. Dest VARIABLE whether
9957 it is aligned to VALUE bytes. If true, jump to the label. */
9958 static rtx
9959 ix86_expand_aligntest (variable, value)
9960 rtx variable;
9961 int value;
9962 {
9963 rtx label = gen_label_rtx ();
9964 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9965 if (GET_MODE (variable) == DImode)
9966 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9967 else
9968 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9969 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9970 1, label);
9971 return label;
9972 }
9973
9974 /* Adjust COUNTER by the VALUE. */
9975 static void
9976 ix86_adjust_counter (countreg, value)
9977 rtx countreg;
9978 HOST_WIDE_INT value;
9979 {
9980 if (GET_MODE (countreg) == DImode)
9981 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9982 else
9983 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9984 }
9985
9986 /* Zero extend possibly SImode EXP to Pmode register. */
9987 rtx
9988 ix86_zero_extend_to_Pmode (exp)
9989 rtx exp;
9990 {
9991 rtx r;
9992 if (GET_MODE (exp) == VOIDmode)
9993 return force_reg (Pmode, exp);
9994 if (GET_MODE (exp) == Pmode)
9995 return copy_to_mode_reg (Pmode, exp);
9996 r = gen_reg_rtx (Pmode);
9997 emit_insn (gen_zero_extendsidi2 (r, exp));
9998 return r;
9999 }
10000
10001 /* Expand string move (memcpy) operation. Use i386 string operations when
10002 profitable. expand_clrstr contains similar code. */
10003 int
10004 ix86_expand_movstr (dst, src, count_exp, align_exp)
10005 rtx dst, src, count_exp, align_exp;
10006 {
10007 rtx srcreg, destreg, countreg;
10008 enum machine_mode counter_mode;
10009 HOST_WIDE_INT align = 0;
10010 unsigned HOST_WIDE_INT count = 0;
10011 rtx insns;
10012
10013 start_sequence ();
10014
10015 if (GET_CODE (align_exp) == CONST_INT)
10016 align = INTVAL (align_exp);
10017
10018 /* This simple hack avoids all inlining code and simplifies code below. */
10019 if (!TARGET_ALIGN_STRINGOPS)
10020 align = 64;
10021
10022 if (GET_CODE (count_exp) == CONST_INT)
10023 count = INTVAL (count_exp);
10024
10025 /* Figure out proper mode for counter. For 32bits it is always SImode,
10026 for 64bits use SImode when possible, otherwise DImode.
10027 Set count to number of bytes copied when known at compile time. */
10028 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10029 || x86_64_zero_extended_value (count_exp))
10030 counter_mode = SImode;
10031 else
10032 counter_mode = DImode;
10033
10034 if (counter_mode != SImode && counter_mode != DImode)
10035 abort ();
10036
10037 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10038 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10039
10040 emit_insn (gen_cld ());
10041
10042 /* When optimizing for size emit simple rep ; movsb instruction for
10043 counts not divisible by 4. */
10044
10045 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10046 {
10047 countreg = ix86_zero_extend_to_Pmode (count_exp);
10048 if (TARGET_64BIT)
10049 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10050 destreg, srcreg, countreg));
10051 else
10052 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10053 destreg, srcreg, countreg));
10054 }
10055
10056 /* For constant aligned (or small unaligned) copies use rep movsl
10057 followed by code copying the rest. For PentiumPro ensure 8 byte
10058 alignment to allow rep movsl acceleration. */
10059
10060 else if (count != 0
10061 && (align >= 8
10062 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10063 || optimize_size || count < (unsigned int) 64))
10064 {
10065 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10066 if (count & ~(size - 1))
10067 {
10068 countreg = copy_to_mode_reg (counter_mode,
10069 GEN_INT ((count >> (size == 4 ? 2 : 3))
10070 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10071 countreg = ix86_zero_extend_to_Pmode (countreg);
10072 if (size == 4)
10073 {
10074 if (TARGET_64BIT)
10075 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10076 destreg, srcreg, countreg));
10077 else
10078 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10079 destreg, srcreg, countreg));
10080 }
10081 else
10082 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10083 destreg, srcreg, countreg));
10084 }
10085 if (size == 8 && (count & 0x04))
10086 emit_insn (gen_strmovsi (destreg, srcreg));
10087 if (count & 0x02)
10088 emit_insn (gen_strmovhi (destreg, srcreg));
10089 if (count & 0x01)
10090 emit_insn (gen_strmovqi (destreg, srcreg));
10091 }
10092 /* The generic code based on the glibc implementation:
10093 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10094 allowing accelerated copying there)
10095 - copy the data using rep movsl
10096 - copy the rest. */
10097 else
10098 {
10099 rtx countreg2;
10100 rtx label = NULL;
10101 int desired_alignment = (TARGET_PENTIUMPRO
10102 && (count == 0 || count >= (unsigned int) 260)
10103 ? 8 : UNITS_PER_WORD);
10104
10105 /* In case we don't know anything about the alignment, default to
10106 library version, since it is usually equally fast and result in
10107 shorter code. */
10108 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10109 {
10110 end_sequence ();
10111 return 0;
10112 }
10113
10114 if (TARGET_SINGLE_STRINGOP)
10115 emit_insn (gen_cld ());
10116
10117 countreg2 = gen_reg_rtx (Pmode);
10118 countreg = copy_to_mode_reg (counter_mode, count_exp);
10119
10120 /* We don't use loops to align destination and to copy parts smaller
10121 than 4 bytes, because gcc is able to optimize such code better (in
10122 the case the destination or the count really is aligned, gcc is often
10123 able to predict the branches) and also it is friendlier to the
10124 hardware branch prediction.
10125
10126 Using loops is benefical for generic case, because we can
10127 handle small counts using the loops. Many CPUs (such as Athlon)
10128 have large REP prefix setup costs.
10129
10130 This is quite costy. Maybe we can revisit this decision later or
10131 add some customizability to this code. */
10132
10133 if (count == 0 && align < desired_alignment)
10134 {
10135 label = gen_label_rtx ();
10136 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10137 LEU, 0, counter_mode, 1, label);
10138 }
10139 if (align <= 1)
10140 {
10141 rtx label = ix86_expand_aligntest (destreg, 1);
10142 emit_insn (gen_strmovqi (destreg, srcreg));
10143 ix86_adjust_counter (countreg, 1);
10144 emit_label (label);
10145 LABEL_NUSES (label) = 1;
10146 }
10147 if (align <= 2)
10148 {
10149 rtx label = ix86_expand_aligntest (destreg, 2);
10150 emit_insn (gen_strmovhi (destreg, srcreg));
10151 ix86_adjust_counter (countreg, 2);
10152 emit_label (label);
10153 LABEL_NUSES (label) = 1;
10154 }
10155 if (align <= 4 && desired_alignment > 4)
10156 {
10157 rtx label = ix86_expand_aligntest (destreg, 4);
10158 emit_insn (gen_strmovsi (destreg, srcreg));
10159 ix86_adjust_counter (countreg, 4);
10160 emit_label (label);
10161 LABEL_NUSES (label) = 1;
10162 }
10163
10164 if (label && desired_alignment > 4 && !TARGET_64BIT)
10165 {
10166 emit_label (label);
10167 LABEL_NUSES (label) = 1;
10168 label = NULL_RTX;
10169 }
10170 if (!TARGET_SINGLE_STRINGOP)
10171 emit_insn (gen_cld ());
10172 if (TARGET_64BIT)
10173 {
10174 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10175 GEN_INT (3)));
10176 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10177 destreg, srcreg, countreg2));
10178 }
10179 else
10180 {
10181 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10182 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10183 destreg, srcreg, countreg2));
10184 }
10185
10186 if (label)
10187 {
10188 emit_label (label);
10189 LABEL_NUSES (label) = 1;
10190 }
10191 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10192 emit_insn (gen_strmovsi (destreg, srcreg));
10193 if ((align <= 4 || count == 0) && TARGET_64BIT)
10194 {
10195 rtx label = ix86_expand_aligntest (countreg, 4);
10196 emit_insn (gen_strmovsi (destreg, srcreg));
10197 emit_label (label);
10198 LABEL_NUSES (label) = 1;
10199 }
10200 if (align > 2 && count != 0 && (count & 2))
10201 emit_insn (gen_strmovhi (destreg, srcreg));
10202 if (align <= 2 || count == 0)
10203 {
10204 rtx label = ix86_expand_aligntest (countreg, 2);
10205 emit_insn (gen_strmovhi (destreg, srcreg));
10206 emit_label (label);
10207 LABEL_NUSES (label) = 1;
10208 }
10209 if (align > 1 && count != 0 && (count & 1))
10210 emit_insn (gen_strmovqi (destreg, srcreg));
10211 if (align <= 1 || count == 0)
10212 {
10213 rtx label = ix86_expand_aligntest (countreg, 1);
10214 emit_insn (gen_strmovqi (destreg, srcreg));
10215 emit_label (label);
10216 LABEL_NUSES (label) = 1;
10217 }
10218 }
10219
10220 insns = get_insns ();
10221 end_sequence ();
10222
10223 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10224 emit_insn (insns);
10225 return 1;
10226 }
10227
10228 /* Expand string clear operation (bzero). Use i386 string operations when
10229 profitable. expand_movstr contains similar code. */
10230 int
10231 ix86_expand_clrstr (src, count_exp, align_exp)
10232 rtx src, count_exp, align_exp;
10233 {
10234 rtx destreg, zeroreg, countreg;
10235 enum machine_mode counter_mode;
10236 HOST_WIDE_INT align = 0;
10237 unsigned HOST_WIDE_INT count = 0;
10238
10239 if (GET_CODE (align_exp) == CONST_INT)
10240 align = INTVAL (align_exp);
10241
10242 /* This simple hack avoids all inlining code and simplifies code below. */
10243 if (!TARGET_ALIGN_STRINGOPS)
10244 align = 32;
10245
10246 if (GET_CODE (count_exp) == CONST_INT)
10247 count = INTVAL (count_exp);
10248 /* Figure out proper mode for counter. For 32bits it is always SImode,
10249 for 64bits use SImode when possible, otherwise DImode.
10250 Set count to number of bytes copied when known at compile time. */
10251 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10252 || x86_64_zero_extended_value (count_exp))
10253 counter_mode = SImode;
10254 else
10255 counter_mode = DImode;
10256
10257 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10258
10259 emit_insn (gen_cld ());
10260
10261 /* When optimizing for size emit simple rep ; movsb instruction for
10262 counts not divisible by 4. */
10263
10264 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10265 {
10266 countreg = ix86_zero_extend_to_Pmode (count_exp);
10267 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10268 if (TARGET_64BIT)
10269 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10270 destreg, countreg));
10271 else
10272 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10273 destreg, countreg));
10274 }
10275 else if (count != 0
10276 && (align >= 8
10277 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10278 || optimize_size || count < (unsigned int) 64))
10279 {
10280 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10281 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10282 if (count & ~(size - 1))
10283 {
10284 countreg = copy_to_mode_reg (counter_mode,
10285 GEN_INT ((count >> (size == 4 ? 2 : 3))
10286 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10287 countreg = ix86_zero_extend_to_Pmode (countreg);
10288 if (size == 4)
10289 {
10290 if (TARGET_64BIT)
10291 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10292 destreg, countreg));
10293 else
10294 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10295 destreg, countreg));
10296 }
10297 else
10298 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10299 destreg, countreg));
10300 }
10301 if (size == 8 && (count & 0x04))
10302 emit_insn (gen_strsetsi (destreg,
10303 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10304 if (count & 0x02)
10305 emit_insn (gen_strsethi (destreg,
10306 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10307 if (count & 0x01)
10308 emit_insn (gen_strsetqi (destreg,
10309 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10310 }
10311 else
10312 {
10313 rtx countreg2;
10314 rtx label = NULL;
10315 /* Compute desired alignment of the string operation. */
10316 int desired_alignment = (TARGET_PENTIUMPRO
10317 && (count == 0 || count >= (unsigned int) 260)
10318 ? 8 : UNITS_PER_WORD);
10319
10320 /* In case we don't know anything about the alignment, default to
10321 library version, since it is usually equally fast and result in
10322 shorter code. */
10323 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10324 return 0;
10325
10326 if (TARGET_SINGLE_STRINGOP)
10327 emit_insn (gen_cld ());
10328
10329 countreg2 = gen_reg_rtx (Pmode);
10330 countreg = copy_to_mode_reg (counter_mode, count_exp);
10331 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10332
10333 if (count == 0 && align < desired_alignment)
10334 {
10335 label = gen_label_rtx ();
10336 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10337 LEU, 0, counter_mode, 1, label);
10338 }
10339 if (align <= 1)
10340 {
10341 rtx label = ix86_expand_aligntest (destreg, 1);
10342 emit_insn (gen_strsetqi (destreg,
10343 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10344 ix86_adjust_counter (countreg, 1);
10345 emit_label (label);
10346 LABEL_NUSES (label) = 1;
10347 }
10348 if (align <= 2)
10349 {
10350 rtx label = ix86_expand_aligntest (destreg, 2);
10351 emit_insn (gen_strsethi (destreg,
10352 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10353 ix86_adjust_counter (countreg, 2);
10354 emit_label (label);
10355 LABEL_NUSES (label) = 1;
10356 }
10357 if (align <= 4 && desired_alignment > 4)
10358 {
10359 rtx label = ix86_expand_aligntest (destreg, 4);
10360 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10361 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10362 : zeroreg)));
10363 ix86_adjust_counter (countreg, 4);
10364 emit_label (label);
10365 LABEL_NUSES (label) = 1;
10366 }
10367
10368 if (label && desired_alignment > 4 && !TARGET_64BIT)
10369 {
10370 emit_label (label);
10371 LABEL_NUSES (label) = 1;
10372 label = NULL_RTX;
10373 }
10374
10375 if (!TARGET_SINGLE_STRINGOP)
10376 emit_insn (gen_cld ());
10377 if (TARGET_64BIT)
10378 {
10379 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10380 GEN_INT (3)));
10381 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10382 destreg, countreg2));
10383 }
10384 else
10385 {
10386 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10387 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10388 destreg, countreg2));
10389 }
10390 if (label)
10391 {
10392 emit_label (label);
10393 LABEL_NUSES (label) = 1;
10394 }
10395
10396 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10397 emit_insn (gen_strsetsi (destreg,
10398 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10399 if (TARGET_64BIT && (align <= 4 || count == 0))
10400 {
10401 rtx label = ix86_expand_aligntest (countreg, 4);
10402 emit_insn (gen_strsetsi (destreg,
10403 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10404 emit_label (label);
10405 LABEL_NUSES (label) = 1;
10406 }
10407 if (align > 2 && count != 0 && (count & 2))
10408 emit_insn (gen_strsethi (destreg,
10409 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10410 if (align <= 2 || count == 0)
10411 {
10412 rtx label = ix86_expand_aligntest (countreg, 2);
10413 emit_insn (gen_strsethi (destreg,
10414 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10415 emit_label (label);
10416 LABEL_NUSES (label) = 1;
10417 }
10418 if (align > 1 && count != 0 && (count & 1))
10419 emit_insn (gen_strsetqi (destreg,
10420 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10421 if (align <= 1 || count == 0)
10422 {
10423 rtx label = ix86_expand_aligntest (countreg, 1);
10424 emit_insn (gen_strsetqi (destreg,
10425 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10426 emit_label (label);
10427 LABEL_NUSES (label) = 1;
10428 }
10429 }
10430 return 1;
10431 }
10432 /* Expand strlen. */
10433 int
10434 ix86_expand_strlen (out, src, eoschar, align)
10435 rtx out, src, eoschar, align;
10436 {
10437 rtx addr, scratch1, scratch2, scratch3, scratch4;
10438
10439 /* The generic case of strlen expander is long. Avoid it's
10440 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10441
10442 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10443 && !TARGET_INLINE_ALL_STRINGOPS
10444 && !optimize_size
10445 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10446 return 0;
10447
10448 addr = force_reg (Pmode, XEXP (src, 0));
10449 scratch1 = gen_reg_rtx (Pmode);
10450
10451 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10452 && !optimize_size)
10453 {
10454 /* Well it seems that some optimizer does not combine a call like
10455 foo(strlen(bar), strlen(bar));
10456 when the move and the subtraction is done here. It does calculate
10457 the length just once when these instructions are done inside of
10458 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10459 often used and I use one fewer register for the lifetime of
10460 output_strlen_unroll() this is better. */
10461
10462 emit_move_insn (out, addr);
10463
10464 ix86_expand_strlensi_unroll_1 (out, align);
10465
10466 /* strlensi_unroll_1 returns the address of the zero at the end of
10467 the string, like memchr(), so compute the length by subtracting
10468 the start address. */
10469 if (TARGET_64BIT)
10470 emit_insn (gen_subdi3 (out, out, addr));
10471 else
10472 emit_insn (gen_subsi3 (out, out, addr));
10473 }
10474 else
10475 {
10476 scratch2 = gen_reg_rtx (Pmode);
10477 scratch3 = gen_reg_rtx (Pmode);
10478 scratch4 = force_reg (Pmode, constm1_rtx);
10479
10480 emit_move_insn (scratch3, addr);
10481 eoschar = force_reg (QImode, eoschar);
10482
10483 emit_insn (gen_cld ());
10484 if (TARGET_64BIT)
10485 {
10486 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10487 align, scratch4, scratch3));
10488 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10489 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10490 }
10491 else
10492 {
10493 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10494 align, scratch4, scratch3));
10495 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10496 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10497 }
10498 }
10499 return 1;
10500 }
10501
10502 /* Expand the appropriate insns for doing strlen if not just doing
10503 repnz; scasb
10504
10505 out = result, initialized with the start address
10506 align_rtx = alignment of the address.
10507 scratch = scratch register, initialized with the startaddress when
10508 not aligned, otherwise undefined
10509
10510 This is just the body. It needs the initialisations mentioned above and
10511 some address computing at the end. These things are done in i386.md. */
10512
10513 static void
10514 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10515 rtx out, align_rtx;
10516 {
10517 int align;
10518 rtx tmp;
10519 rtx align_2_label = NULL_RTX;
10520 rtx align_3_label = NULL_RTX;
10521 rtx align_4_label = gen_label_rtx ();
10522 rtx end_0_label = gen_label_rtx ();
10523 rtx mem;
10524 rtx tmpreg = gen_reg_rtx (SImode);
10525 rtx scratch = gen_reg_rtx (SImode);
10526
10527 align = 0;
10528 if (GET_CODE (align_rtx) == CONST_INT)
10529 align = INTVAL (align_rtx);
10530
10531 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10532
10533 /* Is there a known alignment and is it less than 4? */
10534 if (align < 4)
10535 {
10536 rtx scratch1 = gen_reg_rtx (Pmode);
10537 emit_move_insn (scratch1, out);
10538 /* Is there a known alignment and is it not 2? */
10539 if (align != 2)
10540 {
10541 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10542 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10543
10544 /* Leave just the 3 lower bits. */
10545 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10546 NULL_RTX, 0, OPTAB_WIDEN);
10547
10548 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10549 Pmode, 1, align_4_label);
10550 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10551 Pmode, 1, align_2_label);
10552 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10553 Pmode, 1, align_3_label);
10554 }
10555 else
10556 {
10557 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10558 check if is aligned to 4 - byte. */
10559
10560 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10561 NULL_RTX, 0, OPTAB_WIDEN);
10562
10563 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10564 Pmode, 1, align_4_label);
10565 }
10566
10567 mem = gen_rtx_MEM (QImode, out);
10568
10569 /* Now compare the bytes. */
10570
10571 /* Compare the first n unaligned byte on a byte per byte basis. */
10572 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10573 QImode, 1, end_0_label);
10574
10575 /* Increment the address. */
10576 if (TARGET_64BIT)
10577 emit_insn (gen_adddi3 (out, out, const1_rtx));
10578 else
10579 emit_insn (gen_addsi3 (out, out, const1_rtx));
10580
10581 /* Not needed with an alignment of 2 */
10582 if (align != 2)
10583 {
10584 emit_label (align_2_label);
10585
10586 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10587 end_0_label);
10588
10589 if (TARGET_64BIT)
10590 emit_insn (gen_adddi3 (out, out, const1_rtx));
10591 else
10592 emit_insn (gen_addsi3 (out, out, const1_rtx));
10593
10594 emit_label (align_3_label);
10595 }
10596
10597 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10598 end_0_label);
10599
10600 if (TARGET_64BIT)
10601 emit_insn (gen_adddi3 (out, out, const1_rtx));
10602 else
10603 emit_insn (gen_addsi3 (out, out, const1_rtx));
10604 }
10605
10606 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10607 align this loop. It gives only huge programs, but does not help to
10608 speed up. */
10609 emit_label (align_4_label);
10610
10611 mem = gen_rtx_MEM (SImode, out);
10612 emit_move_insn (scratch, mem);
10613 if (TARGET_64BIT)
10614 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10615 else
10616 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10617
10618 /* This formula yields a nonzero result iff one of the bytes is zero.
10619 This saves three branches inside loop and many cycles. */
10620
10621 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10622 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10623 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10624 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10625 gen_int_mode (0x80808080, SImode)));
10626 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10627 align_4_label);
10628
10629 if (TARGET_CMOVE)
10630 {
10631 rtx reg = gen_reg_rtx (SImode);
10632 rtx reg2 = gen_reg_rtx (Pmode);
10633 emit_move_insn (reg, tmpreg);
10634 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10635
10636 /* If zero is not in the first two bytes, move two bytes forward. */
10637 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10638 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10639 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10640 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10641 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10642 reg,
10643 tmpreg)));
10644 /* Emit lea manually to avoid clobbering of flags. */
10645 emit_insn (gen_rtx_SET (SImode, reg2,
10646 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10647
10648 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10649 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10650 emit_insn (gen_rtx_SET (VOIDmode, out,
10651 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10652 reg2,
10653 out)));
10654
10655 }
10656 else
10657 {
10658 rtx end_2_label = gen_label_rtx ();
10659 /* Is zero in the first two bytes? */
10660
10661 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10662 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10663 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10664 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10665 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10666 pc_rtx);
10667 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10668 JUMP_LABEL (tmp) = end_2_label;
10669
10670 /* Not in the first two. Move two bytes forward. */
10671 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10672 if (TARGET_64BIT)
10673 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10674 else
10675 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10676
10677 emit_label (end_2_label);
10678
10679 }
10680
10681 /* Avoid branch in fixing the byte. */
10682 tmpreg = gen_lowpart (QImode, tmpreg);
10683 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10684 if (TARGET_64BIT)
10685 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10686 else
10687 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10688
10689 emit_label (end_0_label);
10690 }
10691
10692 void
10693 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10694 rtx retval, fnaddr, callarg1, callarg2, pop;
10695 {
10696 rtx use = NULL, call;
10697
10698 if (pop == const0_rtx)
10699 pop = NULL;
10700 if (TARGET_64BIT && pop)
10701 abort ();
10702
10703 #if TARGET_MACHO
10704 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10705 fnaddr = machopic_indirect_call_target (fnaddr);
10706 #else
10707 /* Static functions and indirect calls don't need the pic register. */
10708 if (! TARGET_64BIT && flag_pic
10709 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10710 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10711 use_reg (&use, pic_offset_table_rtx);
10712
10713 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10714 {
10715 rtx al = gen_rtx_REG (QImode, 0);
10716 emit_move_insn (al, callarg2);
10717 use_reg (&use, al);
10718 }
10719 #endif /* TARGET_MACHO */
10720
10721 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10722 {
10723 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10724 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10725 }
10726
10727 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10728 if (retval)
10729 call = gen_rtx_SET (VOIDmode, retval, call);
10730 if (pop)
10731 {
10732 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10733 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10734 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10735 }
10736
10737 call = emit_call_insn (call);
10738 if (use)
10739 CALL_INSN_FUNCTION_USAGE (call) = use;
10740 }
10741
10742 \f
10743 /* Clear stack slot assignments remembered from previous functions.
10744 This is called from INIT_EXPANDERS once before RTL is emitted for each
10745 function. */
10746
10747 static struct machine_function *
10748 ix86_init_machine_status ()
10749 {
10750 return ggc_alloc_cleared (sizeof (struct machine_function));
10751 }
10752
10753 /* Return a MEM corresponding to a stack slot with mode MODE.
10754 Allocate a new slot if necessary.
10755
10756 The RTL for a function can have several slots available: N is
10757 which slot to use. */
10758
10759 rtx
10760 assign_386_stack_local (mode, n)
10761 enum machine_mode mode;
10762 int n;
10763 {
10764 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10765 abort ();
10766
10767 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10768 ix86_stack_locals[(int) mode][n]
10769 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10770
10771 return ix86_stack_locals[(int) mode][n];
10772 }
10773
10774 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10775
10776 static GTY(()) rtx ix86_tls_symbol;
10777 rtx
10778 ix86_tls_get_addr ()
10779 {
10780
10781 if (!ix86_tls_symbol)
10782 {
10783 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10784 ? "___tls_get_addr"
10785 : "__tls_get_addr"));
10786 }
10787
10788 return ix86_tls_symbol;
10789 }
10790 \f
10791 /* Calculate the length of the memory address in the instruction
10792 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10793
10794 static int
10795 memory_address_length (addr)
10796 rtx addr;
10797 {
10798 struct ix86_address parts;
10799 rtx base, index, disp;
10800 int len;
10801
10802 if (GET_CODE (addr) == PRE_DEC
10803 || GET_CODE (addr) == POST_INC
10804 || GET_CODE (addr) == PRE_MODIFY
10805 || GET_CODE (addr) == POST_MODIFY)
10806 return 0;
10807
10808 if (! ix86_decompose_address (addr, &parts))
10809 abort ();
10810
10811 base = parts.base;
10812 index = parts.index;
10813 disp = parts.disp;
10814 len = 0;
10815
10816 /* Register Indirect. */
10817 if (base && !index && !disp)
10818 {
10819 /* Special cases: ebp and esp need the two-byte modrm form. */
10820 if (addr == stack_pointer_rtx
10821 || addr == arg_pointer_rtx
10822 || addr == frame_pointer_rtx
10823 || addr == hard_frame_pointer_rtx)
10824 len = 1;
10825 }
10826
10827 /* Direct Addressing. */
10828 else if (disp && !base && !index)
10829 len = 4;
10830
10831 else
10832 {
10833 /* Find the length of the displacement constant. */
10834 if (disp)
10835 {
10836 if (GET_CODE (disp) == CONST_INT
10837 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10838 len = 1;
10839 else
10840 len = 4;
10841 }
10842
10843 /* An index requires the two-byte modrm form. */
10844 if (index)
10845 len += 1;
10846 }
10847
10848 return len;
10849 }
10850
10851 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10852 is set, expect that insn have 8bit immediate alternative. */
10853 int
10854 ix86_attr_length_immediate_default (insn, shortform)
10855 rtx insn;
10856 int shortform;
10857 {
10858 int len = 0;
10859 int i;
10860 extract_insn_cached (insn);
10861 for (i = recog_data.n_operands - 1; i >= 0; --i)
10862 if (CONSTANT_P (recog_data.operand[i]))
10863 {
10864 if (len)
10865 abort ();
10866 if (shortform
10867 && GET_CODE (recog_data.operand[i]) == CONST_INT
10868 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10869 len = 1;
10870 else
10871 {
10872 switch (get_attr_mode (insn))
10873 {
10874 case MODE_QI:
10875 len+=1;
10876 break;
10877 case MODE_HI:
10878 len+=2;
10879 break;
10880 case MODE_SI:
10881 len+=4;
10882 break;
10883 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10884 case MODE_DI:
10885 len+=4;
10886 break;
10887 default:
10888 fatal_insn ("unknown insn mode", insn);
10889 }
10890 }
10891 }
10892 return len;
10893 }
10894 /* Compute default value for "length_address" attribute. */
10895 int
10896 ix86_attr_length_address_default (insn)
10897 rtx insn;
10898 {
10899 int i;
10900 extract_insn_cached (insn);
10901 for (i = recog_data.n_operands - 1; i >= 0; --i)
10902 if (GET_CODE (recog_data.operand[i]) == MEM)
10903 {
10904 return memory_address_length (XEXP (recog_data.operand[i], 0));
10905 break;
10906 }
10907 return 0;
10908 }
10909 \f
10910 /* Return the maximum number of instructions a cpu can issue. */
10911
10912 static int
10913 ix86_issue_rate ()
10914 {
10915 switch (ix86_cpu)
10916 {
10917 case PROCESSOR_PENTIUM:
10918 case PROCESSOR_K6:
10919 return 2;
10920
10921 case PROCESSOR_PENTIUMPRO:
10922 case PROCESSOR_PENTIUM4:
10923 case PROCESSOR_ATHLON:
10924 return 3;
10925
10926 default:
10927 return 1;
10928 }
10929 }
10930
10931 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10932 by DEP_INSN and nothing set by DEP_INSN. */
10933
10934 static int
10935 ix86_flags_dependant (insn, dep_insn, insn_type)
10936 rtx insn, dep_insn;
10937 enum attr_type insn_type;
10938 {
10939 rtx set, set2;
10940
10941 /* Simplify the test for uninteresting insns. */
10942 if (insn_type != TYPE_SETCC
10943 && insn_type != TYPE_ICMOV
10944 && insn_type != TYPE_FCMOV
10945 && insn_type != TYPE_IBR)
10946 return 0;
10947
10948 if ((set = single_set (dep_insn)) != 0)
10949 {
10950 set = SET_DEST (set);
10951 set2 = NULL_RTX;
10952 }
10953 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10954 && XVECLEN (PATTERN (dep_insn), 0) == 2
10955 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10956 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10957 {
10958 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10959 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10960 }
10961 else
10962 return 0;
10963
10964 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10965 return 0;
10966
10967 /* This test is true if the dependent insn reads the flags but
10968 not any other potentially set register. */
10969 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10970 return 0;
10971
10972 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10973 return 0;
10974
10975 return 1;
10976 }
10977
10978 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10979 address with operands set by DEP_INSN. */
10980
10981 static int
10982 ix86_agi_dependant (insn, dep_insn, insn_type)
10983 rtx insn, dep_insn;
10984 enum attr_type insn_type;
10985 {
10986 rtx addr;
10987
10988 if (insn_type == TYPE_LEA
10989 && TARGET_PENTIUM)
10990 {
10991 addr = PATTERN (insn);
10992 if (GET_CODE (addr) == SET)
10993 ;
10994 else if (GET_CODE (addr) == PARALLEL
10995 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10996 addr = XVECEXP (addr, 0, 0);
10997 else
10998 abort ();
10999 addr = SET_SRC (addr);
11000 }
11001 else
11002 {
11003 int i;
11004 extract_insn_cached (insn);
11005 for (i = recog_data.n_operands - 1; i >= 0; --i)
11006 if (GET_CODE (recog_data.operand[i]) == MEM)
11007 {
11008 addr = XEXP (recog_data.operand[i], 0);
11009 goto found;
11010 }
11011 return 0;
11012 found:;
11013 }
11014
11015 return modified_in_p (addr, dep_insn);
11016 }
11017
11018 static int
11019 ix86_adjust_cost (insn, link, dep_insn, cost)
11020 rtx insn, link, dep_insn;
11021 int cost;
11022 {
11023 enum attr_type insn_type, dep_insn_type;
11024 enum attr_memory memory, dep_memory;
11025 rtx set, set2;
11026 int dep_insn_code_number;
11027
11028 /* Anti and output depenancies have zero cost on all CPUs. */
11029 if (REG_NOTE_KIND (link) != 0)
11030 return 0;
11031
11032 dep_insn_code_number = recog_memoized (dep_insn);
11033
11034 /* If we can't recognize the insns, we can't really do anything. */
11035 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11036 return cost;
11037
11038 insn_type = get_attr_type (insn);
11039 dep_insn_type = get_attr_type (dep_insn);
11040
11041 switch (ix86_cpu)
11042 {
11043 case PROCESSOR_PENTIUM:
11044 /* Address Generation Interlock adds a cycle of latency. */
11045 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11046 cost += 1;
11047
11048 /* ??? Compares pair with jump/setcc. */
11049 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11050 cost = 0;
11051
11052 /* Floating point stores require value to be ready one cycle ealier. */
11053 if (insn_type == TYPE_FMOV
11054 && get_attr_memory (insn) == MEMORY_STORE
11055 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11056 cost += 1;
11057 break;
11058
11059 case PROCESSOR_PENTIUMPRO:
11060 memory = get_attr_memory (insn);
11061 dep_memory = get_attr_memory (dep_insn);
11062
11063 /* Since we can't represent delayed latencies of load+operation,
11064 increase the cost here for non-imov insns. */
11065 if (dep_insn_type != TYPE_IMOV
11066 && dep_insn_type != TYPE_FMOV
11067 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11068 cost += 1;
11069
11070 /* INT->FP conversion is expensive. */
11071 if (get_attr_fp_int_src (dep_insn))
11072 cost += 5;
11073
11074 /* There is one cycle extra latency between an FP op and a store. */
11075 if (insn_type == TYPE_FMOV
11076 && (set = single_set (dep_insn)) != NULL_RTX
11077 && (set2 = single_set (insn)) != NULL_RTX
11078 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11079 && GET_CODE (SET_DEST (set2)) == MEM)
11080 cost += 1;
11081
11082 /* Show ability of reorder buffer to hide latency of load by executing
11083 in parallel with previous instruction in case
11084 previous instruction is not needed to compute the address. */
11085 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11086 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11087 {
11088 /* Claim moves to take one cycle, as core can issue one load
11089 at time and the next load can start cycle later. */
11090 if (dep_insn_type == TYPE_IMOV
11091 || dep_insn_type == TYPE_FMOV)
11092 cost = 1;
11093 else if (cost > 1)
11094 cost--;
11095 }
11096 break;
11097
11098 case PROCESSOR_K6:
11099 memory = get_attr_memory (insn);
11100 dep_memory = get_attr_memory (dep_insn);
11101 /* The esp dependency is resolved before the instruction is really
11102 finished. */
11103 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11104 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11105 return 1;
11106
11107 /* Since we can't represent delayed latencies of load+operation,
11108 increase the cost here for non-imov insns. */
11109 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11110 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11111
11112 /* INT->FP conversion is expensive. */
11113 if (get_attr_fp_int_src (dep_insn))
11114 cost += 5;
11115
11116 /* Show ability of reorder buffer to hide latency of load by executing
11117 in parallel with previous instruction in case
11118 previous instruction is not needed to compute the address. */
11119 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11120 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11121 {
11122 /* Claim moves to take one cycle, as core can issue one load
11123 at time and the next load can start cycle later. */
11124 if (dep_insn_type == TYPE_IMOV
11125 || dep_insn_type == TYPE_FMOV)
11126 cost = 1;
11127 else if (cost > 2)
11128 cost -= 2;
11129 else
11130 cost = 1;
11131 }
11132 break;
11133
11134 case PROCESSOR_ATHLON:
11135 memory = get_attr_memory (insn);
11136 dep_memory = get_attr_memory (dep_insn);
11137
11138 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11139 {
11140 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11141 cost += 2;
11142 else
11143 cost += 3;
11144 }
11145 /* Show ability of reorder buffer to hide latency of load by executing
11146 in parallel with previous instruction in case
11147 previous instruction is not needed to compute the address. */
11148 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11149 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11150 {
11151 /* Claim moves to take one cycle, as core can issue one load
11152 at time and the next load can start cycle later. */
11153 if (dep_insn_type == TYPE_IMOV
11154 || dep_insn_type == TYPE_FMOV)
11155 cost = 0;
11156 else if (cost >= 3)
11157 cost -= 3;
11158 else
11159 cost = 0;
11160 }
11161
11162 default:
11163 break;
11164 }
11165
11166 return cost;
11167 }
11168
11169 static union
11170 {
11171 struct ppro_sched_data
11172 {
11173 rtx decode[3];
11174 int issued_this_cycle;
11175 } ppro;
11176 } ix86_sched_data;
11177
11178 static enum attr_ppro_uops
11179 ix86_safe_ppro_uops (insn)
11180 rtx insn;
11181 {
11182 if (recog_memoized (insn) >= 0)
11183 return get_attr_ppro_uops (insn);
11184 else
11185 return PPRO_UOPS_MANY;
11186 }
11187
11188 static void
11189 ix86_dump_ppro_packet (dump)
11190 FILE *dump;
11191 {
11192 if (ix86_sched_data.ppro.decode[0])
11193 {
11194 fprintf (dump, "PPRO packet: %d",
11195 INSN_UID (ix86_sched_data.ppro.decode[0]));
11196 if (ix86_sched_data.ppro.decode[1])
11197 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11198 if (ix86_sched_data.ppro.decode[2])
11199 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11200 fputc ('\n', dump);
11201 }
11202 }
11203
11204 /* We're beginning a new block. Initialize data structures as necessary. */
11205
11206 static void
11207 ix86_sched_init (dump, sched_verbose, veclen)
11208 FILE *dump ATTRIBUTE_UNUSED;
11209 int sched_verbose ATTRIBUTE_UNUSED;
11210 int veclen ATTRIBUTE_UNUSED;
11211 {
11212 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11213 }
11214
11215 /* Shift INSN to SLOT, and shift everything else down. */
11216
11217 static void
11218 ix86_reorder_insn (insnp, slot)
11219 rtx *insnp, *slot;
11220 {
11221 if (insnp != slot)
11222 {
11223 rtx insn = *insnp;
11224 do
11225 insnp[0] = insnp[1];
11226 while (++insnp != slot);
11227 *insnp = insn;
11228 }
11229 }
11230
11231 static void
11232 ix86_sched_reorder_ppro (ready, e_ready)
11233 rtx *ready;
11234 rtx *e_ready;
11235 {
11236 rtx decode[3];
11237 enum attr_ppro_uops cur_uops;
11238 int issued_this_cycle;
11239 rtx *insnp;
11240 int i;
11241
11242 /* At this point .ppro.decode contains the state of the three
11243 decoders from last "cycle". That is, those insns that were
11244 actually independent. But here we're scheduling for the
11245 decoder, and we may find things that are decodable in the
11246 same cycle. */
11247
11248 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11249 issued_this_cycle = 0;
11250
11251 insnp = e_ready;
11252 cur_uops = ix86_safe_ppro_uops (*insnp);
11253
11254 /* If the decoders are empty, and we've a complex insn at the
11255 head of the priority queue, let it issue without complaint. */
11256 if (decode[0] == NULL)
11257 {
11258 if (cur_uops == PPRO_UOPS_MANY)
11259 {
11260 decode[0] = *insnp;
11261 goto ppro_done;
11262 }
11263
11264 /* Otherwise, search for a 2-4 uop unsn to issue. */
11265 while (cur_uops != PPRO_UOPS_FEW)
11266 {
11267 if (insnp == ready)
11268 break;
11269 cur_uops = ix86_safe_ppro_uops (*--insnp);
11270 }
11271
11272 /* If so, move it to the head of the line. */
11273 if (cur_uops == PPRO_UOPS_FEW)
11274 ix86_reorder_insn (insnp, e_ready);
11275
11276 /* Issue the head of the queue. */
11277 issued_this_cycle = 1;
11278 decode[0] = *e_ready--;
11279 }
11280
11281 /* Look for simple insns to fill in the other two slots. */
11282 for (i = 1; i < 3; ++i)
11283 if (decode[i] == NULL)
11284 {
11285 if (ready > e_ready)
11286 goto ppro_done;
11287
11288 insnp = e_ready;
11289 cur_uops = ix86_safe_ppro_uops (*insnp);
11290 while (cur_uops != PPRO_UOPS_ONE)
11291 {
11292 if (insnp == ready)
11293 break;
11294 cur_uops = ix86_safe_ppro_uops (*--insnp);
11295 }
11296
11297 /* Found one. Move it to the head of the queue and issue it. */
11298 if (cur_uops == PPRO_UOPS_ONE)
11299 {
11300 ix86_reorder_insn (insnp, e_ready);
11301 decode[i] = *e_ready--;
11302 issued_this_cycle++;
11303 continue;
11304 }
11305
11306 /* ??? Didn't find one. Ideally, here we would do a lazy split
11307 of 2-uop insns, issue one and queue the other. */
11308 }
11309
11310 ppro_done:
11311 if (issued_this_cycle == 0)
11312 issued_this_cycle = 1;
11313 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11314 }
11315
11316 /* We are about to being issuing insns for this clock cycle.
11317 Override the default sort algorithm to better slot instructions. */
11318 static int
11319 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11320 FILE *dump ATTRIBUTE_UNUSED;
11321 int sched_verbose ATTRIBUTE_UNUSED;
11322 rtx *ready;
11323 int *n_readyp;
11324 int clock_var ATTRIBUTE_UNUSED;
11325 {
11326 int n_ready = *n_readyp;
11327 rtx *e_ready = ready + n_ready - 1;
11328
11329 /* Make sure to go ahead and initialize key items in
11330 ix86_sched_data if we are not going to bother trying to
11331 reorder the ready queue. */
11332 if (n_ready < 2)
11333 {
11334 ix86_sched_data.ppro.issued_this_cycle = 1;
11335 goto out;
11336 }
11337
11338 switch (ix86_cpu)
11339 {
11340 default:
11341 break;
11342
11343 case PROCESSOR_PENTIUMPRO:
11344 ix86_sched_reorder_ppro (ready, e_ready);
11345 break;
11346 }
11347
11348 out:
11349 return ix86_issue_rate ();
11350 }
11351
11352 /* We are about to issue INSN. Return the number of insns left on the
11353 ready queue that can be issued this cycle. */
11354
11355 static int
11356 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11357 FILE *dump;
11358 int sched_verbose;
11359 rtx insn;
11360 int can_issue_more;
11361 {
11362 int i;
11363 switch (ix86_cpu)
11364 {
11365 default:
11366 return can_issue_more - 1;
11367
11368 case PROCESSOR_PENTIUMPRO:
11369 {
11370 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11371
11372 if (uops == PPRO_UOPS_MANY)
11373 {
11374 if (sched_verbose)
11375 ix86_dump_ppro_packet (dump);
11376 ix86_sched_data.ppro.decode[0] = insn;
11377 ix86_sched_data.ppro.decode[1] = NULL;
11378 ix86_sched_data.ppro.decode[2] = NULL;
11379 if (sched_verbose)
11380 ix86_dump_ppro_packet (dump);
11381 ix86_sched_data.ppro.decode[0] = NULL;
11382 }
11383 else if (uops == PPRO_UOPS_FEW)
11384 {
11385 if (sched_verbose)
11386 ix86_dump_ppro_packet (dump);
11387 ix86_sched_data.ppro.decode[0] = insn;
11388 ix86_sched_data.ppro.decode[1] = NULL;
11389 ix86_sched_data.ppro.decode[2] = NULL;
11390 }
11391 else
11392 {
11393 for (i = 0; i < 3; ++i)
11394 if (ix86_sched_data.ppro.decode[i] == NULL)
11395 {
11396 ix86_sched_data.ppro.decode[i] = insn;
11397 break;
11398 }
11399 if (i == 3)
11400 abort ();
11401 if (i == 2)
11402 {
11403 if (sched_verbose)
11404 ix86_dump_ppro_packet (dump);
11405 ix86_sched_data.ppro.decode[0] = NULL;
11406 ix86_sched_data.ppro.decode[1] = NULL;
11407 ix86_sched_data.ppro.decode[2] = NULL;
11408 }
11409 }
11410 }
11411 return --ix86_sched_data.ppro.issued_this_cycle;
11412 }
11413 }
11414
11415 static int
11416 ia32_use_dfa_pipeline_interface ()
11417 {
11418 if (ix86_cpu == PROCESSOR_PENTIUM)
11419 return 1;
11420 return 0;
11421 }
11422
11423 /* How many alternative schedules to try. This should be as wide as the
11424 scheduling freedom in the DFA, but no wider. Making this value too
11425 large results extra work for the scheduler. */
11426
11427 static int
11428 ia32_multipass_dfa_lookahead ()
11429 {
11430 if (ix86_cpu == PROCESSOR_PENTIUM)
11431 return 2;
11432 else
11433 return 0;
11434 }
11435
11436 \f
11437 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11438 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11439 appropriate. */
11440
11441 void
11442 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11443 rtx insns;
11444 rtx dstref, srcref, dstreg, srcreg;
11445 {
11446 rtx insn;
11447
11448 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11449 if (INSN_P (insn))
11450 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11451 dstreg, srcreg);
11452 }
11453
11454 /* Subroutine of above to actually do the updating by recursively walking
11455 the rtx. */
11456
11457 static void
11458 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11459 rtx x;
11460 rtx dstref, srcref, dstreg, srcreg;
11461 {
11462 enum rtx_code code = GET_CODE (x);
11463 const char *format_ptr = GET_RTX_FORMAT (code);
11464 int i, j;
11465
11466 if (code == MEM && XEXP (x, 0) == dstreg)
11467 MEM_COPY_ATTRIBUTES (x, dstref);
11468 else if (code == MEM && XEXP (x, 0) == srcreg)
11469 MEM_COPY_ATTRIBUTES (x, srcref);
11470
11471 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11472 {
11473 if (*format_ptr == 'e')
11474 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11475 dstreg, srcreg);
11476 else if (*format_ptr == 'E')
11477 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11478 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11479 dstreg, srcreg);
11480 }
11481 }
11482 \f
11483 /* Compute the alignment given to a constant that is being placed in memory.
11484 EXP is the constant and ALIGN is the alignment that the object would
11485 ordinarily have.
11486 The value of this function is used instead of that alignment to align
11487 the object. */
11488
11489 int
11490 ix86_constant_alignment (exp, align)
11491 tree exp;
11492 int align;
11493 {
11494 if (TREE_CODE (exp) == REAL_CST)
11495 {
11496 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11497 return 64;
11498 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11499 return 128;
11500 }
11501 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11502 && align < 256)
11503 return 256;
11504
11505 return align;
11506 }
11507
11508 /* Compute the alignment for a static variable.
11509 TYPE is the data type, and ALIGN is the alignment that
11510 the object would ordinarily have. The value of this function is used
11511 instead of that alignment to align the object. */
11512
11513 int
11514 ix86_data_alignment (type, align)
11515 tree type;
11516 int align;
11517 {
11518 if (AGGREGATE_TYPE_P (type)
11519 && TYPE_SIZE (type)
11520 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11521 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11522 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11523 return 256;
11524
11525 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11526 to 16byte boundary. */
11527 if (TARGET_64BIT)
11528 {
11529 if (AGGREGATE_TYPE_P (type)
11530 && TYPE_SIZE (type)
11531 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11532 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11533 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11534 return 128;
11535 }
11536
11537 if (TREE_CODE (type) == ARRAY_TYPE)
11538 {
11539 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11540 return 64;
11541 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11542 return 128;
11543 }
11544 else if (TREE_CODE (type) == COMPLEX_TYPE)
11545 {
11546
11547 if (TYPE_MODE (type) == DCmode && align < 64)
11548 return 64;
11549 if (TYPE_MODE (type) == XCmode && align < 128)
11550 return 128;
11551 }
11552 else if ((TREE_CODE (type) == RECORD_TYPE
11553 || TREE_CODE (type) == UNION_TYPE
11554 || TREE_CODE (type) == QUAL_UNION_TYPE)
11555 && TYPE_FIELDS (type))
11556 {
11557 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11558 return 64;
11559 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11560 return 128;
11561 }
11562 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11563 || TREE_CODE (type) == INTEGER_TYPE)
11564 {
11565 if (TYPE_MODE (type) == DFmode && align < 64)
11566 return 64;
11567 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11568 return 128;
11569 }
11570
11571 return align;
11572 }
11573
11574 /* Compute the alignment for a local variable.
11575 TYPE is the data type, and ALIGN is the alignment that
11576 the object would ordinarily have. The value of this macro is used
11577 instead of that alignment to align the object. */
11578
11579 int
11580 ix86_local_alignment (type, align)
11581 tree type;
11582 int align;
11583 {
11584 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11585 to 16byte boundary. */
11586 if (TARGET_64BIT)
11587 {
11588 if (AGGREGATE_TYPE_P (type)
11589 && TYPE_SIZE (type)
11590 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11591 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11592 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11593 return 128;
11594 }
11595 if (TREE_CODE (type) == ARRAY_TYPE)
11596 {
11597 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11598 return 64;
11599 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11600 return 128;
11601 }
11602 else if (TREE_CODE (type) == COMPLEX_TYPE)
11603 {
11604 if (TYPE_MODE (type) == DCmode && align < 64)
11605 return 64;
11606 if (TYPE_MODE (type) == XCmode && align < 128)
11607 return 128;
11608 }
11609 else if ((TREE_CODE (type) == RECORD_TYPE
11610 || TREE_CODE (type) == UNION_TYPE
11611 || TREE_CODE (type) == QUAL_UNION_TYPE)
11612 && TYPE_FIELDS (type))
11613 {
11614 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11615 return 64;
11616 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11617 return 128;
11618 }
11619 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11620 || TREE_CODE (type) == INTEGER_TYPE)
11621 {
11622
11623 if (TYPE_MODE (type) == DFmode && align < 64)
11624 return 64;
11625 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11626 return 128;
11627 }
11628 return align;
11629 }
11630 \f
11631 /* Emit RTL insns to initialize the variable parts of a trampoline.
11632 FNADDR is an RTX for the address of the function's pure code.
11633 CXT is an RTX for the static chain value for the function. */
11634 void
11635 x86_initialize_trampoline (tramp, fnaddr, cxt)
11636 rtx tramp, fnaddr, cxt;
11637 {
11638 if (!TARGET_64BIT)
11639 {
11640 /* Compute offset from the end of the jmp to the target function. */
11641 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11642 plus_constant (tramp, 10),
11643 NULL_RTX, 1, OPTAB_DIRECT);
11644 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11645 gen_int_mode (0xb9, QImode));
11646 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11647 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11648 gen_int_mode (0xe9, QImode));
11649 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11650 }
11651 else
11652 {
11653 int offset = 0;
11654 /* Try to load address using shorter movl instead of movabs.
11655 We may want to support movq for kernel mode, but kernel does not use
11656 trampolines at the moment. */
11657 if (x86_64_zero_extended_value (fnaddr))
11658 {
11659 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11660 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11661 gen_int_mode (0xbb41, HImode));
11662 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11663 gen_lowpart (SImode, fnaddr));
11664 offset += 6;
11665 }
11666 else
11667 {
11668 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11669 gen_int_mode (0xbb49, HImode));
11670 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11671 fnaddr);
11672 offset += 10;
11673 }
11674 /* Load static chain using movabs to r10. */
11675 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11676 gen_int_mode (0xba49, HImode));
11677 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11678 cxt);
11679 offset += 10;
11680 /* Jump to the r11 */
11681 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11682 gen_int_mode (0xff49, HImode));
11683 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11684 gen_int_mode (0xe3, QImode));
11685 offset += 3;
11686 if (offset > TRAMPOLINE_SIZE)
11687 abort ();
11688 }
11689 }
11690 \f
11691 #define def_builtin(MASK, NAME, TYPE, CODE) \
11692 do { \
11693 if ((MASK) & target_flags) \
11694 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11695 NULL, NULL_TREE); \
11696 } while (0)
11697
11698 struct builtin_description
11699 {
11700 const unsigned int mask;
11701 const enum insn_code icode;
11702 const char *const name;
11703 const enum ix86_builtins code;
11704 const enum rtx_code comparison;
11705 const unsigned int flag;
11706 };
11707
11708 /* Used for builtins that are enabled both by -msse and -msse2. */
11709 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11710
11711 static const struct builtin_description bdesc_comi[] =
11712 {
11713 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11714 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11715 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11716 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11717 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11718 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11719 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11720 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11721 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11722 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11723 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11724 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11725 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11726 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11727 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11728 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11729 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11730 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11731 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11732 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11733 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11734 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11735 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11736 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11737 };
11738
11739 static const struct builtin_description bdesc_2arg[] =
11740 {
11741 /* SSE */
11742 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11743 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11744 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11745 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11746 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11747 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11748 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11749 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11750
11751 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11752 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11753 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11754 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11755 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11756 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11757 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11758 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11759 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11760 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11761 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11762 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11763 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11764 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11765 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11766 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11767 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11768 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11769 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11770 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11771 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11772 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11773 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11774 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11775
11776 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11777 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11778 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11779 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11780
11781 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11782 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11783 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11784 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11785 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11786
11787 /* MMX */
11788 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11789 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11790 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11791 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11792 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11793 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11794
11795 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11796 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11797 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11798 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11799 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11800 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11801 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11802 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11803
11804 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11805 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11806 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11807
11808 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11809 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11810 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11811 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11812
11813 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11814 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11815
11816 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11817 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11818 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11819 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11820 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11821 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11822
11823 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11824 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11825 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11826 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11827
11828 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11829 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11830 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11831 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11832 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11833 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11834
11835 /* Special. */
11836 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11837 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11838 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11839
11840 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11841 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11842
11843 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11844 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11845 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11846 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11847 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11848 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11849
11850 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11851 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11852 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11853 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11854 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11855 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11856
11857 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11858 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11859 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11860 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11861
11862 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11863 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11864
11865 /* SSE2 */
11866 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11867 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11868 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11869 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11870 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11871 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11872 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11873 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11874
11875 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11876 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11877 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11878 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11879 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11880 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11881 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11882 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11883 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11884 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11885 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11886 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11887 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11888 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11889 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11890 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11891 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11892 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11893 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11894 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11895 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11896 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11897 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11898 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11899
11900 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11902 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11904
11905 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11908 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11909
11910 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11913
11914 /* SSE2 MMX */
11915 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11917 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11918 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11919 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11920 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11921 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11922 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11923
11924 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11925 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11926 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11927 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11928 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11929 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11930 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11931 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11932
11933 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11934 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11935 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11937
11938 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11939 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11940 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11941 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11942
11943 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11944 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11945
11946 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11947 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11948 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11949 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11950 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11951 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11952
11953 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11954 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11955 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11956 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11957
11958 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11959 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11960 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11961 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11962 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11963 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11964
11965 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11966 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11967 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11968
11969 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11970 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11971
11972 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11973 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11974 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11975 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11976 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11977 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11978
11979 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11980 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11981 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11982 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11983 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11984 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11985
11986 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11987 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11988 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11989 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11990
11991 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11992
11993 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11994 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11995 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11996 };
11997
11998 static const struct builtin_description bdesc_1arg[] =
11999 {
12000 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12001 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12002
12003 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12004 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12005 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12006
12007 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12008 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12009 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12010 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12011
12012 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12013 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12014 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12015
12016 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12017
12018 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12019 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12020
12021 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12022 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12023 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12024 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12025 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12026
12027 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12028
12029 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12030 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12031
12032 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12033 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12034 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
12035 };
12036
12037 void
12038 ix86_init_builtins ()
12039 {
12040 if (TARGET_MMX)
12041 ix86_init_mmx_sse_builtins ();
12042 }
12043
12044 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12045 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12046 builtins. */
12047 static void
12048 ix86_init_mmx_sse_builtins ()
12049 {
12050 const struct builtin_description * d;
12051 size_t i;
12052
12053 tree pchar_type_node = build_pointer_type (char_type_node);
12054 tree pfloat_type_node = build_pointer_type (float_type_node);
12055 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12056 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12057 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12058
12059 /* Comparisons. */
12060 tree int_ftype_v4sf_v4sf
12061 = build_function_type_list (integer_type_node,
12062 V4SF_type_node, V4SF_type_node, NULL_TREE);
12063 tree v4si_ftype_v4sf_v4sf
12064 = build_function_type_list (V4SI_type_node,
12065 V4SF_type_node, V4SF_type_node, NULL_TREE);
12066 /* MMX/SSE/integer conversions. */
12067 tree int_ftype_v4sf
12068 = build_function_type_list (integer_type_node,
12069 V4SF_type_node, NULL_TREE);
12070 tree int_ftype_v8qi
12071 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12072 tree v4sf_ftype_v4sf_int
12073 = build_function_type_list (V4SF_type_node,
12074 V4SF_type_node, integer_type_node, NULL_TREE);
12075 tree v4sf_ftype_v4sf_v2si
12076 = build_function_type_list (V4SF_type_node,
12077 V4SF_type_node, V2SI_type_node, NULL_TREE);
12078 tree int_ftype_v4hi_int
12079 = build_function_type_list (integer_type_node,
12080 V4HI_type_node, integer_type_node, NULL_TREE);
12081 tree v4hi_ftype_v4hi_int_int
12082 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12083 integer_type_node, integer_type_node,
12084 NULL_TREE);
12085 /* Miscellaneous. */
12086 tree v8qi_ftype_v4hi_v4hi
12087 = build_function_type_list (V8QI_type_node,
12088 V4HI_type_node, V4HI_type_node, NULL_TREE);
12089 tree v4hi_ftype_v2si_v2si
12090 = build_function_type_list (V4HI_type_node,
12091 V2SI_type_node, V2SI_type_node, NULL_TREE);
12092 tree v4sf_ftype_v4sf_v4sf_int
12093 = build_function_type_list (V4SF_type_node,
12094 V4SF_type_node, V4SF_type_node,
12095 integer_type_node, NULL_TREE);
12096 tree v2si_ftype_v4hi_v4hi
12097 = build_function_type_list (V2SI_type_node,
12098 V4HI_type_node, V4HI_type_node, NULL_TREE);
12099 tree v4hi_ftype_v4hi_int
12100 = build_function_type_list (V4HI_type_node,
12101 V4HI_type_node, integer_type_node, NULL_TREE);
12102 tree v4hi_ftype_v4hi_di
12103 = build_function_type_list (V4HI_type_node,
12104 V4HI_type_node, long_long_unsigned_type_node,
12105 NULL_TREE);
12106 tree v2si_ftype_v2si_di
12107 = build_function_type_list (V2SI_type_node,
12108 V2SI_type_node, long_long_unsigned_type_node,
12109 NULL_TREE);
12110 tree void_ftype_void
12111 = build_function_type (void_type_node, void_list_node);
12112 tree void_ftype_unsigned
12113 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12114 tree unsigned_ftype_void
12115 = build_function_type (unsigned_type_node, void_list_node);
12116 tree di_ftype_void
12117 = build_function_type (long_long_unsigned_type_node, void_list_node);
12118 tree v4sf_ftype_void
12119 = build_function_type (V4SF_type_node, void_list_node);
12120 tree v2si_ftype_v4sf
12121 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12122 /* Loads/stores. */
12123 tree void_ftype_v8qi_v8qi_pchar
12124 = build_function_type_list (void_type_node,
12125 V8QI_type_node, V8QI_type_node,
12126 pchar_type_node, NULL_TREE);
12127 tree v4sf_ftype_pfloat
12128 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12129 /* @@@ the type is bogus */
12130 tree v4sf_ftype_v4sf_pv2si
12131 = build_function_type_list (V4SF_type_node,
12132 V4SF_type_node, pv2di_type_node, NULL_TREE);
12133 tree void_ftype_pv2si_v4sf
12134 = build_function_type_list (void_type_node,
12135 pv2di_type_node, V4SF_type_node, NULL_TREE);
12136 tree void_ftype_pfloat_v4sf
12137 = build_function_type_list (void_type_node,
12138 pfloat_type_node, V4SF_type_node, NULL_TREE);
12139 tree void_ftype_pdi_di
12140 = build_function_type_list (void_type_node,
12141 pdi_type_node, long_long_unsigned_type_node,
12142 NULL_TREE);
12143 tree void_ftype_pv2di_v2di
12144 = build_function_type_list (void_type_node,
12145 pv2di_type_node, V2DI_type_node, NULL_TREE);
12146 /* Normal vector unops. */
12147 tree v4sf_ftype_v4sf
12148 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12149
12150 /* Normal vector binops. */
12151 tree v4sf_ftype_v4sf_v4sf
12152 = build_function_type_list (V4SF_type_node,
12153 V4SF_type_node, V4SF_type_node, NULL_TREE);
12154 tree v8qi_ftype_v8qi_v8qi
12155 = build_function_type_list (V8QI_type_node,
12156 V8QI_type_node, V8QI_type_node, NULL_TREE);
12157 tree v4hi_ftype_v4hi_v4hi
12158 = build_function_type_list (V4HI_type_node,
12159 V4HI_type_node, V4HI_type_node, NULL_TREE);
12160 tree v2si_ftype_v2si_v2si
12161 = build_function_type_list (V2SI_type_node,
12162 V2SI_type_node, V2SI_type_node, NULL_TREE);
12163 tree di_ftype_di_di
12164 = build_function_type_list (long_long_unsigned_type_node,
12165 long_long_unsigned_type_node,
12166 long_long_unsigned_type_node, NULL_TREE);
12167
12168 tree v2si_ftype_v2sf
12169 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12170 tree v2sf_ftype_v2si
12171 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12172 tree v2si_ftype_v2si
12173 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12174 tree v2sf_ftype_v2sf
12175 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12176 tree v2sf_ftype_v2sf_v2sf
12177 = build_function_type_list (V2SF_type_node,
12178 V2SF_type_node, V2SF_type_node, NULL_TREE);
12179 tree v2si_ftype_v2sf_v2sf
12180 = build_function_type_list (V2SI_type_node,
12181 V2SF_type_node, V2SF_type_node, NULL_TREE);
12182 tree pint_type_node = build_pointer_type (integer_type_node);
12183 tree pdouble_type_node = build_pointer_type (double_type_node);
12184 tree int_ftype_v2df_v2df
12185 = build_function_type_list (integer_type_node,
12186 V2DF_type_node, V2DF_type_node, NULL_TREE);
12187
12188 tree ti_ftype_void
12189 = build_function_type (intTI_type_node, void_list_node);
12190 tree ti_ftype_ti_ti
12191 = build_function_type_list (intTI_type_node,
12192 intTI_type_node, intTI_type_node, NULL_TREE);
12193 tree void_ftype_pvoid
12194 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12195 tree v2di_ftype_di
12196 = build_function_type_list (V2DI_type_node,
12197 long_long_unsigned_type_node, NULL_TREE);
12198 tree v4sf_ftype_v4si
12199 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12200 tree v4si_ftype_v4sf
12201 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12202 tree v2df_ftype_v4si
12203 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12204 tree v4si_ftype_v2df
12205 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12206 tree v2si_ftype_v2df
12207 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12208 tree v4sf_ftype_v2df
12209 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12210 tree v2df_ftype_v2si
12211 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12212 tree v2df_ftype_v4sf
12213 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12214 tree int_ftype_v2df
12215 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12216 tree v2df_ftype_v2df_int
12217 = build_function_type_list (V2DF_type_node,
12218 V2DF_type_node, integer_type_node, NULL_TREE);
12219 tree v4sf_ftype_v4sf_v2df
12220 = build_function_type_list (V4SF_type_node,
12221 V4SF_type_node, V2DF_type_node, NULL_TREE);
12222 tree v2df_ftype_v2df_v4sf
12223 = build_function_type_list (V2DF_type_node,
12224 V2DF_type_node, V4SF_type_node, NULL_TREE);
12225 tree v2df_ftype_v2df_v2df_int
12226 = build_function_type_list (V2DF_type_node,
12227 V2DF_type_node, V2DF_type_node,
12228 integer_type_node,
12229 NULL_TREE);
12230 tree v2df_ftype_v2df_pv2si
12231 = build_function_type_list (V2DF_type_node,
12232 V2DF_type_node, pv2si_type_node, NULL_TREE);
12233 tree void_ftype_pv2si_v2df
12234 = build_function_type_list (void_type_node,
12235 pv2si_type_node, V2DF_type_node, NULL_TREE);
12236 tree void_ftype_pdouble_v2df
12237 = build_function_type_list (void_type_node,
12238 pdouble_type_node, V2DF_type_node, NULL_TREE);
12239 tree void_ftype_pint_int
12240 = build_function_type_list (void_type_node,
12241 pint_type_node, integer_type_node, NULL_TREE);
12242 tree void_ftype_v16qi_v16qi_pchar
12243 = build_function_type_list (void_type_node,
12244 V16QI_type_node, V16QI_type_node,
12245 pchar_type_node, NULL_TREE);
12246 tree v2df_ftype_pdouble
12247 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12248 tree v2df_ftype_v2df_v2df
12249 = build_function_type_list (V2DF_type_node,
12250 V2DF_type_node, V2DF_type_node, NULL_TREE);
12251 tree v16qi_ftype_v16qi_v16qi
12252 = build_function_type_list (V16QI_type_node,
12253 V16QI_type_node, V16QI_type_node, NULL_TREE);
12254 tree v8hi_ftype_v8hi_v8hi
12255 = build_function_type_list (V8HI_type_node,
12256 V8HI_type_node, V8HI_type_node, NULL_TREE);
12257 tree v4si_ftype_v4si_v4si
12258 = build_function_type_list (V4SI_type_node,
12259 V4SI_type_node, V4SI_type_node, NULL_TREE);
12260 tree v2di_ftype_v2di_v2di
12261 = build_function_type_list (V2DI_type_node,
12262 V2DI_type_node, V2DI_type_node, NULL_TREE);
12263 tree v2di_ftype_v2df_v2df
12264 = build_function_type_list (V2DI_type_node,
12265 V2DF_type_node, V2DF_type_node, NULL_TREE);
12266 tree v2df_ftype_v2df
12267 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12268 tree v2df_ftype_double
12269 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12270 tree v2df_ftype_double_double
12271 = build_function_type_list (V2DF_type_node,
12272 double_type_node, double_type_node, NULL_TREE);
12273 tree int_ftype_v8hi_int
12274 = build_function_type_list (integer_type_node,
12275 V8HI_type_node, integer_type_node, NULL_TREE);
12276 tree v8hi_ftype_v8hi_int_int
12277 = build_function_type_list (V8HI_type_node,
12278 V8HI_type_node, integer_type_node,
12279 integer_type_node, NULL_TREE);
12280 tree v2di_ftype_v2di_int
12281 = build_function_type_list (V2DI_type_node,
12282 V2DI_type_node, integer_type_node, NULL_TREE);
12283 tree v4si_ftype_v4si_int
12284 = build_function_type_list (V4SI_type_node,
12285 V4SI_type_node, integer_type_node, NULL_TREE);
12286 tree v8hi_ftype_v8hi_int
12287 = build_function_type_list (V8HI_type_node,
12288 V8HI_type_node, integer_type_node, NULL_TREE);
12289 tree v8hi_ftype_v8hi_v2di
12290 = build_function_type_list (V8HI_type_node,
12291 V8HI_type_node, V2DI_type_node, NULL_TREE);
12292 tree v4si_ftype_v4si_v2di
12293 = build_function_type_list (V4SI_type_node,
12294 V4SI_type_node, V2DI_type_node, NULL_TREE);
12295 tree v4si_ftype_v8hi_v8hi
12296 = build_function_type_list (V4SI_type_node,
12297 V8HI_type_node, V8HI_type_node, NULL_TREE);
12298 tree di_ftype_v8qi_v8qi
12299 = build_function_type_list (long_long_unsigned_type_node,
12300 V8QI_type_node, V8QI_type_node, NULL_TREE);
12301 tree v2di_ftype_v16qi_v16qi
12302 = build_function_type_list (V2DI_type_node,
12303 V16QI_type_node, V16QI_type_node, NULL_TREE);
12304 tree int_ftype_v16qi
12305 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12306
12307 /* Add all builtins that are more or less simple operations on two
12308 operands. */
12309 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12310 {
12311 /* Use one of the operands; the target can have a different mode for
12312 mask-generating compares. */
12313 enum machine_mode mode;
12314 tree type;
12315
12316 if (d->name == 0)
12317 continue;
12318 mode = insn_data[d->icode].operand[1].mode;
12319
12320 switch (mode)
12321 {
12322 case V16QImode:
12323 type = v16qi_ftype_v16qi_v16qi;
12324 break;
12325 case V8HImode:
12326 type = v8hi_ftype_v8hi_v8hi;
12327 break;
12328 case V4SImode:
12329 type = v4si_ftype_v4si_v4si;
12330 break;
12331 case V2DImode:
12332 type = v2di_ftype_v2di_v2di;
12333 break;
12334 case V2DFmode:
12335 type = v2df_ftype_v2df_v2df;
12336 break;
12337 case TImode:
12338 type = ti_ftype_ti_ti;
12339 break;
12340 case V4SFmode:
12341 type = v4sf_ftype_v4sf_v4sf;
12342 break;
12343 case V8QImode:
12344 type = v8qi_ftype_v8qi_v8qi;
12345 break;
12346 case V4HImode:
12347 type = v4hi_ftype_v4hi_v4hi;
12348 break;
12349 case V2SImode:
12350 type = v2si_ftype_v2si_v2si;
12351 break;
12352 case DImode:
12353 type = di_ftype_di_di;
12354 break;
12355
12356 default:
12357 abort ();
12358 }
12359
12360 /* Override for comparisons. */
12361 if (d->icode == CODE_FOR_maskcmpv4sf3
12362 || d->icode == CODE_FOR_maskncmpv4sf3
12363 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12364 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12365 type = v4si_ftype_v4sf_v4sf;
12366
12367 if (d->icode == CODE_FOR_maskcmpv2df3
12368 || d->icode == CODE_FOR_maskncmpv2df3
12369 || d->icode == CODE_FOR_vmmaskcmpv2df3
12370 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12371 type = v2di_ftype_v2df_v2df;
12372
12373 def_builtin (d->mask, d->name, type, d->code);
12374 }
12375
12376 /* Add the remaining MMX insns with somewhat more complicated types. */
12377 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12378 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12379 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12380 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12381 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12382 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12383 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12384
12385 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12386 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12387 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12388
12389 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12390 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12391
12392 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12393 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12394
12395 /* comi/ucomi insns. */
12396 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12397 if (d->mask == MASK_SSE2)
12398 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12399 else
12400 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12401
12402 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12403 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12404 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12405
12406 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12407 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12408 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12409 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12410 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12411 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12412
12413 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12414 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12415 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12416 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12417
12418 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12419 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12420
12421 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12422
12423 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12424 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12425 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12426 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12427 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12428 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12429
12430 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12431 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12432 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12433 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12434
12435 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12436 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12437 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12438 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12439
12440 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12441
12442 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12443
12444 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12445 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12446 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12447 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12448 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12449 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12450
12451 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12452
12453 /* Original 3DNow! */
12454 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12455 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12456 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12457 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12458 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12459 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12460 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12461 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12462 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12463 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12465 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12466 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12467 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12468 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12469 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12470 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12471 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12472 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12473 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12474
12475 /* 3DNow! extension as used in the Athlon CPU. */
12476 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12477 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12478 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12479 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12480 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12481 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12482
12483 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12484
12485 /* SSE2 */
12486 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12487 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12488
12489 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12490 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12491
12492 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12493 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12494 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12495 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12496 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12497 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12498
12499 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12500 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12501 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12502 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12503
12504 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12505 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12506 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12507 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12508 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12509
12510 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12511 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12512 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12513 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12514
12515 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12516 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12517
12518 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12519
12520 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12521 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12522
12523 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12524 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12525 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12526 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12527 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12528
12529 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12530
12531 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12532 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12533
12534 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12535 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12536 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12537
12538 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12539 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12540 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12541
12542 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12543 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12544 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12545 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12546 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12547 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12548 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12549
12550 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12551 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12552 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12553
12554 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12555 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12556 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12557
12558 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12559 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12560 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12561
12562 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12563 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12564
12565 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12566 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12567 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12568
12569 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12570 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12571 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12572
12573 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12574 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12575
12576 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12577 }
12578
12579 /* Errors in the source file can cause expand_expr to return const0_rtx
12580 where we expect a vector. To avoid crashing, use one of the vector
12581 clear instructions. */
12582 static rtx
12583 safe_vector_operand (x, mode)
12584 rtx x;
12585 enum machine_mode mode;
12586 {
12587 if (x != const0_rtx)
12588 return x;
12589 x = gen_reg_rtx (mode);
12590
12591 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12592 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12593 : gen_rtx_SUBREG (DImode, x, 0)));
12594 else
12595 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12596 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12597 return x;
12598 }
12599
12600 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12601
12602 static rtx
12603 ix86_expand_binop_builtin (icode, arglist, target)
12604 enum insn_code icode;
12605 tree arglist;
12606 rtx target;
12607 {
12608 rtx pat;
12609 tree arg0 = TREE_VALUE (arglist);
12610 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12611 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12612 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12613 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12614 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12615 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12616
12617 if (VECTOR_MODE_P (mode0))
12618 op0 = safe_vector_operand (op0, mode0);
12619 if (VECTOR_MODE_P (mode1))
12620 op1 = safe_vector_operand (op1, mode1);
12621
12622 if (! target
12623 || GET_MODE (target) != tmode
12624 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12625 target = gen_reg_rtx (tmode);
12626
12627 /* In case the insn wants input operands in modes different from
12628 the result, abort. */
12629 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12630 abort ();
12631
12632 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12633 op0 = copy_to_mode_reg (mode0, op0);
12634 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12635 op1 = copy_to_mode_reg (mode1, op1);
12636
12637 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12638 yet one of the two must not be a memory. This is normally enforced
12639 by expanders, but we didn't bother to create one here. */
12640 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12641 op0 = copy_to_mode_reg (mode0, op0);
12642
12643 pat = GEN_FCN (icode) (target, op0, op1);
12644 if (! pat)
12645 return 0;
12646 emit_insn (pat);
12647 return target;
12648 }
12649
12650 /* In type_for_mode we restrict the ability to create TImode types
12651 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12652 to have a V4SFmode signature. Convert them in-place to TImode. */
12653
12654 static rtx
12655 ix86_expand_timode_binop_builtin (icode, arglist, target)
12656 enum insn_code icode;
12657 tree arglist;
12658 rtx target;
12659 {
12660 rtx pat;
12661 tree arg0 = TREE_VALUE (arglist);
12662 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12663 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12664 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12665
12666 op0 = gen_lowpart (TImode, op0);
12667 op1 = gen_lowpart (TImode, op1);
12668 target = gen_reg_rtx (TImode);
12669
12670 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12671 op0 = copy_to_mode_reg (TImode, op0);
12672 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12673 op1 = copy_to_mode_reg (TImode, op1);
12674
12675 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12676 yet one of the two must not be a memory. This is normally enforced
12677 by expanders, but we didn't bother to create one here. */
12678 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12679 op0 = copy_to_mode_reg (TImode, op0);
12680
12681 pat = GEN_FCN (icode) (target, op0, op1);
12682 if (! pat)
12683 return 0;
12684 emit_insn (pat);
12685
12686 return gen_lowpart (V4SFmode, target);
12687 }
12688
12689 /* Subroutine of ix86_expand_builtin to take care of stores. */
12690
12691 static rtx
12692 ix86_expand_store_builtin (icode, arglist)
12693 enum insn_code icode;
12694 tree arglist;
12695 {
12696 rtx pat;
12697 tree arg0 = TREE_VALUE (arglist);
12698 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12699 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12700 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12701 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12702 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12703
12704 if (VECTOR_MODE_P (mode1))
12705 op1 = safe_vector_operand (op1, mode1);
12706
12707 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12708
12709 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12710 op1 = copy_to_mode_reg (mode1, op1);
12711
12712 pat = GEN_FCN (icode) (op0, op1);
12713 if (pat)
12714 emit_insn (pat);
12715 return 0;
12716 }
12717
12718 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12719
12720 static rtx
12721 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12722 enum insn_code icode;
12723 tree arglist;
12724 rtx target;
12725 int do_load;
12726 {
12727 rtx pat;
12728 tree arg0 = TREE_VALUE (arglist);
12729 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12730 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12731 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12732
12733 if (! target
12734 || GET_MODE (target) != tmode
12735 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12736 target = gen_reg_rtx (tmode);
12737 if (do_load)
12738 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12739 else
12740 {
12741 if (VECTOR_MODE_P (mode0))
12742 op0 = safe_vector_operand (op0, mode0);
12743
12744 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12745 op0 = copy_to_mode_reg (mode0, op0);
12746 }
12747
12748 pat = GEN_FCN (icode) (target, op0);
12749 if (! pat)
12750 return 0;
12751 emit_insn (pat);
12752 return target;
12753 }
12754
12755 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12756 sqrtss, rsqrtss, rcpss. */
12757
12758 static rtx
12759 ix86_expand_unop1_builtin (icode, arglist, target)
12760 enum insn_code icode;
12761 tree arglist;
12762 rtx target;
12763 {
12764 rtx pat;
12765 tree arg0 = TREE_VALUE (arglist);
12766 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12767 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12768 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12769
12770 if (! target
12771 || GET_MODE (target) != tmode
12772 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12773 target = gen_reg_rtx (tmode);
12774
12775 if (VECTOR_MODE_P (mode0))
12776 op0 = safe_vector_operand (op0, mode0);
12777
12778 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12779 op0 = copy_to_mode_reg (mode0, op0);
12780
12781 op1 = op0;
12782 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12783 op1 = copy_to_mode_reg (mode0, op1);
12784
12785 pat = GEN_FCN (icode) (target, op0, op1);
12786 if (! pat)
12787 return 0;
12788 emit_insn (pat);
12789 return target;
12790 }
12791
12792 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12793
12794 static rtx
12795 ix86_expand_sse_compare (d, arglist, target)
12796 const struct builtin_description *d;
12797 tree arglist;
12798 rtx target;
12799 {
12800 rtx pat;
12801 tree arg0 = TREE_VALUE (arglist);
12802 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12803 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12804 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12805 rtx op2;
12806 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12807 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12808 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12809 enum rtx_code comparison = d->comparison;
12810
12811 if (VECTOR_MODE_P (mode0))
12812 op0 = safe_vector_operand (op0, mode0);
12813 if (VECTOR_MODE_P (mode1))
12814 op1 = safe_vector_operand (op1, mode1);
12815
12816 /* Swap operands if we have a comparison that isn't available in
12817 hardware. */
12818 if (d->flag)
12819 {
12820 rtx tmp = gen_reg_rtx (mode1);
12821 emit_move_insn (tmp, op1);
12822 op1 = op0;
12823 op0 = tmp;
12824 }
12825
12826 if (! target
12827 || GET_MODE (target) != tmode
12828 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12829 target = gen_reg_rtx (tmode);
12830
12831 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12832 op0 = copy_to_mode_reg (mode0, op0);
12833 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12834 op1 = copy_to_mode_reg (mode1, op1);
12835
12836 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12837 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12838 if (! pat)
12839 return 0;
12840 emit_insn (pat);
12841 return target;
12842 }
12843
12844 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12845
12846 static rtx
12847 ix86_expand_sse_comi (d, arglist, target)
12848 const struct builtin_description *d;
12849 tree arglist;
12850 rtx target;
12851 {
12852 rtx pat;
12853 tree arg0 = TREE_VALUE (arglist);
12854 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12855 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12856 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12857 rtx op2;
12858 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12859 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12860 enum rtx_code comparison = d->comparison;
12861
12862 if (VECTOR_MODE_P (mode0))
12863 op0 = safe_vector_operand (op0, mode0);
12864 if (VECTOR_MODE_P (mode1))
12865 op1 = safe_vector_operand (op1, mode1);
12866
12867 /* Swap operands if we have a comparison that isn't available in
12868 hardware. */
12869 if (d->flag)
12870 {
12871 rtx tmp = op1;
12872 op1 = op0;
12873 op0 = tmp;
12874 }
12875
12876 target = gen_reg_rtx (SImode);
12877 emit_move_insn (target, const0_rtx);
12878 target = gen_rtx_SUBREG (QImode, target, 0);
12879
12880 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12881 op0 = copy_to_mode_reg (mode0, op0);
12882 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12883 op1 = copy_to_mode_reg (mode1, op1);
12884
12885 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12886 pat = GEN_FCN (d->icode) (op0, op1, op2);
12887 if (! pat)
12888 return 0;
12889 emit_insn (pat);
12890 emit_insn (gen_rtx_SET (VOIDmode,
12891 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12892 gen_rtx_fmt_ee (comparison, QImode,
12893 gen_rtx_REG (CCmode, FLAGS_REG),
12894 const0_rtx)));
12895
12896 return SUBREG_REG (target);
12897 }
12898
12899 /* Expand an expression EXP that calls a built-in function,
12900 with result going to TARGET if that's convenient
12901 (and in mode MODE if that's convenient).
12902 SUBTARGET may be used as the target for computing one of EXP's operands.
12903 IGNORE is nonzero if the value is to be ignored. */
12904
12905 rtx
12906 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12907 tree exp;
12908 rtx target;
12909 rtx subtarget ATTRIBUTE_UNUSED;
12910 enum machine_mode mode ATTRIBUTE_UNUSED;
12911 int ignore ATTRIBUTE_UNUSED;
12912 {
12913 const struct builtin_description *d;
12914 size_t i;
12915 enum insn_code icode;
12916 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12917 tree arglist = TREE_OPERAND (exp, 1);
12918 tree arg0, arg1, arg2;
12919 rtx op0, op1, op2, pat;
12920 enum machine_mode tmode, mode0, mode1, mode2;
12921 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12922
12923 switch (fcode)
12924 {
12925 case IX86_BUILTIN_EMMS:
12926 emit_insn (gen_emms ());
12927 return 0;
12928
12929 case IX86_BUILTIN_SFENCE:
12930 emit_insn (gen_sfence ());
12931 return 0;
12932
12933 case IX86_BUILTIN_PEXTRW:
12934 case IX86_BUILTIN_PEXTRW128:
12935 icode = (fcode == IX86_BUILTIN_PEXTRW
12936 ? CODE_FOR_mmx_pextrw
12937 : CODE_FOR_sse2_pextrw);
12938 arg0 = TREE_VALUE (arglist);
12939 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12940 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12941 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12942 tmode = insn_data[icode].operand[0].mode;
12943 mode0 = insn_data[icode].operand[1].mode;
12944 mode1 = insn_data[icode].operand[2].mode;
12945
12946 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12947 op0 = copy_to_mode_reg (mode0, op0);
12948 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12949 {
12950 /* @@@ better error message */
12951 error ("selector must be an immediate");
12952 return gen_reg_rtx (tmode);
12953 }
12954 if (target == 0
12955 || GET_MODE (target) != tmode
12956 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12957 target = gen_reg_rtx (tmode);
12958 pat = GEN_FCN (icode) (target, op0, op1);
12959 if (! pat)
12960 return 0;
12961 emit_insn (pat);
12962 return target;
12963
12964 case IX86_BUILTIN_PINSRW:
12965 case IX86_BUILTIN_PINSRW128:
12966 icode = (fcode == IX86_BUILTIN_PINSRW
12967 ? CODE_FOR_mmx_pinsrw
12968 : CODE_FOR_sse2_pinsrw);
12969 arg0 = TREE_VALUE (arglist);
12970 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12971 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12972 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12973 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12974 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12975 tmode = insn_data[icode].operand[0].mode;
12976 mode0 = insn_data[icode].operand[1].mode;
12977 mode1 = insn_data[icode].operand[2].mode;
12978 mode2 = insn_data[icode].operand[3].mode;
12979
12980 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12981 op0 = copy_to_mode_reg (mode0, op0);
12982 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12983 op1 = copy_to_mode_reg (mode1, op1);
12984 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12985 {
12986 /* @@@ better error message */
12987 error ("selector must be an immediate");
12988 return const0_rtx;
12989 }
12990 if (target == 0
12991 || GET_MODE (target) != tmode
12992 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12993 target = gen_reg_rtx (tmode);
12994 pat = GEN_FCN (icode) (target, op0, op1, op2);
12995 if (! pat)
12996 return 0;
12997 emit_insn (pat);
12998 return target;
12999
13000 case IX86_BUILTIN_MASKMOVQ:
13001 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13002 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13003 : CODE_FOR_sse2_maskmovdqu);
13004 /* Note the arg order is different from the operand order. */
13005 arg1 = TREE_VALUE (arglist);
13006 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13007 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13008 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13009 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13010 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13011 mode0 = insn_data[icode].operand[0].mode;
13012 mode1 = insn_data[icode].operand[1].mode;
13013 mode2 = insn_data[icode].operand[2].mode;
13014
13015 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13016 op0 = copy_to_mode_reg (mode0, op0);
13017 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13018 op1 = copy_to_mode_reg (mode1, op1);
13019 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13020 op2 = copy_to_mode_reg (mode2, op2);
13021 pat = GEN_FCN (icode) (op0, op1, op2);
13022 if (! pat)
13023 return 0;
13024 emit_insn (pat);
13025 return 0;
13026
13027 case IX86_BUILTIN_SQRTSS:
13028 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13029 case IX86_BUILTIN_RSQRTSS:
13030 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13031 case IX86_BUILTIN_RCPSS:
13032 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13033
13034 case IX86_BUILTIN_ANDPS:
13035 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
13036 arglist, target);
13037 case IX86_BUILTIN_ANDNPS:
13038 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
13039 arglist, target);
13040 case IX86_BUILTIN_ORPS:
13041 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
13042 arglist, target);
13043 case IX86_BUILTIN_XORPS:
13044 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
13045 arglist, target);
13046
13047 case IX86_BUILTIN_LOADAPS:
13048 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13049
13050 case IX86_BUILTIN_LOADUPS:
13051 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13052
13053 case IX86_BUILTIN_STOREAPS:
13054 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13055 case IX86_BUILTIN_STOREUPS:
13056 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13057
13058 case IX86_BUILTIN_LOADSS:
13059 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13060
13061 case IX86_BUILTIN_STORESS:
13062 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13063
13064 case IX86_BUILTIN_LOADHPS:
13065 case IX86_BUILTIN_LOADLPS:
13066 case IX86_BUILTIN_LOADHPD:
13067 case IX86_BUILTIN_LOADLPD:
13068 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13069 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13070 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13071 : CODE_FOR_sse2_movlpd);
13072 arg0 = TREE_VALUE (arglist);
13073 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13074 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13075 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13076 tmode = insn_data[icode].operand[0].mode;
13077 mode0 = insn_data[icode].operand[1].mode;
13078 mode1 = insn_data[icode].operand[2].mode;
13079
13080 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13081 op0 = copy_to_mode_reg (mode0, op0);
13082 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13083 if (target == 0
13084 || GET_MODE (target) != tmode
13085 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13086 target = gen_reg_rtx (tmode);
13087 pat = GEN_FCN (icode) (target, op0, op1);
13088 if (! pat)
13089 return 0;
13090 emit_insn (pat);
13091 return target;
13092
13093 case IX86_BUILTIN_STOREHPS:
13094 case IX86_BUILTIN_STORELPS:
13095 case IX86_BUILTIN_STOREHPD:
13096 case IX86_BUILTIN_STORELPD:
13097 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13098 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13099 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13100 : CODE_FOR_sse2_movlpd);
13101 arg0 = TREE_VALUE (arglist);
13102 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13103 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13104 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13105 mode0 = insn_data[icode].operand[1].mode;
13106 mode1 = insn_data[icode].operand[2].mode;
13107
13108 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13109 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13110 op1 = copy_to_mode_reg (mode1, op1);
13111
13112 pat = GEN_FCN (icode) (op0, op0, op1);
13113 if (! pat)
13114 return 0;
13115 emit_insn (pat);
13116 return 0;
13117
13118 case IX86_BUILTIN_MOVNTPS:
13119 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13120 case IX86_BUILTIN_MOVNTQ:
13121 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13122
13123 case IX86_BUILTIN_LDMXCSR:
13124 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13125 target = assign_386_stack_local (SImode, 0);
13126 emit_move_insn (target, op0);
13127 emit_insn (gen_ldmxcsr (target));
13128 return 0;
13129
13130 case IX86_BUILTIN_STMXCSR:
13131 target = assign_386_stack_local (SImode, 0);
13132 emit_insn (gen_stmxcsr (target));
13133 return copy_to_mode_reg (SImode, target);
13134
13135 case IX86_BUILTIN_SHUFPS:
13136 case IX86_BUILTIN_SHUFPD:
13137 icode = (fcode == IX86_BUILTIN_SHUFPS
13138 ? CODE_FOR_sse_shufps
13139 : CODE_FOR_sse2_shufpd);
13140 arg0 = TREE_VALUE (arglist);
13141 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13142 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13143 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13144 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13145 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13146 tmode = insn_data[icode].operand[0].mode;
13147 mode0 = insn_data[icode].operand[1].mode;
13148 mode1 = insn_data[icode].operand[2].mode;
13149 mode2 = insn_data[icode].operand[3].mode;
13150
13151 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13152 op0 = copy_to_mode_reg (mode0, op0);
13153 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13154 op1 = copy_to_mode_reg (mode1, op1);
13155 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13156 {
13157 /* @@@ better error message */
13158 error ("mask must be an immediate");
13159 return gen_reg_rtx (tmode);
13160 }
13161 if (target == 0
13162 || GET_MODE (target) != tmode
13163 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13164 target = gen_reg_rtx (tmode);
13165 pat = GEN_FCN (icode) (target, op0, op1, op2);
13166 if (! pat)
13167 return 0;
13168 emit_insn (pat);
13169 return target;
13170
13171 case IX86_BUILTIN_PSHUFW:
13172 case IX86_BUILTIN_PSHUFD:
13173 case IX86_BUILTIN_PSHUFHW:
13174 case IX86_BUILTIN_PSHUFLW:
13175 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13176 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13177 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13178 : CODE_FOR_mmx_pshufw);
13179 arg0 = TREE_VALUE (arglist);
13180 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13181 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13182 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13183 tmode = insn_data[icode].operand[0].mode;
13184 mode1 = insn_data[icode].operand[1].mode;
13185 mode2 = insn_data[icode].operand[2].mode;
13186
13187 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13188 op0 = copy_to_mode_reg (mode1, op0);
13189 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13190 {
13191 /* @@@ better error message */
13192 error ("mask must be an immediate");
13193 return const0_rtx;
13194 }
13195 if (target == 0
13196 || GET_MODE (target) != tmode
13197 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13198 target = gen_reg_rtx (tmode);
13199 pat = GEN_FCN (icode) (target, op0, op1);
13200 if (! pat)
13201 return 0;
13202 emit_insn (pat);
13203 return target;
13204
13205 case IX86_BUILTIN_FEMMS:
13206 emit_insn (gen_femms ());
13207 return NULL_RTX;
13208
13209 case IX86_BUILTIN_PAVGUSB:
13210 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13211
13212 case IX86_BUILTIN_PF2ID:
13213 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13214
13215 case IX86_BUILTIN_PFACC:
13216 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13217
13218 case IX86_BUILTIN_PFADD:
13219 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13220
13221 case IX86_BUILTIN_PFCMPEQ:
13222 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13223
13224 case IX86_BUILTIN_PFCMPGE:
13225 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13226
13227 case IX86_BUILTIN_PFCMPGT:
13228 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13229
13230 case IX86_BUILTIN_PFMAX:
13231 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13232
13233 case IX86_BUILTIN_PFMIN:
13234 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13235
13236 case IX86_BUILTIN_PFMUL:
13237 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13238
13239 case IX86_BUILTIN_PFRCP:
13240 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13241
13242 case IX86_BUILTIN_PFRCPIT1:
13243 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13244
13245 case IX86_BUILTIN_PFRCPIT2:
13246 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13247
13248 case IX86_BUILTIN_PFRSQIT1:
13249 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13250
13251 case IX86_BUILTIN_PFRSQRT:
13252 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13253
13254 case IX86_BUILTIN_PFSUB:
13255 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13256
13257 case IX86_BUILTIN_PFSUBR:
13258 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13259
13260 case IX86_BUILTIN_PI2FD:
13261 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13262
13263 case IX86_BUILTIN_PMULHRW:
13264 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13265
13266 case IX86_BUILTIN_PF2IW:
13267 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13268
13269 case IX86_BUILTIN_PFNACC:
13270 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13271
13272 case IX86_BUILTIN_PFPNACC:
13273 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13274
13275 case IX86_BUILTIN_PI2FW:
13276 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13277
13278 case IX86_BUILTIN_PSWAPDSI:
13279 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13280
13281 case IX86_BUILTIN_PSWAPDSF:
13282 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13283
13284 case IX86_BUILTIN_SSE_ZERO:
13285 target = gen_reg_rtx (V4SFmode);
13286 emit_insn (gen_sse_clrv4sf (target));
13287 return target;
13288
13289 case IX86_BUILTIN_MMX_ZERO:
13290 target = gen_reg_rtx (DImode);
13291 emit_insn (gen_mmx_clrdi (target));
13292 return target;
13293
13294 case IX86_BUILTIN_SQRTSD:
13295 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13296 case IX86_BUILTIN_LOADAPD:
13297 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13298 case IX86_BUILTIN_LOADUPD:
13299 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13300
13301 case IX86_BUILTIN_STOREAPD:
13302 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13303 case IX86_BUILTIN_STOREUPD:
13304 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13305
13306 case IX86_BUILTIN_LOADSD:
13307 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13308
13309 case IX86_BUILTIN_STORESD:
13310 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13311
13312 case IX86_BUILTIN_SETPD1:
13313 target = assign_386_stack_local (DFmode, 0);
13314 arg0 = TREE_VALUE (arglist);
13315 emit_move_insn (adjust_address (target, DFmode, 0),
13316 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13317 op0 = gen_reg_rtx (V2DFmode);
13318 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13319 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13320 return op0;
13321
13322 case IX86_BUILTIN_SETPD:
13323 target = assign_386_stack_local (V2DFmode, 0);
13324 arg0 = TREE_VALUE (arglist);
13325 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13326 emit_move_insn (adjust_address (target, DFmode, 0),
13327 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13328 emit_move_insn (adjust_address (target, DFmode, 8),
13329 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13330 op0 = gen_reg_rtx (V2DFmode);
13331 emit_insn (gen_sse2_movapd (op0, target));
13332 return op0;
13333
13334 case IX86_BUILTIN_LOADRPD:
13335 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13336 gen_reg_rtx (V2DFmode), 1);
13337 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13338 return target;
13339
13340 case IX86_BUILTIN_LOADPD1:
13341 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13342 gen_reg_rtx (V2DFmode), 1);
13343 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13344 return target;
13345
13346 case IX86_BUILTIN_STOREPD1:
13347 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13348 case IX86_BUILTIN_STORERPD:
13349 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13350
13351 case IX86_BUILTIN_MFENCE:
13352 emit_insn (gen_sse2_mfence ());
13353 return 0;
13354 case IX86_BUILTIN_LFENCE:
13355 emit_insn (gen_sse2_lfence ());
13356 return 0;
13357
13358 case IX86_BUILTIN_CLFLUSH:
13359 arg0 = TREE_VALUE (arglist);
13360 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13361 icode = CODE_FOR_sse2_clflush;
13362 mode0 = insn_data[icode].operand[0].mode;
13363 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13364 op0 = copy_to_mode_reg (mode0, op0);
13365
13366 emit_insn (gen_sse2_clflush (op0));
13367 return 0;
13368
13369 case IX86_BUILTIN_MOVNTPD:
13370 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13371 case IX86_BUILTIN_MOVNTDQ:
13372 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13373 case IX86_BUILTIN_MOVNTI:
13374 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13375
13376 default:
13377 break;
13378 }
13379
13380 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13381 if (d->code == fcode)
13382 {
13383 /* Compares are treated specially. */
13384 if (d->icode == CODE_FOR_maskcmpv4sf3
13385 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13386 || d->icode == CODE_FOR_maskncmpv4sf3
13387 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13388 || d->icode == CODE_FOR_maskcmpv2df3
13389 || d->icode == CODE_FOR_vmmaskcmpv2df3
13390 || d->icode == CODE_FOR_maskncmpv2df3
13391 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13392 return ix86_expand_sse_compare (d, arglist, target);
13393
13394 return ix86_expand_binop_builtin (d->icode, arglist, target);
13395 }
13396
13397 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13398 if (d->code == fcode)
13399 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13400
13401 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13402 if (d->code == fcode)
13403 return ix86_expand_sse_comi (d, arglist, target);
13404
13405 /* @@@ Should really do something sensible here. */
13406 return 0;
13407 }
13408
13409 /* Store OPERAND to the memory after reload is completed. This means
13410 that we can't easily use assign_stack_local. */
13411 rtx
13412 ix86_force_to_memory (mode, operand)
13413 enum machine_mode mode;
13414 rtx operand;
13415 {
13416 rtx result;
13417 if (!reload_completed)
13418 abort ();
13419 if (TARGET_64BIT && TARGET_RED_ZONE)
13420 {
13421 result = gen_rtx_MEM (mode,
13422 gen_rtx_PLUS (Pmode,
13423 stack_pointer_rtx,
13424 GEN_INT (-RED_ZONE_SIZE)));
13425 emit_move_insn (result, operand);
13426 }
13427 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13428 {
13429 switch (mode)
13430 {
13431 case HImode:
13432 case SImode:
13433 operand = gen_lowpart (DImode, operand);
13434 /* FALLTHRU */
13435 case DImode:
13436 emit_insn (
13437 gen_rtx_SET (VOIDmode,
13438 gen_rtx_MEM (DImode,
13439 gen_rtx_PRE_DEC (DImode,
13440 stack_pointer_rtx)),
13441 operand));
13442 break;
13443 default:
13444 abort ();
13445 }
13446 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13447 }
13448 else
13449 {
13450 switch (mode)
13451 {
13452 case DImode:
13453 {
13454 rtx operands[2];
13455 split_di (&operand, 1, operands, operands + 1);
13456 emit_insn (
13457 gen_rtx_SET (VOIDmode,
13458 gen_rtx_MEM (SImode,
13459 gen_rtx_PRE_DEC (Pmode,
13460 stack_pointer_rtx)),
13461 operands[1]));
13462 emit_insn (
13463 gen_rtx_SET (VOIDmode,
13464 gen_rtx_MEM (SImode,
13465 gen_rtx_PRE_DEC (Pmode,
13466 stack_pointer_rtx)),
13467 operands[0]));
13468 }
13469 break;
13470 case HImode:
13471 /* It is better to store HImodes as SImodes. */
13472 if (!TARGET_PARTIAL_REG_STALL)
13473 operand = gen_lowpart (SImode, operand);
13474 /* FALLTHRU */
13475 case SImode:
13476 emit_insn (
13477 gen_rtx_SET (VOIDmode,
13478 gen_rtx_MEM (GET_MODE (operand),
13479 gen_rtx_PRE_DEC (SImode,
13480 stack_pointer_rtx)),
13481 operand));
13482 break;
13483 default:
13484 abort ();
13485 }
13486 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13487 }
13488 return result;
13489 }
13490
13491 /* Free operand from the memory. */
13492 void
13493 ix86_free_from_memory (mode)
13494 enum machine_mode mode;
13495 {
13496 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13497 {
13498 int size;
13499
13500 if (mode == DImode || TARGET_64BIT)
13501 size = 8;
13502 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13503 size = 2;
13504 else
13505 size = 4;
13506 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13507 to pop or add instruction if registers are available. */
13508 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13509 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13510 GEN_INT (size))));
13511 }
13512 }
13513
13514 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13515 QImode must go into class Q_REGS.
13516 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13517 movdf to do mem-to-mem moves through integer regs. */
13518 enum reg_class
13519 ix86_preferred_reload_class (x, class)
13520 rtx x;
13521 enum reg_class class;
13522 {
13523 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13524 {
13525 /* SSE can't load any constant directly yet. */
13526 if (SSE_CLASS_P (class))
13527 return NO_REGS;
13528 /* Floats can load 0 and 1. */
13529 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13530 {
13531 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13532 if (MAYBE_SSE_CLASS_P (class))
13533 return (reg_class_subset_p (class, GENERAL_REGS)
13534 ? GENERAL_REGS : FLOAT_REGS);
13535 else
13536 return class;
13537 }
13538 /* General regs can load everything. */
13539 if (reg_class_subset_p (class, GENERAL_REGS))
13540 return GENERAL_REGS;
13541 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13542 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13543 return NO_REGS;
13544 }
13545 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13546 return NO_REGS;
13547 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13548 return Q_REGS;
13549 return class;
13550 }
13551
13552 /* If we are copying between general and FP registers, we need a memory
13553 location. The same is true for SSE and MMX registers.
13554
13555 The macro can't work reliably when one of the CLASSES is class containing
13556 registers from multiple units (SSE, MMX, integer). We avoid this by never
13557 combining those units in single alternative in the machine description.
13558 Ensure that this constraint holds to avoid unexpected surprises.
13559
13560 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13561 enforce these sanity checks. */
13562 int
13563 ix86_secondary_memory_needed (class1, class2, mode, strict)
13564 enum reg_class class1, class2;
13565 enum machine_mode mode;
13566 int strict;
13567 {
13568 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13569 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13570 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13571 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13572 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13573 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13574 {
13575 if (strict)
13576 abort ();
13577 else
13578 return 1;
13579 }
13580 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13581 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13582 && (mode) != SImode)
13583 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13584 && (mode) != SImode));
13585 }
13586 /* Return the cost of moving data from a register in class CLASS1 to
13587 one in class CLASS2.
13588
13589 It is not required that the cost always equal 2 when FROM is the same as TO;
13590 on some machines it is expensive to move between registers if they are not
13591 general registers. */
13592 int
13593 ix86_register_move_cost (mode, class1, class2)
13594 enum machine_mode mode;
13595 enum reg_class class1, class2;
13596 {
13597 /* In case we require secondary memory, compute cost of the store followed
13598 by load. In case of copying from general_purpose_register we may emit
13599 multiple stores followed by single load causing memory size mismatch
13600 stall. Count this as arbitarily high cost of 20. */
13601 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13602 {
13603 int add_cost = 0;
13604 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13605 add_cost = 20;
13606 return (MEMORY_MOVE_COST (mode, class1, 0)
13607 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13608 }
13609 /* Moves between SSE/MMX and integer unit are expensive. */
13610 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13611 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13612 return ix86_cost->mmxsse_to_integer;
13613 if (MAYBE_FLOAT_CLASS_P (class1))
13614 return ix86_cost->fp_move;
13615 if (MAYBE_SSE_CLASS_P (class1))
13616 return ix86_cost->sse_move;
13617 if (MAYBE_MMX_CLASS_P (class1))
13618 return ix86_cost->mmx_move;
13619 return 2;
13620 }
13621
13622 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13623 int
13624 ix86_hard_regno_mode_ok (regno, mode)
13625 int regno;
13626 enum machine_mode mode;
13627 {
13628 /* Flags and only flags can only hold CCmode values. */
13629 if (CC_REGNO_P (regno))
13630 return GET_MODE_CLASS (mode) == MODE_CC;
13631 if (GET_MODE_CLASS (mode) == MODE_CC
13632 || GET_MODE_CLASS (mode) == MODE_RANDOM
13633 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13634 return 0;
13635 if (FP_REGNO_P (regno))
13636 return VALID_FP_MODE_P (mode);
13637 if (SSE_REGNO_P (regno))
13638 return VALID_SSE_REG_MODE (mode);
13639 if (MMX_REGNO_P (regno))
13640 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13641 /* We handle both integer and floats in the general purpose registers.
13642 In future we should be able to handle vector modes as well. */
13643 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13644 return 0;
13645 /* Take care for QImode values - they can be in non-QI regs, but then
13646 they do cause partial register stalls. */
13647 if (regno < 4 || mode != QImode || TARGET_64BIT)
13648 return 1;
13649 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13650 }
13651
13652 /* Return the cost of moving data of mode M between a
13653 register and memory. A value of 2 is the default; this cost is
13654 relative to those in `REGISTER_MOVE_COST'.
13655
13656 If moving between registers and memory is more expensive than
13657 between two registers, you should define this macro to express the
13658 relative cost.
13659
13660 Model also increased moving costs of QImode registers in non
13661 Q_REGS classes.
13662 */
13663 int
13664 ix86_memory_move_cost (mode, class, in)
13665 enum machine_mode mode;
13666 enum reg_class class;
13667 int in;
13668 {
13669 if (FLOAT_CLASS_P (class))
13670 {
13671 int index;
13672 switch (mode)
13673 {
13674 case SFmode:
13675 index = 0;
13676 break;
13677 case DFmode:
13678 index = 1;
13679 break;
13680 case XFmode:
13681 case TFmode:
13682 index = 2;
13683 break;
13684 default:
13685 return 100;
13686 }
13687 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13688 }
13689 if (SSE_CLASS_P (class))
13690 {
13691 int index;
13692 switch (GET_MODE_SIZE (mode))
13693 {
13694 case 4:
13695 index = 0;
13696 break;
13697 case 8:
13698 index = 1;
13699 break;
13700 case 16:
13701 index = 2;
13702 break;
13703 default:
13704 return 100;
13705 }
13706 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13707 }
13708 if (MMX_CLASS_P (class))
13709 {
13710 int index;
13711 switch (GET_MODE_SIZE (mode))
13712 {
13713 case 4:
13714 index = 0;
13715 break;
13716 case 8:
13717 index = 1;
13718 break;
13719 default:
13720 return 100;
13721 }
13722 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13723 }
13724 switch (GET_MODE_SIZE (mode))
13725 {
13726 case 1:
13727 if (in)
13728 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13729 : ix86_cost->movzbl_load);
13730 else
13731 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13732 : ix86_cost->int_store[0] + 4);
13733 break;
13734 case 2:
13735 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13736 default:
13737 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13738 if (mode == TFmode)
13739 mode = XFmode;
13740 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13741 * (int) GET_MODE_SIZE (mode) / 4);
13742 }
13743 }
13744
13745 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13746 static void
13747 ix86_svr3_asm_out_constructor (symbol, priority)
13748 rtx symbol;
13749 int priority ATTRIBUTE_UNUSED;
13750 {
13751 init_section ();
13752 fputs ("\tpushl $", asm_out_file);
13753 assemble_name (asm_out_file, XSTR (symbol, 0));
13754 fputc ('\n', asm_out_file);
13755 }
13756 #endif
13757
13758 #if TARGET_MACHO
13759
13760 static int current_machopic_label_num;
13761
13762 /* Given a symbol name and its associated stub, write out the
13763 definition of the stub. */
13764
13765 void
13766 machopic_output_stub (file, symb, stub)
13767 FILE *file;
13768 const char *symb, *stub;
13769 {
13770 unsigned int length;
13771 char *binder_name, *symbol_name, lazy_ptr_name[32];
13772 int label = ++current_machopic_label_num;
13773
13774 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13775 symb = (*targetm.strip_name_encoding) (symb);
13776
13777 length = strlen (stub);
13778 binder_name = alloca (length + 32);
13779 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13780
13781 length = strlen (symb);
13782 symbol_name = alloca (length + 32);
13783 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13784
13785 sprintf (lazy_ptr_name, "L%d$lz", label);
13786
13787 if (MACHOPIC_PURE)
13788 machopic_picsymbol_stub_section ();
13789 else
13790 machopic_symbol_stub_section ();
13791
13792 fprintf (file, "%s:\n", stub);
13793 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13794
13795 if (MACHOPIC_PURE)
13796 {
13797 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13798 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13799 fprintf (file, "\tjmp %%edx\n");
13800 }
13801 else
13802 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13803
13804 fprintf (file, "%s:\n", binder_name);
13805
13806 if (MACHOPIC_PURE)
13807 {
13808 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13809 fprintf (file, "\tpushl %%eax\n");
13810 }
13811 else
13812 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13813
13814 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13815
13816 machopic_lazy_symbol_ptr_section ();
13817 fprintf (file, "%s:\n", lazy_ptr_name);
13818 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13819 fprintf (file, "\t.long %s\n", binder_name);
13820 }
13821 #endif /* TARGET_MACHO */
13822
13823 /* Order the registers for register allocator. */
13824
13825 void
13826 x86_order_regs_for_local_alloc ()
13827 {
13828 int pos = 0;
13829 int i;
13830
13831 /* First allocate the local general purpose registers. */
13832 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13833 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13834 reg_alloc_order [pos++] = i;
13835
13836 /* Global general purpose registers. */
13837 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13838 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13839 reg_alloc_order [pos++] = i;
13840
13841 /* x87 registers come first in case we are doing FP math
13842 using them. */
13843 if (!TARGET_SSE_MATH)
13844 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13845 reg_alloc_order [pos++] = i;
13846
13847 /* SSE registers. */
13848 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13849 reg_alloc_order [pos++] = i;
13850 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13851 reg_alloc_order [pos++] = i;
13852
13853 /* x87 registerts. */
13854 if (TARGET_SSE_MATH)
13855 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13856 reg_alloc_order [pos++] = i;
13857
13858 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13859 reg_alloc_order [pos++] = i;
13860
13861 /* Initialize the rest of array as we do not allocate some registers
13862 at all. */
13863 while (pos < FIRST_PSEUDO_REGISTER)
13864 reg_alloc_order [pos++] = 0;
13865 }
13866
13867 void
13868 x86_output_mi_thunk (file, delta, function)
13869 FILE *file;
13870 int delta;
13871 tree function;
13872 {
13873 tree parm;
13874 rtx xops[3];
13875
13876 if (ix86_regparm > 0)
13877 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13878 else
13879 parm = NULL_TREE;
13880 for (; parm; parm = TREE_CHAIN (parm))
13881 if (TREE_VALUE (parm) == void_type_node)
13882 break;
13883
13884 xops[0] = GEN_INT (delta);
13885 if (TARGET_64BIT)
13886 {
13887 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13888 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13889 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13890 if (flag_pic)
13891 {
13892 fprintf (file, "\tjmp *");
13893 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13894 fprintf (file, "@GOTPCREL(%%rip)\n");
13895 }
13896 else
13897 {
13898 fprintf (file, "\tjmp ");
13899 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13900 fprintf (file, "\n");
13901 }
13902 }
13903 else
13904 {
13905 if (parm)
13906 xops[1] = gen_rtx_REG (SImode, 0);
13907 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13908 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13909 else
13910 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13911 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13912
13913 if (flag_pic)
13914 {
13915 xops[0] = pic_offset_table_rtx;
13916 xops[1] = gen_label_rtx ();
13917 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13918
13919 if (ix86_regparm > 2)
13920 abort ();
13921 output_asm_insn ("push{l}\t%0", xops);
13922 output_asm_insn ("call\t%P1", xops);
13923 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13924 output_asm_insn ("pop{l}\t%0", xops);
13925 output_asm_insn
13926 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13927 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13928 output_asm_insn
13929 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13930 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13931 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13932 }
13933 else
13934 {
13935 fprintf (file, "\tjmp ");
13936 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13937 fprintf (file, "\n");
13938 }
13939 }
13940 }
13941
13942 int
13943 x86_field_alignment (field, computed)
13944 tree field;
13945 int computed;
13946 {
13947 enum machine_mode mode;
13948 tree type = TREE_TYPE (field);
13949
13950 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13951 return computed;
13952 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13953 ? get_inner_array_type (type) : type);
13954 if (mode == DFmode || mode == DCmode
13955 || GET_MODE_CLASS (mode) == MODE_INT
13956 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13957 return MIN (32, computed);
13958 return computed;
13959 }
13960
13961 /* Implement machine specific optimizations.
13962 At the moment we implement single transformation: AMD Athlon works faster
13963 when RET is not destination of conditional jump or directly preceeded
13964 by other jump instruction. We avoid the penalty by inserting NOP just
13965 before the RET instructions in such cases. */
13966 void
13967 x86_machine_dependent_reorg (first)
13968 rtx first ATTRIBUTE_UNUSED;
13969 {
13970 edge e;
13971
13972 if (!TARGET_ATHLON || !optimize || optimize_size)
13973 return;
13974 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13975 {
13976 basic_block bb = e->src;
13977 rtx ret = bb->end;
13978 rtx prev;
13979 bool insert = false;
13980
13981 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13982 continue;
13983 prev = prev_nonnote_insn (ret);
13984 if (prev && GET_CODE (prev) == CODE_LABEL)
13985 {
13986 edge e;
13987 for (e = bb->pred; e; e = e->pred_next)
13988 if (EDGE_FREQUENCY (e) && e->src->index > 0
13989 && !(e->flags & EDGE_FALLTHRU))
13990 insert = 1;
13991 }
13992 if (!insert)
13993 {
13994 prev = prev_real_insn (ret);
13995 if (prev && GET_CODE (prev) == JUMP_INSN
13996 && any_condjump_p (prev))
13997 insert = 1;
13998 }
13999 if (insert)
14000 emit_insn_before (gen_nop (), ret);
14001 }
14002 }
14003
14004 #include "gt-i386.h"
This page took 0.639899 seconds and 5 git commands to generate.