]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.c (legitimate_constant_p): UNSPEC_TP is not legitimate constant.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 };
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 };
125
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 };
162
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
198 };
199
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
235 };
236
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
272 };
273
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309 };
310
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 };
347
348 const struct processor_costs *ix86_cost = &pentium_cost;
349
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
399
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403 #define FAST_PROLOGUE_INSN_COUNT 30
404
405 /* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407 static int use_fast_prologue_epilogue;
408
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
413
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
416
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
418 {
419 /* ax, dx, cx, bx */
420 AREG, DREG, CREG, BREG,
421 /* si, di, bp, sp */
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
423 /* FP registers */
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
426 /* arg pointer */
427 NON_Q_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
431 SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
433 MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
437 SSE_REGS, SSE_REGS,
438 };
439
440 /* The "default" register map used in 32bit mode. */
441
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
443 {
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
451 };
452
453 static int const x86_64_int_parameter_registers[6] =
454 {
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
457 };
458
459 static int const x86_64_int_return_registers[4] =
460 {
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
462 };
463
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
466 {
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
474 };
475
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
520 numbers.
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
529 */
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
531 {
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
539 };
540
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
543
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
546
547 /* The encoding characters for the four TLS models present in ELF. */
548
549 static char const tls_model_chars[] = " GLil";
550
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
554
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function GTY(())
557 {
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
562 };
563
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
566
567 /* Structure describing stack frame layout.
568 Stack grows downward:
569
570 [arguments]
571 <- ARG_POINTER
572 saved pc
573
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
576 [saved regs]
577
578 [padding1] \
579 )
580 [va_arg registers] (
581 > to_allocate <- FRAME_POINTER
582 [frame] (
583 )
584 [padding2] /
585 */
586 struct ix86_frame
587 {
588 int nregs;
589 int padding1;
590 int va_arg_size;
591 HOST_WIDE_INT frame;
592 int padding2;
593 int outgoing_arguments_size;
594 int red_zone_size;
595
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
601 };
602
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
607 /* Parsed value. */
608 enum cmodel ix86_cmodel;
609 /* Asm dialect. */
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
612 /* TLS dialext. */
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
615
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
618
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
623
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
628
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
631
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
634
635 /* ix86_regparm_string as a number */
636 int ix86_regparm;
637
638 /* Alignment to use for loops and jumps: */
639
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
642
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
645
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
648
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
651
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
655
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
658
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
662 \f
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
667 int, int, FILE *));
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
673 rtx *, rtx *));
674 static rtx get_thread_pointer PARAMS ((void));
675 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
676 static rtx gen_push PARAMS ((rtx));
677 static int memory_address_length PARAMS ((rtx addr));
678 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681 static void ix86_dump_ppro_packet PARAMS ((FILE *));
682 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
683 static struct machine_function * ix86_init_machine_status PARAMS ((void));
684 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
685 static int ix86_nsaved_regs PARAMS ((void));
686 static void ix86_emit_save_regs PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
688 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
689 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
704
705 struct ix86_address
706 {
707 rtx base, index, disp;
708 HOST_WIDE_INT scale;
709 };
710
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
712
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
715 ATTRIBUTE_UNUSED;
716
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
719 tree, rtx));
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
721 tree, rtx));
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
726 tree, rtx));
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
731 enum rtx_code *,
732 enum rtx_code *,
733 enum rtx_code *));
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
735 rtx *, rtx *));
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
741 static int ix86_save_reg PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
743 static int ix86_comp_type_attributes PARAMS ((tree, tree));
744 const struct attribute_spec ix86_attribute_table[];
745 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static int ix86_value_regno PARAMS ((enum machine_mode));
748
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
751 #endif
752
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
757
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
760 */
761 enum x86_64_reg_class
762 {
763 X86_64_NO_CLASS,
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
766 X86_64_SSE_CLASS,
767 X86_64_SSESF_CLASS,
768 X86_64_SSEDF_CLASS,
769 X86_64_SSEUP_CLASS,
770 X86_64_X87_CLASS,
771 X86_64_X87UP_CLASS,
772 X86_64_MEMORY_CLASS
773 };
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
776
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
780 int));
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
782 int *));
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
784 const int *, int));
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
787 \f
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
794 #endif
795
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
798
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
801
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
804
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
807
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
812
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
817 #ifdef ASM_QUAD
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
820 #endif
821
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
828
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
845
846 #ifdef HAVE_AS_TLS
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
849 #endif
850
851 struct gcc_target targetm = TARGET_INITIALIZER;
852 \f
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
857 been parsed.
858
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
861
862 void
863 override_options ()
864 {
865 int i;
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
868
869 static struct ptt
870 {
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
880 }
881 const processor_target_table[PROCESSOR_max] =
882 {
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
890 };
891
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
893 static struct pta
894 {
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
897 const enum pta_flags
898 {
899 PTA_SSE = 1,
900 PTA_SSE2 = 2,
901 PTA_MMX = 4,
902 PTA_PREFETCH_SSE = 8,
903 PTA_3DNOW = 16,
904 PTA_3DNOW_A = 64
905 } flags;
906 }
907 const processor_alias_table[] =
908 {
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"i686", PROCESSOR_PENTIUMPRO, 0},
915 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
916 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
917 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
918 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
919 PTA_MMX | PTA_PREFETCH_SSE},
920 {"k6", PROCESSOR_K6, PTA_MMX},
921 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
922 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
923 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
924 | PTA_3DNOW_A},
925 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
926 | PTA_3DNOW | PTA_3DNOW_A},
927 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
928 | PTA_3DNOW_A | PTA_SSE},
929 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
930 | PTA_3DNOW_A | PTA_SSE},
931 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
932 | PTA_3DNOW_A | PTA_SSE},
933 };
934
935 int const pta_size = ARRAY_SIZE (processor_alias_table);
936
937 #ifdef SUBTARGET_OVERRIDE_OPTIONS
938 SUBTARGET_OVERRIDE_OPTIONS;
939 #endif
940
941 if (!ix86_cpu_string && ix86_arch_string)
942 ix86_cpu_string = ix86_arch_string;
943 if (!ix86_cpu_string)
944 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
945 if (!ix86_arch_string)
946 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
947
948 if (ix86_cmodel_string != 0)
949 {
950 if (!strcmp (ix86_cmodel_string, "small"))
951 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
952 else if (flag_pic)
953 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
954 else if (!strcmp (ix86_cmodel_string, "32"))
955 ix86_cmodel = CM_32;
956 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
957 ix86_cmodel = CM_KERNEL;
958 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
959 ix86_cmodel = CM_MEDIUM;
960 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
961 ix86_cmodel = CM_LARGE;
962 else
963 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
964 }
965 else
966 {
967 ix86_cmodel = CM_32;
968 if (TARGET_64BIT)
969 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
970 }
971 if (ix86_asm_string != 0)
972 {
973 if (!strcmp (ix86_asm_string, "intel"))
974 ix86_asm_dialect = ASM_INTEL;
975 else if (!strcmp (ix86_asm_string, "att"))
976 ix86_asm_dialect = ASM_ATT;
977 else
978 error ("bad value (%s) for -masm= switch", ix86_asm_string);
979 }
980 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
981 error ("code model `%s' not supported in the %s bit mode",
982 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
983 if (ix86_cmodel == CM_LARGE)
984 sorry ("code model `large' not supported yet");
985 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
986 sorry ("%i-bit mode not compiled in",
987 (target_flags & MASK_64BIT) ? 64 : 32);
988
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
991 {
992 ix86_arch = processor_alias_table[i].processor;
993 /* Default cpu tuning to the architecture. */
994 ix86_cpu = ix86_arch;
995 if (processor_alias_table[i].flags & PTA_MMX
996 && !(target_flags & MASK_MMX_SET))
997 target_flags |= MASK_MMX;
998 if (processor_alias_table[i].flags & PTA_3DNOW
999 && !(target_flags & MASK_3DNOW_SET))
1000 target_flags |= MASK_3DNOW;
1001 if (processor_alias_table[i].flags & PTA_3DNOW_A
1002 && !(target_flags & MASK_3DNOW_A_SET))
1003 target_flags |= MASK_3DNOW_A;
1004 if (processor_alias_table[i].flags & PTA_SSE
1005 && !(target_flags & MASK_SSE_SET))
1006 target_flags |= MASK_SSE;
1007 if (processor_alias_table[i].flags & PTA_SSE2
1008 && !(target_flags & MASK_SSE2_SET))
1009 target_flags |= MASK_SSE2;
1010 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1011 x86_prefetch_sse = true;
1012 break;
1013 }
1014
1015 if (i == pta_size)
1016 error ("bad value (%s) for -march= switch", ix86_arch_string);
1017
1018 for (i = 0; i < pta_size; i++)
1019 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1020 {
1021 ix86_cpu = processor_alias_table[i].processor;
1022 break;
1023 }
1024 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1025 x86_prefetch_sse = true;
1026 if (i == pta_size)
1027 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1028
1029 if (optimize_size)
1030 ix86_cost = &size_cost;
1031 else
1032 ix86_cost = processor_target_table[ix86_cpu].cost;
1033 target_flags |= processor_target_table[ix86_cpu].target_enable;
1034 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1035
1036 /* Arrange to set up i386_stack_locals for all functions. */
1037 init_machine_status = ix86_init_machine_status;
1038
1039 /* Validate -mregparm= value. */
1040 if (ix86_regparm_string)
1041 {
1042 i = atoi (ix86_regparm_string);
1043 if (i < 0 || i > REGPARM_MAX)
1044 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1045 else
1046 ix86_regparm = i;
1047 }
1048 else
1049 if (TARGET_64BIT)
1050 ix86_regparm = REGPARM_MAX;
1051
1052 /* If the user has provided any of the -malign-* options,
1053 warn and use that value only if -falign-* is not set.
1054 Remove this code in GCC 3.2 or later. */
1055 if (ix86_align_loops_string)
1056 {
1057 warning ("-malign-loops is obsolete, use -falign-loops");
1058 if (align_loops == 0)
1059 {
1060 i = atoi (ix86_align_loops_string);
1061 if (i < 0 || i > MAX_CODE_ALIGN)
1062 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1063 else
1064 align_loops = 1 << i;
1065 }
1066 }
1067
1068 if (ix86_align_jumps_string)
1069 {
1070 warning ("-malign-jumps is obsolete, use -falign-jumps");
1071 if (align_jumps == 0)
1072 {
1073 i = atoi (ix86_align_jumps_string);
1074 if (i < 0 || i > MAX_CODE_ALIGN)
1075 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1076 else
1077 align_jumps = 1 << i;
1078 }
1079 }
1080
1081 if (ix86_align_funcs_string)
1082 {
1083 warning ("-malign-functions is obsolete, use -falign-functions");
1084 if (align_functions == 0)
1085 {
1086 i = atoi (ix86_align_funcs_string);
1087 if (i < 0 || i > MAX_CODE_ALIGN)
1088 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1089 else
1090 align_functions = 1 << i;
1091 }
1092 }
1093
1094 /* Default align_* from the processor table. */
1095 if (align_loops == 0)
1096 {
1097 align_loops = processor_target_table[ix86_cpu].align_loop;
1098 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1099 }
1100 if (align_jumps == 0)
1101 {
1102 align_jumps = processor_target_table[ix86_cpu].align_jump;
1103 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1104 }
1105 if (align_functions == 0)
1106 {
1107 align_functions = processor_target_table[ix86_cpu].align_func;
1108 }
1109
1110 /* Validate -mpreferred-stack-boundary= value, or provide default.
1111 The default of 128 bits is for Pentium III's SSE __m128, but we
1112 don't want additional code to keep the stack aligned when
1113 optimizing for code size. */
1114 ix86_preferred_stack_boundary = (optimize_size
1115 ? TARGET_64BIT ? 64 : 32
1116 : 128);
1117 if (ix86_preferred_stack_boundary_string)
1118 {
1119 i = atoi (ix86_preferred_stack_boundary_string);
1120 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1121 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1122 TARGET_64BIT ? 3 : 2);
1123 else
1124 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1125 }
1126
1127 /* Validate -mbranch-cost= value, or provide default. */
1128 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1129 if (ix86_branch_cost_string)
1130 {
1131 i = atoi (ix86_branch_cost_string);
1132 if (i < 0 || i > 5)
1133 error ("-mbranch-cost=%d is not between 0 and 5", i);
1134 else
1135 ix86_branch_cost = i;
1136 }
1137
1138 if (ix86_tls_dialect_string)
1139 {
1140 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1141 ix86_tls_dialect = TLS_DIALECT_GNU;
1142 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1143 ix86_tls_dialect = TLS_DIALECT_SUN;
1144 else
1145 error ("bad value (%s) for -mtls-dialect= switch",
1146 ix86_tls_dialect_string);
1147 }
1148
1149 /* Keep nonleaf frame pointers. */
1150 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1151 flag_omit_frame_pointer = 1;
1152
1153 /* If we're doing fast math, we don't care about comparison order
1154 wrt NaNs. This lets us use a shorter comparison sequence. */
1155 if (flag_unsafe_math_optimizations)
1156 target_flags &= ~MASK_IEEE_FP;
1157
1158 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1159 since the insns won't need emulation. */
1160 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1161 target_flags &= ~MASK_NO_FANCY_MATH_387;
1162
1163 if (TARGET_64BIT)
1164 {
1165 if (TARGET_ALIGN_DOUBLE)
1166 error ("-malign-double makes no sense in the 64bit mode");
1167 if (TARGET_RTD)
1168 error ("-mrtd calling convention not supported in the 64bit mode");
1169 /* Enable by default the SSE and MMX builtins. */
1170 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1171 ix86_fpmath = FPMATH_SSE;
1172 }
1173 else
1174 ix86_fpmath = FPMATH_387;
1175
1176 if (ix86_fpmath_string != 0)
1177 {
1178 if (! strcmp (ix86_fpmath_string, "387"))
1179 ix86_fpmath = FPMATH_387;
1180 else if (! strcmp (ix86_fpmath_string, "sse"))
1181 {
1182 if (!TARGET_SSE)
1183 {
1184 warning ("SSE instruction set disabled, using 387 arithmetics");
1185 ix86_fpmath = FPMATH_387;
1186 }
1187 else
1188 ix86_fpmath = FPMATH_SSE;
1189 }
1190 else if (! strcmp (ix86_fpmath_string, "387,sse")
1191 || ! strcmp (ix86_fpmath_string, "sse,387"))
1192 {
1193 if (!TARGET_SSE)
1194 {
1195 warning ("SSE instruction set disabled, using 387 arithmetics");
1196 ix86_fpmath = FPMATH_387;
1197 }
1198 else if (!TARGET_80387)
1199 {
1200 warning ("387 instruction set disabled, using SSE arithmetics");
1201 ix86_fpmath = FPMATH_SSE;
1202 }
1203 else
1204 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1205 }
1206 else
1207 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1208 }
1209
1210 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1211 on by -msse. */
1212 if (TARGET_SSE)
1213 {
1214 target_flags |= MASK_MMX;
1215 x86_prefetch_sse = true;
1216 }
1217
1218 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1219 if (TARGET_3DNOW)
1220 {
1221 target_flags |= MASK_MMX;
1222 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1223 extensions it adds. */
1224 if (x86_3dnow_a & (1 << ix86_arch))
1225 target_flags |= MASK_3DNOW_A;
1226 }
1227 if ((x86_accumulate_outgoing_args & CPUMASK)
1228 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1229 && !optimize_size)
1230 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1231
1232 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1233 {
1234 char *p;
1235 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1236 p = strchr (internal_label_prefix, 'X');
1237 internal_label_prefix_len = p - internal_label_prefix;
1238 *p = '\0';
1239 }
1240 }
1241 \f
1242 void
1243 optimization_options (level, size)
1244 int level;
1245 int size ATTRIBUTE_UNUSED;
1246 {
1247 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1248 make the problem with not enough registers even worse. */
1249 #ifdef INSN_SCHEDULING
1250 if (level > 1)
1251 flag_schedule_insns = 0;
1252 #endif
1253 if (TARGET_64BIT && optimize >= 1)
1254 flag_omit_frame_pointer = 1;
1255 if (TARGET_64BIT)
1256 {
1257 flag_pcc_struct_return = 0;
1258 flag_asynchronous_unwind_tables = 1;
1259 }
1260 }
1261 \f
1262 /* Table of valid machine attributes. */
1263 const struct attribute_spec ix86_attribute_table[] =
1264 {
1265 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1266 /* Stdcall attribute says callee is responsible for popping arguments
1267 if they are not variable. */
1268 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1269 /* Cdecl attribute says the callee is a normal C declaration */
1270 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1271 /* Regparm attribute specifies how many integer arguments are to be
1272 passed in registers. */
1273 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1274 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1275 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1276 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1277 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1278 #endif
1279 { NULL, 0, 0, false, false, false, NULL }
1280 };
1281
1282 /* Handle a "cdecl" or "stdcall" attribute;
1283 arguments as in struct attribute_spec.handler. */
1284 static tree
1285 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1286 tree *node;
1287 tree name;
1288 tree args ATTRIBUTE_UNUSED;
1289 int flags ATTRIBUTE_UNUSED;
1290 bool *no_add_attrs;
1291 {
1292 if (TREE_CODE (*node) != FUNCTION_TYPE
1293 && TREE_CODE (*node) != METHOD_TYPE
1294 && TREE_CODE (*node) != FIELD_DECL
1295 && TREE_CODE (*node) != TYPE_DECL)
1296 {
1297 warning ("`%s' attribute only applies to functions",
1298 IDENTIFIER_POINTER (name));
1299 *no_add_attrs = true;
1300 }
1301
1302 if (TARGET_64BIT)
1303 {
1304 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1305 *no_add_attrs = true;
1306 }
1307
1308 return NULL_TREE;
1309 }
1310
1311 /* Handle a "regparm" attribute;
1312 arguments as in struct attribute_spec.handler. */
1313 static tree
1314 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1315 tree *node;
1316 tree name;
1317 tree args;
1318 int flags ATTRIBUTE_UNUSED;
1319 bool *no_add_attrs;
1320 {
1321 if (TREE_CODE (*node) != FUNCTION_TYPE
1322 && TREE_CODE (*node) != METHOD_TYPE
1323 && TREE_CODE (*node) != FIELD_DECL
1324 && TREE_CODE (*node) != TYPE_DECL)
1325 {
1326 warning ("`%s' attribute only applies to functions",
1327 IDENTIFIER_POINTER (name));
1328 *no_add_attrs = true;
1329 }
1330 else
1331 {
1332 tree cst;
1333
1334 cst = TREE_VALUE (args);
1335 if (TREE_CODE (cst) != INTEGER_CST)
1336 {
1337 warning ("`%s' attribute requires an integer constant argument",
1338 IDENTIFIER_POINTER (name));
1339 *no_add_attrs = true;
1340 }
1341 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1342 {
1343 warning ("argument to `%s' attribute larger than %d",
1344 IDENTIFIER_POINTER (name), REGPARM_MAX);
1345 *no_add_attrs = true;
1346 }
1347 }
1348
1349 return NULL_TREE;
1350 }
1351
1352 /* Return 0 if the attributes for two types are incompatible, 1 if they
1353 are compatible, and 2 if they are nearly compatible (which causes a
1354 warning to be generated). */
1355
1356 static int
1357 ix86_comp_type_attributes (type1, type2)
1358 tree type1;
1359 tree type2;
1360 {
1361 /* Check for mismatch of non-default calling convention. */
1362 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1363
1364 if (TREE_CODE (type1) != FUNCTION_TYPE)
1365 return 1;
1366
1367 /* Check for mismatched return types (cdecl vs stdcall). */
1368 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1369 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1370 return 0;
1371 return 1;
1372 }
1373 \f
1374 /* Value is the number of bytes of arguments automatically
1375 popped when returning from a subroutine call.
1376 FUNDECL is the declaration node of the function (as a tree),
1377 FUNTYPE is the data type of the function (as a tree),
1378 or for a library call it is an identifier node for the subroutine name.
1379 SIZE is the number of bytes of arguments passed on the stack.
1380
1381 On the 80386, the RTD insn may be used to pop them if the number
1382 of args is fixed, but if the number is variable then the caller
1383 must pop them all. RTD can't be used for library calls now
1384 because the library is compiled with the Unix compiler.
1385 Use of RTD is a selectable option, since it is incompatible with
1386 standard Unix calling sequences. If the option is not selected,
1387 the caller must always pop the args.
1388
1389 The attribute stdcall is equivalent to RTD on a per module basis. */
1390
1391 int
1392 ix86_return_pops_args (fundecl, funtype, size)
1393 tree fundecl;
1394 tree funtype;
1395 int size;
1396 {
1397 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1398
1399 /* Cdecl functions override -mrtd, and never pop the stack. */
1400 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1401
1402 /* Stdcall functions will pop the stack if not variable args. */
1403 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1404 rtd = 1;
1405
1406 if (rtd
1407 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1408 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1409 == void_type_node)))
1410 return size;
1411 }
1412
1413 /* Lose any fake structure return argument if it is passed on the stack. */
1414 if (aggregate_value_p (TREE_TYPE (funtype))
1415 && !TARGET_64BIT)
1416 {
1417 int nregs = ix86_regparm;
1418
1419 if (funtype)
1420 {
1421 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1422
1423 if (attr)
1424 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1425 }
1426
1427 if (!nregs)
1428 return GET_MODE_SIZE (Pmode);
1429 }
1430
1431 return 0;
1432 }
1433 \f
1434 /* Argument support functions. */
1435
1436 /* Return true when register may be used to pass function parameters. */
1437 bool
1438 ix86_function_arg_regno_p (regno)
1439 int regno;
1440 {
1441 int i;
1442 if (!TARGET_64BIT)
1443 return (regno < REGPARM_MAX
1444 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1445 if (SSE_REGNO_P (regno) && TARGET_SSE)
1446 return true;
1447 /* RAX is used as hidden argument to va_arg functions. */
1448 if (!regno)
1449 return true;
1450 for (i = 0; i < REGPARM_MAX; i++)
1451 if (regno == x86_64_int_parameter_registers[i])
1452 return true;
1453 return false;
1454 }
1455
1456 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1457 for a call to a function whose data type is FNTYPE.
1458 For a library call, FNTYPE is 0. */
1459
1460 void
1461 init_cumulative_args (cum, fntype, libname)
1462 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1463 tree fntype; /* tree ptr for function decl */
1464 rtx libname; /* SYMBOL_REF of library name or 0 */
1465 {
1466 static CUMULATIVE_ARGS zero_cum;
1467 tree param, next_param;
1468
1469 if (TARGET_DEBUG_ARG)
1470 {
1471 fprintf (stderr, "\ninit_cumulative_args (");
1472 if (fntype)
1473 fprintf (stderr, "fntype code = %s, ret code = %s",
1474 tree_code_name[(int) TREE_CODE (fntype)],
1475 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1476 else
1477 fprintf (stderr, "no fntype");
1478
1479 if (libname)
1480 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1481 }
1482
1483 *cum = zero_cum;
1484
1485 /* Set up the number of registers to use for passing arguments. */
1486 cum->nregs = ix86_regparm;
1487 cum->sse_nregs = SSE_REGPARM_MAX;
1488 if (fntype && !TARGET_64BIT)
1489 {
1490 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1491
1492 if (attr)
1493 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1494 }
1495 cum->maybe_vaarg = false;
1496
1497 /* Determine if this function has variable arguments. This is
1498 indicated by the last argument being 'void_type_mode' if there
1499 are no variable arguments. If there are variable arguments, then
1500 we won't pass anything in registers */
1501
1502 if (cum->nregs)
1503 {
1504 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1505 param != 0; param = next_param)
1506 {
1507 next_param = TREE_CHAIN (param);
1508 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1509 {
1510 if (!TARGET_64BIT)
1511 cum->nregs = 0;
1512 cum->maybe_vaarg = true;
1513 }
1514 }
1515 }
1516 if ((!fntype && !libname)
1517 || (fntype && !TYPE_ARG_TYPES (fntype)))
1518 cum->maybe_vaarg = 1;
1519
1520 if (TARGET_DEBUG_ARG)
1521 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1522
1523 return;
1524 }
1525
1526 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1527 of this code is to classify each 8bytes of incoming argument by the register
1528 class and assign registers accordingly. */
1529
1530 /* Return the union class of CLASS1 and CLASS2.
1531 See the x86-64 PS ABI for details. */
1532
1533 static enum x86_64_reg_class
1534 merge_classes (class1, class2)
1535 enum x86_64_reg_class class1, class2;
1536 {
1537 /* Rule #1: If both classes are equal, this is the resulting class. */
1538 if (class1 == class2)
1539 return class1;
1540
1541 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1542 the other class. */
1543 if (class1 == X86_64_NO_CLASS)
1544 return class2;
1545 if (class2 == X86_64_NO_CLASS)
1546 return class1;
1547
1548 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1549 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1550 return X86_64_MEMORY_CLASS;
1551
1552 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1553 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1554 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1555 return X86_64_INTEGERSI_CLASS;
1556 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1557 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1558 return X86_64_INTEGER_CLASS;
1559
1560 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1561 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1562 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1563 return X86_64_MEMORY_CLASS;
1564
1565 /* Rule #6: Otherwise class SSE is used. */
1566 return X86_64_SSE_CLASS;
1567 }
1568
1569 /* Classify the argument of type TYPE and mode MODE.
1570 CLASSES will be filled by the register class used to pass each word
1571 of the operand. The number of words is returned. In case the parameter
1572 should be passed in memory, 0 is returned. As a special case for zero
1573 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1574
1575 BIT_OFFSET is used internally for handling records and specifies offset
1576 of the offset in bits modulo 256 to avoid overflow cases.
1577
1578 See the x86-64 PS ABI for details.
1579 */
1580
1581 static int
1582 classify_argument (mode, type, classes, bit_offset)
1583 enum machine_mode mode;
1584 tree type;
1585 enum x86_64_reg_class classes[MAX_CLASSES];
1586 int bit_offset;
1587 {
1588 int bytes =
1589 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1590 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1591
1592 if (type && AGGREGATE_TYPE_P (type))
1593 {
1594 int i;
1595 tree field;
1596 enum x86_64_reg_class subclasses[MAX_CLASSES];
1597
1598 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1599 if (bytes > 16)
1600 return 0;
1601
1602 for (i = 0; i < words; i++)
1603 classes[i] = X86_64_NO_CLASS;
1604
1605 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1606 signalize memory class, so handle it as special case. */
1607 if (!words)
1608 {
1609 classes[0] = X86_64_NO_CLASS;
1610 return 1;
1611 }
1612
1613 /* Classify each field of record and merge classes. */
1614 if (TREE_CODE (type) == RECORD_TYPE)
1615 {
1616 /* For classes first merge in the field of the subclasses. */
1617 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1618 {
1619 tree bases = TYPE_BINFO_BASETYPES (type);
1620 int n_bases = TREE_VEC_LENGTH (bases);
1621 int i;
1622
1623 for (i = 0; i < n_bases; ++i)
1624 {
1625 tree binfo = TREE_VEC_ELT (bases, i);
1626 int num;
1627 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1628 tree type = BINFO_TYPE (binfo);
1629
1630 num = classify_argument (TYPE_MODE (type),
1631 type, subclasses,
1632 (offset + bit_offset) % 256);
1633 if (!num)
1634 return 0;
1635 for (i = 0; i < num; i++)
1636 {
1637 int pos = (offset + bit_offset) / 8 / 8;
1638 classes[i + pos] =
1639 merge_classes (subclasses[i], classes[i + pos]);
1640 }
1641 }
1642 }
1643 /* And now merge the fields of structure. */
1644 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1645 {
1646 if (TREE_CODE (field) == FIELD_DECL)
1647 {
1648 int num;
1649
1650 /* Bitfields are always classified as integer. Handle them
1651 early, since later code would consider them to be
1652 misaligned integers. */
1653 if (DECL_BIT_FIELD (field))
1654 {
1655 for (i = int_bit_position (field) / 8 / 8;
1656 i < (int_bit_position (field)
1657 + tree_low_cst (DECL_SIZE (field), 0)
1658 + 63) / 8 / 8; i++)
1659 classes[i] =
1660 merge_classes (X86_64_INTEGER_CLASS,
1661 classes[i]);
1662 }
1663 else
1664 {
1665 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1666 TREE_TYPE (field), subclasses,
1667 (int_bit_position (field)
1668 + bit_offset) % 256);
1669 if (!num)
1670 return 0;
1671 for (i = 0; i < num; i++)
1672 {
1673 int pos =
1674 (int_bit_position (field) + bit_offset) / 8 / 8;
1675 classes[i + pos] =
1676 merge_classes (subclasses[i], classes[i + pos]);
1677 }
1678 }
1679 }
1680 }
1681 }
1682 /* Arrays are handled as small records. */
1683 else if (TREE_CODE (type) == ARRAY_TYPE)
1684 {
1685 int num;
1686 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1687 TREE_TYPE (type), subclasses, bit_offset);
1688 if (!num)
1689 return 0;
1690
1691 /* The partial classes are now full classes. */
1692 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1693 subclasses[0] = X86_64_SSE_CLASS;
1694 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1695 subclasses[0] = X86_64_INTEGER_CLASS;
1696
1697 for (i = 0; i < words; i++)
1698 classes[i] = subclasses[i % num];
1699 }
1700 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1701 else if (TREE_CODE (type) == UNION_TYPE
1702 || TREE_CODE (type) == QUAL_UNION_TYPE)
1703 {
1704 /* For classes first merge in the field of the subclasses. */
1705 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1706 {
1707 tree bases = TYPE_BINFO_BASETYPES (type);
1708 int n_bases = TREE_VEC_LENGTH (bases);
1709 int i;
1710
1711 for (i = 0; i < n_bases; ++i)
1712 {
1713 tree binfo = TREE_VEC_ELT (bases, i);
1714 int num;
1715 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1716 tree type = BINFO_TYPE (binfo);
1717
1718 num = classify_argument (TYPE_MODE (type),
1719 type, subclasses,
1720 (offset + bit_offset) % 256);
1721 if (!num)
1722 return 0;
1723 for (i = 0; i < num; i++)
1724 {
1725 int pos = (offset + bit_offset) / 8 / 8;
1726 classes[i + pos] =
1727 merge_classes (subclasses[i], classes[i + pos]);
1728 }
1729 }
1730 }
1731 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1732 {
1733 if (TREE_CODE (field) == FIELD_DECL)
1734 {
1735 int num;
1736 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1737 TREE_TYPE (field), subclasses,
1738 bit_offset);
1739 if (!num)
1740 return 0;
1741 for (i = 0; i < num; i++)
1742 classes[i] = merge_classes (subclasses[i], classes[i]);
1743 }
1744 }
1745 }
1746 else
1747 abort ();
1748
1749 /* Final merger cleanup. */
1750 for (i = 0; i < words; i++)
1751 {
1752 /* If one class is MEMORY, everything should be passed in
1753 memory. */
1754 if (classes[i] == X86_64_MEMORY_CLASS)
1755 return 0;
1756
1757 /* The X86_64_SSEUP_CLASS should be always preceded by
1758 X86_64_SSE_CLASS. */
1759 if (classes[i] == X86_64_SSEUP_CLASS
1760 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1761 classes[i] = X86_64_SSE_CLASS;
1762
1763 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1764 if (classes[i] == X86_64_X87UP_CLASS
1765 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1766 classes[i] = X86_64_SSE_CLASS;
1767 }
1768 return words;
1769 }
1770
1771 /* Compute alignment needed. We align all types to natural boundaries with
1772 exception of XFmode that is aligned to 64bits. */
1773 if (mode != VOIDmode && mode != BLKmode)
1774 {
1775 int mode_alignment = GET_MODE_BITSIZE (mode);
1776
1777 if (mode == XFmode)
1778 mode_alignment = 128;
1779 else if (mode == XCmode)
1780 mode_alignment = 256;
1781 /* Misaligned fields are always returned in memory. */
1782 if (bit_offset % mode_alignment)
1783 return 0;
1784 }
1785
1786 /* Classification of atomic types. */
1787 switch (mode)
1788 {
1789 case DImode:
1790 case SImode:
1791 case HImode:
1792 case QImode:
1793 case CSImode:
1794 case CHImode:
1795 case CQImode:
1796 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1797 classes[0] = X86_64_INTEGERSI_CLASS;
1798 else
1799 classes[0] = X86_64_INTEGER_CLASS;
1800 return 1;
1801 case CDImode:
1802 case TImode:
1803 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1804 return 2;
1805 case CTImode:
1806 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1807 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1808 return 4;
1809 case SFmode:
1810 if (!(bit_offset % 64))
1811 classes[0] = X86_64_SSESF_CLASS;
1812 else
1813 classes[0] = X86_64_SSE_CLASS;
1814 return 1;
1815 case DFmode:
1816 classes[0] = X86_64_SSEDF_CLASS;
1817 return 1;
1818 case TFmode:
1819 classes[0] = X86_64_X87_CLASS;
1820 classes[1] = X86_64_X87UP_CLASS;
1821 return 2;
1822 case TCmode:
1823 classes[0] = X86_64_X87_CLASS;
1824 classes[1] = X86_64_X87UP_CLASS;
1825 classes[2] = X86_64_X87_CLASS;
1826 classes[3] = X86_64_X87UP_CLASS;
1827 return 4;
1828 case DCmode:
1829 classes[0] = X86_64_SSEDF_CLASS;
1830 classes[1] = X86_64_SSEDF_CLASS;
1831 return 2;
1832 case SCmode:
1833 classes[0] = X86_64_SSE_CLASS;
1834 return 1;
1835 case V4SFmode:
1836 case V4SImode:
1837 case V16QImode:
1838 case V8HImode:
1839 case V2DFmode:
1840 case V2DImode:
1841 classes[0] = X86_64_SSE_CLASS;
1842 classes[1] = X86_64_SSEUP_CLASS;
1843 return 2;
1844 case V2SFmode:
1845 case V2SImode:
1846 case V4HImode:
1847 case V8QImode:
1848 classes[0] = X86_64_SSE_CLASS;
1849 return 1;
1850 case BLKmode:
1851 case VOIDmode:
1852 return 0;
1853 default:
1854 abort ();
1855 }
1856 }
1857
1858 /* Examine the argument and return set number of register required in each
1859 class. Return 0 iff parameter should be passed in memory. */
1860 static int
1861 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1862 enum machine_mode mode;
1863 tree type;
1864 int *int_nregs, *sse_nregs;
1865 int in_return;
1866 {
1867 enum x86_64_reg_class class[MAX_CLASSES];
1868 int n = classify_argument (mode, type, class, 0);
1869
1870 *int_nregs = 0;
1871 *sse_nregs = 0;
1872 if (!n)
1873 return 0;
1874 for (n--; n >= 0; n--)
1875 switch (class[n])
1876 {
1877 case X86_64_INTEGER_CLASS:
1878 case X86_64_INTEGERSI_CLASS:
1879 (*int_nregs)++;
1880 break;
1881 case X86_64_SSE_CLASS:
1882 case X86_64_SSESF_CLASS:
1883 case X86_64_SSEDF_CLASS:
1884 (*sse_nregs)++;
1885 break;
1886 case X86_64_NO_CLASS:
1887 case X86_64_SSEUP_CLASS:
1888 break;
1889 case X86_64_X87_CLASS:
1890 case X86_64_X87UP_CLASS:
1891 if (!in_return)
1892 return 0;
1893 break;
1894 case X86_64_MEMORY_CLASS:
1895 abort ();
1896 }
1897 return 1;
1898 }
1899 /* Construct container for the argument used by GCC interface. See
1900 FUNCTION_ARG for the detailed description. */
1901 static rtx
1902 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1903 enum machine_mode mode;
1904 tree type;
1905 int in_return;
1906 int nintregs, nsseregs;
1907 const int * intreg;
1908 int sse_regno;
1909 {
1910 enum machine_mode tmpmode;
1911 int bytes =
1912 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1913 enum x86_64_reg_class class[MAX_CLASSES];
1914 int n;
1915 int i;
1916 int nexps = 0;
1917 int needed_sseregs, needed_intregs;
1918 rtx exp[MAX_CLASSES];
1919 rtx ret;
1920
1921 n = classify_argument (mode, type, class, 0);
1922 if (TARGET_DEBUG_ARG)
1923 {
1924 if (!n)
1925 fprintf (stderr, "Memory class\n");
1926 else
1927 {
1928 fprintf (stderr, "Classes:");
1929 for (i = 0; i < n; i++)
1930 {
1931 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1932 }
1933 fprintf (stderr, "\n");
1934 }
1935 }
1936 if (!n)
1937 return NULL;
1938 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1939 return NULL;
1940 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1941 return NULL;
1942
1943 /* First construct simple cases. Avoid SCmode, since we want to use
1944 single register to pass this type. */
1945 if (n == 1 && mode != SCmode)
1946 switch (class[0])
1947 {
1948 case X86_64_INTEGER_CLASS:
1949 case X86_64_INTEGERSI_CLASS:
1950 return gen_rtx_REG (mode, intreg[0]);
1951 case X86_64_SSE_CLASS:
1952 case X86_64_SSESF_CLASS:
1953 case X86_64_SSEDF_CLASS:
1954 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1955 case X86_64_X87_CLASS:
1956 return gen_rtx_REG (mode, FIRST_STACK_REG);
1957 case X86_64_NO_CLASS:
1958 /* Zero sized array, struct or class. */
1959 return NULL;
1960 default:
1961 abort ();
1962 }
1963 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1964 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1965 if (n == 2
1966 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1967 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1968 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1969 && class[1] == X86_64_INTEGER_CLASS
1970 && (mode == CDImode || mode == TImode)
1971 && intreg[0] + 1 == intreg[1])
1972 return gen_rtx_REG (mode, intreg[0]);
1973 if (n == 4
1974 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1975 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1976 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1977
1978 /* Otherwise figure out the entries of the PARALLEL. */
1979 for (i = 0; i < n; i++)
1980 {
1981 switch (class[i])
1982 {
1983 case X86_64_NO_CLASS:
1984 break;
1985 case X86_64_INTEGER_CLASS:
1986 case X86_64_INTEGERSI_CLASS:
1987 /* Merge TImodes on aligned occassions here too. */
1988 if (i * 8 + 8 > bytes)
1989 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1990 else if (class[i] == X86_64_INTEGERSI_CLASS)
1991 tmpmode = SImode;
1992 else
1993 tmpmode = DImode;
1994 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1995 if (tmpmode == BLKmode)
1996 tmpmode = DImode;
1997 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1998 gen_rtx_REG (tmpmode, *intreg),
1999 GEN_INT (i*8));
2000 intreg++;
2001 break;
2002 case X86_64_SSESF_CLASS:
2003 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2004 gen_rtx_REG (SFmode,
2005 SSE_REGNO (sse_regno)),
2006 GEN_INT (i*8));
2007 sse_regno++;
2008 break;
2009 case X86_64_SSEDF_CLASS:
2010 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2011 gen_rtx_REG (DFmode,
2012 SSE_REGNO (sse_regno)),
2013 GEN_INT (i*8));
2014 sse_regno++;
2015 break;
2016 case X86_64_SSE_CLASS:
2017 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2018 tmpmode = TImode, i++;
2019 else
2020 tmpmode = DImode;
2021 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2022 gen_rtx_REG (tmpmode,
2023 SSE_REGNO (sse_regno)),
2024 GEN_INT (i*8));
2025 sse_regno++;
2026 break;
2027 default:
2028 abort ();
2029 }
2030 }
2031 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2032 for (i = 0; i < nexps; i++)
2033 XVECEXP (ret, 0, i) = exp [i];
2034 return ret;
2035 }
2036
2037 /* Update the data in CUM to advance over an argument
2038 of mode MODE and data type TYPE.
2039 (TYPE is null for libcalls where that information may not be available.) */
2040
2041 void
2042 function_arg_advance (cum, mode, type, named)
2043 CUMULATIVE_ARGS *cum; /* current arg information */
2044 enum machine_mode mode; /* current arg mode */
2045 tree type; /* type of the argument or 0 if lib support */
2046 int named; /* whether or not the argument was named */
2047 {
2048 int bytes =
2049 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2050 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2051
2052 if (TARGET_DEBUG_ARG)
2053 fprintf (stderr,
2054 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2055 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2056 if (TARGET_64BIT)
2057 {
2058 int int_nregs, sse_nregs;
2059 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2060 cum->words += words;
2061 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2062 {
2063 cum->nregs -= int_nregs;
2064 cum->sse_nregs -= sse_nregs;
2065 cum->regno += int_nregs;
2066 cum->sse_regno += sse_nregs;
2067 }
2068 else
2069 cum->words += words;
2070 }
2071 else
2072 {
2073 if (TARGET_SSE && mode == TImode)
2074 {
2075 cum->sse_words += words;
2076 cum->sse_nregs -= 1;
2077 cum->sse_regno += 1;
2078 if (cum->sse_nregs <= 0)
2079 {
2080 cum->sse_nregs = 0;
2081 cum->sse_regno = 0;
2082 }
2083 }
2084 else
2085 {
2086 cum->words += words;
2087 cum->nregs -= words;
2088 cum->regno += words;
2089
2090 if (cum->nregs <= 0)
2091 {
2092 cum->nregs = 0;
2093 cum->regno = 0;
2094 }
2095 }
2096 }
2097 return;
2098 }
2099
2100 /* Define where to put the arguments to a function.
2101 Value is zero to push the argument on the stack,
2102 or a hard register in which to store the argument.
2103
2104 MODE is the argument's machine mode.
2105 TYPE is the data type of the argument (as a tree).
2106 This is null for libcalls where that information may
2107 not be available.
2108 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2109 the preceding args and about the function being called.
2110 NAMED is nonzero if this argument is a named parameter
2111 (otherwise it is an extra parameter matching an ellipsis). */
2112
2113 rtx
2114 function_arg (cum, mode, type, named)
2115 CUMULATIVE_ARGS *cum; /* current arg information */
2116 enum machine_mode mode; /* current arg mode */
2117 tree type; /* type of the argument or 0 if lib support */
2118 int named; /* != 0 for normal args, == 0 for ... args */
2119 {
2120 rtx ret = NULL_RTX;
2121 int bytes =
2122 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2123 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2124
2125 /* Handle an hidden AL argument containing number of registers for varargs
2126 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2127 any AL settings. */
2128 if (mode == VOIDmode)
2129 {
2130 if (TARGET_64BIT)
2131 return GEN_INT (cum->maybe_vaarg
2132 ? (cum->sse_nregs < 0
2133 ? SSE_REGPARM_MAX
2134 : cum->sse_regno)
2135 : -1);
2136 else
2137 return constm1_rtx;
2138 }
2139 if (TARGET_64BIT)
2140 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2141 &x86_64_int_parameter_registers [cum->regno],
2142 cum->sse_regno);
2143 else
2144 switch (mode)
2145 {
2146 /* For now, pass fp/complex values on the stack. */
2147 default:
2148 break;
2149
2150 case BLKmode:
2151 case DImode:
2152 case SImode:
2153 case HImode:
2154 case QImode:
2155 if (words <= cum->nregs)
2156 ret = gen_rtx_REG (mode, cum->regno);
2157 break;
2158 case TImode:
2159 if (cum->sse_nregs)
2160 ret = gen_rtx_REG (mode, cum->sse_regno);
2161 break;
2162 }
2163
2164 if (TARGET_DEBUG_ARG)
2165 {
2166 fprintf (stderr,
2167 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2168 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2169
2170 if (ret)
2171 print_simple_rtl (stderr, ret);
2172 else
2173 fprintf (stderr, ", stack");
2174
2175 fprintf (stderr, " )\n");
2176 }
2177
2178 return ret;
2179 }
2180
2181 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2182 and type. */
2183
2184 int
2185 ix86_function_arg_boundary (mode, type)
2186 enum machine_mode mode;
2187 tree type;
2188 {
2189 int align;
2190 if (!TARGET_64BIT)
2191 return PARM_BOUNDARY;
2192 if (type)
2193 align = TYPE_ALIGN (type);
2194 else
2195 align = GET_MODE_ALIGNMENT (mode);
2196 if (align < PARM_BOUNDARY)
2197 align = PARM_BOUNDARY;
2198 if (align > 128)
2199 align = 128;
2200 return align;
2201 }
2202
2203 /* Return true if N is a possible register number of function value. */
2204 bool
2205 ix86_function_value_regno_p (regno)
2206 int regno;
2207 {
2208 if (!TARGET_64BIT)
2209 {
2210 return ((regno) == 0
2211 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2212 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2213 }
2214 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2215 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2216 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2217 }
2218
2219 /* Define how to find the value returned by a function.
2220 VALTYPE is the data type of the value (as a tree).
2221 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2222 otherwise, FUNC is 0. */
2223 rtx
2224 ix86_function_value (valtype)
2225 tree valtype;
2226 {
2227 if (TARGET_64BIT)
2228 {
2229 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2230 REGPARM_MAX, SSE_REGPARM_MAX,
2231 x86_64_int_return_registers, 0);
2232 /* For zero sized structures, construct_continer return NULL, but we need
2233 to keep rest of compiler happy by returning meaningfull value. */
2234 if (!ret)
2235 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2236 return ret;
2237 }
2238 else
2239 return gen_rtx_REG (TYPE_MODE (valtype),
2240 ix86_value_regno (TYPE_MODE (valtype)));
2241 }
2242
2243 /* Return false iff type is returned in memory. */
2244 int
2245 ix86_return_in_memory (type)
2246 tree type;
2247 {
2248 int needed_intregs, needed_sseregs;
2249 if (TARGET_64BIT)
2250 {
2251 return !examine_argument (TYPE_MODE (type), type, 1,
2252 &needed_intregs, &needed_sseregs);
2253 }
2254 else
2255 {
2256 if (TYPE_MODE (type) == BLKmode
2257 || (VECTOR_MODE_P (TYPE_MODE (type))
2258 && int_size_in_bytes (type) == 8)
2259 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2260 && TYPE_MODE (type) != TFmode
2261 && !VECTOR_MODE_P (TYPE_MODE (type))))
2262 return 1;
2263 return 0;
2264 }
2265 }
2266
2267 /* Define how to find the value returned by a library function
2268 assuming the value has mode MODE. */
2269 rtx
2270 ix86_libcall_value (mode)
2271 enum machine_mode mode;
2272 {
2273 if (TARGET_64BIT)
2274 {
2275 switch (mode)
2276 {
2277 case SFmode:
2278 case SCmode:
2279 case DFmode:
2280 case DCmode:
2281 return gen_rtx_REG (mode, FIRST_SSE_REG);
2282 case TFmode:
2283 case TCmode:
2284 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2285 default:
2286 return gen_rtx_REG (mode, 0);
2287 }
2288 }
2289 else
2290 return gen_rtx_REG (mode, ix86_value_regno (mode));
2291 }
2292
2293 /* Given a mode, return the register to use for a return value. */
2294
2295 static int
2296 ix86_value_regno (mode)
2297 enum machine_mode mode;
2298 {
2299 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2300 return FIRST_FLOAT_REG;
2301 if (mode == TImode || VECTOR_MODE_P (mode))
2302 return FIRST_SSE_REG;
2303 return 0;
2304 }
2305 \f
2306 /* Create the va_list data type. */
2307
2308 tree
2309 ix86_build_va_list ()
2310 {
2311 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2312
2313 /* For i386 we use plain pointer to argument area. */
2314 if (!TARGET_64BIT)
2315 return build_pointer_type (char_type_node);
2316
2317 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2318 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2319
2320 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2321 unsigned_type_node);
2322 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2323 unsigned_type_node);
2324 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2325 ptr_type_node);
2326 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2327 ptr_type_node);
2328
2329 DECL_FIELD_CONTEXT (f_gpr) = record;
2330 DECL_FIELD_CONTEXT (f_fpr) = record;
2331 DECL_FIELD_CONTEXT (f_ovf) = record;
2332 DECL_FIELD_CONTEXT (f_sav) = record;
2333
2334 TREE_CHAIN (record) = type_decl;
2335 TYPE_NAME (record) = type_decl;
2336 TYPE_FIELDS (record) = f_gpr;
2337 TREE_CHAIN (f_gpr) = f_fpr;
2338 TREE_CHAIN (f_fpr) = f_ovf;
2339 TREE_CHAIN (f_ovf) = f_sav;
2340
2341 layout_type (record);
2342
2343 /* The correct type is an array type of one element. */
2344 return build_array_type (record, build_index_type (size_zero_node));
2345 }
2346
2347 /* Perform any needed actions needed for a function that is receiving a
2348 variable number of arguments.
2349
2350 CUM is as above.
2351
2352 MODE and TYPE are the mode and type of the current parameter.
2353
2354 PRETEND_SIZE is a variable that should be set to the amount of stack
2355 that must be pushed by the prolog to pretend that our caller pushed
2356 it.
2357
2358 Normally, this macro will push all remaining incoming registers on the
2359 stack and set PRETEND_SIZE to the length of the registers pushed. */
2360
2361 void
2362 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2363 CUMULATIVE_ARGS *cum;
2364 enum machine_mode mode;
2365 tree type;
2366 int *pretend_size ATTRIBUTE_UNUSED;
2367 int no_rtl;
2368
2369 {
2370 CUMULATIVE_ARGS next_cum;
2371 rtx save_area = NULL_RTX, mem;
2372 rtx label;
2373 rtx label_ref;
2374 rtx tmp_reg;
2375 rtx nsse_reg;
2376 int set;
2377 tree fntype;
2378 int stdarg_p;
2379 int i;
2380
2381 if (!TARGET_64BIT)
2382 return;
2383
2384 /* Indicate to allocate space on the stack for varargs save area. */
2385 ix86_save_varrargs_registers = 1;
2386
2387 fntype = TREE_TYPE (current_function_decl);
2388 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2389 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2390 != void_type_node));
2391
2392 /* For varargs, we do not want to skip the dummy va_dcl argument.
2393 For stdargs, we do want to skip the last named argument. */
2394 next_cum = *cum;
2395 if (stdarg_p)
2396 function_arg_advance (&next_cum, mode, type, 1);
2397
2398 if (!no_rtl)
2399 save_area = frame_pointer_rtx;
2400
2401 set = get_varargs_alias_set ();
2402
2403 for (i = next_cum.regno; i < ix86_regparm; i++)
2404 {
2405 mem = gen_rtx_MEM (Pmode,
2406 plus_constant (save_area, i * UNITS_PER_WORD));
2407 set_mem_alias_set (mem, set);
2408 emit_move_insn (mem, gen_rtx_REG (Pmode,
2409 x86_64_int_parameter_registers[i]));
2410 }
2411
2412 if (next_cum.sse_nregs)
2413 {
2414 /* Now emit code to save SSE registers. The AX parameter contains number
2415 of SSE parameter regsiters used to call this function. We use
2416 sse_prologue_save insn template that produces computed jump across
2417 SSE saves. We need some preparation work to get this working. */
2418
2419 label = gen_label_rtx ();
2420 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2421
2422 /* Compute address to jump to :
2423 label - 5*eax + nnamed_sse_arguments*5 */
2424 tmp_reg = gen_reg_rtx (Pmode);
2425 nsse_reg = gen_reg_rtx (Pmode);
2426 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2427 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2428 gen_rtx_MULT (Pmode, nsse_reg,
2429 GEN_INT (4))));
2430 if (next_cum.sse_regno)
2431 emit_move_insn
2432 (nsse_reg,
2433 gen_rtx_CONST (DImode,
2434 gen_rtx_PLUS (DImode,
2435 label_ref,
2436 GEN_INT (next_cum.sse_regno * 4))));
2437 else
2438 emit_move_insn (nsse_reg, label_ref);
2439 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2440
2441 /* Compute address of memory block we save into. We always use pointer
2442 pointing 127 bytes after first byte to store - this is needed to keep
2443 instruction size limited by 4 bytes. */
2444 tmp_reg = gen_reg_rtx (Pmode);
2445 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2446 plus_constant (save_area,
2447 8 * REGPARM_MAX + 127)));
2448 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2449 set_mem_alias_set (mem, set);
2450 set_mem_align (mem, BITS_PER_WORD);
2451
2452 /* And finally do the dirty job! */
2453 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2454 GEN_INT (next_cum.sse_regno), label));
2455 }
2456
2457 }
2458
2459 /* Implement va_start. */
2460
2461 void
2462 ix86_va_start (valist, nextarg)
2463 tree valist;
2464 rtx nextarg;
2465 {
2466 HOST_WIDE_INT words, n_gpr, n_fpr;
2467 tree f_gpr, f_fpr, f_ovf, f_sav;
2468 tree gpr, fpr, ovf, sav, t;
2469
2470 /* Only 64bit target needs something special. */
2471 if (!TARGET_64BIT)
2472 {
2473 std_expand_builtin_va_start (valist, nextarg);
2474 return;
2475 }
2476
2477 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2478 f_fpr = TREE_CHAIN (f_gpr);
2479 f_ovf = TREE_CHAIN (f_fpr);
2480 f_sav = TREE_CHAIN (f_ovf);
2481
2482 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2483 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2484 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2485 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2486 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2487
2488 /* Count number of gp and fp argument registers used. */
2489 words = current_function_args_info.words;
2490 n_gpr = current_function_args_info.regno;
2491 n_fpr = current_function_args_info.sse_regno;
2492
2493 if (TARGET_DEBUG_ARG)
2494 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2495 (int) words, (int) n_gpr, (int) n_fpr);
2496
2497 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2498 build_int_2 (n_gpr * 8, 0));
2499 TREE_SIDE_EFFECTS (t) = 1;
2500 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2501
2502 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2503 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2504 TREE_SIDE_EFFECTS (t) = 1;
2505 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2506
2507 /* Find the overflow area. */
2508 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2509 if (words != 0)
2510 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2511 build_int_2 (words * UNITS_PER_WORD, 0));
2512 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2513 TREE_SIDE_EFFECTS (t) = 1;
2514 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2515
2516 /* Find the register save area.
2517 Prologue of the function save it right above stack frame. */
2518 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2519 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2520 TREE_SIDE_EFFECTS (t) = 1;
2521 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2522 }
2523
2524 /* Implement va_arg. */
2525 rtx
2526 ix86_va_arg (valist, type)
2527 tree valist, type;
2528 {
2529 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2530 tree f_gpr, f_fpr, f_ovf, f_sav;
2531 tree gpr, fpr, ovf, sav, t;
2532 int size, rsize;
2533 rtx lab_false, lab_over = NULL_RTX;
2534 rtx addr_rtx, r;
2535 rtx container;
2536
2537 /* Only 64bit target needs something special. */
2538 if (!TARGET_64BIT)
2539 {
2540 return std_expand_builtin_va_arg (valist, type);
2541 }
2542
2543 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2544 f_fpr = TREE_CHAIN (f_gpr);
2545 f_ovf = TREE_CHAIN (f_fpr);
2546 f_sav = TREE_CHAIN (f_ovf);
2547
2548 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2549 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2550 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2551 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2552 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2553
2554 size = int_size_in_bytes (type);
2555 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2556
2557 container = construct_container (TYPE_MODE (type), type, 0,
2558 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2559 /*
2560 * Pull the value out of the saved registers ...
2561 */
2562
2563 addr_rtx = gen_reg_rtx (Pmode);
2564
2565 if (container)
2566 {
2567 rtx int_addr_rtx, sse_addr_rtx;
2568 int needed_intregs, needed_sseregs;
2569 int need_temp;
2570
2571 lab_over = gen_label_rtx ();
2572 lab_false = gen_label_rtx ();
2573
2574 examine_argument (TYPE_MODE (type), type, 0,
2575 &needed_intregs, &needed_sseregs);
2576
2577
2578 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2579 || TYPE_ALIGN (type) > 128);
2580
2581 /* In case we are passing structure, verify that it is consetuctive block
2582 on the register save area. If not we need to do moves. */
2583 if (!need_temp && !REG_P (container))
2584 {
2585 /* Verify that all registers are strictly consetuctive */
2586 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2587 {
2588 int i;
2589
2590 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2591 {
2592 rtx slot = XVECEXP (container, 0, i);
2593 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2594 || INTVAL (XEXP (slot, 1)) != i * 16)
2595 need_temp = 1;
2596 }
2597 }
2598 else
2599 {
2600 int i;
2601
2602 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2603 {
2604 rtx slot = XVECEXP (container, 0, i);
2605 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2606 || INTVAL (XEXP (slot, 1)) != i * 8)
2607 need_temp = 1;
2608 }
2609 }
2610 }
2611 if (!need_temp)
2612 {
2613 int_addr_rtx = addr_rtx;
2614 sse_addr_rtx = addr_rtx;
2615 }
2616 else
2617 {
2618 int_addr_rtx = gen_reg_rtx (Pmode);
2619 sse_addr_rtx = gen_reg_rtx (Pmode);
2620 }
2621 /* First ensure that we fit completely in registers. */
2622 if (needed_intregs)
2623 {
2624 emit_cmp_and_jump_insns (expand_expr
2625 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2626 GEN_INT ((REGPARM_MAX - needed_intregs +
2627 1) * 8), GE, const1_rtx, SImode,
2628 1, lab_false);
2629 }
2630 if (needed_sseregs)
2631 {
2632 emit_cmp_and_jump_insns (expand_expr
2633 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2634 GEN_INT ((SSE_REGPARM_MAX -
2635 needed_sseregs + 1) * 16 +
2636 REGPARM_MAX * 8), GE, const1_rtx,
2637 SImode, 1, lab_false);
2638 }
2639
2640 /* Compute index to start of area used for integer regs. */
2641 if (needed_intregs)
2642 {
2643 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2644 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2645 if (r != int_addr_rtx)
2646 emit_move_insn (int_addr_rtx, r);
2647 }
2648 if (needed_sseregs)
2649 {
2650 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2651 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2652 if (r != sse_addr_rtx)
2653 emit_move_insn (sse_addr_rtx, r);
2654 }
2655 if (need_temp)
2656 {
2657 int i;
2658 rtx mem;
2659
2660 /* Never use the memory itself, as it has the alias set. */
2661 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2662 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2663 set_mem_alias_set (mem, get_varargs_alias_set ());
2664 set_mem_align (mem, BITS_PER_UNIT);
2665
2666 for (i = 0; i < XVECLEN (container, 0); i++)
2667 {
2668 rtx slot = XVECEXP (container, 0, i);
2669 rtx reg = XEXP (slot, 0);
2670 enum machine_mode mode = GET_MODE (reg);
2671 rtx src_addr;
2672 rtx src_mem;
2673 int src_offset;
2674 rtx dest_mem;
2675
2676 if (SSE_REGNO_P (REGNO (reg)))
2677 {
2678 src_addr = sse_addr_rtx;
2679 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2680 }
2681 else
2682 {
2683 src_addr = int_addr_rtx;
2684 src_offset = REGNO (reg) * 8;
2685 }
2686 src_mem = gen_rtx_MEM (mode, src_addr);
2687 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2688 src_mem = adjust_address (src_mem, mode, src_offset);
2689 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2690 emit_move_insn (dest_mem, src_mem);
2691 }
2692 }
2693
2694 if (needed_intregs)
2695 {
2696 t =
2697 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2698 build_int_2 (needed_intregs * 8, 0));
2699 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2700 TREE_SIDE_EFFECTS (t) = 1;
2701 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2702 }
2703 if (needed_sseregs)
2704 {
2705 t =
2706 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2707 build_int_2 (needed_sseregs * 16, 0));
2708 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2709 TREE_SIDE_EFFECTS (t) = 1;
2710 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2711 }
2712
2713 emit_jump_insn (gen_jump (lab_over));
2714 emit_barrier ();
2715 emit_label (lab_false);
2716 }
2717
2718 /* ... otherwise out of the overflow area. */
2719
2720 /* Care for on-stack alignment if needed. */
2721 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2722 t = ovf;
2723 else
2724 {
2725 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2726 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2727 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2728 }
2729 t = save_expr (t);
2730
2731 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2732 if (r != addr_rtx)
2733 emit_move_insn (addr_rtx, r);
2734
2735 t =
2736 build (PLUS_EXPR, TREE_TYPE (t), t,
2737 build_int_2 (rsize * UNITS_PER_WORD, 0));
2738 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2739 TREE_SIDE_EFFECTS (t) = 1;
2740 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2741
2742 if (container)
2743 emit_label (lab_over);
2744
2745 return addr_rtx;
2746 }
2747 \f
2748 /* Return nonzero if OP is general operand representable on x86_64. */
2749
2750 int
2751 x86_64_general_operand (op, mode)
2752 rtx op;
2753 enum machine_mode mode;
2754 {
2755 if (!TARGET_64BIT)
2756 return general_operand (op, mode);
2757 if (nonimmediate_operand (op, mode))
2758 return 1;
2759 return x86_64_sign_extended_value (op);
2760 }
2761
2762 /* Return nonzero if OP is general operand representable on x86_64
2763 as either sign extended or zero extended constant. */
2764
2765 int
2766 x86_64_szext_general_operand (op, mode)
2767 rtx op;
2768 enum machine_mode mode;
2769 {
2770 if (!TARGET_64BIT)
2771 return general_operand (op, mode);
2772 if (nonimmediate_operand (op, mode))
2773 return 1;
2774 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2775 }
2776
2777 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2778
2779 int
2780 x86_64_nonmemory_operand (op, mode)
2781 rtx op;
2782 enum machine_mode mode;
2783 {
2784 if (!TARGET_64BIT)
2785 return nonmemory_operand (op, mode);
2786 if (register_operand (op, mode))
2787 return 1;
2788 return x86_64_sign_extended_value (op);
2789 }
2790
2791 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2792
2793 int
2794 x86_64_movabs_operand (op, mode)
2795 rtx op;
2796 enum machine_mode mode;
2797 {
2798 if (!TARGET_64BIT || !flag_pic)
2799 return nonmemory_operand (op, mode);
2800 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2801 return 1;
2802 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2803 return 1;
2804 return 0;
2805 }
2806
2807 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2808
2809 int
2810 x86_64_szext_nonmemory_operand (op, mode)
2811 rtx op;
2812 enum machine_mode mode;
2813 {
2814 if (!TARGET_64BIT)
2815 return nonmemory_operand (op, mode);
2816 if (register_operand (op, mode))
2817 return 1;
2818 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2819 }
2820
2821 /* Return nonzero if OP is immediate operand representable on x86_64. */
2822
2823 int
2824 x86_64_immediate_operand (op, mode)
2825 rtx op;
2826 enum machine_mode mode;
2827 {
2828 if (!TARGET_64BIT)
2829 return immediate_operand (op, mode);
2830 return x86_64_sign_extended_value (op);
2831 }
2832
2833 /* Return nonzero if OP is immediate operand representable on x86_64. */
2834
2835 int
2836 x86_64_zext_immediate_operand (op, mode)
2837 rtx op;
2838 enum machine_mode mode ATTRIBUTE_UNUSED;
2839 {
2840 return x86_64_zero_extended_value (op);
2841 }
2842
2843 /* Return nonzero if OP is (const_int 1), else return zero. */
2844
2845 int
2846 const_int_1_operand (op, mode)
2847 rtx op;
2848 enum machine_mode mode ATTRIBUTE_UNUSED;
2849 {
2850 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2851 }
2852
2853 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2854 for shift & compare patterns, as shifting by 0 does not change flags),
2855 else return zero. */
2856
2857 int
2858 const_int_1_31_operand (op, mode)
2859 rtx op;
2860 enum machine_mode mode ATTRIBUTE_UNUSED;
2861 {
2862 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2863 }
2864
2865 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2866 reference and a constant. */
2867
2868 int
2869 symbolic_operand (op, mode)
2870 register rtx op;
2871 enum machine_mode mode ATTRIBUTE_UNUSED;
2872 {
2873 switch (GET_CODE (op))
2874 {
2875 case SYMBOL_REF:
2876 case LABEL_REF:
2877 return 1;
2878
2879 case CONST:
2880 op = XEXP (op, 0);
2881 if (GET_CODE (op) == SYMBOL_REF
2882 || GET_CODE (op) == LABEL_REF
2883 || (GET_CODE (op) == UNSPEC
2884 && (XINT (op, 1) == UNSPEC_GOT
2885 || XINT (op, 1) == UNSPEC_GOTOFF
2886 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2887 return 1;
2888 if (GET_CODE (op) != PLUS
2889 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2890 return 0;
2891
2892 op = XEXP (op, 0);
2893 if (GET_CODE (op) == SYMBOL_REF
2894 || GET_CODE (op) == LABEL_REF)
2895 return 1;
2896 /* Only @GOTOFF gets offsets. */
2897 if (GET_CODE (op) != UNSPEC
2898 || XINT (op, 1) != UNSPEC_GOTOFF)
2899 return 0;
2900
2901 op = XVECEXP (op, 0, 0);
2902 if (GET_CODE (op) == SYMBOL_REF
2903 || GET_CODE (op) == LABEL_REF)
2904 return 1;
2905 return 0;
2906
2907 default:
2908 return 0;
2909 }
2910 }
2911
2912 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2913
2914 int
2915 pic_symbolic_operand (op, mode)
2916 register rtx op;
2917 enum machine_mode mode ATTRIBUTE_UNUSED;
2918 {
2919 if (GET_CODE (op) != CONST)
2920 return 0;
2921 op = XEXP (op, 0);
2922 if (TARGET_64BIT)
2923 {
2924 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2925 return 1;
2926 }
2927 else
2928 {
2929 if (GET_CODE (op) == UNSPEC)
2930 return 1;
2931 if (GET_CODE (op) != PLUS
2932 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2933 return 0;
2934 op = XEXP (op, 0);
2935 if (GET_CODE (op) == UNSPEC)
2936 return 1;
2937 }
2938 return 0;
2939 }
2940
2941 /* Return true if OP is a symbolic operand that resolves locally. */
2942
2943 static int
2944 local_symbolic_operand (op, mode)
2945 rtx op;
2946 enum machine_mode mode ATTRIBUTE_UNUSED;
2947 {
2948 if (GET_CODE (op) == LABEL_REF)
2949 return 1;
2950
2951 if (GET_CODE (op) == CONST
2952 && GET_CODE (XEXP (op, 0)) == PLUS
2953 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2954 op = XEXP (XEXP (op, 0), 0);
2955
2956 if (GET_CODE (op) != SYMBOL_REF)
2957 return 0;
2958
2959 /* These we've been told are local by varasm and encode_section_info
2960 respectively. */
2961 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2962 return 1;
2963
2964 /* There is, however, a not insubstantial body of code in the rest of
2965 the compiler that assumes it can just stick the results of
2966 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2967 /* ??? This is a hack. Should update the body of the compiler to
2968 always create a DECL an invoke targetm.encode_section_info. */
2969 if (strncmp (XSTR (op, 0), internal_label_prefix,
2970 internal_label_prefix_len) == 0)
2971 return 1;
2972
2973 return 0;
2974 }
2975
2976 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2977
2978 int
2979 tls_symbolic_operand (op, mode)
2980 register rtx op;
2981 enum machine_mode mode ATTRIBUTE_UNUSED;
2982 {
2983 const char *symbol_str;
2984
2985 if (GET_CODE (op) != SYMBOL_REF)
2986 return 0;
2987 symbol_str = XSTR (op, 0);
2988
2989 if (symbol_str[0] != '%')
2990 return 0;
2991 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
2992 }
2993
2994 static int
2995 tls_symbolic_operand_1 (op, kind)
2996 rtx op;
2997 enum tls_model kind;
2998 {
2999 const char *symbol_str;
3000
3001 if (GET_CODE (op) != SYMBOL_REF)
3002 return 0;
3003 symbol_str = XSTR (op, 0);
3004
3005 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3006 }
3007
3008 int
3009 global_dynamic_symbolic_operand (op, mode)
3010 register rtx op;
3011 enum machine_mode mode ATTRIBUTE_UNUSED;
3012 {
3013 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3014 }
3015
3016 int
3017 local_dynamic_symbolic_operand (op, mode)
3018 register rtx op;
3019 enum machine_mode mode ATTRIBUTE_UNUSED;
3020 {
3021 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3022 }
3023
3024 int
3025 initial_exec_symbolic_operand (op, mode)
3026 register rtx op;
3027 enum machine_mode mode ATTRIBUTE_UNUSED;
3028 {
3029 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3030 }
3031
3032 int
3033 local_exec_symbolic_operand (op, mode)
3034 register rtx op;
3035 enum machine_mode mode ATTRIBUTE_UNUSED;
3036 {
3037 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3038 }
3039
3040 /* Test for a valid operand for a call instruction. Don't allow the
3041 arg pointer register or virtual regs since they may decay into
3042 reg + const, which the patterns can't handle. */
3043
3044 int
3045 call_insn_operand (op, mode)
3046 rtx op;
3047 enum machine_mode mode ATTRIBUTE_UNUSED;
3048 {
3049 /* Disallow indirect through a virtual register. This leads to
3050 compiler aborts when trying to eliminate them. */
3051 if (GET_CODE (op) == REG
3052 && (op == arg_pointer_rtx
3053 || op == frame_pointer_rtx
3054 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3055 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3056 return 0;
3057
3058 /* Disallow `call 1234'. Due to varying assembler lameness this
3059 gets either rejected or translated to `call .+1234'. */
3060 if (GET_CODE (op) == CONST_INT)
3061 return 0;
3062
3063 /* Explicitly allow SYMBOL_REF even if pic. */
3064 if (GET_CODE (op) == SYMBOL_REF)
3065 return 1;
3066
3067 /* Otherwise we can allow any general_operand in the address. */
3068 return general_operand (op, Pmode);
3069 }
3070
3071 int
3072 constant_call_address_operand (op, mode)
3073 rtx op;
3074 enum machine_mode mode ATTRIBUTE_UNUSED;
3075 {
3076 if (GET_CODE (op) == CONST
3077 && GET_CODE (XEXP (op, 0)) == PLUS
3078 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3079 op = XEXP (XEXP (op, 0), 0);
3080 return GET_CODE (op) == SYMBOL_REF;
3081 }
3082
3083 /* Match exactly zero and one. */
3084
3085 int
3086 const0_operand (op, mode)
3087 register rtx op;
3088 enum machine_mode mode;
3089 {
3090 return op == CONST0_RTX (mode);
3091 }
3092
3093 int
3094 const1_operand (op, mode)
3095 register rtx op;
3096 enum machine_mode mode ATTRIBUTE_UNUSED;
3097 {
3098 return op == const1_rtx;
3099 }
3100
3101 /* Match 2, 4, or 8. Used for leal multiplicands. */
3102
3103 int
3104 const248_operand (op, mode)
3105 register rtx op;
3106 enum machine_mode mode ATTRIBUTE_UNUSED;
3107 {
3108 return (GET_CODE (op) == CONST_INT
3109 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3110 }
3111
3112 /* True if this is a constant appropriate for an increment or decremenmt. */
3113
3114 int
3115 incdec_operand (op, mode)
3116 register rtx op;
3117 enum machine_mode mode ATTRIBUTE_UNUSED;
3118 {
3119 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3120 registers, since carry flag is not set. */
3121 if (TARGET_PENTIUM4 && !optimize_size)
3122 return 0;
3123 return op == const1_rtx || op == constm1_rtx;
3124 }
3125
3126 /* Return nonzero if OP is acceptable as operand of DImode shift
3127 expander. */
3128
3129 int
3130 shiftdi_operand (op, mode)
3131 rtx op;
3132 enum machine_mode mode ATTRIBUTE_UNUSED;
3133 {
3134 if (TARGET_64BIT)
3135 return nonimmediate_operand (op, mode);
3136 else
3137 return register_operand (op, mode);
3138 }
3139
3140 /* Return false if this is the stack pointer, or any other fake
3141 register eliminable to the stack pointer. Otherwise, this is
3142 a register operand.
3143
3144 This is used to prevent esp from being used as an index reg.
3145 Which would only happen in pathological cases. */
3146
3147 int
3148 reg_no_sp_operand (op, mode)
3149 register rtx op;
3150 enum machine_mode mode;
3151 {
3152 rtx t = op;
3153 if (GET_CODE (t) == SUBREG)
3154 t = SUBREG_REG (t);
3155 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3156 return 0;
3157
3158 return register_operand (op, mode);
3159 }
3160
3161 int
3162 mmx_reg_operand (op, mode)
3163 register rtx op;
3164 enum machine_mode mode ATTRIBUTE_UNUSED;
3165 {
3166 return MMX_REG_P (op);
3167 }
3168
3169 /* Return false if this is any eliminable register. Otherwise
3170 general_operand. */
3171
3172 int
3173 general_no_elim_operand (op, mode)
3174 register rtx op;
3175 enum machine_mode mode;
3176 {
3177 rtx t = op;
3178 if (GET_CODE (t) == SUBREG)
3179 t = SUBREG_REG (t);
3180 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3181 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3182 || t == virtual_stack_dynamic_rtx)
3183 return 0;
3184 if (REG_P (t)
3185 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3186 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3187 return 0;
3188
3189 return general_operand (op, mode);
3190 }
3191
3192 /* Return false if this is any eliminable register. Otherwise
3193 register_operand or const_int. */
3194
3195 int
3196 nonmemory_no_elim_operand (op, mode)
3197 register rtx op;
3198 enum machine_mode mode;
3199 {
3200 rtx t = op;
3201 if (GET_CODE (t) == SUBREG)
3202 t = SUBREG_REG (t);
3203 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3204 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3205 || t == virtual_stack_dynamic_rtx)
3206 return 0;
3207
3208 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3209 }
3210
3211 /* Return true if op is a Q_REGS class register. */
3212
3213 int
3214 q_regs_operand (op, mode)
3215 register rtx op;
3216 enum machine_mode mode;
3217 {
3218 if (mode != VOIDmode && GET_MODE (op) != mode)
3219 return 0;
3220 if (GET_CODE (op) == SUBREG)
3221 op = SUBREG_REG (op);
3222 return ANY_QI_REG_P (op);
3223 }
3224
3225 /* Return true if op is a NON_Q_REGS class register. */
3226
3227 int
3228 non_q_regs_operand (op, mode)
3229 register rtx op;
3230 enum machine_mode mode;
3231 {
3232 if (mode != VOIDmode && GET_MODE (op) != mode)
3233 return 0;
3234 if (GET_CODE (op) == SUBREG)
3235 op = SUBREG_REG (op);
3236 return NON_QI_REG_P (op);
3237 }
3238
3239 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3240 insns. */
3241 int
3242 sse_comparison_operator (op, mode)
3243 rtx op;
3244 enum machine_mode mode ATTRIBUTE_UNUSED;
3245 {
3246 enum rtx_code code = GET_CODE (op);
3247 switch (code)
3248 {
3249 /* Operations supported directly. */
3250 case EQ:
3251 case LT:
3252 case LE:
3253 case UNORDERED:
3254 case NE:
3255 case UNGE:
3256 case UNGT:
3257 case ORDERED:
3258 return 1;
3259 /* These are equivalent to ones above in non-IEEE comparisons. */
3260 case UNEQ:
3261 case UNLT:
3262 case UNLE:
3263 case LTGT:
3264 case GE:
3265 case GT:
3266 return !TARGET_IEEE_FP;
3267 default:
3268 return 0;
3269 }
3270 }
3271 /* Return 1 if OP is a valid comparison operator in valid mode. */
3272 int
3273 ix86_comparison_operator (op, mode)
3274 register rtx op;
3275 enum machine_mode mode;
3276 {
3277 enum machine_mode inmode;
3278 enum rtx_code code = GET_CODE (op);
3279 if (mode != VOIDmode && GET_MODE (op) != mode)
3280 return 0;
3281 if (GET_RTX_CLASS (code) != '<')
3282 return 0;
3283 inmode = GET_MODE (XEXP (op, 0));
3284
3285 if (inmode == CCFPmode || inmode == CCFPUmode)
3286 {
3287 enum rtx_code second_code, bypass_code;
3288 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3289 return (bypass_code == NIL && second_code == NIL);
3290 }
3291 switch (code)
3292 {
3293 case EQ: case NE:
3294 return 1;
3295 case LT: case GE:
3296 if (inmode == CCmode || inmode == CCGCmode
3297 || inmode == CCGOCmode || inmode == CCNOmode)
3298 return 1;
3299 return 0;
3300 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3301 if (inmode == CCmode)
3302 return 1;
3303 return 0;
3304 case GT: case LE:
3305 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3306 return 1;
3307 return 0;
3308 default:
3309 return 0;
3310 }
3311 }
3312
3313 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3314
3315 int
3316 fcmov_comparison_operator (op, mode)
3317 register rtx op;
3318 enum machine_mode mode;
3319 {
3320 enum machine_mode inmode;
3321 enum rtx_code code = GET_CODE (op);
3322 if (mode != VOIDmode && GET_MODE (op) != mode)
3323 return 0;
3324 if (GET_RTX_CLASS (code) != '<')
3325 return 0;
3326 inmode = GET_MODE (XEXP (op, 0));
3327 if (inmode == CCFPmode || inmode == CCFPUmode)
3328 {
3329 enum rtx_code second_code, bypass_code;
3330 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3331 if (bypass_code != NIL || second_code != NIL)
3332 return 0;
3333 code = ix86_fp_compare_code_to_integer (code);
3334 }
3335 /* i387 supports just limited amount of conditional codes. */
3336 switch (code)
3337 {
3338 case LTU: case GTU: case LEU: case GEU:
3339 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3340 return 1;
3341 return 0;
3342 case ORDERED: case UNORDERED:
3343 case EQ: case NE:
3344 return 1;
3345 default:
3346 return 0;
3347 }
3348 }
3349
3350 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3351
3352 int
3353 promotable_binary_operator (op, mode)
3354 register rtx op;
3355 enum machine_mode mode ATTRIBUTE_UNUSED;
3356 {
3357 switch (GET_CODE (op))
3358 {
3359 case MULT:
3360 /* Modern CPUs have same latency for HImode and SImode multiply,
3361 but 386 and 486 do HImode multiply faster. */
3362 return ix86_cpu > PROCESSOR_I486;
3363 case PLUS:
3364 case AND:
3365 case IOR:
3366 case XOR:
3367 case ASHIFT:
3368 return 1;
3369 default:
3370 return 0;
3371 }
3372 }
3373
3374 /* Nearly general operand, but accept any const_double, since we wish
3375 to be able to drop them into memory rather than have them get pulled
3376 into registers. */
3377
3378 int
3379 cmp_fp_expander_operand (op, mode)
3380 register rtx op;
3381 enum machine_mode mode;
3382 {
3383 if (mode != VOIDmode && mode != GET_MODE (op))
3384 return 0;
3385 if (GET_CODE (op) == CONST_DOUBLE)
3386 return 1;
3387 return general_operand (op, mode);
3388 }
3389
3390 /* Match an SI or HImode register for a zero_extract. */
3391
3392 int
3393 ext_register_operand (op, mode)
3394 register rtx op;
3395 enum machine_mode mode ATTRIBUTE_UNUSED;
3396 {
3397 int regno;
3398 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3399 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3400 return 0;
3401
3402 if (!register_operand (op, VOIDmode))
3403 return 0;
3404
3405 /* Be curefull to accept only registers having upper parts. */
3406 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3407 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3408 }
3409
3410 /* Return 1 if this is a valid binary floating-point operation.
3411 OP is the expression matched, and MODE is its mode. */
3412
3413 int
3414 binary_fp_operator (op, mode)
3415 register rtx op;
3416 enum machine_mode mode;
3417 {
3418 if (mode != VOIDmode && mode != GET_MODE (op))
3419 return 0;
3420
3421 switch (GET_CODE (op))
3422 {
3423 case PLUS:
3424 case MINUS:
3425 case MULT:
3426 case DIV:
3427 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3428
3429 default:
3430 return 0;
3431 }
3432 }
3433
3434 int
3435 mult_operator (op, mode)
3436 register rtx op;
3437 enum machine_mode mode ATTRIBUTE_UNUSED;
3438 {
3439 return GET_CODE (op) == MULT;
3440 }
3441
3442 int
3443 div_operator (op, mode)
3444 register rtx op;
3445 enum machine_mode mode ATTRIBUTE_UNUSED;
3446 {
3447 return GET_CODE (op) == DIV;
3448 }
3449
3450 int
3451 arith_or_logical_operator (op, mode)
3452 rtx op;
3453 enum machine_mode mode;
3454 {
3455 return ((mode == VOIDmode || GET_MODE (op) == mode)
3456 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3457 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3458 }
3459
3460 /* Returns 1 if OP is memory operand with a displacement. */
3461
3462 int
3463 memory_displacement_operand (op, mode)
3464 register rtx op;
3465 enum machine_mode mode;
3466 {
3467 struct ix86_address parts;
3468
3469 if (! memory_operand (op, mode))
3470 return 0;
3471
3472 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3473 abort ();
3474
3475 return parts.disp != NULL_RTX;
3476 }
3477
3478 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3479 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3480
3481 ??? It seems likely that this will only work because cmpsi is an
3482 expander, and no actual insns use this. */
3483
3484 int
3485 cmpsi_operand (op, mode)
3486 rtx op;
3487 enum machine_mode mode;
3488 {
3489 if (nonimmediate_operand (op, mode))
3490 return 1;
3491
3492 if (GET_CODE (op) == AND
3493 && GET_MODE (op) == SImode
3494 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3495 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3496 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3497 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3498 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3499 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3500 return 1;
3501
3502 return 0;
3503 }
3504
3505 /* Returns 1 if OP is memory operand that can not be represented by the
3506 modRM array. */
3507
3508 int
3509 long_memory_operand (op, mode)
3510 register rtx op;
3511 enum machine_mode mode;
3512 {
3513 if (! memory_operand (op, mode))
3514 return 0;
3515
3516 return memory_address_length (op) != 0;
3517 }
3518
3519 /* Return nonzero if the rtx is known aligned. */
3520
3521 int
3522 aligned_operand (op, mode)
3523 rtx op;
3524 enum machine_mode mode;
3525 {
3526 struct ix86_address parts;
3527
3528 if (!general_operand (op, mode))
3529 return 0;
3530
3531 /* Registers and immediate operands are always "aligned". */
3532 if (GET_CODE (op) != MEM)
3533 return 1;
3534
3535 /* Don't even try to do any aligned optimizations with volatiles. */
3536 if (MEM_VOLATILE_P (op))
3537 return 0;
3538
3539 op = XEXP (op, 0);
3540
3541 /* Pushes and pops are only valid on the stack pointer. */
3542 if (GET_CODE (op) == PRE_DEC
3543 || GET_CODE (op) == POST_INC)
3544 return 1;
3545
3546 /* Decode the address. */
3547 if (! ix86_decompose_address (op, &parts))
3548 abort ();
3549
3550 if (parts.base && GET_CODE (parts.base) == SUBREG)
3551 parts.base = SUBREG_REG (parts.base);
3552 if (parts.index && GET_CODE (parts.index) == SUBREG)
3553 parts.index = SUBREG_REG (parts.index);
3554
3555 /* Look for some component that isn't known to be aligned. */
3556 if (parts.index)
3557 {
3558 if (parts.scale < 4
3559 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3560 return 0;
3561 }
3562 if (parts.base)
3563 {
3564 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3565 return 0;
3566 }
3567 if (parts.disp)
3568 {
3569 if (GET_CODE (parts.disp) != CONST_INT
3570 || (INTVAL (parts.disp) & 3) != 0)
3571 return 0;
3572 }
3573
3574 /* Didn't find one -- this must be an aligned address. */
3575 return 1;
3576 }
3577 \f
3578 /* Return true if the constant is something that can be loaded with
3579 a special instruction. Only handle 0.0 and 1.0; others are less
3580 worthwhile. */
3581
3582 int
3583 standard_80387_constant_p (x)
3584 rtx x;
3585 {
3586 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3587 return -1;
3588 /* Note that on the 80387, other constants, such as pi, that we should support
3589 too. On some machines, these are much slower to load as standard constant,
3590 than to load from doubles in memory. */
3591 if (x == CONST0_RTX (GET_MODE (x)))
3592 return 1;
3593 if (x == CONST1_RTX (GET_MODE (x)))
3594 return 2;
3595 return 0;
3596 }
3597
3598 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3599 */
3600 int
3601 standard_sse_constant_p (x)
3602 rtx x;
3603 {
3604 if (GET_CODE (x) != CONST_DOUBLE)
3605 return -1;
3606 return (x == CONST0_RTX (GET_MODE (x)));
3607 }
3608
3609 /* Returns 1 if OP contains a symbol reference */
3610
3611 int
3612 symbolic_reference_mentioned_p (op)
3613 rtx op;
3614 {
3615 register const char *fmt;
3616 register int i;
3617
3618 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3619 return 1;
3620
3621 fmt = GET_RTX_FORMAT (GET_CODE (op));
3622 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3623 {
3624 if (fmt[i] == 'E')
3625 {
3626 register int j;
3627
3628 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3629 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3630 return 1;
3631 }
3632
3633 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3634 return 1;
3635 }
3636
3637 return 0;
3638 }
3639
3640 /* Return 1 if it is appropriate to emit `ret' instructions in the
3641 body of a function. Do this only if the epilogue is simple, needing a
3642 couple of insns. Prior to reloading, we can't tell how many registers
3643 must be saved, so return 0 then. Return 0 if there is no frame
3644 marker to de-allocate.
3645
3646 If NON_SAVING_SETJMP is defined and true, then it is not possible
3647 for the epilogue to be simple, so return 0. This is a special case
3648 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3649 until final, but jump_optimize may need to know sooner if a
3650 `return' is OK. */
3651
3652 int
3653 ix86_can_use_return_insn_p ()
3654 {
3655 struct ix86_frame frame;
3656
3657 #ifdef NON_SAVING_SETJMP
3658 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3659 return 0;
3660 #endif
3661
3662 if (! reload_completed || frame_pointer_needed)
3663 return 0;
3664
3665 /* Don't allow more than 32 pop, since that's all we can do
3666 with one instruction. */
3667 if (current_function_pops_args
3668 && current_function_args_size >= 32768)
3669 return 0;
3670
3671 ix86_compute_frame_layout (&frame);
3672 return frame.to_allocate == 0 && frame.nregs == 0;
3673 }
3674 \f
3675 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3676 int
3677 x86_64_sign_extended_value (value)
3678 rtx value;
3679 {
3680 switch (GET_CODE (value))
3681 {
3682 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3683 to be at least 32 and this all acceptable constants are
3684 represented as CONST_INT. */
3685 case CONST_INT:
3686 if (HOST_BITS_PER_WIDE_INT == 32)
3687 return 1;
3688 else
3689 {
3690 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3691 return trunc_int_for_mode (val, SImode) == val;
3692 }
3693 break;
3694
3695 /* For certain code models, the symbolic references are known to fit. */
3696 case SYMBOL_REF:
3697 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3698
3699 /* For certain code models, the code is near as well. */
3700 case LABEL_REF:
3701 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3702
3703 /* We also may accept the offsetted memory references in certain special
3704 cases. */
3705 case CONST:
3706 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3707 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3708 return 1;
3709 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3710 {
3711 rtx op1 = XEXP (XEXP (value, 0), 0);
3712 rtx op2 = XEXP (XEXP (value, 0), 1);
3713 HOST_WIDE_INT offset;
3714
3715 if (ix86_cmodel == CM_LARGE)
3716 return 0;
3717 if (GET_CODE (op2) != CONST_INT)
3718 return 0;
3719 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3720 switch (GET_CODE (op1))
3721 {
3722 case SYMBOL_REF:
3723 /* For CM_SMALL assume that latest object is 1MB before
3724 end of 31bits boundary. We may also accept pretty
3725 large negative constants knowing that all objects are
3726 in the positive half of address space. */
3727 if (ix86_cmodel == CM_SMALL
3728 && offset < 1024*1024*1024
3729 && trunc_int_for_mode (offset, SImode) == offset)
3730 return 1;
3731 /* For CM_KERNEL we know that all object resist in the
3732 negative half of 32bits address space. We may not
3733 accept negative offsets, since they may be just off
3734 and we may accept pretty large positive ones. */
3735 if (ix86_cmodel == CM_KERNEL
3736 && offset > 0
3737 && trunc_int_for_mode (offset, SImode) == offset)
3738 return 1;
3739 break;
3740 case LABEL_REF:
3741 /* These conditions are similar to SYMBOL_REF ones, just the
3742 constraints for code models differ. */
3743 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3744 && offset < 1024*1024*1024
3745 && trunc_int_for_mode (offset, SImode) == offset)
3746 return 1;
3747 if (ix86_cmodel == CM_KERNEL
3748 && offset > 0
3749 && trunc_int_for_mode (offset, SImode) == offset)
3750 return 1;
3751 break;
3752 default:
3753 return 0;
3754 }
3755 }
3756 return 0;
3757 default:
3758 return 0;
3759 }
3760 }
3761
3762 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3763 int
3764 x86_64_zero_extended_value (value)
3765 rtx value;
3766 {
3767 switch (GET_CODE (value))
3768 {
3769 case CONST_DOUBLE:
3770 if (HOST_BITS_PER_WIDE_INT == 32)
3771 return (GET_MODE (value) == VOIDmode
3772 && !CONST_DOUBLE_HIGH (value));
3773 else
3774 return 0;
3775 case CONST_INT:
3776 if (HOST_BITS_PER_WIDE_INT == 32)
3777 return INTVAL (value) >= 0;
3778 else
3779 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3780 break;
3781
3782 /* For certain code models, the symbolic references are known to fit. */
3783 case SYMBOL_REF:
3784 return ix86_cmodel == CM_SMALL;
3785
3786 /* For certain code models, the code is near as well. */
3787 case LABEL_REF:
3788 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3789
3790 /* We also may accept the offsetted memory references in certain special
3791 cases. */
3792 case CONST:
3793 if (GET_CODE (XEXP (value, 0)) == PLUS)
3794 {
3795 rtx op1 = XEXP (XEXP (value, 0), 0);
3796 rtx op2 = XEXP (XEXP (value, 0), 1);
3797
3798 if (ix86_cmodel == CM_LARGE)
3799 return 0;
3800 switch (GET_CODE (op1))
3801 {
3802 case SYMBOL_REF:
3803 return 0;
3804 /* For small code model we may accept pretty large positive
3805 offsets, since one bit is available for free. Negative
3806 offsets are limited by the size of NULL pointer area
3807 specified by the ABI. */
3808 if (ix86_cmodel == CM_SMALL
3809 && GET_CODE (op2) == CONST_INT
3810 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3811 && (trunc_int_for_mode (INTVAL (op2), SImode)
3812 == INTVAL (op2)))
3813 return 1;
3814 /* ??? For the kernel, we may accept adjustment of
3815 -0x10000000, since we know that it will just convert
3816 negative address space to positive, but perhaps this
3817 is not worthwhile. */
3818 break;
3819 case LABEL_REF:
3820 /* These conditions are similar to SYMBOL_REF ones, just the
3821 constraints for code models differ. */
3822 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3823 && GET_CODE (op2) == CONST_INT
3824 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3825 && (trunc_int_for_mode (INTVAL (op2), SImode)
3826 == INTVAL (op2)))
3827 return 1;
3828 break;
3829 default:
3830 return 0;
3831 }
3832 }
3833 return 0;
3834 default:
3835 return 0;
3836 }
3837 }
3838
3839 /* Value should be nonzero if functions must have frame pointers.
3840 Zero means the frame pointer need not be set up (and parms may
3841 be accessed via the stack pointer) in functions that seem suitable. */
3842
3843 int
3844 ix86_frame_pointer_required ()
3845 {
3846 /* If we accessed previous frames, then the generated code expects
3847 to be able to access the saved ebp value in our frame. */
3848 if (cfun->machine->accesses_prev_frame)
3849 return 1;
3850
3851 /* Several x86 os'es need a frame pointer for other reasons,
3852 usually pertaining to setjmp. */
3853 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3854 return 1;
3855
3856 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3857 the frame pointer by default. Turn it back on now if we've not
3858 got a leaf function. */
3859 if (TARGET_OMIT_LEAF_FRAME_POINTER
3860 && (!current_function_is_leaf || current_function_profile))
3861 return 1;
3862
3863 return 0;
3864 }
3865
3866 /* Record that the current function accesses previous call frames. */
3867
3868 void
3869 ix86_setup_frame_addresses ()
3870 {
3871 cfun->machine->accesses_prev_frame = 1;
3872 }
3873 \f
3874 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3875 # define USE_HIDDEN_LINKONCE 1
3876 #else
3877 # define USE_HIDDEN_LINKONCE 0
3878 #endif
3879
3880 static int pic_labels_used;
3881
3882 /* Fills in the label name that should be used for a pc thunk for
3883 the given register. */
3884
3885 static void
3886 get_pc_thunk_name (name, regno)
3887 char name[32];
3888 unsigned int regno;
3889 {
3890 if (USE_HIDDEN_LINKONCE)
3891 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3892 else
3893 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3894 }
3895
3896
3897 /* This function generates code for -fpic that loads %ebx with
3898 the return address of the caller and then returns. */
3899
3900 void
3901 ix86_asm_file_end (file)
3902 FILE *file;
3903 {
3904 rtx xops[2];
3905 int regno;
3906
3907 for (regno = 0; regno < 8; ++regno)
3908 {
3909 char name[32];
3910
3911 if (! ((pic_labels_used >> regno) & 1))
3912 continue;
3913
3914 get_pc_thunk_name (name, regno);
3915
3916 if (USE_HIDDEN_LINKONCE)
3917 {
3918 tree decl;
3919
3920 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3921 error_mark_node);
3922 TREE_PUBLIC (decl) = 1;
3923 TREE_STATIC (decl) = 1;
3924 DECL_ONE_ONLY (decl) = 1;
3925
3926 (*targetm.asm_out.unique_section) (decl, 0);
3927 named_section (decl, NULL, 0);
3928
3929 ASM_GLOBALIZE_LABEL (file, name);
3930 fputs ("\t.hidden\t", file);
3931 assemble_name (file, name);
3932 fputc ('\n', file);
3933 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3934 }
3935 else
3936 {
3937 text_section ();
3938 ASM_OUTPUT_LABEL (file, name);
3939 }
3940
3941 xops[0] = gen_rtx_REG (SImode, regno);
3942 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3943 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3944 output_asm_insn ("ret", xops);
3945 }
3946 }
3947
3948 /* Emit code for the SET_GOT patterns. */
3949
3950 const char *
3951 output_set_got (dest)
3952 rtx dest;
3953 {
3954 rtx xops[3];
3955
3956 xops[0] = dest;
3957 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3958
3959 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3960 {
3961 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3962
3963 if (!flag_pic)
3964 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3965 else
3966 output_asm_insn ("call\t%a2", xops);
3967
3968 #if TARGET_MACHO
3969 /* Output the "canonical" label name ("Lxx$pb") here too. This
3970 is what will be referred to by the Mach-O PIC subsystem. */
3971 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3972 #endif
3973 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3974 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3975
3976 if (flag_pic)
3977 output_asm_insn ("pop{l}\t%0", xops);
3978 }
3979 else
3980 {
3981 char name[32];
3982 get_pc_thunk_name (name, REGNO (dest));
3983 pic_labels_used |= 1 << REGNO (dest);
3984
3985 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3986 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3987 output_asm_insn ("call\t%X2", xops);
3988 }
3989
3990 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3991 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3992 else if (!TARGET_MACHO)
3993 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3994
3995 return "";
3996 }
3997
3998 /* Generate an "push" pattern for input ARG. */
3999
4000 static rtx
4001 gen_push (arg)
4002 rtx arg;
4003 {
4004 return gen_rtx_SET (VOIDmode,
4005 gen_rtx_MEM (Pmode,
4006 gen_rtx_PRE_DEC (Pmode,
4007 stack_pointer_rtx)),
4008 arg);
4009 }
4010
4011 /* Return >= 0 if there is an unused call-clobbered register available
4012 for the entire function. */
4013
4014 static unsigned int
4015 ix86_select_alt_pic_regnum ()
4016 {
4017 if (current_function_is_leaf && !current_function_profile)
4018 {
4019 int i;
4020 for (i = 2; i >= 0; --i)
4021 if (!regs_ever_live[i])
4022 return i;
4023 }
4024
4025 return INVALID_REGNUM;
4026 }
4027
4028 /* Return 1 if we need to save REGNO. */
4029 static int
4030 ix86_save_reg (regno, maybe_eh_return)
4031 unsigned int regno;
4032 int maybe_eh_return;
4033 {
4034 if (pic_offset_table_rtx
4035 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4036 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4037 || current_function_profile
4038 || current_function_calls_eh_return))
4039 {
4040 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4041 return 0;
4042 return 1;
4043 }
4044
4045 if (current_function_calls_eh_return && maybe_eh_return)
4046 {
4047 unsigned i;
4048 for (i = 0; ; i++)
4049 {
4050 unsigned test = EH_RETURN_DATA_REGNO (i);
4051 if (test == INVALID_REGNUM)
4052 break;
4053 if (test == regno)
4054 return 1;
4055 }
4056 }
4057
4058 return (regs_ever_live[regno]
4059 && !call_used_regs[regno]
4060 && !fixed_regs[regno]
4061 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4062 }
4063
4064 /* Return number of registers to be saved on the stack. */
4065
4066 static int
4067 ix86_nsaved_regs ()
4068 {
4069 int nregs = 0;
4070 int regno;
4071
4072 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4073 if (ix86_save_reg (regno, true))
4074 nregs++;
4075 return nregs;
4076 }
4077
4078 /* Return the offset between two registers, one to be eliminated, and the other
4079 its replacement, at the start of a routine. */
4080
4081 HOST_WIDE_INT
4082 ix86_initial_elimination_offset (from, to)
4083 int from;
4084 int to;
4085 {
4086 struct ix86_frame frame;
4087 ix86_compute_frame_layout (&frame);
4088
4089 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4090 return frame.hard_frame_pointer_offset;
4091 else if (from == FRAME_POINTER_REGNUM
4092 && to == HARD_FRAME_POINTER_REGNUM)
4093 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4094 else
4095 {
4096 if (to != STACK_POINTER_REGNUM)
4097 abort ();
4098 else if (from == ARG_POINTER_REGNUM)
4099 return frame.stack_pointer_offset;
4100 else if (from != FRAME_POINTER_REGNUM)
4101 abort ();
4102 else
4103 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4104 }
4105 }
4106
4107 /* Fill structure ix86_frame about frame of currently computed function. */
4108
4109 static void
4110 ix86_compute_frame_layout (frame)
4111 struct ix86_frame *frame;
4112 {
4113 HOST_WIDE_INT total_size;
4114 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4115 int offset;
4116 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4117 HOST_WIDE_INT size = get_frame_size ();
4118
4119 frame->nregs = ix86_nsaved_regs ();
4120 total_size = size;
4121
4122 /* Skip return address and saved base pointer. */
4123 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4124
4125 frame->hard_frame_pointer_offset = offset;
4126
4127 /* Do some sanity checking of stack_alignment_needed and
4128 preferred_alignment, since i386 port is the only using those features
4129 that may break easily. */
4130
4131 if (size && !stack_alignment_needed)
4132 abort ();
4133 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4134 abort ();
4135 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4136 abort ();
4137 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4138 abort ();
4139
4140 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4141 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4142
4143 /* Register save area */
4144 offset += frame->nregs * UNITS_PER_WORD;
4145
4146 /* Va-arg area */
4147 if (ix86_save_varrargs_registers)
4148 {
4149 offset += X86_64_VARARGS_SIZE;
4150 frame->va_arg_size = X86_64_VARARGS_SIZE;
4151 }
4152 else
4153 frame->va_arg_size = 0;
4154
4155 /* Align start of frame for local function. */
4156 frame->padding1 = ((offset + stack_alignment_needed - 1)
4157 & -stack_alignment_needed) - offset;
4158
4159 offset += frame->padding1;
4160
4161 /* Frame pointer points here. */
4162 frame->frame_pointer_offset = offset;
4163
4164 offset += size;
4165
4166 /* Add outgoing arguments area. Can be skipped if we eliminated
4167 all the function calls as dead code. */
4168 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4169 {
4170 offset += current_function_outgoing_args_size;
4171 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4172 }
4173 else
4174 frame->outgoing_arguments_size = 0;
4175
4176 /* Align stack boundary. Only needed if we're calling another function
4177 or using alloca. */
4178 if (!current_function_is_leaf || current_function_calls_alloca)
4179 frame->padding2 = ((offset + preferred_alignment - 1)
4180 & -preferred_alignment) - offset;
4181 else
4182 frame->padding2 = 0;
4183
4184 offset += frame->padding2;
4185
4186 /* We've reached end of stack frame. */
4187 frame->stack_pointer_offset = offset;
4188
4189 /* Size prologue needs to allocate. */
4190 frame->to_allocate =
4191 (size + frame->padding1 + frame->padding2
4192 + frame->outgoing_arguments_size + frame->va_arg_size);
4193
4194 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4195 && current_function_is_leaf)
4196 {
4197 frame->red_zone_size = frame->to_allocate;
4198 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4199 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4200 }
4201 else
4202 frame->red_zone_size = 0;
4203 frame->to_allocate -= frame->red_zone_size;
4204 frame->stack_pointer_offset -= frame->red_zone_size;
4205 #if 0
4206 fprintf (stderr, "nregs: %i\n", frame->nregs);
4207 fprintf (stderr, "size: %i\n", size);
4208 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4209 fprintf (stderr, "padding1: %i\n", frame->padding1);
4210 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4211 fprintf (stderr, "padding2: %i\n", frame->padding2);
4212 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4213 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4214 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4215 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4216 frame->hard_frame_pointer_offset);
4217 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4218 #endif
4219 }
4220
4221 /* Emit code to save registers in the prologue. */
4222
4223 static void
4224 ix86_emit_save_regs ()
4225 {
4226 register int regno;
4227 rtx insn;
4228
4229 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4230 if (ix86_save_reg (regno, true))
4231 {
4232 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4233 RTX_FRAME_RELATED_P (insn) = 1;
4234 }
4235 }
4236
4237 /* Emit code to save registers using MOV insns. First register
4238 is restored from POINTER + OFFSET. */
4239 static void
4240 ix86_emit_save_regs_using_mov (pointer, offset)
4241 rtx pointer;
4242 HOST_WIDE_INT offset;
4243 {
4244 int regno;
4245 rtx insn;
4246
4247 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4248 if (ix86_save_reg (regno, true))
4249 {
4250 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4251 Pmode, offset),
4252 gen_rtx_REG (Pmode, regno));
4253 RTX_FRAME_RELATED_P (insn) = 1;
4254 offset += UNITS_PER_WORD;
4255 }
4256 }
4257
4258 /* Expand the prologue into a bunch of separate insns. */
4259
4260 void
4261 ix86_expand_prologue ()
4262 {
4263 rtx insn;
4264 bool pic_reg_used;
4265 struct ix86_frame frame;
4266 int use_mov = 0;
4267 HOST_WIDE_INT allocate;
4268
4269 if (!optimize_size)
4270 {
4271 use_fast_prologue_epilogue
4272 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4273 if (TARGET_PROLOGUE_USING_MOVE)
4274 use_mov = use_fast_prologue_epilogue;
4275 }
4276 ix86_compute_frame_layout (&frame);
4277
4278 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4279 slower on all targets. Also sdb doesn't like it. */
4280
4281 if (frame_pointer_needed)
4282 {
4283 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4284 RTX_FRAME_RELATED_P (insn) = 1;
4285
4286 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4287 RTX_FRAME_RELATED_P (insn) = 1;
4288 }
4289
4290 allocate = frame.to_allocate;
4291 /* In case we are dealing only with single register and empty frame,
4292 push is equivalent of the mov+add sequence. */
4293 if (allocate == 0 && frame.nregs <= 1)
4294 use_mov = 0;
4295
4296 if (!use_mov)
4297 ix86_emit_save_regs ();
4298 else
4299 allocate += frame.nregs * UNITS_PER_WORD;
4300
4301 if (allocate == 0)
4302 ;
4303 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4304 {
4305 insn = emit_insn (gen_pro_epilogue_adjust_stack
4306 (stack_pointer_rtx, stack_pointer_rtx,
4307 GEN_INT (-allocate)));
4308 RTX_FRAME_RELATED_P (insn) = 1;
4309 }
4310 else
4311 {
4312 /* ??? Is this only valid for Win32? */
4313
4314 rtx arg0, sym;
4315
4316 if (TARGET_64BIT)
4317 abort ();
4318
4319 arg0 = gen_rtx_REG (SImode, 0);
4320 emit_move_insn (arg0, GEN_INT (allocate));
4321
4322 sym = gen_rtx_MEM (FUNCTION_MODE,
4323 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4324 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4325
4326 CALL_INSN_FUNCTION_USAGE (insn)
4327 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4328 CALL_INSN_FUNCTION_USAGE (insn));
4329 }
4330 if (use_mov)
4331 {
4332 if (!frame_pointer_needed || !frame.to_allocate)
4333 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4334 else
4335 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4336 -frame.nregs * UNITS_PER_WORD);
4337 }
4338
4339 #ifdef SUBTARGET_PROLOGUE
4340 SUBTARGET_PROLOGUE;
4341 #endif
4342
4343 pic_reg_used = false;
4344 if (pic_offset_table_rtx
4345 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4346 || current_function_profile))
4347 {
4348 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4349
4350 if (alt_pic_reg_used != INVALID_REGNUM)
4351 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4352
4353 pic_reg_used = true;
4354 }
4355
4356 if (pic_reg_used)
4357 {
4358 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4359
4360 /* Even with accurate pre-reload life analysis, we can wind up
4361 deleting all references to the pic register after reload.
4362 Consider if cross-jumping unifies two sides of a branch
4363 controled by a comparison vs the only read from a global.
4364 In which case, allow the set_got to be deleted, though we're
4365 too late to do anything about the ebx save in the prologue. */
4366 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4367 }
4368
4369 /* Prevent function calls from be scheduled before the call to mcount.
4370 In the pic_reg_used case, make sure that the got load isn't deleted. */
4371 if (current_function_profile)
4372 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4373 }
4374
4375 /* Emit code to restore saved registers using MOV insns. First register
4376 is restored from POINTER + OFFSET. */
4377 static void
4378 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4379 rtx pointer;
4380 int offset;
4381 int maybe_eh_return;
4382 {
4383 int regno;
4384
4385 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4386 if (ix86_save_reg (regno, maybe_eh_return))
4387 {
4388 emit_move_insn (gen_rtx_REG (Pmode, regno),
4389 adjust_address (gen_rtx_MEM (Pmode, pointer),
4390 Pmode, offset));
4391 offset += UNITS_PER_WORD;
4392 }
4393 }
4394
4395 /* Restore function stack, frame, and registers. */
4396
4397 void
4398 ix86_expand_epilogue (style)
4399 int style;
4400 {
4401 int regno;
4402 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4403 struct ix86_frame frame;
4404 HOST_WIDE_INT offset;
4405
4406 ix86_compute_frame_layout (&frame);
4407
4408 /* Calculate start of saved registers relative to ebp. Special care
4409 must be taken for the normal return case of a function using
4410 eh_return: the eax and edx registers are marked as saved, but not
4411 restored along this path. */
4412 offset = frame.nregs;
4413 if (current_function_calls_eh_return && style != 2)
4414 offset -= 2;
4415 offset *= -UNITS_PER_WORD;
4416
4417 /* If we're only restoring one register and sp is not valid then
4418 using a move instruction to restore the register since it's
4419 less work than reloading sp and popping the register.
4420
4421 The default code result in stack adjustment using add/lea instruction,
4422 while this code results in LEAVE instruction (or discrete equivalent),
4423 so it is profitable in some other cases as well. Especially when there
4424 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4425 and there is exactly one register to pop. This heruistic may need some
4426 tuning in future. */
4427 if ((!sp_valid && frame.nregs <= 1)
4428 || (TARGET_EPILOGUE_USING_MOVE
4429 && use_fast_prologue_epilogue
4430 && (frame.nregs > 1 || frame.to_allocate))
4431 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4432 || (frame_pointer_needed && TARGET_USE_LEAVE
4433 && use_fast_prologue_epilogue && frame.nregs == 1)
4434 || current_function_calls_eh_return)
4435 {
4436 /* Restore registers. We can use ebp or esp to address the memory
4437 locations. If both are available, default to ebp, since offsets
4438 are known to be small. Only exception is esp pointing directly to the
4439 end of block of saved registers, where we may simplify addressing
4440 mode. */
4441
4442 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4443 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4444 frame.to_allocate, style == 2);
4445 else
4446 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4447 offset, style == 2);
4448
4449 /* eh_return epilogues need %ecx added to the stack pointer. */
4450 if (style == 2)
4451 {
4452 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4453
4454 if (frame_pointer_needed)
4455 {
4456 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4457 tmp = plus_constant (tmp, UNITS_PER_WORD);
4458 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4459
4460 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4461 emit_move_insn (hard_frame_pointer_rtx, tmp);
4462
4463 emit_insn (gen_pro_epilogue_adjust_stack
4464 (stack_pointer_rtx, sa, const0_rtx));
4465 }
4466 else
4467 {
4468 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4469 tmp = plus_constant (tmp, (frame.to_allocate
4470 + frame.nregs * UNITS_PER_WORD));
4471 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4472 }
4473 }
4474 else if (!frame_pointer_needed)
4475 emit_insn (gen_pro_epilogue_adjust_stack
4476 (stack_pointer_rtx, stack_pointer_rtx,
4477 GEN_INT (frame.to_allocate
4478 + frame.nregs * UNITS_PER_WORD)));
4479 /* If not an i386, mov & pop is faster than "leave". */
4480 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4481 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4482 else
4483 {
4484 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4485 hard_frame_pointer_rtx,
4486 const0_rtx));
4487 if (TARGET_64BIT)
4488 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4489 else
4490 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4491 }
4492 }
4493 else
4494 {
4495 /* First step is to deallocate the stack frame so that we can
4496 pop the registers. */
4497 if (!sp_valid)
4498 {
4499 if (!frame_pointer_needed)
4500 abort ();
4501 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4502 hard_frame_pointer_rtx,
4503 GEN_INT (offset)));
4504 }
4505 else if (frame.to_allocate)
4506 emit_insn (gen_pro_epilogue_adjust_stack
4507 (stack_pointer_rtx, stack_pointer_rtx,
4508 GEN_INT (frame.to_allocate)));
4509
4510 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4511 if (ix86_save_reg (regno, false))
4512 {
4513 if (TARGET_64BIT)
4514 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4515 else
4516 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4517 }
4518 if (frame_pointer_needed)
4519 {
4520 /* Leave results in shorter dependency chains on CPUs that are
4521 able to grok it fast. */
4522 if (TARGET_USE_LEAVE)
4523 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4524 else if (TARGET_64BIT)
4525 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4526 else
4527 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4528 }
4529 }
4530
4531 /* Sibcall epilogues don't want a return instruction. */
4532 if (style == 0)
4533 return;
4534
4535 if (current_function_pops_args && current_function_args_size)
4536 {
4537 rtx popc = GEN_INT (current_function_pops_args);
4538
4539 /* i386 can only pop 64K bytes. If asked to pop more, pop
4540 return address, do explicit add, and jump indirectly to the
4541 caller. */
4542
4543 if (current_function_pops_args >= 65536)
4544 {
4545 rtx ecx = gen_rtx_REG (SImode, 2);
4546
4547 /* There are is no "pascal" calling convention in 64bit ABI. */
4548 if (TARGET_64BIT)
4549 abort ();
4550
4551 emit_insn (gen_popsi1 (ecx));
4552 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4553 emit_jump_insn (gen_return_indirect_internal (ecx));
4554 }
4555 else
4556 emit_jump_insn (gen_return_pop_internal (popc));
4557 }
4558 else
4559 emit_jump_insn (gen_return_internal ());
4560 }
4561
4562 /* Reset from the function's potential modifications. */
4563
4564 static void
4565 ix86_output_function_epilogue (file, size)
4566 FILE *file ATTRIBUTE_UNUSED;
4567 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4568 {
4569 if (pic_offset_table_rtx)
4570 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4571 }
4572 \f
4573 /* Extract the parts of an RTL expression that is a valid memory address
4574 for an instruction. Return 0 if the structure of the address is
4575 grossly off. Return -1 if the address contains ASHIFT, so it is not
4576 strictly valid, but still used for computing length of lea instruction.
4577 */
4578
4579 static int
4580 ix86_decompose_address (addr, out)
4581 register rtx addr;
4582 struct ix86_address *out;
4583 {
4584 rtx base = NULL_RTX;
4585 rtx index = NULL_RTX;
4586 rtx disp = NULL_RTX;
4587 HOST_WIDE_INT scale = 1;
4588 rtx scale_rtx = NULL_RTX;
4589 int retval = 1;
4590
4591 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4592 base = addr;
4593 else if (GET_CODE (addr) == PLUS)
4594 {
4595 rtx op0 = XEXP (addr, 0);
4596 rtx op1 = XEXP (addr, 1);
4597 enum rtx_code code0 = GET_CODE (op0);
4598 enum rtx_code code1 = GET_CODE (op1);
4599
4600 if (code0 == REG || code0 == SUBREG)
4601 {
4602 if (code1 == REG || code1 == SUBREG)
4603 index = op0, base = op1; /* index + base */
4604 else
4605 base = op0, disp = op1; /* base + displacement */
4606 }
4607 else if (code0 == MULT)
4608 {
4609 index = XEXP (op0, 0);
4610 scale_rtx = XEXP (op0, 1);
4611 if (code1 == REG || code1 == SUBREG)
4612 base = op1; /* index*scale + base */
4613 else
4614 disp = op1; /* index*scale + disp */
4615 }
4616 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4617 {
4618 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4619 scale_rtx = XEXP (XEXP (op0, 0), 1);
4620 base = XEXP (op0, 1);
4621 disp = op1;
4622 }
4623 else if (code0 == PLUS)
4624 {
4625 index = XEXP (op0, 0); /* index + base + disp */
4626 base = XEXP (op0, 1);
4627 disp = op1;
4628 }
4629 else
4630 return 0;
4631 }
4632 else if (GET_CODE (addr) == MULT)
4633 {
4634 index = XEXP (addr, 0); /* index*scale */
4635 scale_rtx = XEXP (addr, 1);
4636 }
4637 else if (GET_CODE (addr) == ASHIFT)
4638 {
4639 rtx tmp;
4640
4641 /* We're called for lea too, which implements ashift on occasion. */
4642 index = XEXP (addr, 0);
4643 tmp = XEXP (addr, 1);
4644 if (GET_CODE (tmp) != CONST_INT)
4645 return 0;
4646 scale = INTVAL (tmp);
4647 if ((unsigned HOST_WIDE_INT) scale > 3)
4648 return 0;
4649 scale = 1 << scale;
4650 retval = -1;
4651 }
4652 else
4653 disp = addr; /* displacement */
4654
4655 /* Extract the integral value of scale. */
4656 if (scale_rtx)
4657 {
4658 if (GET_CODE (scale_rtx) != CONST_INT)
4659 return 0;
4660 scale = INTVAL (scale_rtx);
4661 }
4662
4663 /* Allow arg pointer and stack pointer as index if there is not scaling */
4664 if (base && index && scale == 1
4665 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4666 || index == stack_pointer_rtx))
4667 {
4668 rtx tmp = base;
4669 base = index;
4670 index = tmp;
4671 }
4672
4673 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4674 if ((base == hard_frame_pointer_rtx
4675 || base == frame_pointer_rtx
4676 || base == arg_pointer_rtx) && !disp)
4677 disp = const0_rtx;
4678
4679 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4680 Avoid this by transforming to [%esi+0]. */
4681 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4682 && base && !index && !disp
4683 && REG_P (base)
4684 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4685 disp = const0_rtx;
4686
4687 /* Special case: encode reg+reg instead of reg*2. */
4688 if (!base && index && scale && scale == 2)
4689 base = index, scale = 1;
4690
4691 /* Special case: scaling cannot be encoded without base or displacement. */
4692 if (!base && !disp && index && scale != 1)
4693 disp = const0_rtx;
4694
4695 out->base = base;
4696 out->index = index;
4697 out->disp = disp;
4698 out->scale = scale;
4699
4700 return retval;
4701 }
4702 \f
4703 /* Return cost of the memory address x.
4704 For i386, it is better to use a complex address than let gcc copy
4705 the address into a reg and make a new pseudo. But not if the address
4706 requires to two regs - that would mean more pseudos with longer
4707 lifetimes. */
4708 int
4709 ix86_address_cost (x)
4710 rtx x;
4711 {
4712 struct ix86_address parts;
4713 int cost = 1;
4714
4715 if (!ix86_decompose_address (x, &parts))
4716 abort ();
4717
4718 if (parts.base && GET_CODE (parts.base) == SUBREG)
4719 parts.base = SUBREG_REG (parts.base);
4720 if (parts.index && GET_CODE (parts.index) == SUBREG)
4721 parts.index = SUBREG_REG (parts.index);
4722
4723 /* More complex memory references are better. */
4724 if (parts.disp && parts.disp != const0_rtx)
4725 cost--;
4726
4727 /* Attempt to minimize number of registers in the address. */
4728 if ((parts.base
4729 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4730 || (parts.index
4731 && (!REG_P (parts.index)
4732 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4733 cost++;
4734
4735 if (parts.base
4736 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4737 && parts.index
4738 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4739 && parts.base != parts.index)
4740 cost++;
4741
4742 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4743 since it's predecode logic can't detect the length of instructions
4744 and it degenerates to vector decoded. Increase cost of such
4745 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4746 to split such addresses or even refuse such addresses at all.
4747
4748 Following addressing modes are affected:
4749 [base+scale*index]
4750 [scale*index+disp]
4751 [base+index]
4752
4753 The first and last case may be avoidable by explicitly coding the zero in
4754 memory address, but I don't have AMD-K6 machine handy to check this
4755 theory. */
4756
4757 if (TARGET_K6
4758 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4759 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4760 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4761 cost += 10;
4762
4763 return cost;
4764 }
4765 \f
4766 /* If X is a machine specific address (i.e. a symbol or label being
4767 referenced as a displacement from the GOT implemented using an
4768 UNSPEC), then return the base term. Otherwise return X. */
4769
4770 rtx
4771 ix86_find_base_term (x)
4772 rtx x;
4773 {
4774 rtx term;
4775
4776 if (TARGET_64BIT)
4777 {
4778 if (GET_CODE (x) != CONST)
4779 return x;
4780 term = XEXP (x, 0);
4781 if (GET_CODE (term) == PLUS
4782 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4783 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4784 term = XEXP (term, 0);
4785 if (GET_CODE (term) != UNSPEC
4786 || XINT (term, 1) != UNSPEC_GOTPCREL)
4787 return x;
4788
4789 term = XVECEXP (term, 0, 0);
4790
4791 if (GET_CODE (term) != SYMBOL_REF
4792 && GET_CODE (term) != LABEL_REF)
4793 return x;
4794
4795 return term;
4796 }
4797
4798 if (GET_CODE (x) != PLUS
4799 || XEXP (x, 0) != pic_offset_table_rtx
4800 || GET_CODE (XEXP (x, 1)) != CONST)
4801 return x;
4802
4803 term = XEXP (XEXP (x, 1), 0);
4804
4805 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4806 term = XEXP (term, 0);
4807
4808 if (GET_CODE (term) != UNSPEC
4809 || XINT (term, 1) != UNSPEC_GOTOFF)
4810 return x;
4811
4812 term = XVECEXP (term, 0, 0);
4813
4814 if (GET_CODE (term) != SYMBOL_REF
4815 && GET_CODE (term) != LABEL_REF)
4816 return x;
4817
4818 return term;
4819 }
4820 \f
4821 /* Determine if a given RTX is a valid constant. We already know this
4822 satisfies CONSTANT_P. */
4823
4824 bool
4825 legitimate_constant_p (x)
4826 rtx x;
4827 {
4828 rtx inner;
4829
4830 switch (GET_CODE (x))
4831 {
4832 case SYMBOL_REF:
4833 /* TLS symbols are not constant. */
4834 if (tls_symbolic_operand (x, Pmode))
4835 return false;
4836 break;
4837
4838 case CONST:
4839 inner = XEXP (x, 0);
4840
4841 /* Offsets of TLS symbols are never valid.
4842 Discourage CSE from creating them. */
4843 if (GET_CODE (inner) == PLUS
4844 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4845 return false;
4846
4847 /* Only some unspecs are valid as "constants". */
4848 if (GET_CODE (inner) == UNSPEC)
4849 switch (XINT (inner, 1))
4850 {
4851 case UNSPEC_TPOFF:
4852 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4853 default:
4854 return false;
4855 }
4856 break;
4857
4858 default:
4859 break;
4860 }
4861
4862 /* Otherwise we handle everything else in the move patterns. */
4863 return true;
4864 }
4865
4866 /* Determine if a given RTX is a valid constant address. */
4867
4868 bool
4869 constant_address_p (x)
4870 rtx x;
4871 {
4872 switch (GET_CODE (x))
4873 {
4874 case LABEL_REF:
4875 case CONST_INT:
4876 return true;
4877
4878 case CONST_DOUBLE:
4879 return TARGET_64BIT;
4880
4881 case CONST:
4882 /* For Mach-O, really believe the CONST. */
4883 if (TARGET_MACHO)
4884 return true;
4885 /* Otherwise fall through. */
4886 case SYMBOL_REF:
4887 return !flag_pic && legitimate_constant_p (x);
4888
4889 default:
4890 return false;
4891 }
4892 }
4893
4894 /* Nonzero if the constant value X is a legitimate general operand
4895 when generating PIC code. It is given that flag_pic is on and
4896 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4897
4898 bool
4899 legitimate_pic_operand_p (x)
4900 rtx x;
4901 {
4902 rtx inner;
4903
4904 switch (GET_CODE (x))
4905 {
4906 case CONST:
4907 inner = XEXP (x, 0);
4908
4909 /* Only some unspecs are valid as "constants". */
4910 if (GET_CODE (inner) == UNSPEC)
4911 switch (XINT (inner, 1))
4912 {
4913 case UNSPEC_TPOFF:
4914 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4915 default:
4916 return false;
4917 }
4918 /* FALLTHRU */
4919
4920 case SYMBOL_REF:
4921 case LABEL_REF:
4922 return legitimate_pic_address_disp_p (x);
4923
4924 default:
4925 return true;
4926 }
4927 }
4928
4929 /* Determine if a given CONST RTX is a valid memory displacement
4930 in PIC mode. */
4931
4932 int
4933 legitimate_pic_address_disp_p (disp)
4934 register rtx disp;
4935 {
4936 bool saw_plus;
4937
4938 /* In 64bit mode we can allow direct addresses of symbols and labels
4939 when they are not dynamic symbols. */
4940 if (TARGET_64BIT)
4941 {
4942 rtx x = disp;
4943 if (GET_CODE (disp) == CONST)
4944 x = XEXP (disp, 0);
4945 /* ??? Handle PIC code models */
4946 if (GET_CODE (x) == PLUS
4947 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4948 && ix86_cmodel == CM_SMALL_PIC
4949 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4950 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4951 x = XEXP (x, 0);
4952 if (local_symbolic_operand (x, Pmode))
4953 return 1;
4954 }
4955 if (GET_CODE (disp) != CONST)
4956 return 0;
4957 disp = XEXP (disp, 0);
4958
4959 if (TARGET_64BIT)
4960 {
4961 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4962 of GOT tables. We should not need these anyway. */
4963 if (GET_CODE (disp) != UNSPEC
4964 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4965 return 0;
4966
4967 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4968 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4969 return 0;
4970 return 1;
4971 }
4972
4973 saw_plus = false;
4974 if (GET_CODE (disp) == PLUS)
4975 {
4976 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4977 return 0;
4978 disp = XEXP (disp, 0);
4979 saw_plus = true;
4980 }
4981
4982 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
4983 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
4984 {
4985 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4986 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4987 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4988 {
4989 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4990 if (strstr (sym_name, "$pb") != 0)
4991 return 1;
4992 }
4993 }
4994
4995 if (GET_CODE (disp) != UNSPEC)
4996 return 0;
4997
4998 switch (XINT (disp, 1))
4999 {
5000 case UNSPEC_GOT:
5001 if (saw_plus)
5002 return false;
5003 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5004 case UNSPEC_GOTOFF:
5005 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5006 case UNSPEC_GOTTPOFF:
5007 if (saw_plus)
5008 return false;
5009 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5010 case UNSPEC_NTPOFF:
5011 /* ??? Could support offset here. */
5012 if (saw_plus)
5013 return false;
5014 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5015 case UNSPEC_DTPOFF:
5016 /* ??? Could support offset here. */
5017 if (saw_plus)
5018 return false;
5019 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5020 }
5021
5022 return 0;
5023 }
5024
5025 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5026 memory address for an instruction. The MODE argument is the machine mode
5027 for the MEM expression that wants to use this address.
5028
5029 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5030 convert common non-canonical forms to canonical form so that they will
5031 be recognized. */
5032
5033 int
5034 legitimate_address_p (mode, addr, strict)
5035 enum machine_mode mode;
5036 register rtx addr;
5037 int strict;
5038 {
5039 struct ix86_address parts;
5040 rtx base, index, disp;
5041 HOST_WIDE_INT scale;
5042 const char *reason = NULL;
5043 rtx reason_rtx = NULL_RTX;
5044
5045 if (TARGET_DEBUG_ADDR)
5046 {
5047 fprintf (stderr,
5048 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5049 GET_MODE_NAME (mode), strict);
5050 debug_rtx (addr);
5051 }
5052
5053 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5054 {
5055 if (TARGET_DEBUG_ADDR)
5056 fprintf (stderr, "Success.\n");
5057 return TRUE;
5058 }
5059
5060 if (ix86_decompose_address (addr, &parts) <= 0)
5061 {
5062 reason = "decomposition failed";
5063 goto report_error;
5064 }
5065
5066 base = parts.base;
5067 index = parts.index;
5068 disp = parts.disp;
5069 scale = parts.scale;
5070
5071 /* Validate base register.
5072
5073 Don't allow SUBREG's here, it can lead to spill failures when the base
5074 is one word out of a two word structure, which is represented internally
5075 as a DImode int. */
5076
5077 if (base)
5078 {
5079 rtx reg;
5080 reason_rtx = base;
5081
5082 if (GET_CODE (base) == SUBREG)
5083 reg = SUBREG_REG (base);
5084 else
5085 reg = base;
5086
5087 if (GET_CODE (reg) != REG)
5088 {
5089 reason = "base is not a register";
5090 goto report_error;
5091 }
5092
5093 if (GET_MODE (base) != Pmode)
5094 {
5095 reason = "base is not in Pmode";
5096 goto report_error;
5097 }
5098
5099 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5100 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5101 {
5102 reason = "base is not valid";
5103 goto report_error;
5104 }
5105 }
5106
5107 /* Validate index register.
5108
5109 Don't allow SUBREG's here, it can lead to spill failures when the index
5110 is one word out of a two word structure, which is represented internally
5111 as a DImode int. */
5112
5113 if (index)
5114 {
5115 rtx reg;
5116 reason_rtx = index;
5117
5118 if (GET_CODE (index) == SUBREG)
5119 reg = SUBREG_REG (index);
5120 else
5121 reg = index;
5122
5123 if (GET_CODE (reg) != REG)
5124 {
5125 reason = "index is not a register";
5126 goto report_error;
5127 }
5128
5129 if (GET_MODE (index) != Pmode)
5130 {
5131 reason = "index is not in Pmode";
5132 goto report_error;
5133 }
5134
5135 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5136 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5137 {
5138 reason = "index is not valid";
5139 goto report_error;
5140 }
5141 }
5142
5143 /* Validate scale factor. */
5144 if (scale != 1)
5145 {
5146 reason_rtx = GEN_INT (scale);
5147 if (!index)
5148 {
5149 reason = "scale without index";
5150 goto report_error;
5151 }
5152
5153 if (scale != 2 && scale != 4 && scale != 8)
5154 {
5155 reason = "scale is not a valid multiplier";
5156 goto report_error;
5157 }
5158 }
5159
5160 /* Validate displacement. */
5161 if (disp)
5162 {
5163 reason_rtx = disp;
5164
5165 if (TARGET_64BIT)
5166 {
5167 if (!x86_64_sign_extended_value (disp))
5168 {
5169 reason = "displacement is out of range";
5170 goto report_error;
5171 }
5172 }
5173 else
5174 {
5175 if (GET_CODE (disp) == CONST_DOUBLE)
5176 {
5177 reason = "displacement is a const_double";
5178 goto report_error;
5179 }
5180 }
5181
5182 if (GET_CODE (disp) == CONST
5183 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5184 switch (XINT (XEXP (disp, 0), 1))
5185 {
5186 case UNSPEC_GOT:
5187 case UNSPEC_GOTOFF:
5188 case UNSPEC_GOTPCREL:
5189 if (!flag_pic)
5190 abort ();
5191 goto is_legitimate_pic;
5192
5193 case UNSPEC_GOTTPOFF:
5194 case UNSPEC_NTPOFF:
5195 case UNSPEC_DTPOFF:
5196 break;
5197
5198 default:
5199 reason = "invalid address unspec";
5200 goto report_error;
5201 }
5202
5203 else if (flag_pic && (SYMBOLIC_CONST (disp)
5204 #if TARGET_MACHO
5205 && !machopic_operand_p (disp)
5206 #endif
5207 ))
5208 {
5209 is_legitimate_pic:
5210 if (TARGET_64BIT && (index || base))
5211 {
5212 reason = "non-constant pic memory reference";
5213 goto report_error;
5214 }
5215 if (! legitimate_pic_address_disp_p (disp))
5216 {
5217 reason = "displacement is an invalid pic construct";
5218 goto report_error;
5219 }
5220
5221 /* This code used to verify that a symbolic pic displacement
5222 includes the pic_offset_table_rtx register.
5223
5224 While this is good idea, unfortunately these constructs may
5225 be created by "adds using lea" optimization for incorrect
5226 code like:
5227
5228 int a;
5229 int foo(int i)
5230 {
5231 return *(&a+i);
5232 }
5233
5234 This code is nonsensical, but results in addressing
5235 GOT table with pic_offset_table_rtx base. We can't
5236 just refuse it easily, since it gets matched by
5237 "addsi3" pattern, that later gets split to lea in the
5238 case output register differs from input. While this
5239 can be handled by separate addsi pattern for this case
5240 that never results in lea, this seems to be easier and
5241 correct fix for crash to disable this test. */
5242 }
5243 else if (!CONSTANT_ADDRESS_P (disp))
5244 {
5245 reason = "displacement is not constant";
5246 goto report_error;
5247 }
5248 }
5249
5250 /* Everything looks valid. */
5251 if (TARGET_DEBUG_ADDR)
5252 fprintf (stderr, "Success.\n");
5253 return TRUE;
5254
5255 report_error:
5256 if (TARGET_DEBUG_ADDR)
5257 {
5258 fprintf (stderr, "Error: %s\n", reason);
5259 debug_rtx (reason_rtx);
5260 }
5261 return FALSE;
5262 }
5263 \f
5264 /* Return an unique alias set for the GOT. */
5265
5266 static HOST_WIDE_INT
5267 ix86_GOT_alias_set ()
5268 {
5269 static HOST_WIDE_INT set = -1;
5270 if (set == -1)
5271 set = new_alias_set ();
5272 return set;
5273 }
5274
5275 /* Return a legitimate reference for ORIG (an address) using the
5276 register REG. If REG is 0, a new pseudo is generated.
5277
5278 There are two types of references that must be handled:
5279
5280 1. Global data references must load the address from the GOT, via
5281 the PIC reg. An insn is emitted to do this load, and the reg is
5282 returned.
5283
5284 2. Static data references, constant pool addresses, and code labels
5285 compute the address as an offset from the GOT, whose base is in
5286 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5287 differentiate them from global data objects. The returned
5288 address is the PIC reg + an unspec constant.
5289
5290 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5291 reg also appears in the address. */
5292
5293 rtx
5294 legitimize_pic_address (orig, reg)
5295 rtx orig;
5296 rtx reg;
5297 {
5298 rtx addr = orig;
5299 rtx new = orig;
5300 rtx base;
5301
5302 #if TARGET_MACHO
5303 if (reg == 0)
5304 reg = gen_reg_rtx (Pmode);
5305 /* Use the generic Mach-O PIC machinery. */
5306 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5307 #endif
5308
5309 if (local_symbolic_operand (addr, Pmode))
5310 {
5311 /* In 64bit mode we can address such objects directly. */
5312 if (TARGET_64BIT)
5313 new = addr;
5314 else
5315 {
5316 /* This symbol may be referenced via a displacement from the PIC
5317 base address (@GOTOFF). */
5318
5319 if (reload_in_progress)
5320 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5321 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5322 new = gen_rtx_CONST (Pmode, new);
5323 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5324
5325 if (reg != 0)
5326 {
5327 emit_move_insn (reg, new);
5328 new = reg;
5329 }
5330 }
5331 }
5332 else if (GET_CODE (addr) == SYMBOL_REF)
5333 {
5334 if (TARGET_64BIT)
5335 {
5336 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5337 new = gen_rtx_CONST (Pmode, new);
5338 new = gen_rtx_MEM (Pmode, new);
5339 RTX_UNCHANGING_P (new) = 1;
5340 set_mem_alias_set (new, ix86_GOT_alias_set ());
5341
5342 if (reg == 0)
5343 reg = gen_reg_rtx (Pmode);
5344 /* Use directly gen_movsi, otherwise the address is loaded
5345 into register for CSE. We don't want to CSE this addresses,
5346 instead we CSE addresses from the GOT table, so skip this. */
5347 emit_insn (gen_movsi (reg, new));
5348 new = reg;
5349 }
5350 else
5351 {
5352 /* This symbol must be referenced via a load from the
5353 Global Offset Table (@GOT). */
5354
5355 if (reload_in_progress)
5356 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5357 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5358 new = gen_rtx_CONST (Pmode, new);
5359 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5360 new = gen_rtx_MEM (Pmode, new);
5361 RTX_UNCHANGING_P (new) = 1;
5362 set_mem_alias_set (new, ix86_GOT_alias_set ());
5363
5364 if (reg == 0)
5365 reg = gen_reg_rtx (Pmode);
5366 emit_move_insn (reg, new);
5367 new = reg;
5368 }
5369 }
5370 else
5371 {
5372 if (GET_CODE (addr) == CONST)
5373 {
5374 addr = XEXP (addr, 0);
5375
5376 /* We must match stuff we generate before. Assume the only
5377 unspecs that can get here are ours. Not that we could do
5378 anything with them anyway... */
5379 if (GET_CODE (addr) == UNSPEC
5380 || (GET_CODE (addr) == PLUS
5381 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5382 return orig;
5383 if (GET_CODE (addr) != PLUS)
5384 abort ();
5385 }
5386 if (GET_CODE (addr) == PLUS)
5387 {
5388 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5389
5390 /* Check first to see if this is a constant offset from a @GOTOFF
5391 symbol reference. */
5392 if (local_symbolic_operand (op0, Pmode)
5393 && GET_CODE (op1) == CONST_INT)
5394 {
5395 if (!TARGET_64BIT)
5396 {
5397 if (reload_in_progress)
5398 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5399 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5400 UNSPEC_GOTOFF);
5401 new = gen_rtx_PLUS (Pmode, new, op1);
5402 new = gen_rtx_CONST (Pmode, new);
5403 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5404
5405 if (reg != 0)
5406 {
5407 emit_move_insn (reg, new);
5408 new = reg;
5409 }
5410 }
5411 else
5412 {
5413 /* ??? We need to limit offsets here. */
5414 }
5415 }
5416 else
5417 {
5418 base = legitimize_pic_address (XEXP (addr, 0), reg);
5419 new = legitimize_pic_address (XEXP (addr, 1),
5420 base == reg ? NULL_RTX : reg);
5421
5422 if (GET_CODE (new) == CONST_INT)
5423 new = plus_constant (base, INTVAL (new));
5424 else
5425 {
5426 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5427 {
5428 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5429 new = XEXP (new, 1);
5430 }
5431 new = gen_rtx_PLUS (Pmode, base, new);
5432 }
5433 }
5434 }
5435 }
5436 return new;
5437 }
5438
5439 static void
5440 ix86_encode_section_info (decl, first)
5441 tree decl;
5442 int first ATTRIBUTE_UNUSED;
5443 {
5444 bool local_p = (*targetm.binds_local_p) (decl);
5445 rtx rtl, symbol;
5446
5447 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5448 if (GET_CODE (rtl) != MEM)
5449 return;
5450 symbol = XEXP (rtl, 0);
5451 if (GET_CODE (symbol) != SYMBOL_REF)
5452 return;
5453
5454 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5455 symbol so that we may access it directly in the GOT. */
5456
5457 if (flag_pic)
5458 SYMBOL_REF_FLAG (symbol) = local_p;
5459
5460 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5461 "local dynamic", "initial exec" or "local exec" TLS models
5462 respectively. */
5463
5464 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5465 {
5466 const char *symbol_str;
5467 char *newstr;
5468 size_t len;
5469 enum tls_model kind;
5470
5471 if (!flag_pic)
5472 {
5473 if (local_p)
5474 kind = TLS_MODEL_LOCAL_EXEC;
5475 else
5476 kind = TLS_MODEL_INITIAL_EXEC;
5477 }
5478 /* Local dynamic is inefficient when we're not combining the
5479 parts of the address. */
5480 else if (optimize && local_p)
5481 kind = TLS_MODEL_LOCAL_DYNAMIC;
5482 else
5483 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5484 if (kind < flag_tls_default)
5485 kind = flag_tls_default;
5486
5487 symbol_str = XSTR (symbol, 0);
5488
5489 if (symbol_str[0] == '%')
5490 {
5491 if (symbol_str[1] == tls_model_chars[kind])
5492 return;
5493 symbol_str += 2;
5494 }
5495 len = strlen (symbol_str) + 1;
5496 newstr = alloca (len + 2);
5497
5498 newstr[0] = '%';
5499 newstr[1] = tls_model_chars[kind];
5500 memcpy (newstr + 2, symbol_str, len);
5501
5502 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5503 }
5504 }
5505
5506 /* Undo the above when printing symbol names. */
5507
5508 static const char *
5509 ix86_strip_name_encoding (str)
5510 const char *str;
5511 {
5512 if (str[0] == '%')
5513 str += 2;
5514 if (str [0] == '*')
5515 str += 1;
5516 return str;
5517 }
5518 \f
5519 /* Load the thread pointer into a register. */
5520
5521 static rtx
5522 get_thread_pointer ()
5523 {
5524 rtx tp;
5525
5526 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5527 tp = gen_rtx_MEM (Pmode, tp);
5528 RTX_UNCHANGING_P (tp) = 1;
5529 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5530 tp = force_reg (Pmode, tp);
5531
5532 return tp;
5533 }
5534
5535 /* Try machine-dependent ways of modifying an illegitimate address
5536 to be legitimate. If we find one, return the new, valid address.
5537 This macro is used in only one place: `memory_address' in explow.c.
5538
5539 OLDX is the address as it was before break_out_memory_refs was called.
5540 In some cases it is useful to look at this to decide what needs to be done.
5541
5542 MODE and WIN are passed so that this macro can use
5543 GO_IF_LEGITIMATE_ADDRESS.
5544
5545 It is always safe for this macro to do nothing. It exists to recognize
5546 opportunities to optimize the output.
5547
5548 For the 80386, we handle X+REG by loading X into a register R and
5549 using R+REG. R will go in a general reg and indexing will be used.
5550 However, if REG is a broken-out memory address or multiplication,
5551 nothing needs to be done because REG can certainly go in a general reg.
5552
5553 When -fpic is used, special handling is needed for symbolic references.
5554 See comments by legitimize_pic_address in i386.c for details. */
5555
5556 rtx
5557 legitimize_address (x, oldx, mode)
5558 register rtx x;
5559 register rtx oldx ATTRIBUTE_UNUSED;
5560 enum machine_mode mode;
5561 {
5562 int changed = 0;
5563 unsigned log;
5564
5565 if (TARGET_DEBUG_ADDR)
5566 {
5567 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5568 GET_MODE_NAME (mode));
5569 debug_rtx (x);
5570 }
5571
5572 log = tls_symbolic_operand (x, mode);
5573 if (log)
5574 {
5575 rtx dest, base, off, pic;
5576
5577 switch (log)
5578 {
5579 case TLS_MODEL_GLOBAL_DYNAMIC:
5580 dest = gen_reg_rtx (Pmode);
5581 emit_insn (gen_tls_global_dynamic (dest, x));
5582 break;
5583
5584 case TLS_MODEL_LOCAL_DYNAMIC:
5585 base = gen_reg_rtx (Pmode);
5586 emit_insn (gen_tls_local_dynamic_base (base));
5587
5588 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5589 off = gen_rtx_CONST (Pmode, off);
5590
5591 return gen_rtx_PLUS (Pmode, base, off);
5592
5593 case TLS_MODEL_INITIAL_EXEC:
5594 if (flag_pic)
5595 {
5596 if (reload_in_progress)
5597 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5598 pic = pic_offset_table_rtx;
5599 }
5600 else
5601 {
5602 pic = gen_reg_rtx (Pmode);
5603 emit_insn (gen_set_got (pic));
5604 }
5605
5606 base = get_thread_pointer ();
5607
5608 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5609 off = gen_rtx_CONST (Pmode, off);
5610 off = gen_rtx_PLUS (Pmode, pic, off);
5611 off = gen_rtx_MEM (Pmode, off);
5612 RTX_UNCHANGING_P (off) = 1;
5613 set_mem_alias_set (off, ix86_GOT_alias_set ());
5614
5615 /* Damn Sun for specifing a set of dynamic relocations without
5616 considering the two-operand nature of the architecture!
5617 We'd be much better off with a "GOTNTPOFF" relocation that
5618 already contained the negated constant. */
5619 /* ??? Using negl and reg+reg addressing appears to be a lose
5620 size-wise. The negl is two bytes, just like the extra movl
5621 incurred by the two-operand subl, but reg+reg addressing
5622 uses the two-byte modrm form, unlike plain reg. */
5623
5624 dest = gen_reg_rtx (Pmode);
5625 emit_insn (gen_subsi3 (dest, base, off));
5626 break;
5627
5628 case TLS_MODEL_LOCAL_EXEC:
5629 base = get_thread_pointer ();
5630
5631 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5632 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5633 off = gen_rtx_CONST (Pmode, off);
5634
5635 if (TARGET_GNU_TLS)
5636 return gen_rtx_PLUS (Pmode, base, off);
5637 else
5638 {
5639 dest = gen_reg_rtx (Pmode);
5640 emit_insn (gen_subsi3 (dest, base, off));
5641 }
5642 break;
5643
5644 default:
5645 abort ();
5646 }
5647
5648 return dest;
5649 }
5650
5651 if (flag_pic && SYMBOLIC_CONST (x))
5652 return legitimize_pic_address (x, 0);
5653
5654 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5655 if (GET_CODE (x) == ASHIFT
5656 && GET_CODE (XEXP (x, 1)) == CONST_INT
5657 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5658 {
5659 changed = 1;
5660 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5661 GEN_INT (1 << log));
5662 }
5663
5664 if (GET_CODE (x) == PLUS)
5665 {
5666 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5667
5668 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5669 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5670 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5671 {
5672 changed = 1;
5673 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5674 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5675 GEN_INT (1 << log));
5676 }
5677
5678 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5679 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5680 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5681 {
5682 changed = 1;
5683 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5684 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5685 GEN_INT (1 << log));
5686 }
5687
5688 /* Put multiply first if it isn't already. */
5689 if (GET_CODE (XEXP (x, 1)) == MULT)
5690 {
5691 rtx tmp = XEXP (x, 0);
5692 XEXP (x, 0) = XEXP (x, 1);
5693 XEXP (x, 1) = tmp;
5694 changed = 1;
5695 }
5696
5697 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5698 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5699 created by virtual register instantiation, register elimination, and
5700 similar optimizations. */
5701 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5702 {
5703 changed = 1;
5704 x = gen_rtx_PLUS (Pmode,
5705 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5706 XEXP (XEXP (x, 1), 0)),
5707 XEXP (XEXP (x, 1), 1));
5708 }
5709
5710 /* Canonicalize
5711 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5712 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5713 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5714 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5715 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5716 && CONSTANT_P (XEXP (x, 1)))
5717 {
5718 rtx constant;
5719 rtx other = NULL_RTX;
5720
5721 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5722 {
5723 constant = XEXP (x, 1);
5724 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5725 }
5726 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5727 {
5728 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5729 other = XEXP (x, 1);
5730 }
5731 else
5732 constant = 0;
5733
5734 if (constant)
5735 {
5736 changed = 1;
5737 x = gen_rtx_PLUS (Pmode,
5738 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5739 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5740 plus_constant (other, INTVAL (constant)));
5741 }
5742 }
5743
5744 if (changed && legitimate_address_p (mode, x, FALSE))
5745 return x;
5746
5747 if (GET_CODE (XEXP (x, 0)) == MULT)
5748 {
5749 changed = 1;
5750 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5751 }
5752
5753 if (GET_CODE (XEXP (x, 1)) == MULT)
5754 {
5755 changed = 1;
5756 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5757 }
5758
5759 if (changed
5760 && GET_CODE (XEXP (x, 1)) == REG
5761 && GET_CODE (XEXP (x, 0)) == REG)
5762 return x;
5763
5764 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5765 {
5766 changed = 1;
5767 x = legitimize_pic_address (x, 0);
5768 }
5769
5770 if (changed && legitimate_address_p (mode, x, FALSE))
5771 return x;
5772
5773 if (GET_CODE (XEXP (x, 0)) == REG)
5774 {
5775 register rtx temp = gen_reg_rtx (Pmode);
5776 register rtx val = force_operand (XEXP (x, 1), temp);
5777 if (val != temp)
5778 emit_move_insn (temp, val);
5779
5780 XEXP (x, 1) = temp;
5781 return x;
5782 }
5783
5784 else if (GET_CODE (XEXP (x, 1)) == REG)
5785 {
5786 register rtx temp = gen_reg_rtx (Pmode);
5787 register rtx val = force_operand (XEXP (x, 0), temp);
5788 if (val != temp)
5789 emit_move_insn (temp, val);
5790
5791 XEXP (x, 0) = temp;
5792 return x;
5793 }
5794 }
5795
5796 return x;
5797 }
5798 \f
5799 /* Print an integer constant expression in assembler syntax. Addition
5800 and subtraction are the only arithmetic that may appear in these
5801 expressions. FILE is the stdio stream to write to, X is the rtx, and
5802 CODE is the operand print code from the output string. */
5803
5804 static void
5805 output_pic_addr_const (file, x, code)
5806 FILE *file;
5807 rtx x;
5808 int code;
5809 {
5810 char buf[256];
5811
5812 switch (GET_CODE (x))
5813 {
5814 case PC:
5815 if (flag_pic)
5816 putc ('.', file);
5817 else
5818 abort ();
5819 break;
5820
5821 case SYMBOL_REF:
5822 assemble_name (file, XSTR (x, 0));
5823 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5824 fputs ("@PLT", file);
5825 break;
5826
5827 case LABEL_REF:
5828 x = XEXP (x, 0);
5829 /* FALLTHRU */
5830 case CODE_LABEL:
5831 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5832 assemble_name (asm_out_file, buf);
5833 break;
5834
5835 case CONST_INT:
5836 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5837 break;
5838
5839 case CONST:
5840 /* This used to output parentheses around the expression,
5841 but that does not work on the 386 (either ATT or BSD assembler). */
5842 output_pic_addr_const (file, XEXP (x, 0), code);
5843 break;
5844
5845 case CONST_DOUBLE:
5846 if (GET_MODE (x) == VOIDmode)
5847 {
5848 /* We can use %d if the number is <32 bits and positive. */
5849 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5850 fprintf (file, "0x%lx%08lx",
5851 (unsigned long) CONST_DOUBLE_HIGH (x),
5852 (unsigned long) CONST_DOUBLE_LOW (x));
5853 else
5854 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5855 }
5856 else
5857 /* We can't handle floating point constants;
5858 PRINT_OPERAND must handle them. */
5859 output_operand_lossage ("floating constant misused");
5860 break;
5861
5862 case PLUS:
5863 /* Some assemblers need integer constants to appear first. */
5864 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5865 {
5866 output_pic_addr_const (file, XEXP (x, 0), code);
5867 putc ('+', file);
5868 output_pic_addr_const (file, XEXP (x, 1), code);
5869 }
5870 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5871 {
5872 output_pic_addr_const (file, XEXP (x, 1), code);
5873 putc ('+', file);
5874 output_pic_addr_const (file, XEXP (x, 0), code);
5875 }
5876 else
5877 abort ();
5878 break;
5879
5880 case MINUS:
5881 if (!TARGET_MACHO)
5882 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5883 output_pic_addr_const (file, XEXP (x, 0), code);
5884 putc ('-', file);
5885 output_pic_addr_const (file, XEXP (x, 1), code);
5886 if (!TARGET_MACHO)
5887 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5888 break;
5889
5890 case UNSPEC:
5891 if (XVECLEN (x, 0) != 1)
5892 abort ();
5893 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5894 switch (XINT (x, 1))
5895 {
5896 case UNSPEC_GOT:
5897 fputs ("@GOT", file);
5898 break;
5899 case UNSPEC_GOTOFF:
5900 fputs ("@GOTOFF", file);
5901 break;
5902 case UNSPEC_GOTPCREL:
5903 fputs ("@GOTPCREL(%rip)", file);
5904 break;
5905 case UNSPEC_GOTTPOFF:
5906 fputs ("@GOTTPOFF", file);
5907 break;
5908 case UNSPEC_TPOFF:
5909 fputs ("@TPOFF", file);
5910 break;
5911 case UNSPEC_NTPOFF:
5912 fputs ("@NTPOFF", file);
5913 break;
5914 case UNSPEC_DTPOFF:
5915 fputs ("@DTPOFF", file);
5916 break;
5917 default:
5918 output_operand_lossage ("invalid UNSPEC as operand");
5919 break;
5920 }
5921 break;
5922
5923 default:
5924 output_operand_lossage ("invalid expression as operand");
5925 }
5926 }
5927
5928 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5929 We need to handle our special PIC relocations. */
5930
5931 void
5932 i386_dwarf_output_addr_const (file, x)
5933 FILE *file;
5934 rtx x;
5935 {
5936 #ifdef ASM_QUAD
5937 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5938 #else
5939 if (TARGET_64BIT)
5940 abort ();
5941 fprintf (file, "%s", ASM_LONG);
5942 #endif
5943 if (flag_pic)
5944 output_pic_addr_const (file, x, '\0');
5945 else
5946 output_addr_const (file, x);
5947 fputc ('\n', file);
5948 }
5949
5950 /* In the name of slightly smaller debug output, and to cater to
5951 general assembler losage, recognize PIC+GOTOFF and turn it back
5952 into a direct symbol reference. */
5953
5954 rtx
5955 i386_simplify_dwarf_addr (orig_x)
5956 rtx orig_x;
5957 {
5958 rtx x = orig_x, y;
5959
5960 if (GET_CODE (x) == MEM)
5961 x = XEXP (x, 0);
5962
5963 if (TARGET_64BIT)
5964 {
5965 if (GET_CODE (x) != CONST
5966 || GET_CODE (XEXP (x, 0)) != UNSPEC
5967 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5968 || GET_CODE (orig_x) != MEM)
5969 return orig_x;
5970 return XVECEXP (XEXP (x, 0), 0, 0);
5971 }
5972
5973 if (GET_CODE (x) != PLUS
5974 || GET_CODE (XEXP (x, 1)) != CONST)
5975 return orig_x;
5976
5977 if (GET_CODE (XEXP (x, 0)) == REG
5978 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5979 /* %ebx + GOT/GOTOFF */
5980 y = NULL;
5981 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5982 {
5983 /* %ebx + %reg * scale + GOT/GOTOFF */
5984 y = XEXP (x, 0);
5985 if (GET_CODE (XEXP (y, 0)) == REG
5986 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5987 y = XEXP (y, 1);
5988 else if (GET_CODE (XEXP (y, 1)) == REG
5989 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5990 y = XEXP (y, 0);
5991 else
5992 return orig_x;
5993 if (GET_CODE (y) != REG
5994 && GET_CODE (y) != MULT
5995 && GET_CODE (y) != ASHIFT)
5996 return orig_x;
5997 }
5998 else
5999 return orig_x;
6000
6001 x = XEXP (XEXP (x, 1), 0);
6002 if (GET_CODE (x) == UNSPEC
6003 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6004 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6005 {
6006 if (y)
6007 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6008 return XVECEXP (x, 0, 0);
6009 }
6010
6011 if (GET_CODE (x) == PLUS
6012 && GET_CODE (XEXP (x, 0)) == UNSPEC
6013 && GET_CODE (XEXP (x, 1)) == CONST_INT
6014 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6015 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6016 && GET_CODE (orig_x) != MEM)))
6017 {
6018 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6019 if (y)
6020 return gen_rtx_PLUS (Pmode, y, x);
6021 return x;
6022 }
6023
6024 return orig_x;
6025 }
6026 \f
6027 static void
6028 put_condition_code (code, mode, reverse, fp, file)
6029 enum rtx_code code;
6030 enum machine_mode mode;
6031 int reverse, fp;
6032 FILE *file;
6033 {
6034 const char *suffix;
6035
6036 if (mode == CCFPmode || mode == CCFPUmode)
6037 {
6038 enum rtx_code second_code, bypass_code;
6039 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6040 if (bypass_code != NIL || second_code != NIL)
6041 abort ();
6042 code = ix86_fp_compare_code_to_integer (code);
6043 mode = CCmode;
6044 }
6045 if (reverse)
6046 code = reverse_condition (code);
6047
6048 switch (code)
6049 {
6050 case EQ:
6051 suffix = "e";
6052 break;
6053 case NE:
6054 suffix = "ne";
6055 break;
6056 case GT:
6057 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6058 abort ();
6059 suffix = "g";
6060 break;
6061 case GTU:
6062 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6063 Those same assemblers have the same but opposite losage on cmov. */
6064 if (mode != CCmode)
6065 abort ();
6066 suffix = fp ? "nbe" : "a";
6067 break;
6068 case LT:
6069 if (mode == CCNOmode || mode == CCGOCmode)
6070 suffix = "s";
6071 else if (mode == CCmode || mode == CCGCmode)
6072 suffix = "l";
6073 else
6074 abort ();
6075 break;
6076 case LTU:
6077 if (mode != CCmode)
6078 abort ();
6079 suffix = "b";
6080 break;
6081 case GE:
6082 if (mode == CCNOmode || mode == CCGOCmode)
6083 suffix = "ns";
6084 else if (mode == CCmode || mode == CCGCmode)
6085 suffix = "ge";
6086 else
6087 abort ();
6088 break;
6089 case GEU:
6090 /* ??? As above. */
6091 if (mode != CCmode)
6092 abort ();
6093 suffix = fp ? "nb" : "ae";
6094 break;
6095 case LE:
6096 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6097 abort ();
6098 suffix = "le";
6099 break;
6100 case LEU:
6101 if (mode != CCmode)
6102 abort ();
6103 suffix = "be";
6104 break;
6105 case UNORDERED:
6106 suffix = fp ? "u" : "p";
6107 break;
6108 case ORDERED:
6109 suffix = fp ? "nu" : "np";
6110 break;
6111 default:
6112 abort ();
6113 }
6114 fputs (suffix, file);
6115 }
6116
6117 void
6118 print_reg (x, code, file)
6119 rtx x;
6120 int code;
6121 FILE *file;
6122 {
6123 if (REGNO (x) == ARG_POINTER_REGNUM
6124 || REGNO (x) == FRAME_POINTER_REGNUM
6125 || REGNO (x) == FLAGS_REG
6126 || REGNO (x) == FPSR_REG)
6127 abort ();
6128
6129 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6130 putc ('%', file);
6131
6132 if (code == 'w' || MMX_REG_P (x))
6133 code = 2;
6134 else if (code == 'b')
6135 code = 1;
6136 else if (code == 'k')
6137 code = 4;
6138 else if (code == 'q')
6139 code = 8;
6140 else if (code == 'y')
6141 code = 3;
6142 else if (code == 'h')
6143 code = 0;
6144 else
6145 code = GET_MODE_SIZE (GET_MODE (x));
6146
6147 /* Irritatingly, AMD extended registers use different naming convention
6148 from the normal registers. */
6149 if (REX_INT_REG_P (x))
6150 {
6151 if (!TARGET_64BIT)
6152 abort ();
6153 switch (code)
6154 {
6155 case 0:
6156 error ("extended registers have no high halves");
6157 break;
6158 case 1:
6159 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6160 break;
6161 case 2:
6162 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6163 break;
6164 case 4:
6165 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6166 break;
6167 case 8:
6168 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6169 break;
6170 default:
6171 error ("unsupported operand size for extended register");
6172 break;
6173 }
6174 return;
6175 }
6176 switch (code)
6177 {
6178 case 3:
6179 if (STACK_TOP_P (x))
6180 {
6181 fputs ("st(0)", file);
6182 break;
6183 }
6184 /* FALLTHRU */
6185 case 8:
6186 case 4:
6187 case 12:
6188 if (! ANY_FP_REG_P (x))
6189 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6190 /* FALLTHRU */
6191 case 16:
6192 case 2:
6193 fputs (hi_reg_name[REGNO (x)], file);
6194 break;
6195 case 1:
6196 fputs (qi_reg_name[REGNO (x)], file);
6197 break;
6198 case 0:
6199 fputs (qi_high_reg_name[REGNO (x)], file);
6200 break;
6201 default:
6202 abort ();
6203 }
6204 }
6205
6206 /* Locate some local-dynamic symbol still in use by this function
6207 so that we can print its name in some tls_local_dynamic_base
6208 pattern. */
6209
6210 static const char *
6211 get_some_local_dynamic_name ()
6212 {
6213 rtx insn;
6214
6215 if (cfun->machine->some_ld_name)
6216 return cfun->machine->some_ld_name;
6217
6218 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6219 if (INSN_P (insn)
6220 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6221 return cfun->machine->some_ld_name;
6222
6223 abort ();
6224 }
6225
6226 static int
6227 get_some_local_dynamic_name_1 (px, data)
6228 rtx *px;
6229 void *data ATTRIBUTE_UNUSED;
6230 {
6231 rtx x = *px;
6232
6233 if (GET_CODE (x) == SYMBOL_REF
6234 && local_dynamic_symbolic_operand (x, Pmode))
6235 {
6236 cfun->machine->some_ld_name = XSTR (x, 0);
6237 return 1;
6238 }
6239
6240 return 0;
6241 }
6242
6243 /* Meaning of CODE:
6244 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6245 C -- print opcode suffix for set/cmov insn.
6246 c -- like C, but print reversed condition
6247 F,f -- likewise, but for floating-point.
6248 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6249 nothing
6250 R -- print the prefix for register names.
6251 z -- print the opcode suffix for the size of the current operand.
6252 * -- print a star (in certain assembler syntax)
6253 A -- print an absolute memory reference.
6254 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6255 s -- print a shift double count, followed by the assemblers argument
6256 delimiter.
6257 b -- print the QImode name of the register for the indicated operand.
6258 %b0 would print %al if operands[0] is reg 0.
6259 w -- likewise, print the HImode name of the register.
6260 k -- likewise, print the SImode name of the register.
6261 q -- likewise, print the DImode name of the register.
6262 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6263 y -- print "st(0)" instead of "st" as a register.
6264 D -- print condition for SSE cmp instruction.
6265 P -- if PIC, print an @PLT suffix.
6266 X -- don't print any sort of PIC '@' suffix for a symbol.
6267 & -- print some in-use local-dynamic symbol name.
6268 */
6269
6270 void
6271 print_operand (file, x, code)
6272 FILE *file;
6273 rtx x;
6274 int code;
6275 {
6276 if (code)
6277 {
6278 switch (code)
6279 {
6280 case '*':
6281 if (ASSEMBLER_DIALECT == ASM_ATT)
6282 putc ('*', file);
6283 return;
6284
6285 case '&':
6286 assemble_name (file, get_some_local_dynamic_name ());
6287 return;
6288
6289 case 'A':
6290 if (ASSEMBLER_DIALECT == ASM_ATT)
6291 putc ('*', file);
6292 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6293 {
6294 /* Intel syntax. For absolute addresses, registers should not
6295 be surrounded by braces. */
6296 if (GET_CODE (x) != REG)
6297 {
6298 putc ('[', file);
6299 PRINT_OPERAND (file, x, 0);
6300 putc (']', file);
6301 return;
6302 }
6303 }
6304 else
6305 abort ();
6306
6307 PRINT_OPERAND (file, x, 0);
6308 return;
6309
6310
6311 case 'L':
6312 if (ASSEMBLER_DIALECT == ASM_ATT)
6313 putc ('l', file);
6314 return;
6315
6316 case 'W':
6317 if (ASSEMBLER_DIALECT == ASM_ATT)
6318 putc ('w', file);
6319 return;
6320
6321 case 'B':
6322 if (ASSEMBLER_DIALECT == ASM_ATT)
6323 putc ('b', file);
6324 return;
6325
6326 case 'Q':
6327 if (ASSEMBLER_DIALECT == ASM_ATT)
6328 putc ('l', file);
6329 return;
6330
6331 case 'S':
6332 if (ASSEMBLER_DIALECT == ASM_ATT)
6333 putc ('s', file);
6334 return;
6335
6336 case 'T':
6337 if (ASSEMBLER_DIALECT == ASM_ATT)
6338 putc ('t', file);
6339 return;
6340
6341 case 'z':
6342 /* 387 opcodes don't get size suffixes if the operands are
6343 registers. */
6344 if (STACK_REG_P (x))
6345 return;
6346
6347 /* Likewise if using Intel opcodes. */
6348 if (ASSEMBLER_DIALECT == ASM_INTEL)
6349 return;
6350
6351 /* This is the size of op from size of operand. */
6352 switch (GET_MODE_SIZE (GET_MODE (x)))
6353 {
6354 case 2:
6355 #ifdef HAVE_GAS_FILDS_FISTS
6356 putc ('s', file);
6357 #endif
6358 return;
6359
6360 case 4:
6361 if (GET_MODE (x) == SFmode)
6362 {
6363 putc ('s', file);
6364 return;
6365 }
6366 else
6367 putc ('l', file);
6368 return;
6369
6370 case 12:
6371 case 16:
6372 putc ('t', file);
6373 return;
6374
6375 case 8:
6376 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6377 {
6378 #ifdef GAS_MNEMONICS
6379 putc ('q', file);
6380 #else
6381 putc ('l', file);
6382 putc ('l', file);
6383 #endif
6384 }
6385 else
6386 putc ('l', file);
6387 return;
6388
6389 default:
6390 abort ();
6391 }
6392
6393 case 'b':
6394 case 'w':
6395 case 'k':
6396 case 'q':
6397 case 'h':
6398 case 'y':
6399 case 'X':
6400 case 'P':
6401 break;
6402
6403 case 's':
6404 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6405 {
6406 PRINT_OPERAND (file, x, 0);
6407 putc (',', file);
6408 }
6409 return;
6410
6411 case 'D':
6412 /* Little bit of braindamage here. The SSE compare instructions
6413 does use completely different names for the comparisons that the
6414 fp conditional moves. */
6415 switch (GET_CODE (x))
6416 {
6417 case EQ:
6418 case UNEQ:
6419 fputs ("eq", file);
6420 break;
6421 case LT:
6422 case UNLT:
6423 fputs ("lt", file);
6424 break;
6425 case LE:
6426 case UNLE:
6427 fputs ("le", file);
6428 break;
6429 case UNORDERED:
6430 fputs ("unord", file);
6431 break;
6432 case NE:
6433 case LTGT:
6434 fputs ("neq", file);
6435 break;
6436 case UNGE:
6437 case GE:
6438 fputs ("nlt", file);
6439 break;
6440 case UNGT:
6441 case GT:
6442 fputs ("nle", file);
6443 break;
6444 case ORDERED:
6445 fputs ("ord", file);
6446 break;
6447 default:
6448 abort ();
6449 break;
6450 }
6451 return;
6452 case 'O':
6453 #ifdef CMOV_SUN_AS_SYNTAX
6454 if (ASSEMBLER_DIALECT == ASM_ATT)
6455 {
6456 switch (GET_MODE (x))
6457 {
6458 case HImode: putc ('w', file); break;
6459 case SImode:
6460 case SFmode: putc ('l', file); break;
6461 case DImode:
6462 case DFmode: putc ('q', file); break;
6463 default: abort ();
6464 }
6465 putc ('.', file);
6466 }
6467 #endif
6468 return;
6469 case 'C':
6470 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6471 return;
6472 case 'F':
6473 #ifdef CMOV_SUN_AS_SYNTAX
6474 if (ASSEMBLER_DIALECT == ASM_ATT)
6475 putc ('.', file);
6476 #endif
6477 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6478 return;
6479
6480 /* Like above, but reverse condition */
6481 case 'c':
6482 /* Check to see if argument to %c is really a constant
6483 and not a condition code which needs to be reversed. */
6484 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6485 {
6486 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6487 return;
6488 }
6489 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6490 return;
6491 case 'f':
6492 #ifdef CMOV_SUN_AS_SYNTAX
6493 if (ASSEMBLER_DIALECT == ASM_ATT)
6494 putc ('.', file);
6495 #endif
6496 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6497 return;
6498 case '+':
6499 {
6500 rtx x;
6501
6502 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6503 return;
6504
6505 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6506 if (x)
6507 {
6508 int pred_val = INTVAL (XEXP (x, 0));
6509
6510 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6511 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6512 {
6513 int taken = pred_val > REG_BR_PROB_BASE / 2;
6514 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6515
6516 /* Emit hints only in the case default branch prediction
6517 heruistics would fail. */
6518 if (taken != cputaken)
6519 {
6520 /* We use 3e (DS) prefix for taken branches and
6521 2e (CS) prefix for not taken branches. */
6522 if (taken)
6523 fputs ("ds ; ", file);
6524 else
6525 fputs ("cs ; ", file);
6526 }
6527 }
6528 }
6529 return;
6530 }
6531 default:
6532 output_operand_lossage ("invalid operand code `%c'", code);
6533 }
6534 }
6535
6536 if (GET_CODE (x) == REG)
6537 {
6538 PRINT_REG (x, code, file);
6539 }
6540
6541 else if (GET_CODE (x) == MEM)
6542 {
6543 /* No `byte ptr' prefix for call instructions. */
6544 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6545 {
6546 const char * size;
6547 switch (GET_MODE_SIZE (GET_MODE (x)))
6548 {
6549 case 1: size = "BYTE"; break;
6550 case 2: size = "WORD"; break;
6551 case 4: size = "DWORD"; break;
6552 case 8: size = "QWORD"; break;
6553 case 12: size = "XWORD"; break;
6554 case 16: size = "XMMWORD"; break;
6555 default:
6556 abort ();
6557 }
6558
6559 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6560 if (code == 'b')
6561 size = "BYTE";
6562 else if (code == 'w')
6563 size = "WORD";
6564 else if (code == 'k')
6565 size = "DWORD";
6566
6567 fputs (size, file);
6568 fputs (" PTR ", file);
6569 }
6570
6571 x = XEXP (x, 0);
6572 if (flag_pic && CONSTANT_ADDRESS_P (x))
6573 output_pic_addr_const (file, x, code);
6574 /* Avoid (%rip) for call operands. */
6575 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6576 && GET_CODE (x) != CONST_INT)
6577 output_addr_const (file, x);
6578 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6579 output_operand_lossage ("invalid constraints for operand");
6580 else
6581 output_address (x);
6582 }
6583
6584 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6585 {
6586 REAL_VALUE_TYPE r;
6587 long l;
6588
6589 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6590 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6591
6592 if (ASSEMBLER_DIALECT == ASM_ATT)
6593 putc ('$', file);
6594 fprintf (file, "0x%lx", l);
6595 }
6596
6597 /* These float cases don't actually occur as immediate operands. */
6598 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6599 {
6600 REAL_VALUE_TYPE r;
6601 char dstr[30];
6602
6603 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6604 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6605 fprintf (file, "%s", dstr);
6606 }
6607
6608 else if (GET_CODE (x) == CONST_DOUBLE
6609 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6610 {
6611 REAL_VALUE_TYPE r;
6612 char dstr[30];
6613
6614 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6615 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6616 fprintf (file, "%s", dstr);
6617 }
6618
6619 else
6620 {
6621 if (code != 'P')
6622 {
6623 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6624 {
6625 if (ASSEMBLER_DIALECT == ASM_ATT)
6626 putc ('$', file);
6627 }
6628 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6629 || GET_CODE (x) == LABEL_REF)
6630 {
6631 if (ASSEMBLER_DIALECT == ASM_ATT)
6632 putc ('$', file);
6633 else
6634 fputs ("OFFSET FLAT:", file);
6635 }
6636 }
6637 if (GET_CODE (x) == CONST_INT)
6638 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6639 else if (flag_pic)
6640 output_pic_addr_const (file, x, code);
6641 else
6642 output_addr_const (file, x);
6643 }
6644 }
6645 \f
6646 /* Print a memory operand whose address is ADDR. */
6647
6648 void
6649 print_operand_address (file, addr)
6650 FILE *file;
6651 register rtx addr;
6652 {
6653 struct ix86_address parts;
6654 rtx base, index, disp;
6655 int scale;
6656
6657 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6658 {
6659 if (ASSEMBLER_DIALECT == ASM_INTEL)
6660 fputs ("DWORD PTR ", file);
6661 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6662 putc ('%', file);
6663 fputs ("gs:0", file);
6664 return;
6665 }
6666
6667 if (! ix86_decompose_address (addr, &parts))
6668 abort ();
6669
6670 base = parts.base;
6671 index = parts.index;
6672 disp = parts.disp;
6673 scale = parts.scale;
6674
6675 if (!base && !index)
6676 {
6677 /* Displacement only requires special attention. */
6678
6679 if (GET_CODE (disp) == CONST_INT)
6680 {
6681 if (ASSEMBLER_DIALECT == ASM_INTEL)
6682 {
6683 if (USER_LABEL_PREFIX[0] == 0)
6684 putc ('%', file);
6685 fputs ("ds:", file);
6686 }
6687 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6688 }
6689 else if (flag_pic)
6690 output_pic_addr_const (file, addr, 0);
6691 else
6692 output_addr_const (file, addr);
6693
6694 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6695 if (TARGET_64BIT
6696 && (GET_CODE (addr) == SYMBOL_REF
6697 || GET_CODE (addr) == LABEL_REF
6698 || (GET_CODE (addr) == CONST
6699 && GET_CODE (XEXP (addr, 0)) == PLUS
6700 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6701 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6702 fputs ("(%rip)", file);
6703 }
6704 else
6705 {
6706 if (ASSEMBLER_DIALECT == ASM_ATT)
6707 {
6708 if (disp)
6709 {
6710 if (flag_pic)
6711 output_pic_addr_const (file, disp, 0);
6712 else if (GET_CODE (disp) == LABEL_REF)
6713 output_asm_label (disp);
6714 else
6715 output_addr_const (file, disp);
6716 }
6717
6718 putc ('(', file);
6719 if (base)
6720 PRINT_REG (base, 0, file);
6721 if (index)
6722 {
6723 putc (',', file);
6724 PRINT_REG (index, 0, file);
6725 if (scale != 1)
6726 fprintf (file, ",%d", scale);
6727 }
6728 putc (')', file);
6729 }
6730 else
6731 {
6732 rtx offset = NULL_RTX;
6733
6734 if (disp)
6735 {
6736 /* Pull out the offset of a symbol; print any symbol itself. */
6737 if (GET_CODE (disp) == CONST
6738 && GET_CODE (XEXP (disp, 0)) == PLUS
6739 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6740 {
6741 offset = XEXP (XEXP (disp, 0), 1);
6742 disp = gen_rtx_CONST (VOIDmode,
6743 XEXP (XEXP (disp, 0), 0));
6744 }
6745
6746 if (flag_pic)
6747 output_pic_addr_const (file, disp, 0);
6748 else if (GET_CODE (disp) == LABEL_REF)
6749 output_asm_label (disp);
6750 else if (GET_CODE (disp) == CONST_INT)
6751 offset = disp;
6752 else
6753 output_addr_const (file, disp);
6754 }
6755
6756 putc ('[', file);
6757 if (base)
6758 {
6759 PRINT_REG (base, 0, file);
6760 if (offset)
6761 {
6762 if (INTVAL (offset) >= 0)
6763 putc ('+', file);
6764 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6765 }
6766 }
6767 else if (offset)
6768 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6769 else
6770 putc ('0', file);
6771
6772 if (index)
6773 {
6774 putc ('+', file);
6775 PRINT_REG (index, 0, file);
6776 if (scale != 1)
6777 fprintf (file, "*%d", scale);
6778 }
6779 putc (']', file);
6780 }
6781 }
6782 }
6783
6784 bool
6785 output_addr_const_extra (file, x)
6786 FILE *file;
6787 rtx x;
6788 {
6789 rtx op;
6790
6791 if (GET_CODE (x) != UNSPEC)
6792 return false;
6793
6794 op = XVECEXP (x, 0, 0);
6795 switch (XINT (x, 1))
6796 {
6797 case UNSPEC_GOTTPOFF:
6798 output_addr_const (file, op);
6799 fputs ("@GOTTPOFF", file);
6800 break;
6801 case UNSPEC_TPOFF:
6802 output_addr_const (file, op);
6803 fputs ("@TPOFF", file);
6804 break;
6805 case UNSPEC_NTPOFF:
6806 output_addr_const (file, op);
6807 fputs ("@NTPOFF", file);
6808 break;
6809 case UNSPEC_DTPOFF:
6810 output_addr_const (file, op);
6811 fputs ("@DTPOFF", file);
6812 break;
6813
6814 default:
6815 return false;
6816 }
6817
6818 return true;
6819 }
6820 \f
6821 /* Split one or more DImode RTL references into pairs of SImode
6822 references. The RTL can be REG, offsettable MEM, integer constant, or
6823 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6824 split and "num" is its length. lo_half and hi_half are output arrays
6825 that parallel "operands". */
6826
6827 void
6828 split_di (operands, num, lo_half, hi_half)
6829 rtx operands[];
6830 int num;
6831 rtx lo_half[], hi_half[];
6832 {
6833 while (num--)
6834 {
6835 rtx op = operands[num];
6836
6837 /* simplify_subreg refuse to split volatile memory addresses,
6838 but we still have to handle it. */
6839 if (GET_CODE (op) == MEM)
6840 {
6841 lo_half[num] = adjust_address (op, SImode, 0);
6842 hi_half[num] = adjust_address (op, SImode, 4);
6843 }
6844 else
6845 {
6846 lo_half[num] = simplify_gen_subreg (SImode, op,
6847 GET_MODE (op) == VOIDmode
6848 ? DImode : GET_MODE (op), 0);
6849 hi_half[num] = simplify_gen_subreg (SImode, op,
6850 GET_MODE (op) == VOIDmode
6851 ? DImode : GET_MODE (op), 4);
6852 }
6853 }
6854 }
6855 /* Split one or more TImode RTL references into pairs of SImode
6856 references. The RTL can be REG, offsettable MEM, integer constant, or
6857 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6858 split and "num" is its length. lo_half and hi_half are output arrays
6859 that parallel "operands". */
6860
6861 void
6862 split_ti (operands, num, lo_half, hi_half)
6863 rtx operands[];
6864 int num;
6865 rtx lo_half[], hi_half[];
6866 {
6867 while (num--)
6868 {
6869 rtx op = operands[num];
6870
6871 /* simplify_subreg refuse to split volatile memory addresses, but we
6872 still have to handle it. */
6873 if (GET_CODE (op) == MEM)
6874 {
6875 lo_half[num] = adjust_address (op, DImode, 0);
6876 hi_half[num] = adjust_address (op, DImode, 8);
6877 }
6878 else
6879 {
6880 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6881 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6882 }
6883 }
6884 }
6885 \f
6886 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6887 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6888 is the expression of the binary operation. The output may either be
6889 emitted here, or returned to the caller, like all output_* functions.
6890
6891 There is no guarantee that the operands are the same mode, as they
6892 might be within FLOAT or FLOAT_EXTEND expressions. */
6893
6894 #ifndef SYSV386_COMPAT
6895 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6896 wants to fix the assemblers because that causes incompatibility
6897 with gcc. No-one wants to fix gcc because that causes
6898 incompatibility with assemblers... You can use the option of
6899 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6900 #define SYSV386_COMPAT 1
6901 #endif
6902
6903 const char *
6904 output_387_binary_op (insn, operands)
6905 rtx insn;
6906 rtx *operands;
6907 {
6908 static char buf[30];
6909 const char *p;
6910 const char *ssep;
6911 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6912
6913 #ifdef ENABLE_CHECKING
6914 /* Even if we do not want to check the inputs, this documents input
6915 constraints. Which helps in understanding the following code. */
6916 if (STACK_REG_P (operands[0])
6917 && ((REG_P (operands[1])
6918 && REGNO (operands[0]) == REGNO (operands[1])
6919 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6920 || (REG_P (operands[2])
6921 && REGNO (operands[0]) == REGNO (operands[2])
6922 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6923 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6924 ; /* ok */
6925 else if (!is_sse)
6926 abort ();
6927 #endif
6928
6929 switch (GET_CODE (operands[3]))
6930 {
6931 case PLUS:
6932 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6933 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6934 p = "fiadd";
6935 else
6936 p = "fadd";
6937 ssep = "add";
6938 break;
6939
6940 case MINUS:
6941 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6942 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6943 p = "fisub";
6944 else
6945 p = "fsub";
6946 ssep = "sub";
6947 break;
6948
6949 case MULT:
6950 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6951 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6952 p = "fimul";
6953 else
6954 p = "fmul";
6955 ssep = "mul";
6956 break;
6957
6958 case DIV:
6959 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6960 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6961 p = "fidiv";
6962 else
6963 p = "fdiv";
6964 ssep = "div";
6965 break;
6966
6967 default:
6968 abort ();
6969 }
6970
6971 if (is_sse)
6972 {
6973 strcpy (buf, ssep);
6974 if (GET_MODE (operands[0]) == SFmode)
6975 strcat (buf, "ss\t{%2, %0|%0, %2}");
6976 else
6977 strcat (buf, "sd\t{%2, %0|%0, %2}");
6978 return buf;
6979 }
6980 strcpy (buf, p);
6981
6982 switch (GET_CODE (operands[3]))
6983 {
6984 case MULT:
6985 case PLUS:
6986 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6987 {
6988 rtx temp = operands[2];
6989 operands[2] = operands[1];
6990 operands[1] = temp;
6991 }
6992
6993 /* know operands[0] == operands[1]. */
6994
6995 if (GET_CODE (operands[2]) == MEM)
6996 {
6997 p = "%z2\t%2";
6998 break;
6999 }
7000
7001 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7002 {
7003 if (STACK_TOP_P (operands[0]))
7004 /* How is it that we are storing to a dead operand[2]?
7005 Well, presumably operands[1] is dead too. We can't
7006 store the result to st(0) as st(0) gets popped on this
7007 instruction. Instead store to operands[2] (which I
7008 think has to be st(1)). st(1) will be popped later.
7009 gcc <= 2.8.1 didn't have this check and generated
7010 assembly code that the Unixware assembler rejected. */
7011 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7012 else
7013 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7014 break;
7015 }
7016
7017 if (STACK_TOP_P (operands[0]))
7018 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7019 else
7020 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7021 break;
7022
7023 case MINUS:
7024 case DIV:
7025 if (GET_CODE (operands[1]) == MEM)
7026 {
7027 p = "r%z1\t%1";
7028 break;
7029 }
7030
7031 if (GET_CODE (operands[2]) == MEM)
7032 {
7033 p = "%z2\t%2";
7034 break;
7035 }
7036
7037 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7038 {
7039 #if SYSV386_COMPAT
7040 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7041 derived assemblers, confusingly reverse the direction of
7042 the operation for fsub{r} and fdiv{r} when the
7043 destination register is not st(0). The Intel assembler
7044 doesn't have this brain damage. Read !SYSV386_COMPAT to
7045 figure out what the hardware really does. */
7046 if (STACK_TOP_P (operands[0]))
7047 p = "{p\t%0, %2|rp\t%2, %0}";
7048 else
7049 p = "{rp\t%2, %0|p\t%0, %2}";
7050 #else
7051 if (STACK_TOP_P (operands[0]))
7052 /* As above for fmul/fadd, we can't store to st(0). */
7053 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7054 else
7055 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7056 #endif
7057 break;
7058 }
7059
7060 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7061 {
7062 #if SYSV386_COMPAT
7063 if (STACK_TOP_P (operands[0]))
7064 p = "{rp\t%0, %1|p\t%1, %0}";
7065 else
7066 p = "{p\t%1, %0|rp\t%0, %1}";
7067 #else
7068 if (STACK_TOP_P (operands[0]))
7069 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7070 else
7071 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7072 #endif
7073 break;
7074 }
7075
7076 if (STACK_TOP_P (operands[0]))
7077 {
7078 if (STACK_TOP_P (operands[1]))
7079 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7080 else
7081 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7082 break;
7083 }
7084 else if (STACK_TOP_P (operands[1]))
7085 {
7086 #if SYSV386_COMPAT
7087 p = "{\t%1, %0|r\t%0, %1}";
7088 #else
7089 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7090 #endif
7091 }
7092 else
7093 {
7094 #if SYSV386_COMPAT
7095 p = "{r\t%2, %0|\t%0, %2}";
7096 #else
7097 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7098 #endif
7099 }
7100 break;
7101
7102 default:
7103 abort ();
7104 }
7105
7106 strcat (buf, p);
7107 return buf;
7108 }
7109
7110 /* Output code to initialize control word copies used by
7111 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7112 is set to control word rounding downwards. */
7113 void
7114 emit_i387_cw_initialization (normal, round_down)
7115 rtx normal, round_down;
7116 {
7117 rtx reg = gen_reg_rtx (HImode);
7118
7119 emit_insn (gen_x86_fnstcw_1 (normal));
7120 emit_move_insn (reg, normal);
7121 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7122 && !TARGET_64BIT)
7123 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7124 else
7125 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7126 emit_move_insn (round_down, reg);
7127 }
7128
7129 /* Output code for INSN to convert a float to a signed int. OPERANDS
7130 are the insn operands. The output may be [HSD]Imode and the input
7131 operand may be [SDX]Fmode. */
7132
7133 const char *
7134 output_fix_trunc (insn, operands)
7135 rtx insn;
7136 rtx *operands;
7137 {
7138 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7139 int dimode_p = GET_MODE (operands[0]) == DImode;
7140
7141 /* Jump through a hoop or two for DImode, since the hardware has no
7142 non-popping instruction. We used to do this a different way, but
7143 that was somewhat fragile and broke with post-reload splitters. */
7144 if (dimode_p && !stack_top_dies)
7145 output_asm_insn ("fld\t%y1", operands);
7146
7147 if (!STACK_TOP_P (operands[1]))
7148 abort ();
7149
7150 if (GET_CODE (operands[0]) != MEM)
7151 abort ();
7152
7153 output_asm_insn ("fldcw\t%3", operands);
7154 if (stack_top_dies || dimode_p)
7155 output_asm_insn ("fistp%z0\t%0", operands);
7156 else
7157 output_asm_insn ("fist%z0\t%0", operands);
7158 output_asm_insn ("fldcw\t%2", operands);
7159
7160 return "";
7161 }
7162
7163 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7164 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7165 when fucom should be used. */
7166
7167 const char *
7168 output_fp_compare (insn, operands, eflags_p, unordered_p)
7169 rtx insn;
7170 rtx *operands;
7171 int eflags_p, unordered_p;
7172 {
7173 int stack_top_dies;
7174 rtx cmp_op0 = operands[0];
7175 rtx cmp_op1 = operands[1];
7176 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7177
7178 if (eflags_p == 2)
7179 {
7180 cmp_op0 = cmp_op1;
7181 cmp_op1 = operands[2];
7182 }
7183 if (is_sse)
7184 {
7185 if (GET_MODE (operands[0]) == SFmode)
7186 if (unordered_p)
7187 return "ucomiss\t{%1, %0|%0, %1}";
7188 else
7189 return "comiss\t{%1, %0|%0, %y}";
7190 else
7191 if (unordered_p)
7192 return "ucomisd\t{%1, %0|%0, %1}";
7193 else
7194 return "comisd\t{%1, %0|%0, %y}";
7195 }
7196
7197 if (! STACK_TOP_P (cmp_op0))
7198 abort ();
7199
7200 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7201
7202 if (STACK_REG_P (cmp_op1)
7203 && stack_top_dies
7204 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7205 && REGNO (cmp_op1) != FIRST_STACK_REG)
7206 {
7207 /* If both the top of the 387 stack dies, and the other operand
7208 is also a stack register that dies, then this must be a
7209 `fcompp' float compare */
7210
7211 if (eflags_p == 1)
7212 {
7213 /* There is no double popping fcomi variant. Fortunately,
7214 eflags is immune from the fstp's cc clobbering. */
7215 if (unordered_p)
7216 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7217 else
7218 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7219 return "fstp\t%y0";
7220 }
7221 else
7222 {
7223 if (eflags_p == 2)
7224 {
7225 if (unordered_p)
7226 return "fucompp\n\tfnstsw\t%0";
7227 else
7228 return "fcompp\n\tfnstsw\t%0";
7229 }
7230 else
7231 {
7232 if (unordered_p)
7233 return "fucompp";
7234 else
7235 return "fcompp";
7236 }
7237 }
7238 }
7239 else
7240 {
7241 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7242
7243 static const char * const alt[24] =
7244 {
7245 "fcom%z1\t%y1",
7246 "fcomp%z1\t%y1",
7247 "fucom%z1\t%y1",
7248 "fucomp%z1\t%y1",
7249
7250 "ficom%z1\t%y1",
7251 "ficomp%z1\t%y1",
7252 NULL,
7253 NULL,
7254
7255 "fcomi\t{%y1, %0|%0, %y1}",
7256 "fcomip\t{%y1, %0|%0, %y1}",
7257 "fucomi\t{%y1, %0|%0, %y1}",
7258 "fucomip\t{%y1, %0|%0, %y1}",
7259
7260 NULL,
7261 NULL,
7262 NULL,
7263 NULL,
7264
7265 "fcom%z2\t%y2\n\tfnstsw\t%0",
7266 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7267 "fucom%z2\t%y2\n\tfnstsw\t%0",
7268 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7269
7270 "ficom%z2\t%y2\n\tfnstsw\t%0",
7271 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7272 NULL,
7273 NULL
7274 };
7275
7276 int mask;
7277 const char *ret;
7278
7279 mask = eflags_p << 3;
7280 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7281 mask |= unordered_p << 1;
7282 mask |= stack_top_dies;
7283
7284 if (mask >= 24)
7285 abort ();
7286 ret = alt[mask];
7287 if (ret == NULL)
7288 abort ();
7289
7290 return ret;
7291 }
7292 }
7293
7294 void
7295 ix86_output_addr_vec_elt (file, value)
7296 FILE *file;
7297 int value;
7298 {
7299 const char *directive = ASM_LONG;
7300
7301 if (TARGET_64BIT)
7302 {
7303 #ifdef ASM_QUAD
7304 directive = ASM_QUAD;
7305 #else
7306 abort ();
7307 #endif
7308 }
7309
7310 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7311 }
7312
7313 void
7314 ix86_output_addr_diff_elt (file, value, rel)
7315 FILE *file;
7316 int value, rel;
7317 {
7318 if (TARGET_64BIT)
7319 fprintf (file, "%s%s%d-%s%d\n",
7320 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7321 else if (HAVE_AS_GOTOFF_IN_DATA)
7322 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7323 #if TARGET_MACHO
7324 else if (TARGET_MACHO)
7325 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7326 machopic_function_base_name () + 1);
7327 #endif
7328 else
7329 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7330 ASM_LONG, LPREFIX, value);
7331 }
7332 \f
7333 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7334 for the target. */
7335
7336 void
7337 ix86_expand_clear (dest)
7338 rtx dest;
7339 {
7340 rtx tmp;
7341
7342 /* We play register width games, which are only valid after reload. */
7343 if (!reload_completed)
7344 abort ();
7345
7346 /* Avoid HImode and its attendant prefix byte. */
7347 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7348 dest = gen_rtx_REG (SImode, REGNO (dest));
7349
7350 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7351
7352 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7353 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7354 {
7355 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7356 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7357 }
7358
7359 emit_insn (tmp);
7360 }
7361
7362 /* X is an unchanging MEM. If it is a constant pool reference, return
7363 the constant pool rtx, else NULL. */
7364
7365 static rtx
7366 maybe_get_pool_constant (x)
7367 rtx x;
7368 {
7369 x = XEXP (x, 0);
7370
7371 if (flag_pic)
7372 {
7373 if (GET_CODE (x) != PLUS)
7374 return NULL_RTX;
7375 if (XEXP (x, 0) != pic_offset_table_rtx)
7376 return NULL_RTX;
7377 x = XEXP (x, 1);
7378 if (GET_CODE (x) != CONST)
7379 return NULL_RTX;
7380 x = XEXP (x, 0);
7381 if (GET_CODE (x) != UNSPEC)
7382 return NULL_RTX;
7383 if (XINT (x, 1) != UNSPEC_GOTOFF)
7384 return NULL_RTX;
7385 x = XVECEXP (x, 0, 0);
7386 }
7387
7388 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7389 return get_pool_constant (x);
7390
7391 return NULL_RTX;
7392 }
7393
7394 void
7395 ix86_expand_move (mode, operands)
7396 enum machine_mode mode;
7397 rtx operands[];
7398 {
7399 int strict = (reload_in_progress || reload_completed);
7400 rtx insn, op0, op1, tmp;
7401
7402 op0 = operands[0];
7403 op1 = operands[1];
7404
7405 /* ??? We have a slight problem. We need to say that tls symbols are
7406 not legitimate constants so that reload does not helpfully reload
7407 these constants from a REG_EQUIV, which we cannot handle. (Recall
7408 that general- and local-dynamic address resolution requires a
7409 function call.)
7410
7411 However, if we say that tls symbols are not legitimate constants,
7412 then emit_move_insn helpfully drop them into the constant pool.
7413
7414 It is far easier to work around emit_move_insn than reload. Recognize
7415 the MEM that we would have created and extract the symbol_ref. */
7416
7417 if (mode == Pmode
7418 && GET_CODE (op1) == MEM
7419 && RTX_UNCHANGING_P (op1))
7420 {
7421 tmp = maybe_get_pool_constant (op1);
7422 /* Note that we only care about symbolic constants here, which
7423 unlike CONST_INT will always have a proper mode. */
7424 if (tmp && GET_MODE (tmp) == Pmode)
7425 op1 = tmp;
7426 }
7427
7428 if (tls_symbolic_operand (op1, Pmode))
7429 {
7430 op1 = legitimize_address (op1, op1, VOIDmode);
7431 if (GET_CODE (op0) == MEM)
7432 {
7433 tmp = gen_reg_rtx (mode);
7434 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7435 op1 = tmp;
7436 }
7437 }
7438 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7439 {
7440 #if TARGET_MACHO
7441 if (MACHOPIC_PURE)
7442 {
7443 rtx temp = ((reload_in_progress
7444 || ((op0 && GET_CODE (op0) == REG)
7445 && mode == Pmode))
7446 ? op0 : gen_reg_rtx (Pmode));
7447 op1 = machopic_indirect_data_reference (op1, temp);
7448 op1 = machopic_legitimize_pic_address (op1, mode,
7449 temp == op1 ? 0 : temp);
7450 }
7451 else
7452 {
7453 if (MACHOPIC_INDIRECT)
7454 op1 = machopic_indirect_data_reference (op1, 0);
7455 }
7456 if (op0 != op1)
7457 {
7458 insn = gen_rtx_SET (VOIDmode, op0, op1);
7459 emit_insn (insn);
7460 }
7461 return;
7462 #endif /* TARGET_MACHO */
7463 if (GET_CODE (op0) == MEM)
7464 op1 = force_reg (Pmode, op1);
7465 else
7466 {
7467 rtx temp = op0;
7468 if (GET_CODE (temp) != REG)
7469 temp = gen_reg_rtx (Pmode);
7470 temp = legitimize_pic_address (op1, temp);
7471 if (temp == op0)
7472 return;
7473 op1 = temp;
7474 }
7475 }
7476 else
7477 {
7478 if (GET_CODE (op0) == MEM
7479 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7480 || !push_operand (op0, mode))
7481 && GET_CODE (op1) == MEM)
7482 op1 = force_reg (mode, op1);
7483
7484 if (push_operand (op0, mode)
7485 && ! general_no_elim_operand (op1, mode))
7486 op1 = copy_to_mode_reg (mode, op1);
7487
7488 /* Force large constants in 64bit compilation into register
7489 to get them CSEed. */
7490 if (TARGET_64BIT && mode == DImode
7491 && immediate_operand (op1, mode)
7492 && !x86_64_zero_extended_value (op1)
7493 && !register_operand (op0, mode)
7494 && optimize && !reload_completed && !reload_in_progress)
7495 op1 = copy_to_mode_reg (mode, op1);
7496
7497 if (FLOAT_MODE_P (mode))
7498 {
7499 /* If we are loading a floating point constant to a register,
7500 force the value to memory now, since we'll get better code
7501 out the back end. */
7502
7503 if (strict)
7504 ;
7505 else if (GET_CODE (op1) == CONST_DOUBLE
7506 && register_operand (op0, mode))
7507 op1 = validize_mem (force_const_mem (mode, op1));
7508 }
7509 }
7510
7511 insn = gen_rtx_SET (VOIDmode, op0, op1);
7512
7513 emit_insn (insn);
7514 }
7515
7516 void
7517 ix86_expand_vector_move (mode, operands)
7518 enum machine_mode mode;
7519 rtx operands[];
7520 {
7521 /* Force constants other than zero into memory. We do not know how
7522 the instructions used to build constants modify the upper 64 bits
7523 of the register, once we have that information we may be able
7524 to handle some of them more efficiently. */
7525 if ((reload_in_progress | reload_completed) == 0
7526 && register_operand (operands[0], mode)
7527 && CONSTANT_P (operands[1]))
7528 {
7529 rtx addr = gen_reg_rtx (Pmode);
7530 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7531 operands[1] = gen_rtx_MEM (mode, addr);
7532 }
7533
7534 /* Make operand1 a register if it isn't already. */
7535 if ((reload_in_progress | reload_completed) == 0
7536 && !register_operand (operands[0], mode)
7537 && !register_operand (operands[1], mode)
7538 && operands[1] != CONST0_RTX (mode))
7539 {
7540 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7541 emit_move_insn (operands[0], temp);
7542 return;
7543 }
7544
7545 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7546 }
7547
7548 /* Attempt to expand a binary operator. Make the expansion closer to the
7549 actual machine, then just general_operand, which will allow 3 separate
7550 memory references (one output, two input) in a single insn. */
7551
7552 void
7553 ix86_expand_binary_operator (code, mode, operands)
7554 enum rtx_code code;
7555 enum machine_mode mode;
7556 rtx operands[];
7557 {
7558 int matching_memory;
7559 rtx src1, src2, dst, op, clob;
7560
7561 dst = operands[0];
7562 src1 = operands[1];
7563 src2 = operands[2];
7564
7565 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7566 if (GET_RTX_CLASS (code) == 'c'
7567 && (rtx_equal_p (dst, src2)
7568 || immediate_operand (src1, mode)))
7569 {
7570 rtx temp = src1;
7571 src1 = src2;
7572 src2 = temp;
7573 }
7574
7575 /* If the destination is memory, and we do not have matching source
7576 operands, do things in registers. */
7577 matching_memory = 0;
7578 if (GET_CODE (dst) == MEM)
7579 {
7580 if (rtx_equal_p (dst, src1))
7581 matching_memory = 1;
7582 else if (GET_RTX_CLASS (code) == 'c'
7583 && rtx_equal_p (dst, src2))
7584 matching_memory = 2;
7585 else
7586 dst = gen_reg_rtx (mode);
7587 }
7588
7589 /* Both source operands cannot be in memory. */
7590 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7591 {
7592 if (matching_memory != 2)
7593 src2 = force_reg (mode, src2);
7594 else
7595 src1 = force_reg (mode, src1);
7596 }
7597
7598 /* If the operation is not commutable, source 1 cannot be a constant
7599 or non-matching memory. */
7600 if ((CONSTANT_P (src1)
7601 || (!matching_memory && GET_CODE (src1) == MEM))
7602 && GET_RTX_CLASS (code) != 'c')
7603 src1 = force_reg (mode, src1);
7604
7605 /* If optimizing, copy to regs to improve CSE */
7606 if (optimize && ! no_new_pseudos)
7607 {
7608 if (GET_CODE (dst) == MEM)
7609 dst = gen_reg_rtx (mode);
7610 if (GET_CODE (src1) == MEM)
7611 src1 = force_reg (mode, src1);
7612 if (GET_CODE (src2) == MEM)
7613 src2 = force_reg (mode, src2);
7614 }
7615
7616 /* Emit the instruction. */
7617
7618 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7619 if (reload_in_progress)
7620 {
7621 /* Reload doesn't know about the flags register, and doesn't know that
7622 it doesn't want to clobber it. We can only do this with PLUS. */
7623 if (code != PLUS)
7624 abort ();
7625 emit_insn (op);
7626 }
7627 else
7628 {
7629 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7630 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7631 }
7632
7633 /* Fix up the destination if needed. */
7634 if (dst != operands[0])
7635 emit_move_insn (operands[0], dst);
7636 }
7637
7638 /* Return TRUE or FALSE depending on whether the binary operator meets the
7639 appropriate constraints. */
7640
7641 int
7642 ix86_binary_operator_ok (code, mode, operands)
7643 enum rtx_code code;
7644 enum machine_mode mode ATTRIBUTE_UNUSED;
7645 rtx operands[3];
7646 {
7647 /* Both source operands cannot be in memory. */
7648 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7649 return 0;
7650 /* If the operation is not commutable, source 1 cannot be a constant. */
7651 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7652 return 0;
7653 /* If the destination is memory, we must have a matching source operand. */
7654 if (GET_CODE (operands[0]) == MEM
7655 && ! (rtx_equal_p (operands[0], operands[1])
7656 || (GET_RTX_CLASS (code) == 'c'
7657 && rtx_equal_p (operands[0], operands[2]))))
7658 return 0;
7659 /* If the operation is not commutable and the source 1 is memory, we must
7660 have a matching destination. */
7661 if (GET_CODE (operands[1]) == MEM
7662 && GET_RTX_CLASS (code) != 'c'
7663 && ! rtx_equal_p (operands[0], operands[1]))
7664 return 0;
7665 return 1;
7666 }
7667
7668 /* Attempt to expand a unary operator. Make the expansion closer to the
7669 actual machine, then just general_operand, which will allow 2 separate
7670 memory references (one output, one input) in a single insn. */
7671
7672 void
7673 ix86_expand_unary_operator (code, mode, operands)
7674 enum rtx_code code;
7675 enum machine_mode mode;
7676 rtx operands[];
7677 {
7678 int matching_memory;
7679 rtx src, dst, op, clob;
7680
7681 dst = operands[0];
7682 src = operands[1];
7683
7684 /* If the destination is memory, and we do not have matching source
7685 operands, do things in registers. */
7686 matching_memory = 0;
7687 if (GET_CODE (dst) == MEM)
7688 {
7689 if (rtx_equal_p (dst, src))
7690 matching_memory = 1;
7691 else
7692 dst = gen_reg_rtx (mode);
7693 }
7694
7695 /* When source operand is memory, destination must match. */
7696 if (!matching_memory && GET_CODE (src) == MEM)
7697 src = force_reg (mode, src);
7698
7699 /* If optimizing, copy to regs to improve CSE */
7700 if (optimize && ! no_new_pseudos)
7701 {
7702 if (GET_CODE (dst) == MEM)
7703 dst = gen_reg_rtx (mode);
7704 if (GET_CODE (src) == MEM)
7705 src = force_reg (mode, src);
7706 }
7707
7708 /* Emit the instruction. */
7709
7710 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7711 if (reload_in_progress || code == NOT)
7712 {
7713 /* Reload doesn't know about the flags register, and doesn't know that
7714 it doesn't want to clobber it. */
7715 if (code != NOT)
7716 abort ();
7717 emit_insn (op);
7718 }
7719 else
7720 {
7721 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7722 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7723 }
7724
7725 /* Fix up the destination if needed. */
7726 if (dst != operands[0])
7727 emit_move_insn (operands[0], dst);
7728 }
7729
7730 /* Return TRUE or FALSE depending on whether the unary operator meets the
7731 appropriate constraints. */
7732
7733 int
7734 ix86_unary_operator_ok (code, mode, operands)
7735 enum rtx_code code ATTRIBUTE_UNUSED;
7736 enum machine_mode mode ATTRIBUTE_UNUSED;
7737 rtx operands[2] ATTRIBUTE_UNUSED;
7738 {
7739 /* If one of operands is memory, source and destination must match. */
7740 if ((GET_CODE (operands[0]) == MEM
7741 || GET_CODE (operands[1]) == MEM)
7742 && ! rtx_equal_p (operands[0], operands[1]))
7743 return FALSE;
7744 return TRUE;
7745 }
7746
7747 /* Return TRUE or FALSE depending on whether the first SET in INSN
7748 has source and destination with matching CC modes, and that the
7749 CC mode is at least as constrained as REQ_MODE. */
7750
7751 int
7752 ix86_match_ccmode (insn, req_mode)
7753 rtx insn;
7754 enum machine_mode req_mode;
7755 {
7756 rtx set;
7757 enum machine_mode set_mode;
7758
7759 set = PATTERN (insn);
7760 if (GET_CODE (set) == PARALLEL)
7761 set = XVECEXP (set, 0, 0);
7762 if (GET_CODE (set) != SET)
7763 abort ();
7764 if (GET_CODE (SET_SRC (set)) != COMPARE)
7765 abort ();
7766
7767 set_mode = GET_MODE (SET_DEST (set));
7768 switch (set_mode)
7769 {
7770 case CCNOmode:
7771 if (req_mode != CCNOmode
7772 && (req_mode != CCmode
7773 || XEXP (SET_SRC (set), 1) != const0_rtx))
7774 return 0;
7775 break;
7776 case CCmode:
7777 if (req_mode == CCGCmode)
7778 return 0;
7779 /* FALLTHRU */
7780 case CCGCmode:
7781 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7782 return 0;
7783 /* FALLTHRU */
7784 case CCGOCmode:
7785 if (req_mode == CCZmode)
7786 return 0;
7787 /* FALLTHRU */
7788 case CCZmode:
7789 break;
7790
7791 default:
7792 abort ();
7793 }
7794
7795 return (GET_MODE (SET_SRC (set)) == set_mode);
7796 }
7797
7798 /* Generate insn patterns to do an integer compare of OPERANDS. */
7799
7800 static rtx
7801 ix86_expand_int_compare (code, op0, op1)
7802 enum rtx_code code;
7803 rtx op0, op1;
7804 {
7805 enum machine_mode cmpmode;
7806 rtx tmp, flags;
7807
7808 cmpmode = SELECT_CC_MODE (code, op0, op1);
7809 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7810
7811 /* This is very simple, but making the interface the same as in the
7812 FP case makes the rest of the code easier. */
7813 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7814 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7815
7816 /* Return the test that should be put into the flags user, i.e.
7817 the bcc, scc, or cmov instruction. */
7818 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7819 }
7820
7821 /* Figure out whether to use ordered or unordered fp comparisons.
7822 Return the appropriate mode to use. */
7823
7824 enum machine_mode
7825 ix86_fp_compare_mode (code)
7826 enum rtx_code code ATTRIBUTE_UNUSED;
7827 {
7828 /* ??? In order to make all comparisons reversible, we do all comparisons
7829 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7830 all forms trapping and nontrapping comparisons, we can make inequality
7831 comparisons trapping again, since it results in better code when using
7832 FCOM based compares. */
7833 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7834 }
7835
7836 enum machine_mode
7837 ix86_cc_mode (code, op0, op1)
7838 enum rtx_code code;
7839 rtx op0, op1;
7840 {
7841 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7842 return ix86_fp_compare_mode (code);
7843 switch (code)
7844 {
7845 /* Only zero flag is needed. */
7846 case EQ: /* ZF=0 */
7847 case NE: /* ZF!=0 */
7848 return CCZmode;
7849 /* Codes needing carry flag. */
7850 case GEU: /* CF=0 */
7851 case GTU: /* CF=0 & ZF=0 */
7852 case LTU: /* CF=1 */
7853 case LEU: /* CF=1 | ZF=1 */
7854 return CCmode;
7855 /* Codes possibly doable only with sign flag when
7856 comparing against zero. */
7857 case GE: /* SF=OF or SF=0 */
7858 case LT: /* SF<>OF or SF=1 */
7859 if (op1 == const0_rtx)
7860 return CCGOCmode;
7861 else
7862 /* For other cases Carry flag is not required. */
7863 return CCGCmode;
7864 /* Codes doable only with sign flag when comparing
7865 against zero, but we miss jump instruction for it
7866 so we need to use relational tests agains overflow
7867 that thus needs to be zero. */
7868 case GT: /* ZF=0 & SF=OF */
7869 case LE: /* ZF=1 | SF<>OF */
7870 if (op1 == const0_rtx)
7871 return CCNOmode;
7872 else
7873 return CCGCmode;
7874 /* strcmp pattern do (use flags) and combine may ask us for proper
7875 mode. */
7876 case USE:
7877 return CCmode;
7878 default:
7879 abort ();
7880 }
7881 }
7882
7883 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7884
7885 int
7886 ix86_use_fcomi_compare (code)
7887 enum rtx_code code ATTRIBUTE_UNUSED;
7888 {
7889 enum rtx_code swapped_code = swap_condition (code);
7890 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7891 || (ix86_fp_comparison_cost (swapped_code)
7892 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7893 }
7894
7895 /* Swap, force into registers, or otherwise massage the two operands
7896 to a fp comparison. The operands are updated in place; the new
7897 comparsion code is returned. */
7898
7899 static enum rtx_code
7900 ix86_prepare_fp_compare_args (code, pop0, pop1)
7901 enum rtx_code code;
7902 rtx *pop0, *pop1;
7903 {
7904 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7905 rtx op0 = *pop0, op1 = *pop1;
7906 enum machine_mode op_mode = GET_MODE (op0);
7907 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7908
7909 /* All of the unordered compare instructions only work on registers.
7910 The same is true of the XFmode compare instructions. The same is
7911 true of the fcomi compare instructions. */
7912
7913 if (!is_sse
7914 && (fpcmp_mode == CCFPUmode
7915 || op_mode == XFmode
7916 || op_mode == TFmode
7917 || ix86_use_fcomi_compare (code)))
7918 {
7919 op0 = force_reg (op_mode, op0);
7920 op1 = force_reg (op_mode, op1);
7921 }
7922 else
7923 {
7924 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7925 things around if they appear profitable, otherwise force op0
7926 into a register. */
7927
7928 if (standard_80387_constant_p (op0) == 0
7929 || (GET_CODE (op0) == MEM
7930 && ! (standard_80387_constant_p (op1) == 0
7931 || GET_CODE (op1) == MEM)))
7932 {
7933 rtx tmp;
7934 tmp = op0, op0 = op1, op1 = tmp;
7935 code = swap_condition (code);
7936 }
7937
7938 if (GET_CODE (op0) != REG)
7939 op0 = force_reg (op_mode, op0);
7940
7941 if (CONSTANT_P (op1))
7942 {
7943 if (standard_80387_constant_p (op1))
7944 op1 = force_reg (op_mode, op1);
7945 else
7946 op1 = validize_mem (force_const_mem (op_mode, op1));
7947 }
7948 }
7949
7950 /* Try to rearrange the comparison to make it cheaper. */
7951 if (ix86_fp_comparison_cost (code)
7952 > ix86_fp_comparison_cost (swap_condition (code))
7953 && (GET_CODE (op1) == REG || !no_new_pseudos))
7954 {
7955 rtx tmp;
7956 tmp = op0, op0 = op1, op1 = tmp;
7957 code = swap_condition (code);
7958 if (GET_CODE (op0) != REG)
7959 op0 = force_reg (op_mode, op0);
7960 }
7961
7962 *pop0 = op0;
7963 *pop1 = op1;
7964 return code;
7965 }
7966
7967 /* Convert comparison codes we use to represent FP comparison to integer
7968 code that will result in proper branch. Return UNKNOWN if no such code
7969 is available. */
7970 static enum rtx_code
7971 ix86_fp_compare_code_to_integer (code)
7972 enum rtx_code code;
7973 {
7974 switch (code)
7975 {
7976 case GT:
7977 return GTU;
7978 case GE:
7979 return GEU;
7980 case ORDERED:
7981 case UNORDERED:
7982 return code;
7983 break;
7984 case UNEQ:
7985 return EQ;
7986 break;
7987 case UNLT:
7988 return LTU;
7989 break;
7990 case UNLE:
7991 return LEU;
7992 break;
7993 case LTGT:
7994 return NE;
7995 break;
7996 default:
7997 return UNKNOWN;
7998 }
7999 }
8000
8001 /* Split comparison code CODE into comparisons we can do using branch
8002 instructions. BYPASS_CODE is comparison code for branch that will
8003 branch around FIRST_CODE and SECOND_CODE. If some of branches
8004 is not required, set value to NIL.
8005 We never require more than two branches. */
8006 static void
8007 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8008 enum rtx_code code, *bypass_code, *first_code, *second_code;
8009 {
8010 *first_code = code;
8011 *bypass_code = NIL;
8012 *second_code = NIL;
8013
8014 /* The fcomi comparison sets flags as follows:
8015
8016 cmp ZF PF CF
8017 > 0 0 0
8018 < 0 0 1
8019 = 1 0 0
8020 un 1 1 1 */
8021
8022 switch (code)
8023 {
8024 case GT: /* GTU - CF=0 & ZF=0 */
8025 case GE: /* GEU - CF=0 */
8026 case ORDERED: /* PF=0 */
8027 case UNORDERED: /* PF=1 */
8028 case UNEQ: /* EQ - ZF=1 */
8029 case UNLT: /* LTU - CF=1 */
8030 case UNLE: /* LEU - CF=1 | ZF=1 */
8031 case LTGT: /* EQ - ZF=0 */
8032 break;
8033 case LT: /* LTU - CF=1 - fails on unordered */
8034 *first_code = UNLT;
8035 *bypass_code = UNORDERED;
8036 break;
8037 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8038 *first_code = UNLE;
8039 *bypass_code = UNORDERED;
8040 break;
8041 case EQ: /* EQ - ZF=1 - fails on unordered */
8042 *first_code = UNEQ;
8043 *bypass_code = UNORDERED;
8044 break;
8045 case NE: /* NE - ZF=0 - fails on unordered */
8046 *first_code = LTGT;
8047 *second_code = UNORDERED;
8048 break;
8049 case UNGE: /* GEU - CF=0 - fails on unordered */
8050 *first_code = GE;
8051 *second_code = UNORDERED;
8052 break;
8053 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8054 *first_code = GT;
8055 *second_code = UNORDERED;
8056 break;
8057 default:
8058 abort ();
8059 }
8060 if (!TARGET_IEEE_FP)
8061 {
8062 *second_code = NIL;
8063 *bypass_code = NIL;
8064 }
8065 }
8066
8067 /* Return cost of comparison done fcom + arithmetics operations on AX.
8068 All following functions do use number of instructions as an cost metrics.
8069 In future this should be tweaked to compute bytes for optimize_size and
8070 take into account performance of various instructions on various CPUs. */
8071 static int
8072 ix86_fp_comparison_arithmetics_cost (code)
8073 enum rtx_code code;
8074 {
8075 if (!TARGET_IEEE_FP)
8076 return 4;
8077 /* The cost of code output by ix86_expand_fp_compare. */
8078 switch (code)
8079 {
8080 case UNLE:
8081 case UNLT:
8082 case LTGT:
8083 case GT:
8084 case GE:
8085 case UNORDERED:
8086 case ORDERED:
8087 case UNEQ:
8088 return 4;
8089 break;
8090 case LT:
8091 case NE:
8092 case EQ:
8093 case UNGE:
8094 return 5;
8095 break;
8096 case LE:
8097 case UNGT:
8098 return 6;
8099 break;
8100 default:
8101 abort ();
8102 }
8103 }
8104
8105 /* Return cost of comparison done using fcomi operation.
8106 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8107 static int
8108 ix86_fp_comparison_fcomi_cost (code)
8109 enum rtx_code code;
8110 {
8111 enum rtx_code bypass_code, first_code, second_code;
8112 /* Return arbitarily high cost when instruction is not supported - this
8113 prevents gcc from using it. */
8114 if (!TARGET_CMOVE)
8115 return 1024;
8116 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8117 return (bypass_code != NIL || second_code != NIL) + 2;
8118 }
8119
8120 /* Return cost of comparison done using sahf operation.
8121 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8122 static int
8123 ix86_fp_comparison_sahf_cost (code)
8124 enum rtx_code code;
8125 {
8126 enum rtx_code bypass_code, first_code, second_code;
8127 /* Return arbitarily high cost when instruction is not preferred - this
8128 avoids gcc from using it. */
8129 if (!TARGET_USE_SAHF && !optimize_size)
8130 return 1024;
8131 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8132 return (bypass_code != NIL || second_code != NIL) + 3;
8133 }
8134
8135 /* Compute cost of the comparison done using any method.
8136 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8137 static int
8138 ix86_fp_comparison_cost (code)
8139 enum rtx_code code;
8140 {
8141 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8142 int min;
8143
8144 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8145 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8146
8147 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8148 if (min > sahf_cost)
8149 min = sahf_cost;
8150 if (min > fcomi_cost)
8151 min = fcomi_cost;
8152 return min;
8153 }
8154
8155 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8156
8157 static rtx
8158 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8159 enum rtx_code code;
8160 rtx op0, op1, scratch;
8161 rtx *second_test;
8162 rtx *bypass_test;
8163 {
8164 enum machine_mode fpcmp_mode, intcmp_mode;
8165 rtx tmp, tmp2;
8166 int cost = ix86_fp_comparison_cost (code);
8167 enum rtx_code bypass_code, first_code, second_code;
8168
8169 fpcmp_mode = ix86_fp_compare_mode (code);
8170 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8171
8172 if (second_test)
8173 *second_test = NULL_RTX;
8174 if (bypass_test)
8175 *bypass_test = NULL_RTX;
8176
8177 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8178
8179 /* Do fcomi/sahf based test when profitable. */
8180 if ((bypass_code == NIL || bypass_test)
8181 && (second_code == NIL || second_test)
8182 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8183 {
8184 if (TARGET_CMOVE)
8185 {
8186 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8187 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8188 tmp);
8189 emit_insn (tmp);
8190 }
8191 else
8192 {
8193 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8194 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8195 if (!scratch)
8196 scratch = gen_reg_rtx (HImode);
8197 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8198 emit_insn (gen_x86_sahf_1 (scratch));
8199 }
8200
8201 /* The FP codes work out to act like unsigned. */
8202 intcmp_mode = fpcmp_mode;
8203 code = first_code;
8204 if (bypass_code != NIL)
8205 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8206 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8207 const0_rtx);
8208 if (second_code != NIL)
8209 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8210 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8211 const0_rtx);
8212 }
8213 else
8214 {
8215 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8216 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8217 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8218 if (!scratch)
8219 scratch = gen_reg_rtx (HImode);
8220 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8221
8222 /* In the unordered case, we have to check C2 for NaN's, which
8223 doesn't happen to work out to anything nice combination-wise.
8224 So do some bit twiddling on the value we've got in AH to come
8225 up with an appropriate set of condition codes. */
8226
8227 intcmp_mode = CCNOmode;
8228 switch (code)
8229 {
8230 case GT:
8231 case UNGT:
8232 if (code == GT || !TARGET_IEEE_FP)
8233 {
8234 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8235 code = EQ;
8236 }
8237 else
8238 {
8239 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8240 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8241 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8242 intcmp_mode = CCmode;
8243 code = GEU;
8244 }
8245 break;
8246 case LT:
8247 case UNLT:
8248 if (code == LT && TARGET_IEEE_FP)
8249 {
8250 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8251 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8252 intcmp_mode = CCmode;
8253 code = EQ;
8254 }
8255 else
8256 {
8257 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8258 code = NE;
8259 }
8260 break;
8261 case GE:
8262 case UNGE:
8263 if (code == GE || !TARGET_IEEE_FP)
8264 {
8265 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8266 code = EQ;
8267 }
8268 else
8269 {
8270 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8271 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8272 GEN_INT (0x01)));
8273 code = NE;
8274 }
8275 break;
8276 case LE:
8277 case UNLE:
8278 if (code == LE && TARGET_IEEE_FP)
8279 {
8280 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8281 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8282 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8283 intcmp_mode = CCmode;
8284 code = LTU;
8285 }
8286 else
8287 {
8288 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8289 code = NE;
8290 }
8291 break;
8292 case EQ:
8293 case UNEQ:
8294 if (code == EQ && TARGET_IEEE_FP)
8295 {
8296 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8297 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8298 intcmp_mode = CCmode;
8299 code = EQ;
8300 }
8301 else
8302 {
8303 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8304 code = NE;
8305 break;
8306 }
8307 break;
8308 case NE:
8309 case LTGT:
8310 if (code == NE && TARGET_IEEE_FP)
8311 {
8312 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8313 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8314 GEN_INT (0x40)));
8315 code = NE;
8316 }
8317 else
8318 {
8319 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8320 code = EQ;
8321 }
8322 break;
8323
8324 case UNORDERED:
8325 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8326 code = NE;
8327 break;
8328 case ORDERED:
8329 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8330 code = EQ;
8331 break;
8332
8333 default:
8334 abort ();
8335 }
8336 }
8337
8338 /* Return the test that should be put into the flags user, i.e.
8339 the bcc, scc, or cmov instruction. */
8340 return gen_rtx_fmt_ee (code, VOIDmode,
8341 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8342 const0_rtx);
8343 }
8344
8345 rtx
8346 ix86_expand_compare (code, second_test, bypass_test)
8347 enum rtx_code code;
8348 rtx *second_test, *bypass_test;
8349 {
8350 rtx op0, op1, ret;
8351 op0 = ix86_compare_op0;
8352 op1 = ix86_compare_op1;
8353
8354 if (second_test)
8355 *second_test = NULL_RTX;
8356 if (bypass_test)
8357 *bypass_test = NULL_RTX;
8358
8359 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8360 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8361 second_test, bypass_test);
8362 else
8363 ret = ix86_expand_int_compare (code, op0, op1);
8364
8365 return ret;
8366 }
8367
8368 /* Return true if the CODE will result in nontrivial jump sequence. */
8369 bool
8370 ix86_fp_jump_nontrivial_p (code)
8371 enum rtx_code code;
8372 {
8373 enum rtx_code bypass_code, first_code, second_code;
8374 if (!TARGET_CMOVE)
8375 return true;
8376 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8377 return bypass_code != NIL || second_code != NIL;
8378 }
8379
8380 void
8381 ix86_expand_branch (code, label)
8382 enum rtx_code code;
8383 rtx label;
8384 {
8385 rtx tmp;
8386
8387 switch (GET_MODE (ix86_compare_op0))
8388 {
8389 case QImode:
8390 case HImode:
8391 case SImode:
8392 simple:
8393 tmp = ix86_expand_compare (code, NULL, NULL);
8394 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8395 gen_rtx_LABEL_REF (VOIDmode, label),
8396 pc_rtx);
8397 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8398 return;
8399
8400 case SFmode:
8401 case DFmode:
8402 case XFmode:
8403 case TFmode:
8404 {
8405 rtvec vec;
8406 int use_fcomi;
8407 enum rtx_code bypass_code, first_code, second_code;
8408
8409 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8410 &ix86_compare_op1);
8411
8412 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8413
8414 /* Check whether we will use the natural sequence with one jump. If
8415 so, we can expand jump early. Otherwise delay expansion by
8416 creating compound insn to not confuse optimizers. */
8417 if (bypass_code == NIL && second_code == NIL
8418 && TARGET_CMOVE)
8419 {
8420 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8421 gen_rtx_LABEL_REF (VOIDmode, label),
8422 pc_rtx, NULL_RTX);
8423 }
8424 else
8425 {
8426 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8427 ix86_compare_op0, ix86_compare_op1);
8428 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8429 gen_rtx_LABEL_REF (VOIDmode, label),
8430 pc_rtx);
8431 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8432
8433 use_fcomi = ix86_use_fcomi_compare (code);
8434 vec = rtvec_alloc (3 + !use_fcomi);
8435 RTVEC_ELT (vec, 0) = tmp;
8436 RTVEC_ELT (vec, 1)
8437 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8438 RTVEC_ELT (vec, 2)
8439 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8440 if (! use_fcomi)
8441 RTVEC_ELT (vec, 3)
8442 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8443
8444 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8445 }
8446 return;
8447 }
8448
8449 case DImode:
8450 if (TARGET_64BIT)
8451 goto simple;
8452 /* Expand DImode branch into multiple compare+branch. */
8453 {
8454 rtx lo[2], hi[2], label2;
8455 enum rtx_code code1, code2, code3;
8456
8457 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8458 {
8459 tmp = ix86_compare_op0;
8460 ix86_compare_op0 = ix86_compare_op1;
8461 ix86_compare_op1 = tmp;
8462 code = swap_condition (code);
8463 }
8464 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8465 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8466
8467 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8468 avoid two branches. This costs one extra insn, so disable when
8469 optimizing for size. */
8470
8471 if ((code == EQ || code == NE)
8472 && (!optimize_size
8473 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8474 {
8475 rtx xor0, xor1;
8476
8477 xor1 = hi[0];
8478 if (hi[1] != const0_rtx)
8479 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8480 NULL_RTX, 0, OPTAB_WIDEN);
8481
8482 xor0 = lo[0];
8483 if (lo[1] != const0_rtx)
8484 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8485 NULL_RTX, 0, OPTAB_WIDEN);
8486
8487 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8488 NULL_RTX, 0, OPTAB_WIDEN);
8489
8490 ix86_compare_op0 = tmp;
8491 ix86_compare_op1 = const0_rtx;
8492 ix86_expand_branch (code, label);
8493 return;
8494 }
8495
8496 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8497 op1 is a constant and the low word is zero, then we can just
8498 examine the high word. */
8499
8500 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8501 switch (code)
8502 {
8503 case LT: case LTU: case GE: case GEU:
8504 ix86_compare_op0 = hi[0];
8505 ix86_compare_op1 = hi[1];
8506 ix86_expand_branch (code, label);
8507 return;
8508 default:
8509 break;
8510 }
8511
8512 /* Otherwise, we need two or three jumps. */
8513
8514 label2 = gen_label_rtx ();
8515
8516 code1 = code;
8517 code2 = swap_condition (code);
8518 code3 = unsigned_condition (code);
8519
8520 switch (code)
8521 {
8522 case LT: case GT: case LTU: case GTU:
8523 break;
8524
8525 case LE: code1 = LT; code2 = GT; break;
8526 case GE: code1 = GT; code2 = LT; break;
8527 case LEU: code1 = LTU; code2 = GTU; break;
8528 case GEU: code1 = GTU; code2 = LTU; break;
8529
8530 case EQ: code1 = NIL; code2 = NE; break;
8531 case NE: code2 = NIL; break;
8532
8533 default:
8534 abort ();
8535 }
8536
8537 /*
8538 * a < b =>
8539 * if (hi(a) < hi(b)) goto true;
8540 * if (hi(a) > hi(b)) goto false;
8541 * if (lo(a) < lo(b)) goto true;
8542 * false:
8543 */
8544
8545 ix86_compare_op0 = hi[0];
8546 ix86_compare_op1 = hi[1];
8547
8548 if (code1 != NIL)
8549 ix86_expand_branch (code1, label);
8550 if (code2 != NIL)
8551 ix86_expand_branch (code2, label2);
8552
8553 ix86_compare_op0 = lo[0];
8554 ix86_compare_op1 = lo[1];
8555 ix86_expand_branch (code3, label);
8556
8557 if (code2 != NIL)
8558 emit_label (label2);
8559 return;
8560 }
8561
8562 default:
8563 abort ();
8564 }
8565 }
8566
8567 /* Split branch based on floating point condition. */
8568 void
8569 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8570 enum rtx_code code;
8571 rtx op1, op2, target1, target2, tmp;
8572 {
8573 rtx second, bypass;
8574 rtx label = NULL_RTX;
8575 rtx condition;
8576 int bypass_probability = -1, second_probability = -1, probability = -1;
8577 rtx i;
8578
8579 if (target2 != pc_rtx)
8580 {
8581 rtx tmp = target2;
8582 code = reverse_condition_maybe_unordered (code);
8583 target2 = target1;
8584 target1 = tmp;
8585 }
8586
8587 condition = ix86_expand_fp_compare (code, op1, op2,
8588 tmp, &second, &bypass);
8589
8590 if (split_branch_probability >= 0)
8591 {
8592 /* Distribute the probabilities across the jumps.
8593 Assume the BYPASS and SECOND to be always test
8594 for UNORDERED. */
8595 probability = split_branch_probability;
8596
8597 /* Value of 1 is low enough to make no need for probability
8598 to be updated. Later we may run some experiments and see
8599 if unordered values are more frequent in practice. */
8600 if (bypass)
8601 bypass_probability = 1;
8602 if (second)
8603 second_probability = 1;
8604 }
8605 if (bypass != NULL_RTX)
8606 {
8607 label = gen_label_rtx ();
8608 i = emit_jump_insn (gen_rtx_SET
8609 (VOIDmode, pc_rtx,
8610 gen_rtx_IF_THEN_ELSE (VOIDmode,
8611 bypass,
8612 gen_rtx_LABEL_REF (VOIDmode,
8613 label),
8614 pc_rtx)));
8615 if (bypass_probability >= 0)
8616 REG_NOTES (i)
8617 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8618 GEN_INT (bypass_probability),
8619 REG_NOTES (i));
8620 }
8621 i = emit_jump_insn (gen_rtx_SET
8622 (VOIDmode, pc_rtx,
8623 gen_rtx_IF_THEN_ELSE (VOIDmode,
8624 condition, target1, target2)));
8625 if (probability >= 0)
8626 REG_NOTES (i)
8627 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8628 GEN_INT (probability),
8629 REG_NOTES (i));
8630 if (second != NULL_RTX)
8631 {
8632 i = emit_jump_insn (gen_rtx_SET
8633 (VOIDmode, pc_rtx,
8634 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8635 target2)));
8636 if (second_probability >= 0)
8637 REG_NOTES (i)
8638 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8639 GEN_INT (second_probability),
8640 REG_NOTES (i));
8641 }
8642 if (label != NULL_RTX)
8643 emit_label (label);
8644 }
8645
8646 int
8647 ix86_expand_setcc (code, dest)
8648 enum rtx_code code;
8649 rtx dest;
8650 {
8651 rtx ret, tmp, tmpreg;
8652 rtx second_test, bypass_test;
8653
8654 if (GET_MODE (ix86_compare_op0) == DImode
8655 && !TARGET_64BIT)
8656 return 0; /* FAIL */
8657
8658 if (GET_MODE (dest) != QImode)
8659 abort ();
8660
8661 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8662 PUT_MODE (ret, QImode);
8663
8664 tmp = dest;
8665 tmpreg = dest;
8666
8667 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8668 if (bypass_test || second_test)
8669 {
8670 rtx test = second_test;
8671 int bypass = 0;
8672 rtx tmp2 = gen_reg_rtx (QImode);
8673 if (bypass_test)
8674 {
8675 if (second_test)
8676 abort ();
8677 test = bypass_test;
8678 bypass = 1;
8679 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8680 }
8681 PUT_MODE (test, QImode);
8682 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8683
8684 if (bypass)
8685 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8686 else
8687 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8688 }
8689
8690 return 1; /* DONE */
8691 }
8692
8693 int
8694 ix86_expand_int_movcc (operands)
8695 rtx operands[];
8696 {
8697 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8698 rtx compare_seq, compare_op;
8699 rtx second_test, bypass_test;
8700 enum machine_mode mode = GET_MODE (operands[0]);
8701
8702 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8703 In case comparsion is done with immediate, we can convert it to LTU or
8704 GEU by altering the integer. */
8705
8706 if ((code == LEU || code == GTU)
8707 && GET_CODE (ix86_compare_op1) == CONST_INT
8708 && mode != HImode
8709 && INTVAL (ix86_compare_op1) != -1
8710 /* For x86-64, the immediate field in the instruction is 32-bit
8711 signed, so we can't increment a DImode value above 0x7fffffff. */
8712 && (!TARGET_64BIT
8713 || GET_MODE (ix86_compare_op0) != DImode
8714 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8715 && GET_CODE (operands[2]) == CONST_INT
8716 && GET_CODE (operands[3]) == CONST_INT)
8717 {
8718 if (code == LEU)
8719 code = LTU;
8720 else
8721 code = GEU;
8722 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8723 GET_MODE (ix86_compare_op0));
8724 }
8725
8726 start_sequence ();
8727 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8728 compare_seq = get_insns ();
8729 end_sequence ();
8730
8731 compare_code = GET_CODE (compare_op);
8732
8733 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8734 HImode insns, we'd be swallowed in word prefix ops. */
8735
8736 if (mode != HImode
8737 && (mode != DImode || TARGET_64BIT)
8738 && GET_CODE (operands[2]) == CONST_INT
8739 && GET_CODE (operands[3]) == CONST_INT)
8740 {
8741 rtx out = operands[0];
8742 HOST_WIDE_INT ct = INTVAL (operands[2]);
8743 HOST_WIDE_INT cf = INTVAL (operands[3]);
8744 HOST_WIDE_INT diff;
8745
8746 if ((compare_code == LTU || compare_code == GEU)
8747 && !second_test && !bypass_test)
8748 {
8749 /* Detect overlap between destination and compare sources. */
8750 rtx tmp = out;
8751
8752 /* To simplify rest of code, restrict to the GEU case. */
8753 if (compare_code == LTU)
8754 {
8755 int tmp = ct;
8756 ct = cf;
8757 cf = tmp;
8758 compare_code = reverse_condition (compare_code);
8759 code = reverse_condition (code);
8760 }
8761 diff = ct - cf;
8762
8763 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8764 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8765 tmp = gen_reg_rtx (mode);
8766
8767 emit_insn (compare_seq);
8768 if (mode == DImode)
8769 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8770 else
8771 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8772
8773 if (diff == 1)
8774 {
8775 /*
8776 * cmpl op0,op1
8777 * sbbl dest,dest
8778 * [addl dest, ct]
8779 *
8780 * Size 5 - 8.
8781 */
8782 if (ct)
8783 tmp = expand_simple_binop (mode, PLUS,
8784 tmp, GEN_INT (ct),
8785 tmp, 1, OPTAB_DIRECT);
8786 }
8787 else if (cf == -1)
8788 {
8789 /*
8790 * cmpl op0,op1
8791 * sbbl dest,dest
8792 * orl $ct, dest
8793 *
8794 * Size 8.
8795 */
8796 tmp = expand_simple_binop (mode, IOR,
8797 tmp, GEN_INT (ct),
8798 tmp, 1, OPTAB_DIRECT);
8799 }
8800 else if (diff == -1 && ct)
8801 {
8802 /*
8803 * cmpl op0,op1
8804 * sbbl dest,dest
8805 * notl dest
8806 * [addl dest, cf]
8807 *
8808 * Size 8 - 11.
8809 */
8810 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8811 if (cf)
8812 tmp = expand_simple_binop (mode, PLUS,
8813 tmp, GEN_INT (cf),
8814 tmp, 1, OPTAB_DIRECT);
8815 }
8816 else
8817 {
8818 /*
8819 * cmpl op0,op1
8820 * sbbl dest,dest
8821 * [notl dest]
8822 * andl cf - ct, dest
8823 * [addl dest, ct]
8824 *
8825 * Size 8 - 11.
8826 */
8827
8828 if (cf == 0)
8829 {
8830 cf = ct;
8831 ct = 0;
8832 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8833 }
8834
8835 tmp = expand_simple_binop (mode, AND,
8836 tmp,
8837 gen_int_mode (cf - ct, mode),
8838 tmp, 1, OPTAB_DIRECT);
8839 if (ct)
8840 tmp = expand_simple_binop (mode, PLUS,
8841 tmp, GEN_INT (ct),
8842 tmp, 1, OPTAB_DIRECT);
8843 }
8844
8845 if (tmp != out)
8846 emit_move_insn (out, tmp);
8847
8848 return 1; /* DONE */
8849 }
8850
8851 diff = ct - cf;
8852 if (diff < 0)
8853 {
8854 HOST_WIDE_INT tmp;
8855 tmp = ct, ct = cf, cf = tmp;
8856 diff = -diff;
8857 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8858 {
8859 /* We may be reversing unordered compare to normal compare, that
8860 is not valid in general (we may convert non-trapping condition
8861 to trapping one), however on i386 we currently emit all
8862 comparisons unordered. */
8863 compare_code = reverse_condition_maybe_unordered (compare_code);
8864 code = reverse_condition_maybe_unordered (code);
8865 }
8866 else
8867 {
8868 compare_code = reverse_condition (compare_code);
8869 code = reverse_condition (code);
8870 }
8871 }
8872
8873 compare_code = NIL;
8874 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8875 && GET_CODE (ix86_compare_op1) == CONST_INT)
8876 {
8877 if (ix86_compare_op1 == const0_rtx
8878 && (code == LT || code == GE))
8879 compare_code = code;
8880 else if (ix86_compare_op1 == constm1_rtx)
8881 {
8882 if (code == LE)
8883 compare_code = LT;
8884 else if (code == GT)
8885 compare_code = GE;
8886 }
8887 }
8888
8889 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8890 if (compare_code != NIL
8891 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8892 && (cf == -1 || ct == -1))
8893 {
8894 /* If lea code below could be used, only optimize
8895 if it results in a 2 insn sequence. */
8896
8897 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8898 || diff == 3 || diff == 5 || diff == 9)
8899 || (compare_code == LT && ct == -1)
8900 || (compare_code == GE && cf == -1))
8901 {
8902 /*
8903 * notl op1 (if necessary)
8904 * sarl $31, op1
8905 * orl cf, op1
8906 */
8907 if (ct != -1)
8908 {
8909 cf = ct;
8910 ct = -1;
8911 code = reverse_condition (code);
8912 }
8913
8914 out = emit_store_flag (out, code, ix86_compare_op0,
8915 ix86_compare_op1, VOIDmode, 0, -1);
8916
8917 out = expand_simple_binop (mode, IOR,
8918 out, GEN_INT (cf),
8919 out, 1, OPTAB_DIRECT);
8920 if (out != operands[0])
8921 emit_move_insn (operands[0], out);
8922
8923 return 1; /* DONE */
8924 }
8925 }
8926
8927 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8928 || diff == 3 || diff == 5 || diff == 9)
8929 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8930 {
8931 /*
8932 * xorl dest,dest
8933 * cmpl op1,op2
8934 * setcc dest
8935 * lea cf(dest*(ct-cf)),dest
8936 *
8937 * Size 14.
8938 *
8939 * This also catches the degenerate setcc-only case.
8940 */
8941
8942 rtx tmp;
8943 int nops;
8944
8945 out = emit_store_flag (out, code, ix86_compare_op0,
8946 ix86_compare_op1, VOIDmode, 0, 1);
8947
8948 nops = 0;
8949 /* On x86_64 the lea instruction operates on Pmode, so we need
8950 to get arithmetics done in proper mode to match. */
8951 if (diff == 1)
8952 tmp = out;
8953 else
8954 {
8955 rtx out1;
8956 out1 = out;
8957 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8958 nops++;
8959 if (diff & 1)
8960 {
8961 tmp = gen_rtx_PLUS (mode, tmp, out1);
8962 nops++;
8963 }
8964 }
8965 if (cf != 0)
8966 {
8967 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8968 nops++;
8969 }
8970 if (tmp != out
8971 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8972 {
8973 if (nops == 1)
8974 {
8975 rtx clob;
8976
8977 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8978 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8979
8980 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8981 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8982 emit_insn (tmp);
8983 }
8984 else
8985 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8986 }
8987 if (out != operands[0])
8988 emit_move_insn (operands[0], out);
8989
8990 return 1; /* DONE */
8991 }
8992
8993 /*
8994 * General case: Jumpful:
8995 * xorl dest,dest cmpl op1, op2
8996 * cmpl op1, op2 movl ct, dest
8997 * setcc dest jcc 1f
8998 * decl dest movl cf, dest
8999 * andl (cf-ct),dest 1:
9000 * addl ct,dest
9001 *
9002 * Size 20. Size 14.
9003 *
9004 * This is reasonably steep, but branch mispredict costs are
9005 * high on modern cpus, so consider failing only if optimizing
9006 * for space.
9007 *
9008 * %%% Parameterize branch_cost on the tuning architecture, then
9009 * use that. The 80386 couldn't care less about mispredicts.
9010 */
9011
9012 if (!optimize_size && !TARGET_CMOVE)
9013 {
9014 if (cf == 0)
9015 {
9016 cf = ct;
9017 ct = 0;
9018 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9019 /* We may be reversing unordered compare to normal compare,
9020 that is not valid in general (we may convert non-trapping
9021 condition to trapping one), however on i386 we currently
9022 emit all comparisons unordered. */
9023 code = reverse_condition_maybe_unordered (code);
9024 else
9025 {
9026 code = reverse_condition (code);
9027 if (compare_code != NIL)
9028 compare_code = reverse_condition (compare_code);
9029 }
9030 }
9031
9032 if (compare_code != NIL)
9033 {
9034 /* notl op1 (if needed)
9035 sarl $31, op1
9036 andl (cf-ct), op1
9037 addl ct, op1
9038
9039 For x < 0 (resp. x <= -1) there will be no notl,
9040 so if possible swap the constants to get rid of the
9041 complement.
9042 True/false will be -1/0 while code below (store flag
9043 followed by decrement) is 0/-1, so the constants need
9044 to be exchanged once more. */
9045
9046 if (compare_code == GE || !cf)
9047 {
9048 code = reverse_condition (code);
9049 compare_code = LT;
9050 }
9051 else
9052 {
9053 HOST_WIDE_INT tmp = cf;
9054 cf = ct;
9055 ct = tmp;
9056 }
9057
9058 out = emit_store_flag (out, code, ix86_compare_op0,
9059 ix86_compare_op1, VOIDmode, 0, -1);
9060 }
9061 else
9062 {
9063 out = emit_store_flag (out, code, ix86_compare_op0,
9064 ix86_compare_op1, VOIDmode, 0, 1);
9065
9066 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9067 out, 1, OPTAB_DIRECT);
9068 }
9069
9070 out = expand_simple_binop (mode, AND, out,
9071 gen_int_mode (cf - ct, mode),
9072 out, 1, OPTAB_DIRECT);
9073 if (ct)
9074 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9075 out, 1, OPTAB_DIRECT);
9076 if (out != operands[0])
9077 emit_move_insn (operands[0], out);
9078
9079 return 1; /* DONE */
9080 }
9081 }
9082
9083 if (!TARGET_CMOVE)
9084 {
9085 /* Try a few things more with specific constants and a variable. */
9086
9087 optab op;
9088 rtx var, orig_out, out, tmp;
9089
9090 if (optimize_size)
9091 return 0; /* FAIL */
9092
9093 /* If one of the two operands is an interesting constant, load a
9094 constant with the above and mask it in with a logical operation. */
9095
9096 if (GET_CODE (operands[2]) == CONST_INT)
9097 {
9098 var = operands[3];
9099 if (INTVAL (operands[2]) == 0)
9100 operands[3] = constm1_rtx, op = and_optab;
9101 else if (INTVAL (operands[2]) == -1)
9102 operands[3] = const0_rtx, op = ior_optab;
9103 else
9104 return 0; /* FAIL */
9105 }
9106 else if (GET_CODE (operands[3]) == CONST_INT)
9107 {
9108 var = operands[2];
9109 if (INTVAL (operands[3]) == 0)
9110 operands[2] = constm1_rtx, op = and_optab;
9111 else if (INTVAL (operands[3]) == -1)
9112 operands[2] = const0_rtx, op = ior_optab;
9113 else
9114 return 0; /* FAIL */
9115 }
9116 else
9117 return 0; /* FAIL */
9118
9119 orig_out = operands[0];
9120 tmp = gen_reg_rtx (mode);
9121 operands[0] = tmp;
9122
9123 /* Recurse to get the constant loaded. */
9124 if (ix86_expand_int_movcc (operands) == 0)
9125 return 0; /* FAIL */
9126
9127 /* Mask in the interesting variable. */
9128 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9129 OPTAB_WIDEN);
9130 if (out != orig_out)
9131 emit_move_insn (orig_out, out);
9132
9133 return 1; /* DONE */
9134 }
9135
9136 /*
9137 * For comparison with above,
9138 *
9139 * movl cf,dest
9140 * movl ct,tmp
9141 * cmpl op1,op2
9142 * cmovcc tmp,dest
9143 *
9144 * Size 15.
9145 */
9146
9147 if (! nonimmediate_operand (operands[2], mode))
9148 operands[2] = force_reg (mode, operands[2]);
9149 if (! nonimmediate_operand (operands[3], mode))
9150 operands[3] = force_reg (mode, operands[3]);
9151
9152 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9153 {
9154 rtx tmp = gen_reg_rtx (mode);
9155 emit_move_insn (tmp, operands[3]);
9156 operands[3] = tmp;
9157 }
9158 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9159 {
9160 rtx tmp = gen_reg_rtx (mode);
9161 emit_move_insn (tmp, operands[2]);
9162 operands[2] = tmp;
9163 }
9164 if (! register_operand (operands[2], VOIDmode)
9165 && ! register_operand (operands[3], VOIDmode))
9166 operands[2] = force_reg (mode, operands[2]);
9167
9168 emit_insn (compare_seq);
9169 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9170 gen_rtx_IF_THEN_ELSE (mode,
9171 compare_op, operands[2],
9172 operands[3])));
9173 if (bypass_test)
9174 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9175 gen_rtx_IF_THEN_ELSE (mode,
9176 bypass_test,
9177 operands[3],
9178 operands[0])));
9179 if (second_test)
9180 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9181 gen_rtx_IF_THEN_ELSE (mode,
9182 second_test,
9183 operands[2],
9184 operands[0])));
9185
9186 return 1; /* DONE */
9187 }
9188
9189 int
9190 ix86_expand_fp_movcc (operands)
9191 rtx operands[];
9192 {
9193 enum rtx_code code;
9194 rtx tmp;
9195 rtx compare_op, second_test, bypass_test;
9196
9197 /* For SF/DFmode conditional moves based on comparisons
9198 in same mode, we may want to use SSE min/max instructions. */
9199 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9200 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9201 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9202 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9203 && (!TARGET_IEEE_FP
9204 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9205 /* We may be called from the post-reload splitter. */
9206 && (!REG_P (operands[0])
9207 || SSE_REG_P (operands[0])
9208 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9209 {
9210 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9211 code = GET_CODE (operands[1]);
9212
9213 /* See if we have (cross) match between comparison operands and
9214 conditional move operands. */
9215 if (rtx_equal_p (operands[2], op1))
9216 {
9217 rtx tmp = op0;
9218 op0 = op1;
9219 op1 = tmp;
9220 code = reverse_condition_maybe_unordered (code);
9221 }
9222 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9223 {
9224 /* Check for min operation. */
9225 if (code == LT)
9226 {
9227 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9228 if (memory_operand (op0, VOIDmode))
9229 op0 = force_reg (GET_MODE (operands[0]), op0);
9230 if (GET_MODE (operands[0]) == SFmode)
9231 emit_insn (gen_minsf3 (operands[0], op0, op1));
9232 else
9233 emit_insn (gen_mindf3 (operands[0], op0, op1));
9234 return 1;
9235 }
9236 /* Check for max operation. */
9237 if (code == GT)
9238 {
9239 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9240 if (memory_operand (op0, VOIDmode))
9241 op0 = force_reg (GET_MODE (operands[0]), op0);
9242 if (GET_MODE (operands[0]) == SFmode)
9243 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9244 else
9245 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9246 return 1;
9247 }
9248 }
9249 /* Manage condition to be sse_comparison_operator. In case we are
9250 in non-ieee mode, try to canonicalize the destination operand
9251 to be first in the comparison - this helps reload to avoid extra
9252 moves. */
9253 if (!sse_comparison_operator (operands[1], VOIDmode)
9254 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9255 {
9256 rtx tmp = ix86_compare_op0;
9257 ix86_compare_op0 = ix86_compare_op1;
9258 ix86_compare_op1 = tmp;
9259 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9260 VOIDmode, ix86_compare_op0,
9261 ix86_compare_op1);
9262 }
9263 /* Similary try to manage result to be first operand of conditional
9264 move. We also don't support the NE comparison on SSE, so try to
9265 avoid it. */
9266 if ((rtx_equal_p (operands[0], operands[3])
9267 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9268 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9269 {
9270 rtx tmp = operands[2];
9271 operands[2] = operands[3];
9272 operands[3] = tmp;
9273 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9274 (GET_CODE (operands[1])),
9275 VOIDmode, ix86_compare_op0,
9276 ix86_compare_op1);
9277 }
9278 if (GET_MODE (operands[0]) == SFmode)
9279 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9280 operands[2], operands[3],
9281 ix86_compare_op0, ix86_compare_op1));
9282 else
9283 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9284 operands[2], operands[3],
9285 ix86_compare_op0, ix86_compare_op1));
9286 return 1;
9287 }
9288
9289 /* The floating point conditional move instructions don't directly
9290 support conditions resulting from a signed integer comparison. */
9291
9292 code = GET_CODE (operands[1]);
9293 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9294
9295 /* The floating point conditional move instructions don't directly
9296 support signed integer comparisons. */
9297
9298 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9299 {
9300 if (second_test != NULL || bypass_test != NULL)
9301 abort ();
9302 tmp = gen_reg_rtx (QImode);
9303 ix86_expand_setcc (code, tmp);
9304 code = NE;
9305 ix86_compare_op0 = tmp;
9306 ix86_compare_op1 = const0_rtx;
9307 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9308 }
9309 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9310 {
9311 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9312 emit_move_insn (tmp, operands[3]);
9313 operands[3] = tmp;
9314 }
9315 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9316 {
9317 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9318 emit_move_insn (tmp, operands[2]);
9319 operands[2] = tmp;
9320 }
9321
9322 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9323 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9324 compare_op,
9325 operands[2],
9326 operands[3])));
9327 if (bypass_test)
9328 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9329 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9330 bypass_test,
9331 operands[3],
9332 operands[0])));
9333 if (second_test)
9334 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9335 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9336 second_test,
9337 operands[2],
9338 operands[0])));
9339
9340 return 1;
9341 }
9342
9343 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9344 works for floating pointer parameters and nonoffsetable memories.
9345 For pushes, it returns just stack offsets; the values will be saved
9346 in the right order. Maximally three parts are generated. */
9347
9348 static int
9349 ix86_split_to_parts (operand, parts, mode)
9350 rtx operand;
9351 rtx *parts;
9352 enum machine_mode mode;
9353 {
9354 int size;
9355
9356 if (!TARGET_64BIT)
9357 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9358 else
9359 size = (GET_MODE_SIZE (mode) + 4) / 8;
9360
9361 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9362 abort ();
9363 if (size < 2 || size > 3)
9364 abort ();
9365
9366 /* Optimize constant pool reference to immediates. This is used by fp
9367 moves, that force all constants to memory to allow combining. */
9368 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9369 {
9370 rtx tmp = maybe_get_pool_constant (operand);
9371 if (tmp)
9372 operand = tmp;
9373 }
9374
9375 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9376 {
9377 /* The only non-offsetable memories we handle are pushes. */
9378 if (! push_operand (operand, VOIDmode))
9379 abort ();
9380
9381 operand = copy_rtx (operand);
9382 PUT_MODE (operand, Pmode);
9383 parts[0] = parts[1] = parts[2] = operand;
9384 }
9385 else if (!TARGET_64BIT)
9386 {
9387 if (mode == DImode)
9388 split_di (&operand, 1, &parts[0], &parts[1]);
9389 else
9390 {
9391 if (REG_P (operand))
9392 {
9393 if (!reload_completed)
9394 abort ();
9395 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9396 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9397 if (size == 3)
9398 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9399 }
9400 else if (offsettable_memref_p (operand))
9401 {
9402 operand = adjust_address (operand, SImode, 0);
9403 parts[0] = operand;
9404 parts[1] = adjust_address (operand, SImode, 4);
9405 if (size == 3)
9406 parts[2] = adjust_address (operand, SImode, 8);
9407 }
9408 else if (GET_CODE (operand) == CONST_DOUBLE)
9409 {
9410 REAL_VALUE_TYPE r;
9411 long l[4];
9412
9413 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9414 switch (mode)
9415 {
9416 case XFmode:
9417 case TFmode:
9418 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9419 parts[2] = gen_int_mode (l[2], SImode);
9420 break;
9421 case DFmode:
9422 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9423 break;
9424 default:
9425 abort ();
9426 }
9427 parts[1] = gen_int_mode (l[1], SImode);
9428 parts[0] = gen_int_mode (l[0], SImode);
9429 }
9430 else
9431 abort ();
9432 }
9433 }
9434 else
9435 {
9436 if (mode == TImode)
9437 split_ti (&operand, 1, &parts[0], &parts[1]);
9438 if (mode == XFmode || mode == TFmode)
9439 {
9440 if (REG_P (operand))
9441 {
9442 if (!reload_completed)
9443 abort ();
9444 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9445 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9446 }
9447 else if (offsettable_memref_p (operand))
9448 {
9449 operand = adjust_address (operand, DImode, 0);
9450 parts[0] = operand;
9451 parts[1] = adjust_address (operand, SImode, 8);
9452 }
9453 else if (GET_CODE (operand) == CONST_DOUBLE)
9454 {
9455 REAL_VALUE_TYPE r;
9456 long l[3];
9457
9458 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9459 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9460 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9461 if (HOST_BITS_PER_WIDE_INT >= 64)
9462 parts[0]
9463 = gen_int_mode
9464 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9465 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9466 DImode);
9467 else
9468 parts[0] = immed_double_const (l[0], l[1], DImode);
9469 parts[1] = gen_int_mode (l[2], SImode);
9470 }
9471 else
9472 abort ();
9473 }
9474 }
9475
9476 return size;
9477 }
9478
9479 /* Emit insns to perform a move or push of DI, DF, and XF values.
9480 Return false when normal moves are needed; true when all required
9481 insns have been emitted. Operands 2-4 contain the input values
9482 int the correct order; operands 5-7 contain the output values. */
9483
9484 void
9485 ix86_split_long_move (operands)
9486 rtx operands[];
9487 {
9488 rtx part[2][3];
9489 int nparts;
9490 int push = 0;
9491 int collisions = 0;
9492 enum machine_mode mode = GET_MODE (operands[0]);
9493
9494 /* The DFmode expanders may ask us to move double.
9495 For 64bit target this is single move. By hiding the fact
9496 here we simplify i386.md splitters. */
9497 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9498 {
9499 /* Optimize constant pool reference to immediates. This is used by
9500 fp moves, that force all constants to memory to allow combining. */
9501
9502 if (GET_CODE (operands[1]) == MEM
9503 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9504 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9505 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9506 if (push_operand (operands[0], VOIDmode))
9507 {
9508 operands[0] = copy_rtx (operands[0]);
9509 PUT_MODE (operands[0], Pmode);
9510 }
9511 else
9512 operands[0] = gen_lowpart (DImode, operands[0]);
9513 operands[1] = gen_lowpart (DImode, operands[1]);
9514 emit_move_insn (operands[0], operands[1]);
9515 return;
9516 }
9517
9518 /* The only non-offsettable memory we handle is push. */
9519 if (push_operand (operands[0], VOIDmode))
9520 push = 1;
9521 else if (GET_CODE (operands[0]) == MEM
9522 && ! offsettable_memref_p (operands[0]))
9523 abort ();
9524
9525 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9526 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9527
9528 /* When emitting push, take care for source operands on the stack. */
9529 if (push && GET_CODE (operands[1]) == MEM
9530 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9531 {
9532 if (nparts == 3)
9533 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9534 XEXP (part[1][2], 0));
9535 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9536 XEXP (part[1][1], 0));
9537 }
9538
9539 /* We need to do copy in the right order in case an address register
9540 of the source overlaps the destination. */
9541 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9542 {
9543 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9544 collisions++;
9545 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9546 collisions++;
9547 if (nparts == 3
9548 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9549 collisions++;
9550
9551 /* Collision in the middle part can be handled by reordering. */
9552 if (collisions == 1 && nparts == 3
9553 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9554 {
9555 rtx tmp;
9556 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9557 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9558 }
9559
9560 /* If there are more collisions, we can't handle it by reordering.
9561 Do an lea to the last part and use only one colliding move. */
9562 else if (collisions > 1)
9563 {
9564 collisions = 1;
9565 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9566 XEXP (part[1][0], 0)));
9567 part[1][0] = change_address (part[1][0],
9568 TARGET_64BIT ? DImode : SImode,
9569 part[0][nparts - 1]);
9570 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9571 if (nparts == 3)
9572 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9573 }
9574 }
9575
9576 if (push)
9577 {
9578 if (!TARGET_64BIT)
9579 {
9580 if (nparts == 3)
9581 {
9582 /* We use only first 12 bytes of TFmode value, but for pushing we
9583 are required to adjust stack as if we were pushing real 16byte
9584 value. */
9585 if (mode == TFmode && !TARGET_64BIT)
9586 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9587 GEN_INT (-4)));
9588 emit_move_insn (part[0][2], part[1][2]);
9589 }
9590 }
9591 else
9592 {
9593 /* In 64bit mode we don't have 32bit push available. In case this is
9594 register, it is OK - we will just use larger counterpart. We also
9595 retype memory - these comes from attempt to avoid REX prefix on
9596 moving of second half of TFmode value. */
9597 if (GET_MODE (part[1][1]) == SImode)
9598 {
9599 if (GET_CODE (part[1][1]) == MEM)
9600 part[1][1] = adjust_address (part[1][1], DImode, 0);
9601 else if (REG_P (part[1][1]))
9602 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9603 else
9604 abort ();
9605 if (GET_MODE (part[1][0]) == SImode)
9606 part[1][0] = part[1][1];
9607 }
9608 }
9609 emit_move_insn (part[0][1], part[1][1]);
9610 emit_move_insn (part[0][0], part[1][0]);
9611 return;
9612 }
9613
9614 /* Choose correct order to not overwrite the source before it is copied. */
9615 if ((REG_P (part[0][0])
9616 && REG_P (part[1][1])
9617 && (REGNO (part[0][0]) == REGNO (part[1][1])
9618 || (nparts == 3
9619 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9620 || (collisions > 0
9621 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9622 {
9623 if (nparts == 3)
9624 {
9625 operands[2] = part[0][2];
9626 operands[3] = part[0][1];
9627 operands[4] = part[0][0];
9628 operands[5] = part[1][2];
9629 operands[6] = part[1][1];
9630 operands[7] = part[1][0];
9631 }
9632 else
9633 {
9634 operands[2] = part[0][1];
9635 operands[3] = part[0][0];
9636 operands[5] = part[1][1];
9637 operands[6] = part[1][0];
9638 }
9639 }
9640 else
9641 {
9642 if (nparts == 3)
9643 {
9644 operands[2] = part[0][0];
9645 operands[3] = part[0][1];
9646 operands[4] = part[0][2];
9647 operands[5] = part[1][0];
9648 operands[6] = part[1][1];
9649 operands[7] = part[1][2];
9650 }
9651 else
9652 {
9653 operands[2] = part[0][0];
9654 operands[3] = part[0][1];
9655 operands[5] = part[1][0];
9656 operands[6] = part[1][1];
9657 }
9658 }
9659 emit_move_insn (operands[2], operands[5]);
9660 emit_move_insn (operands[3], operands[6]);
9661 if (nparts == 3)
9662 emit_move_insn (operands[4], operands[7]);
9663
9664 return;
9665 }
9666
9667 void
9668 ix86_split_ashldi (operands, scratch)
9669 rtx *operands, scratch;
9670 {
9671 rtx low[2], high[2];
9672 int count;
9673
9674 if (GET_CODE (operands[2]) == CONST_INT)
9675 {
9676 split_di (operands, 2, low, high);
9677 count = INTVAL (operands[2]) & 63;
9678
9679 if (count >= 32)
9680 {
9681 emit_move_insn (high[0], low[1]);
9682 emit_move_insn (low[0], const0_rtx);
9683
9684 if (count > 32)
9685 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9686 }
9687 else
9688 {
9689 if (!rtx_equal_p (operands[0], operands[1]))
9690 emit_move_insn (operands[0], operands[1]);
9691 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9692 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9693 }
9694 }
9695 else
9696 {
9697 if (!rtx_equal_p (operands[0], operands[1]))
9698 emit_move_insn (operands[0], operands[1]);
9699
9700 split_di (operands, 1, low, high);
9701
9702 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9703 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9704
9705 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9706 {
9707 if (! no_new_pseudos)
9708 scratch = force_reg (SImode, const0_rtx);
9709 else
9710 emit_move_insn (scratch, const0_rtx);
9711
9712 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9713 scratch));
9714 }
9715 else
9716 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9717 }
9718 }
9719
9720 void
9721 ix86_split_ashrdi (operands, scratch)
9722 rtx *operands, scratch;
9723 {
9724 rtx low[2], high[2];
9725 int count;
9726
9727 if (GET_CODE (operands[2]) == CONST_INT)
9728 {
9729 split_di (operands, 2, low, high);
9730 count = INTVAL (operands[2]) & 63;
9731
9732 if (count >= 32)
9733 {
9734 emit_move_insn (low[0], high[1]);
9735
9736 if (! reload_completed)
9737 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9738 else
9739 {
9740 emit_move_insn (high[0], low[0]);
9741 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9742 }
9743
9744 if (count > 32)
9745 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9746 }
9747 else
9748 {
9749 if (!rtx_equal_p (operands[0], operands[1]))
9750 emit_move_insn (operands[0], operands[1]);
9751 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9752 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9753 }
9754 }
9755 else
9756 {
9757 if (!rtx_equal_p (operands[0], operands[1]))
9758 emit_move_insn (operands[0], operands[1]);
9759
9760 split_di (operands, 1, low, high);
9761
9762 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9763 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9764
9765 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9766 {
9767 if (! no_new_pseudos)
9768 scratch = gen_reg_rtx (SImode);
9769 emit_move_insn (scratch, high[0]);
9770 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9771 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9772 scratch));
9773 }
9774 else
9775 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9776 }
9777 }
9778
9779 void
9780 ix86_split_lshrdi (operands, scratch)
9781 rtx *operands, scratch;
9782 {
9783 rtx low[2], high[2];
9784 int count;
9785
9786 if (GET_CODE (operands[2]) == CONST_INT)
9787 {
9788 split_di (operands, 2, low, high);
9789 count = INTVAL (operands[2]) & 63;
9790
9791 if (count >= 32)
9792 {
9793 emit_move_insn (low[0], high[1]);
9794 emit_move_insn (high[0], const0_rtx);
9795
9796 if (count > 32)
9797 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9798 }
9799 else
9800 {
9801 if (!rtx_equal_p (operands[0], operands[1]))
9802 emit_move_insn (operands[0], operands[1]);
9803 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9804 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9805 }
9806 }
9807 else
9808 {
9809 if (!rtx_equal_p (operands[0], operands[1]))
9810 emit_move_insn (operands[0], operands[1]);
9811
9812 split_di (operands, 1, low, high);
9813
9814 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9815 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9816
9817 /* Heh. By reversing the arguments, we can reuse this pattern. */
9818 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9819 {
9820 if (! no_new_pseudos)
9821 scratch = force_reg (SImode, const0_rtx);
9822 else
9823 emit_move_insn (scratch, const0_rtx);
9824
9825 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9826 scratch));
9827 }
9828 else
9829 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9830 }
9831 }
9832
9833 /* Helper function for the string operations below. Dest VARIABLE whether
9834 it is aligned to VALUE bytes. If true, jump to the label. */
9835 static rtx
9836 ix86_expand_aligntest (variable, value)
9837 rtx variable;
9838 int value;
9839 {
9840 rtx label = gen_label_rtx ();
9841 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9842 if (GET_MODE (variable) == DImode)
9843 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9844 else
9845 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9846 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9847 1, label);
9848 return label;
9849 }
9850
9851 /* Adjust COUNTER by the VALUE. */
9852 static void
9853 ix86_adjust_counter (countreg, value)
9854 rtx countreg;
9855 HOST_WIDE_INT value;
9856 {
9857 if (GET_MODE (countreg) == DImode)
9858 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9859 else
9860 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9861 }
9862
9863 /* Zero extend possibly SImode EXP to Pmode register. */
9864 rtx
9865 ix86_zero_extend_to_Pmode (exp)
9866 rtx exp;
9867 {
9868 rtx r;
9869 if (GET_MODE (exp) == VOIDmode)
9870 return force_reg (Pmode, exp);
9871 if (GET_MODE (exp) == Pmode)
9872 return copy_to_mode_reg (Pmode, exp);
9873 r = gen_reg_rtx (Pmode);
9874 emit_insn (gen_zero_extendsidi2 (r, exp));
9875 return r;
9876 }
9877
9878 /* Expand string move (memcpy) operation. Use i386 string operations when
9879 profitable. expand_clrstr contains similar code. */
9880 int
9881 ix86_expand_movstr (dst, src, count_exp, align_exp)
9882 rtx dst, src, count_exp, align_exp;
9883 {
9884 rtx srcreg, destreg, countreg;
9885 enum machine_mode counter_mode;
9886 HOST_WIDE_INT align = 0;
9887 unsigned HOST_WIDE_INT count = 0;
9888 rtx insns;
9889
9890 start_sequence ();
9891
9892 if (GET_CODE (align_exp) == CONST_INT)
9893 align = INTVAL (align_exp);
9894
9895 /* This simple hack avoids all inlining code and simplifies code below. */
9896 if (!TARGET_ALIGN_STRINGOPS)
9897 align = 64;
9898
9899 if (GET_CODE (count_exp) == CONST_INT)
9900 count = INTVAL (count_exp);
9901
9902 /* Figure out proper mode for counter. For 32bits it is always SImode,
9903 for 64bits use SImode when possible, otherwise DImode.
9904 Set count to number of bytes copied when known at compile time. */
9905 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9906 || x86_64_zero_extended_value (count_exp))
9907 counter_mode = SImode;
9908 else
9909 counter_mode = DImode;
9910
9911 if (counter_mode != SImode && counter_mode != DImode)
9912 abort ();
9913
9914 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9915 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9916
9917 emit_insn (gen_cld ());
9918
9919 /* When optimizing for size emit simple rep ; movsb instruction for
9920 counts not divisible by 4. */
9921
9922 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9923 {
9924 countreg = ix86_zero_extend_to_Pmode (count_exp);
9925 if (TARGET_64BIT)
9926 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9927 destreg, srcreg, countreg));
9928 else
9929 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9930 destreg, srcreg, countreg));
9931 }
9932
9933 /* For constant aligned (or small unaligned) copies use rep movsl
9934 followed by code copying the rest. For PentiumPro ensure 8 byte
9935 alignment to allow rep movsl acceleration. */
9936
9937 else if (count != 0
9938 && (align >= 8
9939 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9940 || optimize_size || count < (unsigned int) 64))
9941 {
9942 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9943 if (count & ~(size - 1))
9944 {
9945 countreg = copy_to_mode_reg (counter_mode,
9946 GEN_INT ((count >> (size == 4 ? 2 : 3))
9947 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9948 countreg = ix86_zero_extend_to_Pmode (countreg);
9949 if (size == 4)
9950 {
9951 if (TARGET_64BIT)
9952 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9953 destreg, srcreg, countreg));
9954 else
9955 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9956 destreg, srcreg, countreg));
9957 }
9958 else
9959 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9960 destreg, srcreg, countreg));
9961 }
9962 if (size == 8 && (count & 0x04))
9963 emit_insn (gen_strmovsi (destreg, srcreg));
9964 if (count & 0x02)
9965 emit_insn (gen_strmovhi (destreg, srcreg));
9966 if (count & 0x01)
9967 emit_insn (gen_strmovqi (destreg, srcreg));
9968 }
9969 /* The generic code based on the glibc implementation:
9970 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9971 allowing accelerated copying there)
9972 - copy the data using rep movsl
9973 - copy the rest. */
9974 else
9975 {
9976 rtx countreg2;
9977 rtx label = NULL;
9978 int desired_alignment = (TARGET_PENTIUMPRO
9979 && (count == 0 || count >= (unsigned int) 260)
9980 ? 8 : UNITS_PER_WORD);
9981
9982 /* In case we don't know anything about the alignment, default to
9983 library version, since it is usually equally fast and result in
9984 shorter code. */
9985 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9986 {
9987 end_sequence ();
9988 return 0;
9989 }
9990
9991 if (TARGET_SINGLE_STRINGOP)
9992 emit_insn (gen_cld ());
9993
9994 countreg2 = gen_reg_rtx (Pmode);
9995 countreg = copy_to_mode_reg (counter_mode, count_exp);
9996
9997 /* We don't use loops to align destination and to copy parts smaller
9998 than 4 bytes, because gcc is able to optimize such code better (in
9999 the case the destination or the count really is aligned, gcc is often
10000 able to predict the branches) and also it is friendlier to the
10001 hardware branch prediction.
10002
10003 Using loops is benefical for generic case, because we can
10004 handle small counts using the loops. Many CPUs (such as Athlon)
10005 have large REP prefix setup costs.
10006
10007 This is quite costy. Maybe we can revisit this decision later or
10008 add some customizability to this code. */
10009
10010 if (count == 0 && align < desired_alignment)
10011 {
10012 label = gen_label_rtx ();
10013 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10014 LEU, 0, counter_mode, 1, label);
10015 }
10016 if (align <= 1)
10017 {
10018 rtx label = ix86_expand_aligntest (destreg, 1);
10019 emit_insn (gen_strmovqi (destreg, srcreg));
10020 ix86_adjust_counter (countreg, 1);
10021 emit_label (label);
10022 LABEL_NUSES (label) = 1;
10023 }
10024 if (align <= 2)
10025 {
10026 rtx label = ix86_expand_aligntest (destreg, 2);
10027 emit_insn (gen_strmovhi (destreg, srcreg));
10028 ix86_adjust_counter (countreg, 2);
10029 emit_label (label);
10030 LABEL_NUSES (label) = 1;
10031 }
10032 if (align <= 4 && desired_alignment > 4)
10033 {
10034 rtx label = ix86_expand_aligntest (destreg, 4);
10035 emit_insn (gen_strmovsi (destreg, srcreg));
10036 ix86_adjust_counter (countreg, 4);
10037 emit_label (label);
10038 LABEL_NUSES (label) = 1;
10039 }
10040
10041 if (label && desired_alignment > 4 && !TARGET_64BIT)
10042 {
10043 emit_label (label);
10044 LABEL_NUSES (label) = 1;
10045 label = NULL_RTX;
10046 }
10047 if (!TARGET_SINGLE_STRINGOP)
10048 emit_insn (gen_cld ());
10049 if (TARGET_64BIT)
10050 {
10051 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10052 GEN_INT (3)));
10053 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10054 destreg, srcreg, countreg2));
10055 }
10056 else
10057 {
10058 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10059 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10060 destreg, srcreg, countreg2));
10061 }
10062
10063 if (label)
10064 {
10065 emit_label (label);
10066 LABEL_NUSES (label) = 1;
10067 }
10068 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10069 emit_insn (gen_strmovsi (destreg, srcreg));
10070 if ((align <= 4 || count == 0) && TARGET_64BIT)
10071 {
10072 rtx label = ix86_expand_aligntest (countreg, 4);
10073 emit_insn (gen_strmovsi (destreg, srcreg));
10074 emit_label (label);
10075 LABEL_NUSES (label) = 1;
10076 }
10077 if (align > 2 && count != 0 && (count & 2))
10078 emit_insn (gen_strmovhi (destreg, srcreg));
10079 if (align <= 2 || count == 0)
10080 {
10081 rtx label = ix86_expand_aligntest (countreg, 2);
10082 emit_insn (gen_strmovhi (destreg, srcreg));
10083 emit_label (label);
10084 LABEL_NUSES (label) = 1;
10085 }
10086 if (align > 1 && count != 0 && (count & 1))
10087 emit_insn (gen_strmovqi (destreg, srcreg));
10088 if (align <= 1 || count == 0)
10089 {
10090 rtx label = ix86_expand_aligntest (countreg, 1);
10091 emit_insn (gen_strmovqi (destreg, srcreg));
10092 emit_label (label);
10093 LABEL_NUSES (label) = 1;
10094 }
10095 }
10096
10097 insns = get_insns ();
10098 end_sequence ();
10099
10100 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10101 emit_insn (insns);
10102 return 1;
10103 }
10104
10105 /* Expand string clear operation (bzero). Use i386 string operations when
10106 profitable. expand_movstr contains similar code. */
10107 int
10108 ix86_expand_clrstr (src, count_exp, align_exp)
10109 rtx src, count_exp, align_exp;
10110 {
10111 rtx destreg, zeroreg, countreg;
10112 enum machine_mode counter_mode;
10113 HOST_WIDE_INT align = 0;
10114 unsigned HOST_WIDE_INT count = 0;
10115
10116 if (GET_CODE (align_exp) == CONST_INT)
10117 align = INTVAL (align_exp);
10118
10119 /* This simple hack avoids all inlining code and simplifies code below. */
10120 if (!TARGET_ALIGN_STRINGOPS)
10121 align = 32;
10122
10123 if (GET_CODE (count_exp) == CONST_INT)
10124 count = INTVAL (count_exp);
10125 /* Figure out proper mode for counter. For 32bits it is always SImode,
10126 for 64bits use SImode when possible, otherwise DImode.
10127 Set count to number of bytes copied when known at compile time. */
10128 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10129 || x86_64_zero_extended_value (count_exp))
10130 counter_mode = SImode;
10131 else
10132 counter_mode = DImode;
10133
10134 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10135
10136 emit_insn (gen_cld ());
10137
10138 /* When optimizing for size emit simple rep ; movsb instruction for
10139 counts not divisible by 4. */
10140
10141 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10142 {
10143 countreg = ix86_zero_extend_to_Pmode (count_exp);
10144 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10145 if (TARGET_64BIT)
10146 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10147 destreg, countreg));
10148 else
10149 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10150 destreg, countreg));
10151 }
10152 else if (count != 0
10153 && (align >= 8
10154 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10155 || optimize_size || count < (unsigned int) 64))
10156 {
10157 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10158 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10159 if (count & ~(size - 1))
10160 {
10161 countreg = copy_to_mode_reg (counter_mode,
10162 GEN_INT ((count >> (size == 4 ? 2 : 3))
10163 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10164 countreg = ix86_zero_extend_to_Pmode (countreg);
10165 if (size == 4)
10166 {
10167 if (TARGET_64BIT)
10168 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10169 destreg, countreg));
10170 else
10171 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10172 destreg, countreg));
10173 }
10174 else
10175 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10176 destreg, countreg));
10177 }
10178 if (size == 8 && (count & 0x04))
10179 emit_insn (gen_strsetsi (destreg,
10180 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10181 if (count & 0x02)
10182 emit_insn (gen_strsethi (destreg,
10183 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10184 if (count & 0x01)
10185 emit_insn (gen_strsetqi (destreg,
10186 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10187 }
10188 else
10189 {
10190 rtx countreg2;
10191 rtx label = NULL;
10192 /* Compute desired alignment of the string operation. */
10193 int desired_alignment = (TARGET_PENTIUMPRO
10194 && (count == 0 || count >= (unsigned int) 260)
10195 ? 8 : UNITS_PER_WORD);
10196
10197 /* In case we don't know anything about the alignment, default to
10198 library version, since it is usually equally fast and result in
10199 shorter code. */
10200 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10201 return 0;
10202
10203 if (TARGET_SINGLE_STRINGOP)
10204 emit_insn (gen_cld ());
10205
10206 countreg2 = gen_reg_rtx (Pmode);
10207 countreg = copy_to_mode_reg (counter_mode, count_exp);
10208 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10209
10210 if (count == 0 && align < desired_alignment)
10211 {
10212 label = gen_label_rtx ();
10213 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10214 LEU, 0, counter_mode, 1, label);
10215 }
10216 if (align <= 1)
10217 {
10218 rtx label = ix86_expand_aligntest (destreg, 1);
10219 emit_insn (gen_strsetqi (destreg,
10220 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10221 ix86_adjust_counter (countreg, 1);
10222 emit_label (label);
10223 LABEL_NUSES (label) = 1;
10224 }
10225 if (align <= 2)
10226 {
10227 rtx label = ix86_expand_aligntest (destreg, 2);
10228 emit_insn (gen_strsethi (destreg,
10229 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10230 ix86_adjust_counter (countreg, 2);
10231 emit_label (label);
10232 LABEL_NUSES (label) = 1;
10233 }
10234 if (align <= 4 && desired_alignment > 4)
10235 {
10236 rtx label = ix86_expand_aligntest (destreg, 4);
10237 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10238 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10239 : zeroreg)));
10240 ix86_adjust_counter (countreg, 4);
10241 emit_label (label);
10242 LABEL_NUSES (label) = 1;
10243 }
10244
10245 if (label && desired_alignment > 4 && !TARGET_64BIT)
10246 {
10247 emit_label (label);
10248 LABEL_NUSES (label) = 1;
10249 label = NULL_RTX;
10250 }
10251
10252 if (!TARGET_SINGLE_STRINGOP)
10253 emit_insn (gen_cld ());
10254 if (TARGET_64BIT)
10255 {
10256 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10257 GEN_INT (3)));
10258 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10259 destreg, countreg2));
10260 }
10261 else
10262 {
10263 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10264 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10265 destreg, countreg2));
10266 }
10267 if (label)
10268 {
10269 emit_label (label);
10270 LABEL_NUSES (label) = 1;
10271 }
10272
10273 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10274 emit_insn (gen_strsetsi (destreg,
10275 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10276 if (TARGET_64BIT && (align <= 4 || count == 0))
10277 {
10278 rtx label = ix86_expand_aligntest (countreg, 4);
10279 emit_insn (gen_strsetsi (destreg,
10280 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10281 emit_label (label);
10282 LABEL_NUSES (label) = 1;
10283 }
10284 if (align > 2 && count != 0 && (count & 2))
10285 emit_insn (gen_strsethi (destreg,
10286 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10287 if (align <= 2 || count == 0)
10288 {
10289 rtx label = ix86_expand_aligntest (countreg, 2);
10290 emit_insn (gen_strsethi (destreg,
10291 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10292 emit_label (label);
10293 LABEL_NUSES (label) = 1;
10294 }
10295 if (align > 1 && count != 0 && (count & 1))
10296 emit_insn (gen_strsetqi (destreg,
10297 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10298 if (align <= 1 || count == 0)
10299 {
10300 rtx label = ix86_expand_aligntest (countreg, 1);
10301 emit_insn (gen_strsetqi (destreg,
10302 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10303 emit_label (label);
10304 LABEL_NUSES (label) = 1;
10305 }
10306 }
10307 return 1;
10308 }
10309 /* Expand strlen. */
10310 int
10311 ix86_expand_strlen (out, src, eoschar, align)
10312 rtx out, src, eoschar, align;
10313 {
10314 rtx addr, scratch1, scratch2, scratch3, scratch4;
10315
10316 /* The generic case of strlen expander is long. Avoid it's
10317 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10318
10319 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10320 && !TARGET_INLINE_ALL_STRINGOPS
10321 && !optimize_size
10322 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10323 return 0;
10324
10325 addr = force_reg (Pmode, XEXP (src, 0));
10326 scratch1 = gen_reg_rtx (Pmode);
10327
10328 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10329 && !optimize_size)
10330 {
10331 /* Well it seems that some optimizer does not combine a call like
10332 foo(strlen(bar), strlen(bar));
10333 when the move and the subtraction is done here. It does calculate
10334 the length just once when these instructions are done inside of
10335 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10336 often used and I use one fewer register for the lifetime of
10337 output_strlen_unroll() this is better. */
10338
10339 emit_move_insn (out, addr);
10340
10341 ix86_expand_strlensi_unroll_1 (out, align);
10342
10343 /* strlensi_unroll_1 returns the address of the zero at the end of
10344 the string, like memchr(), so compute the length by subtracting
10345 the start address. */
10346 if (TARGET_64BIT)
10347 emit_insn (gen_subdi3 (out, out, addr));
10348 else
10349 emit_insn (gen_subsi3 (out, out, addr));
10350 }
10351 else
10352 {
10353 scratch2 = gen_reg_rtx (Pmode);
10354 scratch3 = gen_reg_rtx (Pmode);
10355 scratch4 = force_reg (Pmode, constm1_rtx);
10356
10357 emit_move_insn (scratch3, addr);
10358 eoschar = force_reg (QImode, eoschar);
10359
10360 emit_insn (gen_cld ());
10361 if (TARGET_64BIT)
10362 {
10363 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10364 align, scratch4, scratch3));
10365 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10366 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10367 }
10368 else
10369 {
10370 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10371 align, scratch4, scratch3));
10372 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10373 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10374 }
10375 }
10376 return 1;
10377 }
10378
10379 /* Expand the appropriate insns for doing strlen if not just doing
10380 repnz; scasb
10381
10382 out = result, initialized with the start address
10383 align_rtx = alignment of the address.
10384 scratch = scratch register, initialized with the startaddress when
10385 not aligned, otherwise undefined
10386
10387 This is just the body. It needs the initialisations mentioned above and
10388 some address computing at the end. These things are done in i386.md. */
10389
10390 static void
10391 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10392 rtx out, align_rtx;
10393 {
10394 int align;
10395 rtx tmp;
10396 rtx align_2_label = NULL_RTX;
10397 rtx align_3_label = NULL_RTX;
10398 rtx align_4_label = gen_label_rtx ();
10399 rtx end_0_label = gen_label_rtx ();
10400 rtx mem;
10401 rtx tmpreg = gen_reg_rtx (SImode);
10402 rtx scratch = gen_reg_rtx (SImode);
10403
10404 align = 0;
10405 if (GET_CODE (align_rtx) == CONST_INT)
10406 align = INTVAL (align_rtx);
10407
10408 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10409
10410 /* Is there a known alignment and is it less than 4? */
10411 if (align < 4)
10412 {
10413 rtx scratch1 = gen_reg_rtx (Pmode);
10414 emit_move_insn (scratch1, out);
10415 /* Is there a known alignment and is it not 2? */
10416 if (align != 2)
10417 {
10418 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10419 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10420
10421 /* Leave just the 3 lower bits. */
10422 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10423 NULL_RTX, 0, OPTAB_WIDEN);
10424
10425 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10426 Pmode, 1, align_4_label);
10427 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10428 Pmode, 1, align_2_label);
10429 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10430 Pmode, 1, align_3_label);
10431 }
10432 else
10433 {
10434 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10435 check if is aligned to 4 - byte. */
10436
10437 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10438 NULL_RTX, 0, OPTAB_WIDEN);
10439
10440 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10441 Pmode, 1, align_4_label);
10442 }
10443
10444 mem = gen_rtx_MEM (QImode, out);
10445
10446 /* Now compare the bytes. */
10447
10448 /* Compare the first n unaligned byte on a byte per byte basis. */
10449 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10450 QImode, 1, end_0_label);
10451
10452 /* Increment the address. */
10453 if (TARGET_64BIT)
10454 emit_insn (gen_adddi3 (out, out, const1_rtx));
10455 else
10456 emit_insn (gen_addsi3 (out, out, const1_rtx));
10457
10458 /* Not needed with an alignment of 2 */
10459 if (align != 2)
10460 {
10461 emit_label (align_2_label);
10462
10463 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10464 end_0_label);
10465
10466 if (TARGET_64BIT)
10467 emit_insn (gen_adddi3 (out, out, const1_rtx));
10468 else
10469 emit_insn (gen_addsi3 (out, out, const1_rtx));
10470
10471 emit_label (align_3_label);
10472 }
10473
10474 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10475 end_0_label);
10476
10477 if (TARGET_64BIT)
10478 emit_insn (gen_adddi3 (out, out, const1_rtx));
10479 else
10480 emit_insn (gen_addsi3 (out, out, const1_rtx));
10481 }
10482
10483 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10484 align this loop. It gives only huge programs, but does not help to
10485 speed up. */
10486 emit_label (align_4_label);
10487
10488 mem = gen_rtx_MEM (SImode, out);
10489 emit_move_insn (scratch, mem);
10490 if (TARGET_64BIT)
10491 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10492 else
10493 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10494
10495 /* This formula yields a nonzero result iff one of the bytes is zero.
10496 This saves three branches inside loop and many cycles. */
10497
10498 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10499 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10500 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10501 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10502 gen_int_mode (0x80808080, SImode)));
10503 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10504 align_4_label);
10505
10506 if (TARGET_CMOVE)
10507 {
10508 rtx reg = gen_reg_rtx (SImode);
10509 rtx reg2 = gen_reg_rtx (Pmode);
10510 emit_move_insn (reg, tmpreg);
10511 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10512
10513 /* If zero is not in the first two bytes, move two bytes forward. */
10514 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10515 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10516 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10517 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10518 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10519 reg,
10520 tmpreg)));
10521 /* Emit lea manually to avoid clobbering of flags. */
10522 emit_insn (gen_rtx_SET (SImode, reg2,
10523 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10524
10525 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10526 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10527 emit_insn (gen_rtx_SET (VOIDmode, out,
10528 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10529 reg2,
10530 out)));
10531
10532 }
10533 else
10534 {
10535 rtx end_2_label = gen_label_rtx ();
10536 /* Is zero in the first two bytes? */
10537
10538 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10539 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10540 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10541 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10542 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10543 pc_rtx);
10544 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10545 JUMP_LABEL (tmp) = end_2_label;
10546
10547 /* Not in the first two. Move two bytes forward. */
10548 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10549 if (TARGET_64BIT)
10550 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10551 else
10552 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10553
10554 emit_label (end_2_label);
10555
10556 }
10557
10558 /* Avoid branch in fixing the byte. */
10559 tmpreg = gen_lowpart (QImode, tmpreg);
10560 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10561 if (TARGET_64BIT)
10562 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10563 else
10564 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10565
10566 emit_label (end_0_label);
10567 }
10568
10569 void
10570 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10571 rtx retval, fnaddr, callarg1, callarg2, pop;
10572 {
10573 rtx use = NULL, call;
10574
10575 if (pop == const0_rtx)
10576 pop = NULL;
10577 if (TARGET_64BIT && pop)
10578 abort ();
10579
10580 #if TARGET_MACHO
10581 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10582 fnaddr = machopic_indirect_call_target (fnaddr);
10583 #else
10584 /* Static functions and indirect calls don't need the pic register. */
10585 if (! TARGET_64BIT && flag_pic
10586 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10587 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10588 use_reg (&use, pic_offset_table_rtx);
10589
10590 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10591 {
10592 rtx al = gen_rtx_REG (QImode, 0);
10593 emit_move_insn (al, callarg2);
10594 use_reg (&use, al);
10595 }
10596 #endif /* TARGET_MACHO */
10597
10598 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10599 {
10600 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10601 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10602 }
10603
10604 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10605 if (retval)
10606 call = gen_rtx_SET (VOIDmode, retval, call);
10607 if (pop)
10608 {
10609 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10610 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10611 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10612 }
10613
10614 call = emit_call_insn (call);
10615 if (use)
10616 CALL_INSN_FUNCTION_USAGE (call) = use;
10617 }
10618
10619 \f
10620 /* Clear stack slot assignments remembered from previous functions.
10621 This is called from INIT_EXPANDERS once before RTL is emitted for each
10622 function. */
10623
10624 static struct machine_function *
10625 ix86_init_machine_status ()
10626 {
10627 return ggc_alloc_cleared (sizeof (struct machine_function));
10628 }
10629
10630 /* Return a MEM corresponding to a stack slot with mode MODE.
10631 Allocate a new slot if necessary.
10632
10633 The RTL for a function can have several slots available: N is
10634 which slot to use. */
10635
10636 rtx
10637 assign_386_stack_local (mode, n)
10638 enum machine_mode mode;
10639 int n;
10640 {
10641 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10642 abort ();
10643
10644 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10645 ix86_stack_locals[(int) mode][n]
10646 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10647
10648 return ix86_stack_locals[(int) mode][n];
10649 }
10650
10651 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10652
10653 static GTY(()) rtx ix86_tls_symbol;
10654 rtx
10655 ix86_tls_get_addr ()
10656 {
10657
10658 if (!ix86_tls_symbol)
10659 {
10660 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10661 ? "___tls_get_addr"
10662 : "__tls_get_addr"));
10663 }
10664
10665 return ix86_tls_symbol;
10666 }
10667 \f
10668 /* Calculate the length of the memory address in the instruction
10669 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10670
10671 static int
10672 memory_address_length (addr)
10673 rtx addr;
10674 {
10675 struct ix86_address parts;
10676 rtx base, index, disp;
10677 int len;
10678
10679 if (GET_CODE (addr) == PRE_DEC
10680 || GET_CODE (addr) == POST_INC
10681 || GET_CODE (addr) == PRE_MODIFY
10682 || GET_CODE (addr) == POST_MODIFY)
10683 return 0;
10684
10685 if (! ix86_decompose_address (addr, &parts))
10686 abort ();
10687
10688 base = parts.base;
10689 index = parts.index;
10690 disp = parts.disp;
10691 len = 0;
10692
10693 /* Register Indirect. */
10694 if (base && !index && !disp)
10695 {
10696 /* Special cases: ebp and esp need the two-byte modrm form. */
10697 if (addr == stack_pointer_rtx
10698 || addr == arg_pointer_rtx
10699 || addr == frame_pointer_rtx
10700 || addr == hard_frame_pointer_rtx)
10701 len = 1;
10702 }
10703
10704 /* Direct Addressing. */
10705 else if (disp && !base && !index)
10706 len = 4;
10707
10708 else
10709 {
10710 /* Find the length of the displacement constant. */
10711 if (disp)
10712 {
10713 if (GET_CODE (disp) == CONST_INT
10714 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10715 len = 1;
10716 else
10717 len = 4;
10718 }
10719
10720 /* An index requires the two-byte modrm form. */
10721 if (index)
10722 len += 1;
10723 }
10724
10725 return len;
10726 }
10727
10728 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10729 is set, expect that insn have 8bit immediate alternative. */
10730 int
10731 ix86_attr_length_immediate_default (insn, shortform)
10732 rtx insn;
10733 int shortform;
10734 {
10735 int len = 0;
10736 int i;
10737 extract_insn_cached (insn);
10738 for (i = recog_data.n_operands - 1; i >= 0; --i)
10739 if (CONSTANT_P (recog_data.operand[i]))
10740 {
10741 if (len)
10742 abort ();
10743 if (shortform
10744 && GET_CODE (recog_data.operand[i]) == CONST_INT
10745 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10746 len = 1;
10747 else
10748 {
10749 switch (get_attr_mode (insn))
10750 {
10751 case MODE_QI:
10752 len+=1;
10753 break;
10754 case MODE_HI:
10755 len+=2;
10756 break;
10757 case MODE_SI:
10758 len+=4;
10759 break;
10760 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10761 case MODE_DI:
10762 len+=4;
10763 break;
10764 default:
10765 fatal_insn ("unknown insn mode", insn);
10766 }
10767 }
10768 }
10769 return len;
10770 }
10771 /* Compute default value for "length_address" attribute. */
10772 int
10773 ix86_attr_length_address_default (insn)
10774 rtx insn;
10775 {
10776 int i;
10777 extract_insn_cached (insn);
10778 for (i = recog_data.n_operands - 1; i >= 0; --i)
10779 if (GET_CODE (recog_data.operand[i]) == MEM)
10780 {
10781 return memory_address_length (XEXP (recog_data.operand[i], 0));
10782 break;
10783 }
10784 return 0;
10785 }
10786 \f
10787 /* Return the maximum number of instructions a cpu can issue. */
10788
10789 static int
10790 ix86_issue_rate ()
10791 {
10792 switch (ix86_cpu)
10793 {
10794 case PROCESSOR_PENTIUM:
10795 case PROCESSOR_K6:
10796 return 2;
10797
10798 case PROCESSOR_PENTIUMPRO:
10799 case PROCESSOR_PENTIUM4:
10800 case PROCESSOR_ATHLON:
10801 return 3;
10802
10803 default:
10804 return 1;
10805 }
10806 }
10807
10808 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10809 by DEP_INSN and nothing set by DEP_INSN. */
10810
10811 static int
10812 ix86_flags_dependant (insn, dep_insn, insn_type)
10813 rtx insn, dep_insn;
10814 enum attr_type insn_type;
10815 {
10816 rtx set, set2;
10817
10818 /* Simplify the test for uninteresting insns. */
10819 if (insn_type != TYPE_SETCC
10820 && insn_type != TYPE_ICMOV
10821 && insn_type != TYPE_FCMOV
10822 && insn_type != TYPE_IBR)
10823 return 0;
10824
10825 if ((set = single_set (dep_insn)) != 0)
10826 {
10827 set = SET_DEST (set);
10828 set2 = NULL_RTX;
10829 }
10830 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10831 && XVECLEN (PATTERN (dep_insn), 0) == 2
10832 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10833 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10834 {
10835 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10836 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10837 }
10838 else
10839 return 0;
10840
10841 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10842 return 0;
10843
10844 /* This test is true if the dependent insn reads the flags but
10845 not any other potentially set register. */
10846 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10847 return 0;
10848
10849 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10850 return 0;
10851
10852 return 1;
10853 }
10854
10855 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10856 address with operands set by DEP_INSN. */
10857
10858 static int
10859 ix86_agi_dependant (insn, dep_insn, insn_type)
10860 rtx insn, dep_insn;
10861 enum attr_type insn_type;
10862 {
10863 rtx addr;
10864
10865 if (insn_type == TYPE_LEA
10866 && TARGET_PENTIUM)
10867 {
10868 addr = PATTERN (insn);
10869 if (GET_CODE (addr) == SET)
10870 ;
10871 else if (GET_CODE (addr) == PARALLEL
10872 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10873 addr = XVECEXP (addr, 0, 0);
10874 else
10875 abort ();
10876 addr = SET_SRC (addr);
10877 }
10878 else
10879 {
10880 int i;
10881 extract_insn_cached (insn);
10882 for (i = recog_data.n_operands - 1; i >= 0; --i)
10883 if (GET_CODE (recog_data.operand[i]) == MEM)
10884 {
10885 addr = XEXP (recog_data.operand[i], 0);
10886 goto found;
10887 }
10888 return 0;
10889 found:;
10890 }
10891
10892 return modified_in_p (addr, dep_insn);
10893 }
10894
10895 static int
10896 ix86_adjust_cost (insn, link, dep_insn, cost)
10897 rtx insn, link, dep_insn;
10898 int cost;
10899 {
10900 enum attr_type insn_type, dep_insn_type;
10901 enum attr_memory memory, dep_memory;
10902 rtx set, set2;
10903 int dep_insn_code_number;
10904
10905 /* Anti and output depenancies have zero cost on all CPUs. */
10906 if (REG_NOTE_KIND (link) != 0)
10907 return 0;
10908
10909 dep_insn_code_number = recog_memoized (dep_insn);
10910
10911 /* If we can't recognize the insns, we can't really do anything. */
10912 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10913 return cost;
10914
10915 insn_type = get_attr_type (insn);
10916 dep_insn_type = get_attr_type (dep_insn);
10917
10918 switch (ix86_cpu)
10919 {
10920 case PROCESSOR_PENTIUM:
10921 /* Address Generation Interlock adds a cycle of latency. */
10922 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10923 cost += 1;
10924
10925 /* ??? Compares pair with jump/setcc. */
10926 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10927 cost = 0;
10928
10929 /* Floating point stores require value to be ready one cycle ealier. */
10930 if (insn_type == TYPE_FMOV
10931 && get_attr_memory (insn) == MEMORY_STORE
10932 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10933 cost += 1;
10934 break;
10935
10936 case PROCESSOR_PENTIUMPRO:
10937 memory = get_attr_memory (insn);
10938 dep_memory = get_attr_memory (dep_insn);
10939
10940 /* Since we can't represent delayed latencies of load+operation,
10941 increase the cost here for non-imov insns. */
10942 if (dep_insn_type != TYPE_IMOV
10943 && dep_insn_type != TYPE_FMOV
10944 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10945 cost += 1;
10946
10947 /* INT->FP conversion is expensive. */
10948 if (get_attr_fp_int_src (dep_insn))
10949 cost += 5;
10950
10951 /* There is one cycle extra latency between an FP op and a store. */
10952 if (insn_type == TYPE_FMOV
10953 && (set = single_set (dep_insn)) != NULL_RTX
10954 && (set2 = single_set (insn)) != NULL_RTX
10955 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10956 && GET_CODE (SET_DEST (set2)) == MEM)
10957 cost += 1;
10958
10959 /* Show ability of reorder buffer to hide latency of load by executing
10960 in parallel with previous instruction in case
10961 previous instruction is not needed to compute the address. */
10962 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10963 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10964 {
10965 /* Claim moves to take one cycle, as core can issue one load
10966 at time and the next load can start cycle later. */
10967 if (dep_insn_type == TYPE_IMOV
10968 || dep_insn_type == TYPE_FMOV)
10969 cost = 1;
10970 else if (cost > 1)
10971 cost--;
10972 }
10973 break;
10974
10975 case PROCESSOR_K6:
10976 memory = get_attr_memory (insn);
10977 dep_memory = get_attr_memory (dep_insn);
10978 /* The esp dependency is resolved before the instruction is really
10979 finished. */
10980 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10981 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10982 return 1;
10983
10984 /* Since we can't represent delayed latencies of load+operation,
10985 increase the cost here for non-imov insns. */
10986 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10987 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10988
10989 /* INT->FP conversion is expensive. */
10990 if (get_attr_fp_int_src (dep_insn))
10991 cost += 5;
10992
10993 /* Show ability of reorder buffer to hide latency of load by executing
10994 in parallel with previous instruction in case
10995 previous instruction is not needed to compute the address. */
10996 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10997 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10998 {
10999 /* Claim moves to take one cycle, as core can issue one load
11000 at time and the next load can start cycle later. */
11001 if (dep_insn_type == TYPE_IMOV
11002 || dep_insn_type == TYPE_FMOV)
11003 cost = 1;
11004 else if (cost > 2)
11005 cost -= 2;
11006 else
11007 cost = 1;
11008 }
11009 break;
11010
11011 case PROCESSOR_ATHLON:
11012 memory = get_attr_memory (insn);
11013 dep_memory = get_attr_memory (dep_insn);
11014
11015 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11016 {
11017 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11018 cost += 2;
11019 else
11020 cost += 3;
11021 }
11022 /* Show ability of reorder buffer to hide latency of load by executing
11023 in parallel with previous instruction in case
11024 previous instruction is not needed to compute the address. */
11025 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11026 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11027 {
11028 /* Claim moves to take one cycle, as core can issue one load
11029 at time and the next load can start cycle later. */
11030 if (dep_insn_type == TYPE_IMOV
11031 || dep_insn_type == TYPE_FMOV)
11032 cost = 0;
11033 else if (cost >= 3)
11034 cost -= 3;
11035 else
11036 cost = 0;
11037 }
11038
11039 default:
11040 break;
11041 }
11042
11043 return cost;
11044 }
11045
11046 static union
11047 {
11048 struct ppro_sched_data
11049 {
11050 rtx decode[3];
11051 int issued_this_cycle;
11052 } ppro;
11053 } ix86_sched_data;
11054
11055 static enum attr_ppro_uops
11056 ix86_safe_ppro_uops (insn)
11057 rtx insn;
11058 {
11059 if (recog_memoized (insn) >= 0)
11060 return get_attr_ppro_uops (insn);
11061 else
11062 return PPRO_UOPS_MANY;
11063 }
11064
11065 static void
11066 ix86_dump_ppro_packet (dump)
11067 FILE *dump;
11068 {
11069 if (ix86_sched_data.ppro.decode[0])
11070 {
11071 fprintf (dump, "PPRO packet: %d",
11072 INSN_UID (ix86_sched_data.ppro.decode[0]));
11073 if (ix86_sched_data.ppro.decode[1])
11074 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11075 if (ix86_sched_data.ppro.decode[2])
11076 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11077 fputc ('\n', dump);
11078 }
11079 }
11080
11081 /* We're beginning a new block. Initialize data structures as necessary. */
11082
11083 static void
11084 ix86_sched_init (dump, sched_verbose, veclen)
11085 FILE *dump ATTRIBUTE_UNUSED;
11086 int sched_verbose ATTRIBUTE_UNUSED;
11087 int veclen ATTRIBUTE_UNUSED;
11088 {
11089 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11090 }
11091
11092 /* Shift INSN to SLOT, and shift everything else down. */
11093
11094 static void
11095 ix86_reorder_insn (insnp, slot)
11096 rtx *insnp, *slot;
11097 {
11098 if (insnp != slot)
11099 {
11100 rtx insn = *insnp;
11101 do
11102 insnp[0] = insnp[1];
11103 while (++insnp != slot);
11104 *insnp = insn;
11105 }
11106 }
11107
11108 static void
11109 ix86_sched_reorder_ppro (ready, e_ready)
11110 rtx *ready;
11111 rtx *e_ready;
11112 {
11113 rtx decode[3];
11114 enum attr_ppro_uops cur_uops;
11115 int issued_this_cycle;
11116 rtx *insnp;
11117 int i;
11118
11119 /* At this point .ppro.decode contains the state of the three
11120 decoders from last "cycle". That is, those insns that were
11121 actually independent. But here we're scheduling for the
11122 decoder, and we may find things that are decodable in the
11123 same cycle. */
11124
11125 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11126 issued_this_cycle = 0;
11127
11128 insnp = e_ready;
11129 cur_uops = ix86_safe_ppro_uops (*insnp);
11130
11131 /* If the decoders are empty, and we've a complex insn at the
11132 head of the priority queue, let it issue without complaint. */
11133 if (decode[0] == NULL)
11134 {
11135 if (cur_uops == PPRO_UOPS_MANY)
11136 {
11137 decode[0] = *insnp;
11138 goto ppro_done;
11139 }
11140
11141 /* Otherwise, search for a 2-4 uop unsn to issue. */
11142 while (cur_uops != PPRO_UOPS_FEW)
11143 {
11144 if (insnp == ready)
11145 break;
11146 cur_uops = ix86_safe_ppro_uops (*--insnp);
11147 }
11148
11149 /* If so, move it to the head of the line. */
11150 if (cur_uops == PPRO_UOPS_FEW)
11151 ix86_reorder_insn (insnp, e_ready);
11152
11153 /* Issue the head of the queue. */
11154 issued_this_cycle = 1;
11155 decode[0] = *e_ready--;
11156 }
11157
11158 /* Look for simple insns to fill in the other two slots. */
11159 for (i = 1; i < 3; ++i)
11160 if (decode[i] == NULL)
11161 {
11162 if (ready > e_ready)
11163 goto ppro_done;
11164
11165 insnp = e_ready;
11166 cur_uops = ix86_safe_ppro_uops (*insnp);
11167 while (cur_uops != PPRO_UOPS_ONE)
11168 {
11169 if (insnp == ready)
11170 break;
11171 cur_uops = ix86_safe_ppro_uops (*--insnp);
11172 }
11173
11174 /* Found one. Move it to the head of the queue and issue it. */
11175 if (cur_uops == PPRO_UOPS_ONE)
11176 {
11177 ix86_reorder_insn (insnp, e_ready);
11178 decode[i] = *e_ready--;
11179 issued_this_cycle++;
11180 continue;
11181 }
11182
11183 /* ??? Didn't find one. Ideally, here we would do a lazy split
11184 of 2-uop insns, issue one and queue the other. */
11185 }
11186
11187 ppro_done:
11188 if (issued_this_cycle == 0)
11189 issued_this_cycle = 1;
11190 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11191 }
11192
11193 /* We are about to being issuing insns for this clock cycle.
11194 Override the default sort algorithm to better slot instructions. */
11195 static int
11196 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11197 FILE *dump ATTRIBUTE_UNUSED;
11198 int sched_verbose ATTRIBUTE_UNUSED;
11199 rtx *ready;
11200 int *n_readyp;
11201 int clock_var ATTRIBUTE_UNUSED;
11202 {
11203 int n_ready = *n_readyp;
11204 rtx *e_ready = ready + n_ready - 1;
11205
11206 /* Make sure to go ahead and initialize key items in
11207 ix86_sched_data if we are not going to bother trying to
11208 reorder the ready queue. */
11209 if (n_ready < 2)
11210 {
11211 ix86_sched_data.ppro.issued_this_cycle = 1;
11212 goto out;
11213 }
11214
11215 switch (ix86_cpu)
11216 {
11217 default:
11218 break;
11219
11220 case PROCESSOR_PENTIUMPRO:
11221 ix86_sched_reorder_ppro (ready, e_ready);
11222 break;
11223 }
11224
11225 out:
11226 return ix86_issue_rate ();
11227 }
11228
11229 /* We are about to issue INSN. Return the number of insns left on the
11230 ready queue that can be issued this cycle. */
11231
11232 static int
11233 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11234 FILE *dump;
11235 int sched_verbose;
11236 rtx insn;
11237 int can_issue_more;
11238 {
11239 int i;
11240 switch (ix86_cpu)
11241 {
11242 default:
11243 return can_issue_more - 1;
11244
11245 case PROCESSOR_PENTIUMPRO:
11246 {
11247 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11248
11249 if (uops == PPRO_UOPS_MANY)
11250 {
11251 if (sched_verbose)
11252 ix86_dump_ppro_packet (dump);
11253 ix86_sched_data.ppro.decode[0] = insn;
11254 ix86_sched_data.ppro.decode[1] = NULL;
11255 ix86_sched_data.ppro.decode[2] = NULL;
11256 if (sched_verbose)
11257 ix86_dump_ppro_packet (dump);
11258 ix86_sched_data.ppro.decode[0] = NULL;
11259 }
11260 else if (uops == PPRO_UOPS_FEW)
11261 {
11262 if (sched_verbose)
11263 ix86_dump_ppro_packet (dump);
11264 ix86_sched_data.ppro.decode[0] = insn;
11265 ix86_sched_data.ppro.decode[1] = NULL;
11266 ix86_sched_data.ppro.decode[2] = NULL;
11267 }
11268 else
11269 {
11270 for (i = 0; i < 3; ++i)
11271 if (ix86_sched_data.ppro.decode[i] == NULL)
11272 {
11273 ix86_sched_data.ppro.decode[i] = insn;
11274 break;
11275 }
11276 if (i == 3)
11277 abort ();
11278 if (i == 2)
11279 {
11280 if (sched_verbose)
11281 ix86_dump_ppro_packet (dump);
11282 ix86_sched_data.ppro.decode[0] = NULL;
11283 ix86_sched_data.ppro.decode[1] = NULL;
11284 ix86_sched_data.ppro.decode[2] = NULL;
11285 }
11286 }
11287 }
11288 return --ix86_sched_data.ppro.issued_this_cycle;
11289 }
11290 }
11291
11292 static int
11293 ia32_use_dfa_pipeline_interface ()
11294 {
11295 if (ix86_cpu == PROCESSOR_PENTIUM)
11296 return 1;
11297 return 0;
11298 }
11299
11300 /* How many alternative schedules to try. This should be as wide as the
11301 scheduling freedom in the DFA, but no wider. Making this value too
11302 large results extra work for the scheduler. */
11303
11304 static int
11305 ia32_multipass_dfa_lookahead ()
11306 {
11307 if (ix86_cpu == PROCESSOR_PENTIUM)
11308 return 2;
11309 else
11310 return 0;
11311 }
11312
11313 \f
11314 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11315 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11316 appropriate. */
11317
11318 void
11319 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11320 rtx insns;
11321 rtx dstref, srcref, dstreg, srcreg;
11322 {
11323 rtx insn;
11324
11325 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11326 if (INSN_P (insn))
11327 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11328 dstreg, srcreg);
11329 }
11330
11331 /* Subroutine of above to actually do the updating by recursively walking
11332 the rtx. */
11333
11334 static void
11335 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11336 rtx x;
11337 rtx dstref, srcref, dstreg, srcreg;
11338 {
11339 enum rtx_code code = GET_CODE (x);
11340 const char *format_ptr = GET_RTX_FORMAT (code);
11341 int i, j;
11342
11343 if (code == MEM && XEXP (x, 0) == dstreg)
11344 MEM_COPY_ATTRIBUTES (x, dstref);
11345 else if (code == MEM && XEXP (x, 0) == srcreg)
11346 MEM_COPY_ATTRIBUTES (x, srcref);
11347
11348 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11349 {
11350 if (*format_ptr == 'e')
11351 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11352 dstreg, srcreg);
11353 else if (*format_ptr == 'E')
11354 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11355 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11356 dstreg, srcreg);
11357 }
11358 }
11359 \f
11360 /* Compute the alignment given to a constant that is being placed in memory.
11361 EXP is the constant and ALIGN is the alignment that the object would
11362 ordinarily have.
11363 The value of this function is used instead of that alignment to align
11364 the object. */
11365
11366 int
11367 ix86_constant_alignment (exp, align)
11368 tree exp;
11369 int align;
11370 {
11371 if (TREE_CODE (exp) == REAL_CST)
11372 {
11373 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11374 return 64;
11375 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11376 return 128;
11377 }
11378 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11379 && align < 256)
11380 return 256;
11381
11382 return align;
11383 }
11384
11385 /* Compute the alignment for a static variable.
11386 TYPE is the data type, and ALIGN is the alignment that
11387 the object would ordinarily have. The value of this function is used
11388 instead of that alignment to align the object. */
11389
11390 int
11391 ix86_data_alignment (type, align)
11392 tree type;
11393 int align;
11394 {
11395 if (AGGREGATE_TYPE_P (type)
11396 && TYPE_SIZE (type)
11397 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11398 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11399 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11400 return 256;
11401
11402 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11403 to 16byte boundary. */
11404 if (TARGET_64BIT)
11405 {
11406 if (AGGREGATE_TYPE_P (type)
11407 && TYPE_SIZE (type)
11408 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11409 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11410 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11411 return 128;
11412 }
11413
11414 if (TREE_CODE (type) == ARRAY_TYPE)
11415 {
11416 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11417 return 64;
11418 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11419 return 128;
11420 }
11421 else if (TREE_CODE (type) == COMPLEX_TYPE)
11422 {
11423
11424 if (TYPE_MODE (type) == DCmode && align < 64)
11425 return 64;
11426 if (TYPE_MODE (type) == XCmode && align < 128)
11427 return 128;
11428 }
11429 else if ((TREE_CODE (type) == RECORD_TYPE
11430 || TREE_CODE (type) == UNION_TYPE
11431 || TREE_CODE (type) == QUAL_UNION_TYPE)
11432 && TYPE_FIELDS (type))
11433 {
11434 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11435 return 64;
11436 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11437 return 128;
11438 }
11439 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11440 || TREE_CODE (type) == INTEGER_TYPE)
11441 {
11442 if (TYPE_MODE (type) == DFmode && align < 64)
11443 return 64;
11444 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11445 return 128;
11446 }
11447
11448 return align;
11449 }
11450
11451 /* Compute the alignment for a local variable.
11452 TYPE is the data type, and ALIGN is the alignment that
11453 the object would ordinarily have. The value of this macro is used
11454 instead of that alignment to align the object. */
11455
11456 int
11457 ix86_local_alignment (type, align)
11458 tree type;
11459 int align;
11460 {
11461 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11462 to 16byte boundary. */
11463 if (TARGET_64BIT)
11464 {
11465 if (AGGREGATE_TYPE_P (type)
11466 && TYPE_SIZE (type)
11467 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11468 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11469 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11470 return 128;
11471 }
11472 if (TREE_CODE (type) == ARRAY_TYPE)
11473 {
11474 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11475 return 64;
11476 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11477 return 128;
11478 }
11479 else if (TREE_CODE (type) == COMPLEX_TYPE)
11480 {
11481 if (TYPE_MODE (type) == DCmode && align < 64)
11482 return 64;
11483 if (TYPE_MODE (type) == XCmode && align < 128)
11484 return 128;
11485 }
11486 else if ((TREE_CODE (type) == RECORD_TYPE
11487 || TREE_CODE (type) == UNION_TYPE
11488 || TREE_CODE (type) == QUAL_UNION_TYPE)
11489 && TYPE_FIELDS (type))
11490 {
11491 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11492 return 64;
11493 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11494 return 128;
11495 }
11496 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11497 || TREE_CODE (type) == INTEGER_TYPE)
11498 {
11499
11500 if (TYPE_MODE (type) == DFmode && align < 64)
11501 return 64;
11502 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11503 return 128;
11504 }
11505 return align;
11506 }
11507 \f
11508 /* Emit RTL insns to initialize the variable parts of a trampoline.
11509 FNADDR is an RTX for the address of the function's pure code.
11510 CXT is an RTX for the static chain value for the function. */
11511 void
11512 x86_initialize_trampoline (tramp, fnaddr, cxt)
11513 rtx tramp, fnaddr, cxt;
11514 {
11515 if (!TARGET_64BIT)
11516 {
11517 /* Compute offset from the end of the jmp to the target function. */
11518 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11519 plus_constant (tramp, 10),
11520 NULL_RTX, 1, OPTAB_DIRECT);
11521 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11522 gen_int_mode (0xb9, QImode));
11523 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11524 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11525 gen_int_mode (0xe9, QImode));
11526 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11527 }
11528 else
11529 {
11530 int offset = 0;
11531 /* Try to load address using shorter movl instead of movabs.
11532 We may want to support movq for kernel mode, but kernel does not use
11533 trampolines at the moment. */
11534 if (x86_64_zero_extended_value (fnaddr))
11535 {
11536 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11537 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11538 gen_int_mode (0xbb41, HImode));
11539 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11540 gen_lowpart (SImode, fnaddr));
11541 offset += 6;
11542 }
11543 else
11544 {
11545 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11546 gen_int_mode (0xbb49, HImode));
11547 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11548 fnaddr);
11549 offset += 10;
11550 }
11551 /* Load static chain using movabs to r10. */
11552 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11553 gen_int_mode (0xba49, HImode));
11554 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11555 cxt);
11556 offset += 10;
11557 /* Jump to the r11 */
11558 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11559 gen_int_mode (0xff49, HImode));
11560 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11561 gen_int_mode (0xe3, QImode));
11562 offset += 3;
11563 if (offset > TRAMPOLINE_SIZE)
11564 abort ();
11565 }
11566 }
11567 \f
11568 #define def_builtin(MASK, NAME, TYPE, CODE) \
11569 do { \
11570 if ((MASK) & target_flags) \
11571 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11572 NULL, NULL_TREE); \
11573 } while (0)
11574
11575 struct builtin_description
11576 {
11577 const unsigned int mask;
11578 const enum insn_code icode;
11579 const char *const name;
11580 const enum ix86_builtins code;
11581 const enum rtx_code comparison;
11582 const unsigned int flag;
11583 };
11584
11585 /* Used for builtins that are enabled both by -msse and -msse2. */
11586 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11587
11588 static const struct builtin_description bdesc_comi[] =
11589 {
11590 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11591 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11592 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11593 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11594 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11595 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11596 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11597 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11598 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11599 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11600 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11601 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11602 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11603 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11604 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11605 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11606 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11607 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11608 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11609 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11610 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11611 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11612 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11613 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11614 };
11615
11616 static const struct builtin_description bdesc_2arg[] =
11617 {
11618 /* SSE */
11619 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11620 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11621 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11622 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11623 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11624 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11625 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11626 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11627
11628 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11629 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11630 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11631 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11632 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11633 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11634 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11635 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11636 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11637 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11638 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11639 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11640 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11641 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11642 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11643 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11644 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11645 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11646 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11647 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11648 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11649 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11650 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11651 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11652
11653 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11654 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11655 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11656 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11657
11658 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11659 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11660 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11661 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11662 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11663
11664 /* MMX */
11665 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11666 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11667 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11668 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11669 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11670 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11671
11672 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11673 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11674 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11675 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11676 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11677 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11678 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11679 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11680
11681 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11682 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11683 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11684
11685 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11686 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11687 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11688 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11689
11690 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11691 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11692
11693 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11694 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11695 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11696 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11697 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11698 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11699
11700 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11701 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11702 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11703 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11704
11705 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11706 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11707 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11708 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11709 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11710 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11711
11712 /* Special. */
11713 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11714 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11715 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11716
11717 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11718 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11719
11720 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11721 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11722 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11723 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11724 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11725 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11726
11727 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11728 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11729 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11730 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11731 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11732 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11733
11734 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11735 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11736 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11737 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11738
11739 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11740 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11741
11742 /* SSE2 */
11743 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11744 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11745 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11746 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11747 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11748 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11750 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11751
11752 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11753 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11754 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11755 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11756 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11757 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11758 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11759 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11760 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11761 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11762 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11763 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11764 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11765 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11766 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11767 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11768 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11769 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11770 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11771 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11772 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11773 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11774 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11775 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11776
11777 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11778 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11779 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11780 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11781
11782 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11783 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11784 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11785 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11786
11787 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11788 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11789 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11790
11791 /* SSE2 MMX */
11792 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11793 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11794 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11795 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11796 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11797 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11798 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11799 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11800
11801 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11802 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11803 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11804 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11805 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11806 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11807 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11808 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11809
11810 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11811 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11812 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11813 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11814
11815 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11816 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11817 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11818 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11819
11820 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11821 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11822
11823 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11824 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11825 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11826 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11827 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11828 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11829
11830 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11831 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11832 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11833 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11834
11835 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11836 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11837 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11838 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11839 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11840 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11841
11842 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11843 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11844 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11845
11846 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11847 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11848
11849 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11850 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11851 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11852 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11853 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11854 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11855
11856 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11857 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11858 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11859 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11860 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11861 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11862
11863 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11864 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11865 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11866 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11867
11868 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11869
11870 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11871 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11872 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11873 };
11874
11875 static const struct builtin_description bdesc_1arg[] =
11876 {
11877 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11878 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11879
11880 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11881 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11882 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11883
11884 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11885 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11886 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11887 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11888
11889 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11890 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11891 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11892
11893 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11894
11895 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11897
11898 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11899 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11902 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11903
11904 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11905
11906 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11908
11909 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11910 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11912 };
11913
11914 void
11915 ix86_init_builtins ()
11916 {
11917 if (TARGET_MMX)
11918 ix86_init_mmx_sse_builtins ();
11919 }
11920
11921 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11922 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11923 builtins. */
11924 static void
11925 ix86_init_mmx_sse_builtins ()
11926 {
11927 const struct builtin_description * d;
11928 size_t i;
11929
11930 tree pchar_type_node = build_pointer_type (char_type_node);
11931 tree pfloat_type_node = build_pointer_type (float_type_node);
11932 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11933 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11934 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11935
11936 /* Comparisons. */
11937 tree int_ftype_v4sf_v4sf
11938 = build_function_type_list (integer_type_node,
11939 V4SF_type_node, V4SF_type_node, NULL_TREE);
11940 tree v4si_ftype_v4sf_v4sf
11941 = build_function_type_list (V4SI_type_node,
11942 V4SF_type_node, V4SF_type_node, NULL_TREE);
11943 /* MMX/SSE/integer conversions. */
11944 tree int_ftype_v4sf
11945 = build_function_type_list (integer_type_node,
11946 V4SF_type_node, NULL_TREE);
11947 tree int_ftype_v8qi
11948 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
11949 tree v4sf_ftype_v4sf_int
11950 = build_function_type_list (V4SF_type_node,
11951 V4SF_type_node, integer_type_node, NULL_TREE);
11952 tree v4sf_ftype_v4sf_v2si
11953 = build_function_type_list (V4SF_type_node,
11954 V4SF_type_node, V2SI_type_node, NULL_TREE);
11955 tree int_ftype_v4hi_int
11956 = build_function_type_list (integer_type_node,
11957 V4HI_type_node, integer_type_node, NULL_TREE);
11958 tree v4hi_ftype_v4hi_int_int
11959 = build_function_type_list (V4HI_type_node, V4HI_type_node,
11960 integer_type_node, integer_type_node,
11961 NULL_TREE);
11962 /* Miscellaneous. */
11963 tree v8qi_ftype_v4hi_v4hi
11964 = build_function_type_list (V8QI_type_node,
11965 V4HI_type_node, V4HI_type_node, NULL_TREE);
11966 tree v4hi_ftype_v2si_v2si
11967 = build_function_type_list (V4HI_type_node,
11968 V2SI_type_node, V2SI_type_node, NULL_TREE);
11969 tree v4sf_ftype_v4sf_v4sf_int
11970 = build_function_type_list (V4SF_type_node,
11971 V4SF_type_node, V4SF_type_node,
11972 integer_type_node, NULL_TREE);
11973 tree v2si_ftype_v4hi_v4hi
11974 = build_function_type_list (V2SI_type_node,
11975 V4HI_type_node, V4HI_type_node, NULL_TREE);
11976 tree v4hi_ftype_v4hi_int
11977 = build_function_type_list (V4HI_type_node,
11978 V4HI_type_node, integer_type_node, NULL_TREE);
11979 tree v4hi_ftype_v4hi_di
11980 = build_function_type_list (V4HI_type_node,
11981 V4HI_type_node, long_long_unsigned_type_node,
11982 NULL_TREE);
11983 tree v2si_ftype_v2si_di
11984 = build_function_type_list (V2SI_type_node,
11985 V2SI_type_node, long_long_unsigned_type_node,
11986 NULL_TREE);
11987 tree void_ftype_void
11988 = build_function_type (void_type_node, void_list_node);
11989 tree void_ftype_unsigned
11990 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
11991 tree unsigned_ftype_void
11992 = build_function_type (unsigned_type_node, void_list_node);
11993 tree di_ftype_void
11994 = build_function_type (long_long_unsigned_type_node, void_list_node);
11995 tree v4sf_ftype_void
11996 = build_function_type (V4SF_type_node, void_list_node);
11997 tree v2si_ftype_v4sf
11998 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
11999 /* Loads/stores. */
12000 tree void_ftype_v8qi_v8qi_pchar
12001 = build_function_type_list (void_type_node,
12002 V8QI_type_node, V8QI_type_node,
12003 pchar_type_node, NULL_TREE);
12004 tree v4sf_ftype_pfloat
12005 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12006 /* @@@ the type is bogus */
12007 tree v4sf_ftype_v4sf_pv2si
12008 = build_function_type_list (V4SF_type_node,
12009 V4SF_type_node, pv2di_type_node, NULL_TREE);
12010 tree void_ftype_pv2si_v4sf
12011 = build_function_type_list (void_type_node,
12012 pv2di_type_node, V4SF_type_node, NULL_TREE);
12013 tree void_ftype_pfloat_v4sf
12014 = build_function_type_list (void_type_node,
12015 pfloat_type_node, V4SF_type_node, NULL_TREE);
12016 tree void_ftype_pdi_di
12017 = build_function_type_list (void_type_node,
12018 pdi_type_node, long_long_unsigned_type_node,
12019 NULL_TREE);
12020 tree void_ftype_pv2di_v2di
12021 = build_function_type_list (void_type_node,
12022 pv2di_type_node, V2DI_type_node, NULL_TREE);
12023 /* Normal vector unops. */
12024 tree v4sf_ftype_v4sf
12025 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12026
12027 /* Normal vector binops. */
12028 tree v4sf_ftype_v4sf_v4sf
12029 = build_function_type_list (V4SF_type_node,
12030 V4SF_type_node, V4SF_type_node, NULL_TREE);
12031 tree v8qi_ftype_v8qi_v8qi
12032 = build_function_type_list (V8QI_type_node,
12033 V8QI_type_node, V8QI_type_node, NULL_TREE);
12034 tree v4hi_ftype_v4hi_v4hi
12035 = build_function_type_list (V4HI_type_node,
12036 V4HI_type_node, V4HI_type_node, NULL_TREE);
12037 tree v2si_ftype_v2si_v2si
12038 = build_function_type_list (V2SI_type_node,
12039 V2SI_type_node, V2SI_type_node, NULL_TREE);
12040 tree di_ftype_di_di
12041 = build_function_type_list (long_long_unsigned_type_node,
12042 long_long_unsigned_type_node,
12043 long_long_unsigned_type_node, NULL_TREE);
12044
12045 tree v2si_ftype_v2sf
12046 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12047 tree v2sf_ftype_v2si
12048 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12049 tree v2si_ftype_v2si
12050 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12051 tree v2sf_ftype_v2sf
12052 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12053 tree v2sf_ftype_v2sf_v2sf
12054 = build_function_type_list (V2SF_type_node,
12055 V2SF_type_node, V2SF_type_node, NULL_TREE);
12056 tree v2si_ftype_v2sf_v2sf
12057 = build_function_type_list (V2SI_type_node,
12058 V2SF_type_node, V2SF_type_node, NULL_TREE);
12059 tree pint_type_node = build_pointer_type (integer_type_node);
12060 tree pdouble_type_node = build_pointer_type (double_type_node);
12061 tree int_ftype_v2df_v2df
12062 = build_function_type_list (integer_type_node,
12063 V2DF_type_node, V2DF_type_node, NULL_TREE);
12064
12065 tree ti_ftype_void
12066 = build_function_type (intTI_type_node, void_list_node);
12067 tree ti_ftype_ti_ti
12068 = build_function_type_list (intTI_type_node,
12069 intTI_type_node, intTI_type_node, NULL_TREE);
12070 tree void_ftype_pvoid
12071 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12072 tree v2di_ftype_di
12073 = build_function_type_list (V2DI_type_node,
12074 long_long_unsigned_type_node, NULL_TREE);
12075 tree v4sf_ftype_v4si
12076 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12077 tree v4si_ftype_v4sf
12078 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12079 tree v2df_ftype_v4si
12080 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12081 tree v4si_ftype_v2df
12082 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12083 tree v2si_ftype_v2df
12084 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12085 tree v4sf_ftype_v2df
12086 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12087 tree v2df_ftype_v2si
12088 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12089 tree v2df_ftype_v4sf
12090 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12091 tree int_ftype_v2df
12092 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12093 tree v2df_ftype_v2df_int
12094 = build_function_type_list (V2DF_type_node,
12095 V2DF_type_node, integer_type_node, NULL_TREE);
12096 tree v4sf_ftype_v4sf_v2df
12097 = build_function_type_list (V4SF_type_node,
12098 V4SF_type_node, V2DF_type_node, NULL_TREE);
12099 tree v2df_ftype_v2df_v4sf
12100 = build_function_type_list (V2DF_type_node,
12101 V2DF_type_node, V4SF_type_node, NULL_TREE);
12102 tree v2df_ftype_v2df_v2df_int
12103 = build_function_type_list (V2DF_type_node,
12104 V2DF_type_node, V2DF_type_node,
12105 integer_type_node,
12106 NULL_TREE);
12107 tree v2df_ftype_v2df_pv2si
12108 = build_function_type_list (V2DF_type_node,
12109 V2DF_type_node, pv2si_type_node, NULL_TREE);
12110 tree void_ftype_pv2si_v2df
12111 = build_function_type_list (void_type_node,
12112 pv2si_type_node, V2DF_type_node, NULL_TREE);
12113 tree void_ftype_pdouble_v2df
12114 = build_function_type_list (void_type_node,
12115 pdouble_type_node, V2DF_type_node, NULL_TREE);
12116 tree void_ftype_pint_int
12117 = build_function_type_list (void_type_node,
12118 pint_type_node, integer_type_node, NULL_TREE);
12119 tree void_ftype_v16qi_v16qi_pchar
12120 = build_function_type_list (void_type_node,
12121 V16QI_type_node, V16QI_type_node,
12122 pchar_type_node, NULL_TREE);
12123 tree v2df_ftype_pdouble
12124 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12125 tree v2df_ftype_v2df_v2df
12126 = build_function_type_list (V2DF_type_node,
12127 V2DF_type_node, V2DF_type_node, NULL_TREE);
12128 tree v16qi_ftype_v16qi_v16qi
12129 = build_function_type_list (V16QI_type_node,
12130 V16QI_type_node, V16QI_type_node, NULL_TREE);
12131 tree v8hi_ftype_v8hi_v8hi
12132 = build_function_type_list (V8HI_type_node,
12133 V8HI_type_node, V8HI_type_node, NULL_TREE);
12134 tree v4si_ftype_v4si_v4si
12135 = build_function_type_list (V4SI_type_node,
12136 V4SI_type_node, V4SI_type_node, NULL_TREE);
12137 tree v2di_ftype_v2di_v2di
12138 = build_function_type_list (V2DI_type_node,
12139 V2DI_type_node, V2DI_type_node, NULL_TREE);
12140 tree v2di_ftype_v2df_v2df
12141 = build_function_type_list (V2DI_type_node,
12142 V2DF_type_node, V2DF_type_node, NULL_TREE);
12143 tree v2df_ftype_v2df
12144 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12145 tree v2df_ftype_double
12146 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12147 tree v2df_ftype_double_double
12148 = build_function_type_list (V2DF_type_node,
12149 double_type_node, double_type_node, NULL_TREE);
12150 tree int_ftype_v8hi_int
12151 = build_function_type_list (integer_type_node,
12152 V8HI_type_node, integer_type_node, NULL_TREE);
12153 tree v8hi_ftype_v8hi_int_int
12154 = build_function_type_list (V8HI_type_node,
12155 V8HI_type_node, integer_type_node,
12156 integer_type_node, NULL_TREE);
12157 tree v2di_ftype_v2di_int
12158 = build_function_type_list (V2DI_type_node,
12159 V2DI_type_node, integer_type_node, NULL_TREE);
12160 tree v4si_ftype_v4si_int
12161 = build_function_type_list (V4SI_type_node,
12162 V4SI_type_node, integer_type_node, NULL_TREE);
12163 tree v8hi_ftype_v8hi_int
12164 = build_function_type_list (V8HI_type_node,
12165 V8HI_type_node, integer_type_node, NULL_TREE);
12166 tree v8hi_ftype_v8hi_v2di
12167 = build_function_type_list (V8HI_type_node,
12168 V8HI_type_node, V2DI_type_node, NULL_TREE);
12169 tree v4si_ftype_v4si_v2di
12170 = build_function_type_list (V4SI_type_node,
12171 V4SI_type_node, V2DI_type_node, NULL_TREE);
12172 tree v4si_ftype_v8hi_v8hi
12173 = build_function_type_list (V4SI_type_node,
12174 V8HI_type_node, V8HI_type_node, NULL_TREE);
12175 tree di_ftype_v8qi_v8qi
12176 = build_function_type_list (long_long_unsigned_type_node,
12177 V8QI_type_node, V8QI_type_node, NULL_TREE);
12178 tree v2di_ftype_v16qi_v16qi
12179 = build_function_type_list (V2DI_type_node,
12180 V16QI_type_node, V16QI_type_node, NULL_TREE);
12181 tree int_ftype_v16qi
12182 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12183
12184 /* Add all builtins that are more or less simple operations on two
12185 operands. */
12186 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12187 {
12188 /* Use one of the operands; the target can have a different mode for
12189 mask-generating compares. */
12190 enum machine_mode mode;
12191 tree type;
12192
12193 if (d->name == 0)
12194 continue;
12195 mode = insn_data[d->icode].operand[1].mode;
12196
12197 switch (mode)
12198 {
12199 case V16QImode:
12200 type = v16qi_ftype_v16qi_v16qi;
12201 break;
12202 case V8HImode:
12203 type = v8hi_ftype_v8hi_v8hi;
12204 break;
12205 case V4SImode:
12206 type = v4si_ftype_v4si_v4si;
12207 break;
12208 case V2DImode:
12209 type = v2di_ftype_v2di_v2di;
12210 break;
12211 case V2DFmode:
12212 type = v2df_ftype_v2df_v2df;
12213 break;
12214 case TImode:
12215 type = ti_ftype_ti_ti;
12216 break;
12217 case V4SFmode:
12218 type = v4sf_ftype_v4sf_v4sf;
12219 break;
12220 case V8QImode:
12221 type = v8qi_ftype_v8qi_v8qi;
12222 break;
12223 case V4HImode:
12224 type = v4hi_ftype_v4hi_v4hi;
12225 break;
12226 case V2SImode:
12227 type = v2si_ftype_v2si_v2si;
12228 break;
12229 case DImode:
12230 type = di_ftype_di_di;
12231 break;
12232
12233 default:
12234 abort ();
12235 }
12236
12237 /* Override for comparisons. */
12238 if (d->icode == CODE_FOR_maskcmpv4sf3
12239 || d->icode == CODE_FOR_maskncmpv4sf3
12240 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12241 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12242 type = v4si_ftype_v4sf_v4sf;
12243
12244 if (d->icode == CODE_FOR_maskcmpv2df3
12245 || d->icode == CODE_FOR_maskncmpv2df3
12246 || d->icode == CODE_FOR_vmmaskcmpv2df3
12247 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12248 type = v2di_ftype_v2df_v2df;
12249
12250 def_builtin (d->mask, d->name, type, d->code);
12251 }
12252
12253 /* Add the remaining MMX insns with somewhat more complicated types. */
12254 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12255 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12256 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12257 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12258 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12259 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12260 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12261
12262 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12263 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12264 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12265
12266 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12267 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12268
12269 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12270 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12271
12272 /* comi/ucomi insns. */
12273 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12274 if (d->mask == MASK_SSE2)
12275 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12276 else
12277 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12278
12279 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12280 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12281 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12282
12283 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12284 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12285 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12286 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12287 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12288 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12289
12290 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12291 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12292 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12293 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12294
12295 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12296 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12297
12298 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12299
12300 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12301 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12302 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12303 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12304 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12305 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12306
12307 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12308 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12309 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12310 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12311
12312 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12313 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12314 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12315 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12316
12317 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12318
12319 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12320
12321 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12322 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12323 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12324 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12325 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12326 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12327
12328 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12329
12330 /* Original 3DNow! */
12331 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12332 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12333 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12334 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12335 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12336 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12337 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12338 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12339 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12340 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12341 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12342 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12343 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12344 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12345 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12346 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12347 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12348 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12349 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12350 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12351
12352 /* 3DNow! extension as used in the Athlon CPU. */
12353 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12354 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12355 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12356 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12357 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12358 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12359
12360 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12361
12362 /* SSE2 */
12363 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12364 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12365
12366 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12367 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12368
12369 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12370 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12371 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12372 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12373 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12374 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12375
12376 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12377 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12378 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12379 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12380
12381 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12382 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12383 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12384 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12385 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12386
12387 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12388 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12389 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12390 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12391
12392 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12393 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12394
12395 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12396
12397 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12398 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12399
12400 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12401 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12402 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12403 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12404 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12405
12406 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12407
12408 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12409 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12410
12411 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12412 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12413 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12414
12415 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12416 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12417 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12418
12419 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12420 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12421 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12422 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12423 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12424 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12425 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12426
12427 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12428 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12429 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12430
12431 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12432 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12433 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12434
12435 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12436 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12437 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12438
12439 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12440 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12441
12442 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12443 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12444 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12445
12446 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12447 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12448 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12449
12450 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12451 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12452
12453 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12454 }
12455
12456 /* Errors in the source file can cause expand_expr to return const0_rtx
12457 where we expect a vector. To avoid crashing, use one of the vector
12458 clear instructions. */
12459 static rtx
12460 safe_vector_operand (x, mode)
12461 rtx x;
12462 enum machine_mode mode;
12463 {
12464 if (x != const0_rtx)
12465 return x;
12466 x = gen_reg_rtx (mode);
12467
12468 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12469 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12470 : gen_rtx_SUBREG (DImode, x, 0)));
12471 else
12472 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12473 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12474 return x;
12475 }
12476
12477 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12478
12479 static rtx
12480 ix86_expand_binop_builtin (icode, arglist, target)
12481 enum insn_code icode;
12482 tree arglist;
12483 rtx target;
12484 {
12485 rtx pat;
12486 tree arg0 = TREE_VALUE (arglist);
12487 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12488 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12489 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12490 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12491 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12492 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12493
12494 if (VECTOR_MODE_P (mode0))
12495 op0 = safe_vector_operand (op0, mode0);
12496 if (VECTOR_MODE_P (mode1))
12497 op1 = safe_vector_operand (op1, mode1);
12498
12499 if (! target
12500 || GET_MODE (target) != tmode
12501 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12502 target = gen_reg_rtx (tmode);
12503
12504 /* In case the insn wants input operands in modes different from
12505 the result, abort. */
12506 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12507 abort ();
12508
12509 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12510 op0 = copy_to_mode_reg (mode0, op0);
12511 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12512 op1 = copy_to_mode_reg (mode1, op1);
12513
12514 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12515 yet one of the two must not be a memory. This is normally enforced
12516 by expanders, but we didn't bother to create one here. */
12517 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12518 op0 = copy_to_mode_reg (mode0, op0);
12519
12520 pat = GEN_FCN (icode) (target, op0, op1);
12521 if (! pat)
12522 return 0;
12523 emit_insn (pat);
12524 return target;
12525 }
12526
12527 /* In type_for_mode we restrict the ability to create TImode types
12528 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12529 to have a V4SFmode signature. Convert them in-place to TImode. */
12530
12531 static rtx
12532 ix86_expand_timode_binop_builtin (icode, arglist, target)
12533 enum insn_code icode;
12534 tree arglist;
12535 rtx target;
12536 {
12537 rtx pat;
12538 tree arg0 = TREE_VALUE (arglist);
12539 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12540 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12541 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12542
12543 op0 = gen_lowpart (TImode, op0);
12544 op1 = gen_lowpart (TImode, op1);
12545 target = gen_reg_rtx (TImode);
12546
12547 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12548 op0 = copy_to_mode_reg (TImode, op0);
12549 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12550 op1 = copy_to_mode_reg (TImode, op1);
12551
12552 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12553 yet one of the two must not be a memory. This is normally enforced
12554 by expanders, but we didn't bother to create one here. */
12555 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12556 op0 = copy_to_mode_reg (TImode, op0);
12557
12558 pat = GEN_FCN (icode) (target, op0, op1);
12559 if (! pat)
12560 return 0;
12561 emit_insn (pat);
12562
12563 return gen_lowpart (V4SFmode, target);
12564 }
12565
12566 /* Subroutine of ix86_expand_builtin to take care of stores. */
12567
12568 static rtx
12569 ix86_expand_store_builtin (icode, arglist)
12570 enum insn_code icode;
12571 tree arglist;
12572 {
12573 rtx pat;
12574 tree arg0 = TREE_VALUE (arglist);
12575 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12576 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12577 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12578 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12579 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12580
12581 if (VECTOR_MODE_P (mode1))
12582 op1 = safe_vector_operand (op1, mode1);
12583
12584 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12585
12586 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12587 op1 = copy_to_mode_reg (mode1, op1);
12588
12589 pat = GEN_FCN (icode) (op0, op1);
12590 if (pat)
12591 emit_insn (pat);
12592 return 0;
12593 }
12594
12595 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12596
12597 static rtx
12598 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12599 enum insn_code icode;
12600 tree arglist;
12601 rtx target;
12602 int do_load;
12603 {
12604 rtx pat;
12605 tree arg0 = TREE_VALUE (arglist);
12606 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12607 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12608 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12609
12610 if (! target
12611 || GET_MODE (target) != tmode
12612 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12613 target = gen_reg_rtx (tmode);
12614 if (do_load)
12615 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12616 else
12617 {
12618 if (VECTOR_MODE_P (mode0))
12619 op0 = safe_vector_operand (op0, mode0);
12620
12621 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12622 op0 = copy_to_mode_reg (mode0, op0);
12623 }
12624
12625 pat = GEN_FCN (icode) (target, op0);
12626 if (! pat)
12627 return 0;
12628 emit_insn (pat);
12629 return target;
12630 }
12631
12632 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12633 sqrtss, rsqrtss, rcpss. */
12634
12635 static rtx
12636 ix86_expand_unop1_builtin (icode, arglist, target)
12637 enum insn_code icode;
12638 tree arglist;
12639 rtx target;
12640 {
12641 rtx pat;
12642 tree arg0 = TREE_VALUE (arglist);
12643 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12644 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12645 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12646
12647 if (! target
12648 || GET_MODE (target) != tmode
12649 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12650 target = gen_reg_rtx (tmode);
12651
12652 if (VECTOR_MODE_P (mode0))
12653 op0 = safe_vector_operand (op0, mode0);
12654
12655 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12656 op0 = copy_to_mode_reg (mode0, op0);
12657
12658 op1 = op0;
12659 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12660 op1 = copy_to_mode_reg (mode0, op1);
12661
12662 pat = GEN_FCN (icode) (target, op0, op1);
12663 if (! pat)
12664 return 0;
12665 emit_insn (pat);
12666 return target;
12667 }
12668
12669 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12670
12671 static rtx
12672 ix86_expand_sse_compare (d, arglist, target)
12673 const struct builtin_description *d;
12674 tree arglist;
12675 rtx target;
12676 {
12677 rtx pat;
12678 tree arg0 = TREE_VALUE (arglist);
12679 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12680 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12681 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12682 rtx op2;
12683 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12684 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12685 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12686 enum rtx_code comparison = d->comparison;
12687
12688 if (VECTOR_MODE_P (mode0))
12689 op0 = safe_vector_operand (op0, mode0);
12690 if (VECTOR_MODE_P (mode1))
12691 op1 = safe_vector_operand (op1, mode1);
12692
12693 /* Swap operands if we have a comparison that isn't available in
12694 hardware. */
12695 if (d->flag)
12696 {
12697 rtx tmp = gen_reg_rtx (mode1);
12698 emit_move_insn (tmp, op1);
12699 op1 = op0;
12700 op0 = tmp;
12701 }
12702
12703 if (! target
12704 || GET_MODE (target) != tmode
12705 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12706 target = gen_reg_rtx (tmode);
12707
12708 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12709 op0 = copy_to_mode_reg (mode0, op0);
12710 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12711 op1 = copy_to_mode_reg (mode1, op1);
12712
12713 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12714 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12715 if (! pat)
12716 return 0;
12717 emit_insn (pat);
12718 return target;
12719 }
12720
12721 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12722
12723 static rtx
12724 ix86_expand_sse_comi (d, arglist, target)
12725 const struct builtin_description *d;
12726 tree arglist;
12727 rtx target;
12728 {
12729 rtx pat;
12730 tree arg0 = TREE_VALUE (arglist);
12731 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12732 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12733 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12734 rtx op2;
12735 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12736 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12737 enum rtx_code comparison = d->comparison;
12738
12739 if (VECTOR_MODE_P (mode0))
12740 op0 = safe_vector_operand (op0, mode0);
12741 if (VECTOR_MODE_P (mode1))
12742 op1 = safe_vector_operand (op1, mode1);
12743
12744 /* Swap operands if we have a comparison that isn't available in
12745 hardware. */
12746 if (d->flag)
12747 {
12748 rtx tmp = op1;
12749 op1 = op0;
12750 op0 = tmp;
12751 }
12752
12753 target = gen_reg_rtx (SImode);
12754 emit_move_insn (target, const0_rtx);
12755 target = gen_rtx_SUBREG (QImode, target, 0);
12756
12757 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12758 op0 = copy_to_mode_reg (mode0, op0);
12759 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12760 op1 = copy_to_mode_reg (mode1, op1);
12761
12762 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12763 pat = GEN_FCN (d->icode) (op0, op1, op2);
12764 if (! pat)
12765 return 0;
12766 emit_insn (pat);
12767 emit_insn (gen_rtx_SET (VOIDmode,
12768 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12769 gen_rtx_fmt_ee (comparison, QImode,
12770 gen_rtx_REG (CCmode, FLAGS_REG),
12771 const0_rtx)));
12772
12773 return SUBREG_REG (target);
12774 }
12775
12776 /* Expand an expression EXP that calls a built-in function,
12777 with result going to TARGET if that's convenient
12778 (and in mode MODE if that's convenient).
12779 SUBTARGET may be used as the target for computing one of EXP's operands.
12780 IGNORE is nonzero if the value is to be ignored. */
12781
12782 rtx
12783 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12784 tree exp;
12785 rtx target;
12786 rtx subtarget ATTRIBUTE_UNUSED;
12787 enum machine_mode mode ATTRIBUTE_UNUSED;
12788 int ignore ATTRIBUTE_UNUSED;
12789 {
12790 const struct builtin_description *d;
12791 size_t i;
12792 enum insn_code icode;
12793 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12794 tree arglist = TREE_OPERAND (exp, 1);
12795 tree arg0, arg1, arg2;
12796 rtx op0, op1, op2, pat;
12797 enum machine_mode tmode, mode0, mode1, mode2;
12798 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12799
12800 switch (fcode)
12801 {
12802 case IX86_BUILTIN_EMMS:
12803 emit_insn (gen_emms ());
12804 return 0;
12805
12806 case IX86_BUILTIN_SFENCE:
12807 emit_insn (gen_sfence ());
12808 return 0;
12809
12810 case IX86_BUILTIN_PEXTRW:
12811 case IX86_BUILTIN_PEXTRW128:
12812 icode = (fcode == IX86_BUILTIN_PEXTRW
12813 ? CODE_FOR_mmx_pextrw
12814 : CODE_FOR_sse2_pextrw);
12815 arg0 = TREE_VALUE (arglist);
12816 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12817 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12818 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12819 tmode = insn_data[icode].operand[0].mode;
12820 mode0 = insn_data[icode].operand[1].mode;
12821 mode1 = insn_data[icode].operand[2].mode;
12822
12823 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12824 op0 = copy_to_mode_reg (mode0, op0);
12825 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12826 {
12827 /* @@@ better error message */
12828 error ("selector must be an immediate");
12829 return gen_reg_rtx (tmode);
12830 }
12831 if (target == 0
12832 || GET_MODE (target) != tmode
12833 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12834 target = gen_reg_rtx (tmode);
12835 pat = GEN_FCN (icode) (target, op0, op1);
12836 if (! pat)
12837 return 0;
12838 emit_insn (pat);
12839 return target;
12840
12841 case IX86_BUILTIN_PINSRW:
12842 case IX86_BUILTIN_PINSRW128:
12843 icode = (fcode == IX86_BUILTIN_PINSRW
12844 ? CODE_FOR_mmx_pinsrw
12845 : CODE_FOR_sse2_pinsrw);
12846 arg0 = TREE_VALUE (arglist);
12847 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12848 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12849 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12850 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12851 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12852 tmode = insn_data[icode].operand[0].mode;
12853 mode0 = insn_data[icode].operand[1].mode;
12854 mode1 = insn_data[icode].operand[2].mode;
12855 mode2 = insn_data[icode].operand[3].mode;
12856
12857 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12858 op0 = copy_to_mode_reg (mode0, op0);
12859 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12860 op1 = copy_to_mode_reg (mode1, op1);
12861 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12862 {
12863 /* @@@ better error message */
12864 error ("selector must be an immediate");
12865 return const0_rtx;
12866 }
12867 if (target == 0
12868 || GET_MODE (target) != tmode
12869 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12870 target = gen_reg_rtx (tmode);
12871 pat = GEN_FCN (icode) (target, op0, op1, op2);
12872 if (! pat)
12873 return 0;
12874 emit_insn (pat);
12875 return target;
12876
12877 case IX86_BUILTIN_MASKMOVQ:
12878 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12879 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12880 : CODE_FOR_sse2_maskmovdqu);
12881 /* Note the arg order is different from the operand order. */
12882 arg1 = TREE_VALUE (arglist);
12883 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12884 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12885 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12886 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12887 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12888 mode0 = insn_data[icode].operand[0].mode;
12889 mode1 = insn_data[icode].operand[1].mode;
12890 mode2 = insn_data[icode].operand[2].mode;
12891
12892 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12893 op0 = copy_to_mode_reg (mode0, op0);
12894 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12895 op1 = copy_to_mode_reg (mode1, op1);
12896 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12897 op2 = copy_to_mode_reg (mode2, op2);
12898 pat = GEN_FCN (icode) (op0, op1, op2);
12899 if (! pat)
12900 return 0;
12901 emit_insn (pat);
12902 return 0;
12903
12904 case IX86_BUILTIN_SQRTSS:
12905 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12906 case IX86_BUILTIN_RSQRTSS:
12907 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12908 case IX86_BUILTIN_RCPSS:
12909 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12910
12911 case IX86_BUILTIN_ANDPS:
12912 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12913 arglist, target);
12914 case IX86_BUILTIN_ANDNPS:
12915 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12916 arglist, target);
12917 case IX86_BUILTIN_ORPS:
12918 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12919 arglist, target);
12920 case IX86_BUILTIN_XORPS:
12921 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12922 arglist, target);
12923
12924 case IX86_BUILTIN_LOADAPS:
12925 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12926
12927 case IX86_BUILTIN_LOADUPS:
12928 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12929
12930 case IX86_BUILTIN_STOREAPS:
12931 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12932 case IX86_BUILTIN_STOREUPS:
12933 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12934
12935 case IX86_BUILTIN_LOADSS:
12936 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12937
12938 case IX86_BUILTIN_STORESS:
12939 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12940
12941 case IX86_BUILTIN_LOADHPS:
12942 case IX86_BUILTIN_LOADLPS:
12943 case IX86_BUILTIN_LOADHPD:
12944 case IX86_BUILTIN_LOADLPD:
12945 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12946 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12947 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12948 : CODE_FOR_sse2_movlpd);
12949 arg0 = TREE_VALUE (arglist);
12950 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12951 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12952 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12953 tmode = insn_data[icode].operand[0].mode;
12954 mode0 = insn_data[icode].operand[1].mode;
12955 mode1 = insn_data[icode].operand[2].mode;
12956
12957 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12958 op0 = copy_to_mode_reg (mode0, op0);
12959 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12960 if (target == 0
12961 || GET_MODE (target) != tmode
12962 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12963 target = gen_reg_rtx (tmode);
12964 pat = GEN_FCN (icode) (target, op0, op1);
12965 if (! pat)
12966 return 0;
12967 emit_insn (pat);
12968 return target;
12969
12970 case IX86_BUILTIN_STOREHPS:
12971 case IX86_BUILTIN_STORELPS:
12972 case IX86_BUILTIN_STOREHPD:
12973 case IX86_BUILTIN_STORELPD:
12974 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12975 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12976 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12977 : CODE_FOR_sse2_movlpd);
12978 arg0 = TREE_VALUE (arglist);
12979 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12980 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12981 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12982 mode0 = insn_data[icode].operand[1].mode;
12983 mode1 = insn_data[icode].operand[2].mode;
12984
12985 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12986 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12987 op1 = copy_to_mode_reg (mode1, op1);
12988
12989 pat = GEN_FCN (icode) (op0, op0, op1);
12990 if (! pat)
12991 return 0;
12992 emit_insn (pat);
12993 return 0;
12994
12995 case IX86_BUILTIN_MOVNTPS:
12996 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
12997 case IX86_BUILTIN_MOVNTQ:
12998 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
12999
13000 case IX86_BUILTIN_LDMXCSR:
13001 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13002 target = assign_386_stack_local (SImode, 0);
13003 emit_move_insn (target, op0);
13004 emit_insn (gen_ldmxcsr (target));
13005 return 0;
13006
13007 case IX86_BUILTIN_STMXCSR:
13008 target = assign_386_stack_local (SImode, 0);
13009 emit_insn (gen_stmxcsr (target));
13010 return copy_to_mode_reg (SImode, target);
13011
13012 case IX86_BUILTIN_SHUFPS:
13013 case IX86_BUILTIN_SHUFPD:
13014 icode = (fcode == IX86_BUILTIN_SHUFPS
13015 ? CODE_FOR_sse_shufps
13016 : CODE_FOR_sse2_shufpd);
13017 arg0 = TREE_VALUE (arglist);
13018 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13019 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13020 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13021 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13022 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13023 tmode = insn_data[icode].operand[0].mode;
13024 mode0 = insn_data[icode].operand[1].mode;
13025 mode1 = insn_data[icode].operand[2].mode;
13026 mode2 = insn_data[icode].operand[3].mode;
13027
13028 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13029 op0 = copy_to_mode_reg (mode0, op0);
13030 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13031 op1 = copy_to_mode_reg (mode1, op1);
13032 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13033 {
13034 /* @@@ better error message */
13035 error ("mask must be an immediate");
13036 return gen_reg_rtx (tmode);
13037 }
13038 if (target == 0
13039 || GET_MODE (target) != tmode
13040 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13041 target = gen_reg_rtx (tmode);
13042 pat = GEN_FCN (icode) (target, op0, op1, op2);
13043 if (! pat)
13044 return 0;
13045 emit_insn (pat);
13046 return target;
13047
13048 case IX86_BUILTIN_PSHUFW:
13049 case IX86_BUILTIN_PSHUFD:
13050 case IX86_BUILTIN_PSHUFHW:
13051 case IX86_BUILTIN_PSHUFLW:
13052 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13053 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13054 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13055 : CODE_FOR_mmx_pshufw);
13056 arg0 = TREE_VALUE (arglist);
13057 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13058 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13059 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13060 tmode = insn_data[icode].operand[0].mode;
13061 mode1 = insn_data[icode].operand[1].mode;
13062 mode2 = insn_data[icode].operand[2].mode;
13063
13064 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13065 op0 = copy_to_mode_reg (mode1, op0);
13066 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13067 {
13068 /* @@@ better error message */
13069 error ("mask must be an immediate");
13070 return const0_rtx;
13071 }
13072 if (target == 0
13073 || GET_MODE (target) != tmode
13074 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13075 target = gen_reg_rtx (tmode);
13076 pat = GEN_FCN (icode) (target, op0, op1);
13077 if (! pat)
13078 return 0;
13079 emit_insn (pat);
13080 return target;
13081
13082 case IX86_BUILTIN_FEMMS:
13083 emit_insn (gen_femms ());
13084 return NULL_RTX;
13085
13086 case IX86_BUILTIN_PAVGUSB:
13087 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13088
13089 case IX86_BUILTIN_PF2ID:
13090 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13091
13092 case IX86_BUILTIN_PFACC:
13093 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13094
13095 case IX86_BUILTIN_PFADD:
13096 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13097
13098 case IX86_BUILTIN_PFCMPEQ:
13099 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13100
13101 case IX86_BUILTIN_PFCMPGE:
13102 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13103
13104 case IX86_BUILTIN_PFCMPGT:
13105 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13106
13107 case IX86_BUILTIN_PFMAX:
13108 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13109
13110 case IX86_BUILTIN_PFMIN:
13111 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13112
13113 case IX86_BUILTIN_PFMUL:
13114 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13115
13116 case IX86_BUILTIN_PFRCP:
13117 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13118
13119 case IX86_BUILTIN_PFRCPIT1:
13120 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13121
13122 case IX86_BUILTIN_PFRCPIT2:
13123 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13124
13125 case IX86_BUILTIN_PFRSQIT1:
13126 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13127
13128 case IX86_BUILTIN_PFRSQRT:
13129 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13130
13131 case IX86_BUILTIN_PFSUB:
13132 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13133
13134 case IX86_BUILTIN_PFSUBR:
13135 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13136
13137 case IX86_BUILTIN_PI2FD:
13138 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13139
13140 case IX86_BUILTIN_PMULHRW:
13141 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13142
13143 case IX86_BUILTIN_PF2IW:
13144 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13145
13146 case IX86_BUILTIN_PFNACC:
13147 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13148
13149 case IX86_BUILTIN_PFPNACC:
13150 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13151
13152 case IX86_BUILTIN_PI2FW:
13153 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13154
13155 case IX86_BUILTIN_PSWAPDSI:
13156 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13157
13158 case IX86_BUILTIN_PSWAPDSF:
13159 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13160
13161 case IX86_BUILTIN_SSE_ZERO:
13162 target = gen_reg_rtx (V4SFmode);
13163 emit_insn (gen_sse_clrv4sf (target));
13164 return target;
13165
13166 case IX86_BUILTIN_MMX_ZERO:
13167 target = gen_reg_rtx (DImode);
13168 emit_insn (gen_mmx_clrdi (target));
13169 return target;
13170
13171 case IX86_BUILTIN_SQRTSD:
13172 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13173 case IX86_BUILTIN_LOADAPD:
13174 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13175 case IX86_BUILTIN_LOADUPD:
13176 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13177
13178 case IX86_BUILTIN_STOREAPD:
13179 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13180 case IX86_BUILTIN_STOREUPD:
13181 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13182
13183 case IX86_BUILTIN_LOADSD:
13184 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13185
13186 case IX86_BUILTIN_STORESD:
13187 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13188
13189 case IX86_BUILTIN_SETPD1:
13190 target = assign_386_stack_local (DFmode, 0);
13191 arg0 = TREE_VALUE (arglist);
13192 emit_move_insn (adjust_address (target, DFmode, 0),
13193 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13194 op0 = gen_reg_rtx (V2DFmode);
13195 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13196 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13197 return op0;
13198
13199 case IX86_BUILTIN_SETPD:
13200 target = assign_386_stack_local (V2DFmode, 0);
13201 arg0 = TREE_VALUE (arglist);
13202 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13203 emit_move_insn (adjust_address (target, DFmode, 0),
13204 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13205 emit_move_insn (adjust_address (target, DFmode, 8),
13206 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13207 op0 = gen_reg_rtx (V2DFmode);
13208 emit_insn (gen_sse2_movapd (op0, target));
13209 return op0;
13210
13211 case IX86_BUILTIN_LOADRPD:
13212 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13213 gen_reg_rtx (V2DFmode), 1);
13214 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13215 return target;
13216
13217 case IX86_BUILTIN_LOADPD1:
13218 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13219 gen_reg_rtx (V2DFmode), 1);
13220 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13221 return target;
13222
13223 case IX86_BUILTIN_STOREPD1:
13224 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13225 case IX86_BUILTIN_STORERPD:
13226 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13227
13228 case IX86_BUILTIN_MFENCE:
13229 emit_insn (gen_sse2_mfence ());
13230 return 0;
13231 case IX86_BUILTIN_LFENCE:
13232 emit_insn (gen_sse2_lfence ());
13233 return 0;
13234
13235 case IX86_BUILTIN_CLFLUSH:
13236 arg0 = TREE_VALUE (arglist);
13237 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13238 icode = CODE_FOR_sse2_clflush;
13239 mode0 = insn_data[icode].operand[0].mode;
13240 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13241 op0 = copy_to_mode_reg (mode0, op0);
13242
13243 emit_insn (gen_sse2_clflush (op0));
13244 return 0;
13245
13246 case IX86_BUILTIN_MOVNTPD:
13247 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13248 case IX86_BUILTIN_MOVNTDQ:
13249 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13250 case IX86_BUILTIN_MOVNTI:
13251 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13252
13253 default:
13254 break;
13255 }
13256
13257 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13258 if (d->code == fcode)
13259 {
13260 /* Compares are treated specially. */
13261 if (d->icode == CODE_FOR_maskcmpv4sf3
13262 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13263 || d->icode == CODE_FOR_maskncmpv4sf3
13264 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13265 || d->icode == CODE_FOR_maskcmpv2df3
13266 || d->icode == CODE_FOR_vmmaskcmpv2df3
13267 || d->icode == CODE_FOR_maskncmpv2df3
13268 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13269 return ix86_expand_sse_compare (d, arglist, target);
13270
13271 return ix86_expand_binop_builtin (d->icode, arglist, target);
13272 }
13273
13274 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13275 if (d->code == fcode)
13276 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13277
13278 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13279 if (d->code == fcode)
13280 return ix86_expand_sse_comi (d, arglist, target);
13281
13282 /* @@@ Should really do something sensible here. */
13283 return 0;
13284 }
13285
13286 /* Store OPERAND to the memory after reload is completed. This means
13287 that we can't easily use assign_stack_local. */
13288 rtx
13289 ix86_force_to_memory (mode, operand)
13290 enum machine_mode mode;
13291 rtx operand;
13292 {
13293 rtx result;
13294 if (!reload_completed)
13295 abort ();
13296 if (TARGET_64BIT && TARGET_RED_ZONE)
13297 {
13298 result = gen_rtx_MEM (mode,
13299 gen_rtx_PLUS (Pmode,
13300 stack_pointer_rtx,
13301 GEN_INT (-RED_ZONE_SIZE)));
13302 emit_move_insn (result, operand);
13303 }
13304 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13305 {
13306 switch (mode)
13307 {
13308 case HImode:
13309 case SImode:
13310 operand = gen_lowpart (DImode, operand);
13311 /* FALLTHRU */
13312 case DImode:
13313 emit_insn (
13314 gen_rtx_SET (VOIDmode,
13315 gen_rtx_MEM (DImode,
13316 gen_rtx_PRE_DEC (DImode,
13317 stack_pointer_rtx)),
13318 operand));
13319 break;
13320 default:
13321 abort ();
13322 }
13323 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13324 }
13325 else
13326 {
13327 switch (mode)
13328 {
13329 case DImode:
13330 {
13331 rtx operands[2];
13332 split_di (&operand, 1, operands, operands + 1);
13333 emit_insn (
13334 gen_rtx_SET (VOIDmode,
13335 gen_rtx_MEM (SImode,
13336 gen_rtx_PRE_DEC (Pmode,
13337 stack_pointer_rtx)),
13338 operands[1]));
13339 emit_insn (
13340 gen_rtx_SET (VOIDmode,
13341 gen_rtx_MEM (SImode,
13342 gen_rtx_PRE_DEC (Pmode,
13343 stack_pointer_rtx)),
13344 operands[0]));
13345 }
13346 break;
13347 case HImode:
13348 /* It is better to store HImodes as SImodes. */
13349 if (!TARGET_PARTIAL_REG_STALL)
13350 operand = gen_lowpart (SImode, operand);
13351 /* FALLTHRU */
13352 case SImode:
13353 emit_insn (
13354 gen_rtx_SET (VOIDmode,
13355 gen_rtx_MEM (GET_MODE (operand),
13356 gen_rtx_PRE_DEC (SImode,
13357 stack_pointer_rtx)),
13358 operand));
13359 break;
13360 default:
13361 abort ();
13362 }
13363 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13364 }
13365 return result;
13366 }
13367
13368 /* Free operand from the memory. */
13369 void
13370 ix86_free_from_memory (mode)
13371 enum machine_mode mode;
13372 {
13373 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13374 {
13375 int size;
13376
13377 if (mode == DImode || TARGET_64BIT)
13378 size = 8;
13379 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13380 size = 2;
13381 else
13382 size = 4;
13383 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13384 to pop or add instruction if registers are available. */
13385 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13386 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13387 GEN_INT (size))));
13388 }
13389 }
13390
13391 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13392 QImode must go into class Q_REGS.
13393 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13394 movdf to do mem-to-mem moves through integer regs. */
13395 enum reg_class
13396 ix86_preferred_reload_class (x, class)
13397 rtx x;
13398 enum reg_class class;
13399 {
13400 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13401 {
13402 /* SSE can't load any constant directly yet. */
13403 if (SSE_CLASS_P (class))
13404 return NO_REGS;
13405 /* Floats can load 0 and 1. */
13406 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13407 {
13408 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13409 if (MAYBE_SSE_CLASS_P (class))
13410 return (reg_class_subset_p (class, GENERAL_REGS)
13411 ? GENERAL_REGS : FLOAT_REGS);
13412 else
13413 return class;
13414 }
13415 /* General regs can load everything. */
13416 if (reg_class_subset_p (class, GENERAL_REGS))
13417 return GENERAL_REGS;
13418 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13419 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13420 return NO_REGS;
13421 }
13422 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13423 return NO_REGS;
13424 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13425 return Q_REGS;
13426 return class;
13427 }
13428
13429 /* If we are copying between general and FP registers, we need a memory
13430 location. The same is true for SSE and MMX registers.
13431
13432 The macro can't work reliably when one of the CLASSES is class containing
13433 registers from multiple units (SSE, MMX, integer). We avoid this by never
13434 combining those units in single alternative in the machine description.
13435 Ensure that this constraint holds to avoid unexpected surprises.
13436
13437 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13438 enforce these sanity checks. */
13439 int
13440 ix86_secondary_memory_needed (class1, class2, mode, strict)
13441 enum reg_class class1, class2;
13442 enum machine_mode mode;
13443 int strict;
13444 {
13445 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13446 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13447 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13448 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13449 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13450 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13451 {
13452 if (strict)
13453 abort ();
13454 else
13455 return 1;
13456 }
13457 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13458 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13459 && (mode) != SImode)
13460 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13461 && (mode) != SImode));
13462 }
13463 /* Return the cost of moving data from a register in class CLASS1 to
13464 one in class CLASS2.
13465
13466 It is not required that the cost always equal 2 when FROM is the same as TO;
13467 on some machines it is expensive to move between registers if they are not
13468 general registers. */
13469 int
13470 ix86_register_move_cost (mode, class1, class2)
13471 enum machine_mode mode;
13472 enum reg_class class1, class2;
13473 {
13474 /* In case we require secondary memory, compute cost of the store followed
13475 by load. In case of copying from general_purpose_register we may emit
13476 multiple stores followed by single load causing memory size mismatch
13477 stall. Count this as arbitarily high cost of 20. */
13478 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13479 {
13480 int add_cost = 0;
13481 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13482 add_cost = 20;
13483 return (MEMORY_MOVE_COST (mode, class1, 0)
13484 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13485 }
13486 /* Moves between SSE/MMX and integer unit are expensive. */
13487 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13488 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13489 return ix86_cost->mmxsse_to_integer;
13490 if (MAYBE_FLOAT_CLASS_P (class1))
13491 return ix86_cost->fp_move;
13492 if (MAYBE_SSE_CLASS_P (class1))
13493 return ix86_cost->sse_move;
13494 if (MAYBE_MMX_CLASS_P (class1))
13495 return ix86_cost->mmx_move;
13496 return 2;
13497 }
13498
13499 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13500 int
13501 ix86_hard_regno_mode_ok (regno, mode)
13502 int regno;
13503 enum machine_mode mode;
13504 {
13505 /* Flags and only flags can only hold CCmode values. */
13506 if (CC_REGNO_P (regno))
13507 return GET_MODE_CLASS (mode) == MODE_CC;
13508 if (GET_MODE_CLASS (mode) == MODE_CC
13509 || GET_MODE_CLASS (mode) == MODE_RANDOM
13510 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13511 return 0;
13512 if (FP_REGNO_P (regno))
13513 return VALID_FP_MODE_P (mode);
13514 if (SSE_REGNO_P (regno))
13515 return VALID_SSE_REG_MODE (mode);
13516 if (MMX_REGNO_P (regno))
13517 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13518 /* We handle both integer and floats in the general purpose registers.
13519 In future we should be able to handle vector modes as well. */
13520 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13521 return 0;
13522 /* Take care for QImode values - they can be in non-QI regs, but then
13523 they do cause partial register stalls. */
13524 if (regno < 4 || mode != QImode || TARGET_64BIT)
13525 return 1;
13526 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13527 }
13528
13529 /* Return the cost of moving data of mode M between a
13530 register and memory. A value of 2 is the default; this cost is
13531 relative to those in `REGISTER_MOVE_COST'.
13532
13533 If moving between registers and memory is more expensive than
13534 between two registers, you should define this macro to express the
13535 relative cost.
13536
13537 Model also increased moving costs of QImode registers in non
13538 Q_REGS classes.
13539 */
13540 int
13541 ix86_memory_move_cost (mode, class, in)
13542 enum machine_mode mode;
13543 enum reg_class class;
13544 int in;
13545 {
13546 if (FLOAT_CLASS_P (class))
13547 {
13548 int index;
13549 switch (mode)
13550 {
13551 case SFmode:
13552 index = 0;
13553 break;
13554 case DFmode:
13555 index = 1;
13556 break;
13557 case XFmode:
13558 case TFmode:
13559 index = 2;
13560 break;
13561 default:
13562 return 100;
13563 }
13564 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13565 }
13566 if (SSE_CLASS_P (class))
13567 {
13568 int index;
13569 switch (GET_MODE_SIZE (mode))
13570 {
13571 case 4:
13572 index = 0;
13573 break;
13574 case 8:
13575 index = 1;
13576 break;
13577 case 16:
13578 index = 2;
13579 break;
13580 default:
13581 return 100;
13582 }
13583 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13584 }
13585 if (MMX_CLASS_P (class))
13586 {
13587 int index;
13588 switch (GET_MODE_SIZE (mode))
13589 {
13590 case 4:
13591 index = 0;
13592 break;
13593 case 8:
13594 index = 1;
13595 break;
13596 default:
13597 return 100;
13598 }
13599 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13600 }
13601 switch (GET_MODE_SIZE (mode))
13602 {
13603 case 1:
13604 if (in)
13605 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13606 : ix86_cost->movzbl_load);
13607 else
13608 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13609 : ix86_cost->int_store[0] + 4);
13610 break;
13611 case 2:
13612 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13613 default:
13614 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13615 if (mode == TFmode)
13616 mode = XFmode;
13617 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13618 * (int) GET_MODE_SIZE (mode) / 4);
13619 }
13620 }
13621
13622 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13623 static void
13624 ix86_svr3_asm_out_constructor (symbol, priority)
13625 rtx symbol;
13626 int priority ATTRIBUTE_UNUSED;
13627 {
13628 init_section ();
13629 fputs ("\tpushl $", asm_out_file);
13630 assemble_name (asm_out_file, XSTR (symbol, 0));
13631 fputc ('\n', asm_out_file);
13632 }
13633 #endif
13634
13635 #if TARGET_MACHO
13636
13637 static int current_machopic_label_num;
13638
13639 /* Given a symbol name and its associated stub, write out the
13640 definition of the stub. */
13641
13642 void
13643 machopic_output_stub (file, symb, stub)
13644 FILE *file;
13645 const char *symb, *stub;
13646 {
13647 unsigned int length;
13648 char *binder_name, *symbol_name, lazy_ptr_name[32];
13649 int label = ++current_machopic_label_num;
13650
13651 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13652 symb = (*targetm.strip_name_encoding) (symb);
13653
13654 length = strlen (stub);
13655 binder_name = alloca (length + 32);
13656 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13657
13658 length = strlen (symb);
13659 symbol_name = alloca (length + 32);
13660 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13661
13662 sprintf (lazy_ptr_name, "L%d$lz", label);
13663
13664 if (MACHOPIC_PURE)
13665 machopic_picsymbol_stub_section ();
13666 else
13667 machopic_symbol_stub_section ();
13668
13669 fprintf (file, "%s:\n", stub);
13670 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13671
13672 if (MACHOPIC_PURE)
13673 {
13674 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13675 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13676 fprintf (file, "\tjmp %%edx\n");
13677 }
13678 else
13679 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13680
13681 fprintf (file, "%s:\n", binder_name);
13682
13683 if (MACHOPIC_PURE)
13684 {
13685 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13686 fprintf (file, "\tpushl %%eax\n");
13687 }
13688 else
13689 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13690
13691 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13692
13693 machopic_lazy_symbol_ptr_section ();
13694 fprintf (file, "%s:\n", lazy_ptr_name);
13695 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13696 fprintf (file, "\t.long %s\n", binder_name);
13697 }
13698 #endif /* TARGET_MACHO */
13699
13700 /* Order the registers for register allocator. */
13701
13702 void
13703 x86_order_regs_for_local_alloc ()
13704 {
13705 int pos = 0;
13706 int i;
13707
13708 /* First allocate the local general purpose registers. */
13709 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13710 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13711 reg_alloc_order [pos++] = i;
13712
13713 /* Global general purpose registers. */
13714 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13715 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13716 reg_alloc_order [pos++] = i;
13717
13718 /* x87 registers come first in case we are doing FP math
13719 using them. */
13720 if (!TARGET_SSE_MATH)
13721 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13722 reg_alloc_order [pos++] = i;
13723
13724 /* SSE registers. */
13725 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13726 reg_alloc_order [pos++] = i;
13727 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13728 reg_alloc_order [pos++] = i;
13729
13730 /* x87 registerts. */
13731 if (TARGET_SSE_MATH)
13732 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13733 reg_alloc_order [pos++] = i;
13734
13735 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13736 reg_alloc_order [pos++] = i;
13737
13738 /* Initialize the rest of array as we do not allocate some registers
13739 at all. */
13740 while (pos < FIRST_PSEUDO_REGISTER)
13741 reg_alloc_order [pos++] = 0;
13742 }
13743
13744 void
13745 x86_output_mi_thunk (file, delta, function)
13746 FILE *file;
13747 int delta;
13748 tree function;
13749 {
13750 tree parm;
13751 rtx xops[3];
13752
13753 if (ix86_regparm > 0)
13754 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13755 else
13756 parm = NULL_TREE;
13757 for (; parm; parm = TREE_CHAIN (parm))
13758 if (TREE_VALUE (parm) == void_type_node)
13759 break;
13760
13761 xops[0] = GEN_INT (delta);
13762 if (TARGET_64BIT)
13763 {
13764 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13765 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13766 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13767 if (flag_pic)
13768 {
13769 fprintf (file, "\tjmp *");
13770 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13771 fprintf (file, "@GOTPCREL(%%rip)\n");
13772 }
13773 else
13774 {
13775 fprintf (file, "\tjmp ");
13776 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13777 fprintf (file, "\n");
13778 }
13779 }
13780 else
13781 {
13782 if (parm)
13783 xops[1] = gen_rtx_REG (SImode, 0);
13784 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13785 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13786 else
13787 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13788 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13789
13790 if (flag_pic)
13791 {
13792 xops[0] = pic_offset_table_rtx;
13793 xops[1] = gen_label_rtx ();
13794 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13795
13796 if (ix86_regparm > 2)
13797 abort ();
13798 output_asm_insn ("push{l}\t%0", xops);
13799 output_asm_insn ("call\t%P1", xops);
13800 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13801 output_asm_insn ("pop{l}\t%0", xops);
13802 output_asm_insn
13803 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13804 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13805 output_asm_insn
13806 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13807 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13808 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13809 }
13810 else
13811 {
13812 fprintf (file, "\tjmp ");
13813 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13814 fprintf (file, "\n");
13815 }
13816 }
13817 }
13818
13819 int
13820 x86_field_alignment (field, computed)
13821 tree field;
13822 int computed;
13823 {
13824 enum machine_mode mode;
13825 tree type = TREE_TYPE (field);
13826
13827 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13828 return computed;
13829 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13830 ? get_inner_array_type (type) : type);
13831 if (mode == DFmode || mode == DCmode
13832 || GET_MODE_CLASS (mode) == MODE_INT
13833 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13834 return MIN (32, computed);
13835 return computed;
13836 }
13837
13838 /* Implement machine specific optimizations.
13839 At the moment we implement single transformation: AMD Athlon works faster
13840 when RET is not destination of conditional jump or directly preceeded
13841 by other jump instruction. We avoid the penalty by inserting NOP just
13842 before the RET instructions in such cases. */
13843 void
13844 x86_machine_dependent_reorg (first)
13845 rtx first ATTRIBUTE_UNUSED;
13846 {
13847 edge e;
13848
13849 if (!TARGET_ATHLON || !optimize || optimize_size)
13850 return;
13851 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13852 {
13853 basic_block bb = e->src;
13854 rtx ret = bb->end;
13855 rtx prev;
13856 bool insert = false;
13857
13858 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13859 continue;
13860 prev = prev_nonnote_insn (ret);
13861 if (prev && GET_CODE (prev) == CODE_LABEL)
13862 {
13863 edge e;
13864 for (e = bb->pred; e; e = e->pred_next)
13865 if (EDGE_FREQUENCY (e) && e->src->index > 0
13866 && !(e->flags & EDGE_FALLTHRU))
13867 insert = 1;
13868 }
13869 if (!insert)
13870 {
13871 prev = prev_real_insn (ret);
13872 if (prev && GET_CODE (prev) == JUMP_INSN
13873 && any_condjump_p (prev))
13874 insert = 1;
13875 }
13876 if (insert)
13877 emit_insn_before (gen_nop (), ret);
13878 }
13879 }
13880
13881 #include "gt-i386.h"
This page took 0.682739 seconds and 6 git commands to generate.