]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
config.gcc: Treat winchip_c6-*|winchip2-*|c3-* as pentium-mmx.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 };
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 };
125
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 };
162
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
198 };
199
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
235 };
236
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
272 };
273
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309 };
310
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 };
347
348 const struct processor_costs *ix86_cost = &pentium_cost;
349
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
399
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403 #define FAST_PROLOGUE_INSN_COUNT 30
404
405 /* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407 static int use_fast_prologue_epilogue;
408
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
413
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
416
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
418 {
419 /* ax, dx, cx, bx */
420 AREG, DREG, CREG, BREG,
421 /* si, di, bp, sp */
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
423 /* FP registers */
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
426 /* arg pointer */
427 NON_Q_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
431 SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
433 MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
437 SSE_REGS, SSE_REGS,
438 };
439
440 /* The "default" register map used in 32bit mode. */
441
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
443 {
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
451 };
452
453 static int const x86_64_int_parameter_registers[6] =
454 {
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
457 };
458
459 static int const x86_64_int_return_registers[4] =
460 {
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
462 };
463
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
466 {
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
474 };
475
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
520 numbers.
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
529 */
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
531 {
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
539 };
540
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
543
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
546
547 /* The encoding characters for the four TLS models present in ELF. */
548
549 static char const tls_model_chars[] = " GLil";
550
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
554
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function GTY(())
557 {
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
562 };
563
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
566
567 /* Structure describing stack frame layout.
568 Stack grows downward:
569
570 [arguments]
571 <- ARG_POINTER
572 saved pc
573
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
576 [saved regs]
577
578 [padding1] \
579 )
580 [va_arg registers] (
581 > to_allocate <- FRAME_POINTER
582 [frame] (
583 )
584 [padding2] /
585 */
586 struct ix86_frame
587 {
588 int nregs;
589 int padding1;
590 int va_arg_size;
591 HOST_WIDE_INT frame;
592 int padding2;
593 int outgoing_arguments_size;
594 int red_zone_size;
595
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
601 };
602
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
607 /* Parsed value. */
608 enum cmodel ix86_cmodel;
609 /* Asm dialect. */
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
612 /* TLS dialext. */
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
615
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
618
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
623
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
628
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
631
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
634
635 /* ix86_regparm_string as a number */
636 int ix86_regparm;
637
638 /* Alignment to use for loops and jumps: */
639
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
642
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
645
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
648
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
651
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
655
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
658
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
662 \f
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
667 int, int, FILE *));
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
673 rtx *, rtx *));
674 static rtx get_thread_pointer PARAMS ((void));
675 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
676 static rtx gen_push PARAMS ((rtx));
677 static int memory_address_length PARAMS ((rtx addr));
678 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681 static void ix86_dump_ppro_packet PARAMS ((FILE *));
682 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
683 static struct machine_function * ix86_init_machine_status PARAMS ((void));
684 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
685 static int ix86_nsaved_regs PARAMS ((void));
686 static void ix86_emit_save_regs PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
688 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
689 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
704
705 struct ix86_address
706 {
707 rtx base, index, disp;
708 HOST_WIDE_INT scale;
709 };
710
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
712
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
715 ATTRIBUTE_UNUSED;
716
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
719 tree, rtx));
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
721 tree, rtx));
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
726 tree, rtx));
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
731 enum rtx_code *,
732 enum rtx_code *,
733 enum rtx_code *));
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
735 rtx *, rtx *));
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
741 static int ix86_save_reg PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
743 static int ix86_comp_type_attributes PARAMS ((tree, tree));
744 const struct attribute_spec ix86_attribute_table[];
745 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static int ix86_value_regno PARAMS ((enum machine_mode));
748
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
751 #endif
752
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
757
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
760 */
761 enum x86_64_reg_class
762 {
763 X86_64_NO_CLASS,
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
766 X86_64_SSE_CLASS,
767 X86_64_SSESF_CLASS,
768 X86_64_SSEDF_CLASS,
769 X86_64_SSEUP_CLASS,
770 X86_64_X87_CLASS,
771 X86_64_X87UP_CLASS,
772 X86_64_MEMORY_CLASS
773 };
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
776
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
780 int));
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
782 int *));
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
784 const int *, int));
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
787 \f
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
794 #endif
795
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
798
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
801
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
804
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
807
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
812
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
817 #ifdef ASM_QUAD
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
820 #endif
821
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
828
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
845
846 #ifdef HAVE_AS_TLS
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
849 #endif
850
851 struct gcc_target targetm = TARGET_INITIALIZER;
852 \f
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
857 been parsed.
858
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
861
862 void
863 override_options ()
864 {
865 int i;
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
868
869 static struct ptt
870 {
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
880 }
881 const processor_target_table[PROCESSOR_max] =
882 {
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
890 };
891
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
893 static struct pta
894 {
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
897 const enum pta_flags
898 {
899 PTA_SSE = 1,
900 PTA_SSE2 = 2,
901 PTA_MMX = 4,
902 PTA_PREFETCH_SSE = 8,
903 PTA_3DNOW = 16,
904 PTA_3DNOW_A = 64
905 } flags;
906 }
907 const processor_alias_table[] =
908 {
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
915 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
916 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
917 {"i686", PROCESSOR_PENTIUMPRO, 0},
918 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
919 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
920 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
921 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
922 PTA_MMX | PTA_PREFETCH_SSE},
923 {"k6", PROCESSOR_K6, PTA_MMX},
924 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
925 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
926 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
927 | PTA_3DNOW_A},
928 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
929 | PTA_3DNOW | PTA_3DNOW_A},
930 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
931 | PTA_3DNOW_A | PTA_SSE},
932 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
933 | PTA_3DNOW_A | PTA_SSE},
934 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
935 | PTA_3DNOW_A | PTA_SSE},
936 };
937
938 int const pta_size = ARRAY_SIZE (processor_alias_table);
939
940 #ifdef SUBTARGET_OVERRIDE_OPTIONS
941 SUBTARGET_OVERRIDE_OPTIONS;
942 #endif
943
944 if (!ix86_cpu_string && ix86_arch_string)
945 ix86_cpu_string = ix86_arch_string;
946 if (!ix86_cpu_string)
947 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
948 if (!ix86_arch_string)
949 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
950
951 if (ix86_cmodel_string != 0)
952 {
953 if (!strcmp (ix86_cmodel_string, "small"))
954 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
955 else if (flag_pic)
956 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
957 else if (!strcmp (ix86_cmodel_string, "32"))
958 ix86_cmodel = CM_32;
959 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
960 ix86_cmodel = CM_KERNEL;
961 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
962 ix86_cmodel = CM_MEDIUM;
963 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
964 ix86_cmodel = CM_LARGE;
965 else
966 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
967 }
968 else
969 {
970 ix86_cmodel = CM_32;
971 if (TARGET_64BIT)
972 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
973 }
974 if (ix86_asm_string != 0)
975 {
976 if (!strcmp (ix86_asm_string, "intel"))
977 ix86_asm_dialect = ASM_INTEL;
978 else if (!strcmp (ix86_asm_string, "att"))
979 ix86_asm_dialect = ASM_ATT;
980 else
981 error ("bad value (%s) for -masm= switch", ix86_asm_string);
982 }
983 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
984 error ("code model `%s' not supported in the %s bit mode",
985 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
986 if (ix86_cmodel == CM_LARGE)
987 sorry ("code model `large' not supported yet");
988 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
989 sorry ("%i-bit mode not compiled in",
990 (target_flags & MASK_64BIT) ? 64 : 32);
991
992 for (i = 0; i < pta_size; i++)
993 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
994 {
995 ix86_arch = processor_alias_table[i].processor;
996 /* Default cpu tuning to the architecture. */
997 ix86_cpu = ix86_arch;
998 if (processor_alias_table[i].flags & PTA_MMX
999 && !(target_flags & MASK_MMX_SET))
1000 target_flags |= MASK_MMX;
1001 if (processor_alias_table[i].flags & PTA_3DNOW
1002 && !(target_flags & MASK_3DNOW_SET))
1003 target_flags |= MASK_3DNOW;
1004 if (processor_alias_table[i].flags & PTA_3DNOW_A
1005 && !(target_flags & MASK_3DNOW_A_SET))
1006 target_flags |= MASK_3DNOW_A;
1007 if (processor_alias_table[i].flags & PTA_SSE
1008 && !(target_flags & MASK_SSE_SET))
1009 target_flags |= MASK_SSE;
1010 if (processor_alias_table[i].flags & PTA_SSE2
1011 && !(target_flags & MASK_SSE2_SET))
1012 target_flags |= MASK_SSE2;
1013 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1014 x86_prefetch_sse = true;
1015 break;
1016 }
1017
1018 if (i == pta_size)
1019 error ("bad value (%s) for -march= switch", ix86_arch_string);
1020
1021 for (i = 0; i < pta_size; i++)
1022 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1023 {
1024 ix86_cpu = processor_alias_table[i].processor;
1025 break;
1026 }
1027 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1028 x86_prefetch_sse = true;
1029 if (i == pta_size)
1030 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1031
1032 if (optimize_size)
1033 ix86_cost = &size_cost;
1034 else
1035 ix86_cost = processor_target_table[ix86_cpu].cost;
1036 target_flags |= processor_target_table[ix86_cpu].target_enable;
1037 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1038
1039 /* Arrange to set up i386_stack_locals for all functions. */
1040 init_machine_status = ix86_init_machine_status;
1041
1042 /* Validate -mregparm= value. */
1043 if (ix86_regparm_string)
1044 {
1045 i = atoi (ix86_regparm_string);
1046 if (i < 0 || i > REGPARM_MAX)
1047 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1048 else
1049 ix86_regparm = i;
1050 }
1051 else
1052 if (TARGET_64BIT)
1053 ix86_regparm = REGPARM_MAX;
1054
1055 /* If the user has provided any of the -malign-* options,
1056 warn and use that value only if -falign-* is not set.
1057 Remove this code in GCC 3.2 or later. */
1058 if (ix86_align_loops_string)
1059 {
1060 warning ("-malign-loops is obsolete, use -falign-loops");
1061 if (align_loops == 0)
1062 {
1063 i = atoi (ix86_align_loops_string);
1064 if (i < 0 || i > MAX_CODE_ALIGN)
1065 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1066 else
1067 align_loops = 1 << i;
1068 }
1069 }
1070
1071 if (ix86_align_jumps_string)
1072 {
1073 warning ("-malign-jumps is obsolete, use -falign-jumps");
1074 if (align_jumps == 0)
1075 {
1076 i = atoi (ix86_align_jumps_string);
1077 if (i < 0 || i > MAX_CODE_ALIGN)
1078 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1079 else
1080 align_jumps = 1 << i;
1081 }
1082 }
1083
1084 if (ix86_align_funcs_string)
1085 {
1086 warning ("-malign-functions is obsolete, use -falign-functions");
1087 if (align_functions == 0)
1088 {
1089 i = atoi (ix86_align_funcs_string);
1090 if (i < 0 || i > MAX_CODE_ALIGN)
1091 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1092 else
1093 align_functions = 1 << i;
1094 }
1095 }
1096
1097 /* Default align_* from the processor table. */
1098 if (align_loops == 0)
1099 {
1100 align_loops = processor_target_table[ix86_cpu].align_loop;
1101 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1102 }
1103 if (align_jumps == 0)
1104 {
1105 align_jumps = processor_target_table[ix86_cpu].align_jump;
1106 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1107 }
1108 if (align_functions == 0)
1109 {
1110 align_functions = processor_target_table[ix86_cpu].align_func;
1111 }
1112
1113 /* Validate -mpreferred-stack-boundary= value, or provide default.
1114 The default of 128 bits is for Pentium III's SSE __m128, but we
1115 don't want additional code to keep the stack aligned when
1116 optimizing for code size. */
1117 ix86_preferred_stack_boundary = (optimize_size
1118 ? TARGET_64BIT ? 64 : 32
1119 : 128);
1120 if (ix86_preferred_stack_boundary_string)
1121 {
1122 i = atoi (ix86_preferred_stack_boundary_string);
1123 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1124 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1125 TARGET_64BIT ? 3 : 2);
1126 else
1127 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1128 }
1129
1130 /* Validate -mbranch-cost= value, or provide default. */
1131 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1132 if (ix86_branch_cost_string)
1133 {
1134 i = atoi (ix86_branch_cost_string);
1135 if (i < 0 || i > 5)
1136 error ("-mbranch-cost=%d is not between 0 and 5", i);
1137 else
1138 ix86_branch_cost = i;
1139 }
1140
1141 if (ix86_tls_dialect_string)
1142 {
1143 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1144 ix86_tls_dialect = TLS_DIALECT_GNU;
1145 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1146 ix86_tls_dialect = TLS_DIALECT_SUN;
1147 else
1148 error ("bad value (%s) for -mtls-dialect= switch",
1149 ix86_tls_dialect_string);
1150 }
1151
1152 if (profile_flag)
1153 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1154
1155 /* Keep nonleaf frame pointers. */
1156 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1157 flag_omit_frame_pointer = 1;
1158
1159 /* If we're doing fast math, we don't care about comparison order
1160 wrt NaNs. This lets us use a shorter comparison sequence. */
1161 if (flag_unsafe_math_optimizations)
1162 target_flags &= ~MASK_IEEE_FP;
1163
1164 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1165 since the insns won't need emulation. */
1166 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1167 target_flags &= ~MASK_NO_FANCY_MATH_387;
1168
1169 if (TARGET_64BIT)
1170 {
1171 if (TARGET_ALIGN_DOUBLE)
1172 error ("-malign-double makes no sense in the 64bit mode");
1173 if (TARGET_RTD)
1174 error ("-mrtd calling convention not supported in the 64bit mode");
1175 /* Enable by default the SSE and MMX builtins. */
1176 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1177 ix86_fpmath = FPMATH_SSE;
1178 }
1179 else
1180 ix86_fpmath = FPMATH_387;
1181
1182 if (ix86_fpmath_string != 0)
1183 {
1184 if (! strcmp (ix86_fpmath_string, "387"))
1185 ix86_fpmath = FPMATH_387;
1186 else if (! strcmp (ix86_fpmath_string, "sse"))
1187 {
1188 if (!TARGET_SSE)
1189 {
1190 warning ("SSE instruction set disabled, using 387 arithmetics");
1191 ix86_fpmath = FPMATH_387;
1192 }
1193 else
1194 ix86_fpmath = FPMATH_SSE;
1195 }
1196 else if (! strcmp (ix86_fpmath_string, "387,sse")
1197 || ! strcmp (ix86_fpmath_string, "sse,387"))
1198 {
1199 if (!TARGET_SSE)
1200 {
1201 warning ("SSE instruction set disabled, using 387 arithmetics");
1202 ix86_fpmath = FPMATH_387;
1203 }
1204 else if (!TARGET_80387)
1205 {
1206 warning ("387 instruction set disabled, using SSE arithmetics");
1207 ix86_fpmath = FPMATH_SSE;
1208 }
1209 else
1210 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1211 }
1212 else
1213 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1214 }
1215
1216 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1217 on by -msse. */
1218 if (TARGET_SSE)
1219 {
1220 target_flags |= MASK_MMX;
1221 x86_prefetch_sse = true;
1222 }
1223
1224 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1225 if (TARGET_3DNOW)
1226 {
1227 target_flags |= MASK_MMX;
1228 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1229 extensions it adds. */
1230 if (x86_3dnow_a & (1 << ix86_arch))
1231 target_flags |= MASK_3DNOW_A;
1232 }
1233 if ((x86_accumulate_outgoing_args & CPUMASK)
1234 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1235 && !optimize_size)
1236 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1237
1238 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1239 {
1240 char *p;
1241 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1242 p = strchr (internal_label_prefix, 'X');
1243 internal_label_prefix_len = p - internal_label_prefix;
1244 *p = '\0';
1245 }
1246 }
1247 \f
1248 void
1249 optimization_options (level, size)
1250 int level;
1251 int size ATTRIBUTE_UNUSED;
1252 {
1253 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1254 make the problem with not enough registers even worse. */
1255 #ifdef INSN_SCHEDULING
1256 if (level > 1)
1257 flag_schedule_insns = 0;
1258 #endif
1259 if (TARGET_64BIT && optimize >= 1)
1260 flag_omit_frame_pointer = 1;
1261 if (TARGET_64BIT)
1262 {
1263 flag_pcc_struct_return = 0;
1264 flag_asynchronous_unwind_tables = 1;
1265 }
1266 if (profile_flag)
1267 flag_omit_frame_pointer = 0;
1268 }
1269 \f
1270 /* Table of valid machine attributes. */
1271 const struct attribute_spec ix86_attribute_table[] =
1272 {
1273 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1274 /* Stdcall attribute says callee is responsible for popping arguments
1275 if they are not variable. */
1276 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1277 /* Cdecl attribute says the callee is a normal C declaration */
1278 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1279 /* Regparm attribute specifies how many integer arguments are to be
1280 passed in registers. */
1281 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1282 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1283 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1284 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1285 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1286 #endif
1287 { NULL, 0, 0, false, false, false, NULL }
1288 };
1289
1290 /* Handle a "cdecl" or "stdcall" attribute;
1291 arguments as in struct attribute_spec.handler. */
1292 static tree
1293 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1294 tree *node;
1295 tree name;
1296 tree args ATTRIBUTE_UNUSED;
1297 int flags ATTRIBUTE_UNUSED;
1298 bool *no_add_attrs;
1299 {
1300 if (TREE_CODE (*node) != FUNCTION_TYPE
1301 && TREE_CODE (*node) != METHOD_TYPE
1302 && TREE_CODE (*node) != FIELD_DECL
1303 && TREE_CODE (*node) != TYPE_DECL)
1304 {
1305 warning ("`%s' attribute only applies to functions",
1306 IDENTIFIER_POINTER (name));
1307 *no_add_attrs = true;
1308 }
1309
1310 if (TARGET_64BIT)
1311 {
1312 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1313 *no_add_attrs = true;
1314 }
1315
1316 return NULL_TREE;
1317 }
1318
1319 /* Handle a "regparm" attribute;
1320 arguments as in struct attribute_spec.handler. */
1321 static tree
1322 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1323 tree *node;
1324 tree name;
1325 tree args;
1326 int flags ATTRIBUTE_UNUSED;
1327 bool *no_add_attrs;
1328 {
1329 if (TREE_CODE (*node) != FUNCTION_TYPE
1330 && TREE_CODE (*node) != METHOD_TYPE
1331 && TREE_CODE (*node) != FIELD_DECL
1332 && TREE_CODE (*node) != TYPE_DECL)
1333 {
1334 warning ("`%s' attribute only applies to functions",
1335 IDENTIFIER_POINTER (name));
1336 *no_add_attrs = true;
1337 }
1338 else
1339 {
1340 tree cst;
1341
1342 cst = TREE_VALUE (args);
1343 if (TREE_CODE (cst) != INTEGER_CST)
1344 {
1345 warning ("`%s' attribute requires an integer constant argument",
1346 IDENTIFIER_POINTER (name));
1347 *no_add_attrs = true;
1348 }
1349 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1350 {
1351 warning ("argument to `%s' attribute larger than %d",
1352 IDENTIFIER_POINTER (name), REGPARM_MAX);
1353 *no_add_attrs = true;
1354 }
1355 }
1356
1357 return NULL_TREE;
1358 }
1359
1360 /* Return 0 if the attributes for two types are incompatible, 1 if they
1361 are compatible, and 2 if they are nearly compatible (which causes a
1362 warning to be generated). */
1363
1364 static int
1365 ix86_comp_type_attributes (type1, type2)
1366 tree type1;
1367 tree type2;
1368 {
1369 /* Check for mismatch of non-default calling convention. */
1370 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1371
1372 if (TREE_CODE (type1) != FUNCTION_TYPE)
1373 return 1;
1374
1375 /* Check for mismatched return types (cdecl vs stdcall). */
1376 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1377 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1378 return 0;
1379 return 1;
1380 }
1381 \f
1382 /* Value is the number of bytes of arguments automatically
1383 popped when returning from a subroutine call.
1384 FUNDECL is the declaration node of the function (as a tree),
1385 FUNTYPE is the data type of the function (as a tree),
1386 or for a library call it is an identifier node for the subroutine name.
1387 SIZE is the number of bytes of arguments passed on the stack.
1388
1389 On the 80386, the RTD insn may be used to pop them if the number
1390 of args is fixed, but if the number is variable then the caller
1391 must pop them all. RTD can't be used for library calls now
1392 because the library is compiled with the Unix compiler.
1393 Use of RTD is a selectable option, since it is incompatible with
1394 standard Unix calling sequences. If the option is not selected,
1395 the caller must always pop the args.
1396
1397 The attribute stdcall is equivalent to RTD on a per module basis. */
1398
1399 int
1400 ix86_return_pops_args (fundecl, funtype, size)
1401 tree fundecl;
1402 tree funtype;
1403 int size;
1404 {
1405 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1406
1407 /* Cdecl functions override -mrtd, and never pop the stack. */
1408 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1409
1410 /* Stdcall functions will pop the stack if not variable args. */
1411 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1412 rtd = 1;
1413
1414 if (rtd
1415 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1416 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1417 == void_type_node)))
1418 return size;
1419 }
1420
1421 /* Lose any fake structure return argument if it is passed on the stack. */
1422 if (aggregate_value_p (TREE_TYPE (funtype))
1423 && !TARGET_64BIT)
1424 {
1425 int nregs = ix86_regparm;
1426
1427 if (funtype)
1428 {
1429 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1430
1431 if (attr)
1432 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1433 }
1434
1435 if (!nregs)
1436 return GET_MODE_SIZE (Pmode);
1437 }
1438
1439 return 0;
1440 }
1441 \f
1442 /* Argument support functions. */
1443
1444 /* Return true when register may be used to pass function parameters. */
1445 bool
1446 ix86_function_arg_regno_p (regno)
1447 int regno;
1448 {
1449 int i;
1450 if (!TARGET_64BIT)
1451 return (regno < REGPARM_MAX
1452 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1453 if (SSE_REGNO_P (regno) && TARGET_SSE)
1454 return true;
1455 /* RAX is used as hidden argument to va_arg functions. */
1456 if (!regno)
1457 return true;
1458 for (i = 0; i < REGPARM_MAX; i++)
1459 if (regno == x86_64_int_parameter_registers[i])
1460 return true;
1461 return false;
1462 }
1463
1464 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1465 for a call to a function whose data type is FNTYPE.
1466 For a library call, FNTYPE is 0. */
1467
1468 void
1469 init_cumulative_args (cum, fntype, libname)
1470 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1471 tree fntype; /* tree ptr for function decl */
1472 rtx libname; /* SYMBOL_REF of library name or 0 */
1473 {
1474 static CUMULATIVE_ARGS zero_cum;
1475 tree param, next_param;
1476
1477 if (TARGET_DEBUG_ARG)
1478 {
1479 fprintf (stderr, "\ninit_cumulative_args (");
1480 if (fntype)
1481 fprintf (stderr, "fntype code = %s, ret code = %s",
1482 tree_code_name[(int) TREE_CODE (fntype)],
1483 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1484 else
1485 fprintf (stderr, "no fntype");
1486
1487 if (libname)
1488 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1489 }
1490
1491 *cum = zero_cum;
1492
1493 /* Set up the number of registers to use for passing arguments. */
1494 cum->nregs = ix86_regparm;
1495 cum->sse_nregs = SSE_REGPARM_MAX;
1496 if (fntype && !TARGET_64BIT)
1497 {
1498 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1499
1500 if (attr)
1501 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1502 }
1503 cum->maybe_vaarg = false;
1504
1505 /* Determine if this function has variable arguments. This is
1506 indicated by the last argument being 'void_type_mode' if there
1507 are no variable arguments. If there are variable arguments, then
1508 we won't pass anything in registers */
1509
1510 if (cum->nregs)
1511 {
1512 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1513 param != 0; param = next_param)
1514 {
1515 next_param = TREE_CHAIN (param);
1516 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1517 {
1518 if (!TARGET_64BIT)
1519 cum->nregs = 0;
1520 cum->maybe_vaarg = true;
1521 }
1522 }
1523 }
1524 if ((!fntype && !libname)
1525 || (fntype && !TYPE_ARG_TYPES (fntype)))
1526 cum->maybe_vaarg = 1;
1527
1528 if (TARGET_DEBUG_ARG)
1529 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1530
1531 return;
1532 }
1533
1534 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1535 of this code is to classify each 8bytes of incoming argument by the register
1536 class and assign registers accordingly. */
1537
1538 /* Return the union class of CLASS1 and CLASS2.
1539 See the x86-64 PS ABI for details. */
1540
1541 static enum x86_64_reg_class
1542 merge_classes (class1, class2)
1543 enum x86_64_reg_class class1, class2;
1544 {
1545 /* Rule #1: If both classes are equal, this is the resulting class. */
1546 if (class1 == class2)
1547 return class1;
1548
1549 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1550 the other class. */
1551 if (class1 == X86_64_NO_CLASS)
1552 return class2;
1553 if (class2 == X86_64_NO_CLASS)
1554 return class1;
1555
1556 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1557 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1558 return X86_64_MEMORY_CLASS;
1559
1560 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1561 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1562 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1563 return X86_64_INTEGERSI_CLASS;
1564 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1565 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1566 return X86_64_INTEGER_CLASS;
1567
1568 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1569 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1570 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1571 return X86_64_MEMORY_CLASS;
1572
1573 /* Rule #6: Otherwise class SSE is used. */
1574 return X86_64_SSE_CLASS;
1575 }
1576
1577 /* Classify the argument of type TYPE and mode MODE.
1578 CLASSES will be filled by the register class used to pass each word
1579 of the operand. The number of words is returned. In case the parameter
1580 should be passed in memory, 0 is returned. As a special case for zero
1581 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1582
1583 BIT_OFFSET is used internally for handling records and specifies offset
1584 of the offset in bits modulo 256 to avoid overflow cases.
1585
1586 See the x86-64 PS ABI for details.
1587 */
1588
1589 static int
1590 classify_argument (mode, type, classes, bit_offset)
1591 enum machine_mode mode;
1592 tree type;
1593 enum x86_64_reg_class classes[MAX_CLASSES];
1594 int bit_offset;
1595 {
1596 int bytes =
1597 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1598 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1599
1600 /* Variable sized entities are always passed/returned in memory. */
1601 if (bytes < 0)
1602 return 0;
1603
1604 if (type && AGGREGATE_TYPE_P (type))
1605 {
1606 int i;
1607 tree field;
1608 enum x86_64_reg_class subclasses[MAX_CLASSES];
1609
1610 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1611 if (bytes > 16)
1612 return 0;
1613
1614 for (i = 0; i < words; i++)
1615 classes[i] = X86_64_NO_CLASS;
1616
1617 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1618 signalize memory class, so handle it as special case. */
1619 if (!words)
1620 {
1621 classes[0] = X86_64_NO_CLASS;
1622 return 1;
1623 }
1624
1625 /* Classify each field of record and merge classes. */
1626 if (TREE_CODE (type) == RECORD_TYPE)
1627 {
1628 /* For classes first merge in the field of the subclasses. */
1629 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1630 {
1631 tree bases = TYPE_BINFO_BASETYPES (type);
1632 int n_bases = TREE_VEC_LENGTH (bases);
1633 int i;
1634
1635 for (i = 0; i < n_bases; ++i)
1636 {
1637 tree binfo = TREE_VEC_ELT (bases, i);
1638 int num;
1639 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1640 tree type = BINFO_TYPE (binfo);
1641
1642 num = classify_argument (TYPE_MODE (type),
1643 type, subclasses,
1644 (offset + bit_offset) % 256);
1645 if (!num)
1646 return 0;
1647 for (i = 0; i < num; i++)
1648 {
1649 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1650 classes[i + pos] =
1651 merge_classes (subclasses[i], classes[i + pos]);
1652 }
1653 }
1654 }
1655 /* And now merge the fields of structure. */
1656 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1657 {
1658 if (TREE_CODE (field) == FIELD_DECL)
1659 {
1660 int num;
1661
1662 /* Bitfields are always classified as integer. Handle them
1663 early, since later code would consider them to be
1664 misaligned integers. */
1665 if (DECL_BIT_FIELD (field))
1666 {
1667 for (i = int_bit_position (field) / 8 / 8;
1668 i < (int_bit_position (field)
1669 + tree_low_cst (DECL_SIZE (field), 0)
1670 + 63) / 8 / 8; i++)
1671 classes[i] =
1672 merge_classes (X86_64_INTEGER_CLASS,
1673 classes[i]);
1674 }
1675 else
1676 {
1677 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1678 TREE_TYPE (field), subclasses,
1679 (int_bit_position (field)
1680 + bit_offset) % 256);
1681 if (!num)
1682 return 0;
1683 for (i = 0; i < num; i++)
1684 {
1685 int pos =
1686 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1687 classes[i + pos] =
1688 merge_classes (subclasses[i], classes[i + pos]);
1689 }
1690 }
1691 }
1692 }
1693 }
1694 /* Arrays are handled as small records. */
1695 else if (TREE_CODE (type) == ARRAY_TYPE)
1696 {
1697 int num;
1698 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1699 TREE_TYPE (type), subclasses, bit_offset);
1700 if (!num)
1701 return 0;
1702
1703 /* The partial classes are now full classes. */
1704 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1705 subclasses[0] = X86_64_SSE_CLASS;
1706 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1707 subclasses[0] = X86_64_INTEGER_CLASS;
1708
1709 for (i = 0; i < words; i++)
1710 classes[i] = subclasses[i % num];
1711 }
1712 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1713 else if (TREE_CODE (type) == UNION_TYPE
1714 || TREE_CODE (type) == QUAL_UNION_TYPE)
1715 {
1716 /* For classes first merge in the field of the subclasses. */
1717 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1718 {
1719 tree bases = TYPE_BINFO_BASETYPES (type);
1720 int n_bases = TREE_VEC_LENGTH (bases);
1721 int i;
1722
1723 for (i = 0; i < n_bases; ++i)
1724 {
1725 tree binfo = TREE_VEC_ELT (bases, i);
1726 int num;
1727 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1728 tree type = BINFO_TYPE (binfo);
1729
1730 num = classify_argument (TYPE_MODE (type),
1731 type, subclasses,
1732 (offset + (bit_offset % 64)) % 256);
1733 if (!num)
1734 return 0;
1735 for (i = 0; i < num; i++)
1736 {
1737 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1738 classes[i + pos] =
1739 merge_classes (subclasses[i], classes[i + pos]);
1740 }
1741 }
1742 }
1743 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1744 {
1745 if (TREE_CODE (field) == FIELD_DECL)
1746 {
1747 int num;
1748 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1749 TREE_TYPE (field), subclasses,
1750 bit_offset);
1751 if (!num)
1752 return 0;
1753 for (i = 0; i < num; i++)
1754 classes[i] = merge_classes (subclasses[i], classes[i]);
1755 }
1756 }
1757 }
1758 else
1759 abort ();
1760
1761 /* Final merger cleanup. */
1762 for (i = 0; i < words; i++)
1763 {
1764 /* If one class is MEMORY, everything should be passed in
1765 memory. */
1766 if (classes[i] == X86_64_MEMORY_CLASS)
1767 return 0;
1768
1769 /* The X86_64_SSEUP_CLASS should be always preceded by
1770 X86_64_SSE_CLASS. */
1771 if (classes[i] == X86_64_SSEUP_CLASS
1772 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1773 classes[i] = X86_64_SSE_CLASS;
1774
1775 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1776 if (classes[i] == X86_64_X87UP_CLASS
1777 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1778 classes[i] = X86_64_SSE_CLASS;
1779 }
1780 return words;
1781 }
1782
1783 /* Compute alignment needed. We align all types to natural boundaries with
1784 exception of XFmode that is aligned to 64bits. */
1785 if (mode != VOIDmode && mode != BLKmode)
1786 {
1787 int mode_alignment = GET_MODE_BITSIZE (mode);
1788
1789 if (mode == XFmode)
1790 mode_alignment = 128;
1791 else if (mode == XCmode)
1792 mode_alignment = 256;
1793 /* Misaligned fields are always returned in memory. */
1794 if (bit_offset % mode_alignment)
1795 return 0;
1796 }
1797
1798 /* Classification of atomic types. */
1799 switch (mode)
1800 {
1801 case DImode:
1802 case SImode:
1803 case HImode:
1804 case QImode:
1805 case CSImode:
1806 case CHImode:
1807 case CQImode:
1808 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1809 classes[0] = X86_64_INTEGERSI_CLASS;
1810 else
1811 classes[0] = X86_64_INTEGER_CLASS;
1812 return 1;
1813 case CDImode:
1814 case TImode:
1815 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1816 return 2;
1817 case CTImode:
1818 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1819 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1820 return 4;
1821 case SFmode:
1822 if (!(bit_offset % 64))
1823 classes[0] = X86_64_SSESF_CLASS;
1824 else
1825 classes[0] = X86_64_SSE_CLASS;
1826 return 1;
1827 case DFmode:
1828 classes[0] = X86_64_SSEDF_CLASS;
1829 return 1;
1830 case TFmode:
1831 classes[0] = X86_64_X87_CLASS;
1832 classes[1] = X86_64_X87UP_CLASS;
1833 return 2;
1834 case TCmode:
1835 classes[0] = X86_64_X87_CLASS;
1836 classes[1] = X86_64_X87UP_CLASS;
1837 classes[2] = X86_64_X87_CLASS;
1838 classes[3] = X86_64_X87UP_CLASS;
1839 return 4;
1840 case DCmode:
1841 classes[0] = X86_64_SSEDF_CLASS;
1842 classes[1] = X86_64_SSEDF_CLASS;
1843 return 2;
1844 case SCmode:
1845 classes[0] = X86_64_SSE_CLASS;
1846 return 1;
1847 case V4SFmode:
1848 case V4SImode:
1849 case V16QImode:
1850 case V8HImode:
1851 case V2DFmode:
1852 case V2DImode:
1853 classes[0] = X86_64_SSE_CLASS;
1854 classes[1] = X86_64_SSEUP_CLASS;
1855 return 2;
1856 case V2SFmode:
1857 case V2SImode:
1858 case V4HImode:
1859 case V8QImode:
1860 classes[0] = X86_64_SSE_CLASS;
1861 return 1;
1862 case BLKmode:
1863 case VOIDmode:
1864 return 0;
1865 default:
1866 abort ();
1867 }
1868 }
1869
1870 /* Examine the argument and return set number of register required in each
1871 class. Return 0 iff parameter should be passed in memory. */
1872 static int
1873 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1874 enum machine_mode mode;
1875 tree type;
1876 int *int_nregs, *sse_nregs;
1877 int in_return;
1878 {
1879 enum x86_64_reg_class class[MAX_CLASSES];
1880 int n = classify_argument (mode, type, class, 0);
1881
1882 *int_nregs = 0;
1883 *sse_nregs = 0;
1884 if (!n)
1885 return 0;
1886 for (n--; n >= 0; n--)
1887 switch (class[n])
1888 {
1889 case X86_64_INTEGER_CLASS:
1890 case X86_64_INTEGERSI_CLASS:
1891 (*int_nregs)++;
1892 break;
1893 case X86_64_SSE_CLASS:
1894 case X86_64_SSESF_CLASS:
1895 case X86_64_SSEDF_CLASS:
1896 (*sse_nregs)++;
1897 break;
1898 case X86_64_NO_CLASS:
1899 case X86_64_SSEUP_CLASS:
1900 break;
1901 case X86_64_X87_CLASS:
1902 case X86_64_X87UP_CLASS:
1903 if (!in_return)
1904 return 0;
1905 break;
1906 case X86_64_MEMORY_CLASS:
1907 abort ();
1908 }
1909 return 1;
1910 }
1911 /* Construct container for the argument used by GCC interface. See
1912 FUNCTION_ARG for the detailed description. */
1913 static rtx
1914 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1915 enum machine_mode mode;
1916 tree type;
1917 int in_return;
1918 int nintregs, nsseregs;
1919 const int * intreg;
1920 int sse_regno;
1921 {
1922 enum machine_mode tmpmode;
1923 int bytes =
1924 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1925 enum x86_64_reg_class class[MAX_CLASSES];
1926 int n;
1927 int i;
1928 int nexps = 0;
1929 int needed_sseregs, needed_intregs;
1930 rtx exp[MAX_CLASSES];
1931 rtx ret;
1932
1933 n = classify_argument (mode, type, class, 0);
1934 if (TARGET_DEBUG_ARG)
1935 {
1936 if (!n)
1937 fprintf (stderr, "Memory class\n");
1938 else
1939 {
1940 fprintf (stderr, "Classes:");
1941 for (i = 0; i < n; i++)
1942 {
1943 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1944 }
1945 fprintf (stderr, "\n");
1946 }
1947 }
1948 if (!n)
1949 return NULL;
1950 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1951 return NULL;
1952 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1953 return NULL;
1954
1955 /* First construct simple cases. Avoid SCmode, since we want to use
1956 single register to pass this type. */
1957 if (n == 1 && mode != SCmode)
1958 switch (class[0])
1959 {
1960 case X86_64_INTEGER_CLASS:
1961 case X86_64_INTEGERSI_CLASS:
1962 return gen_rtx_REG (mode, intreg[0]);
1963 case X86_64_SSE_CLASS:
1964 case X86_64_SSESF_CLASS:
1965 case X86_64_SSEDF_CLASS:
1966 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1967 case X86_64_X87_CLASS:
1968 return gen_rtx_REG (mode, FIRST_STACK_REG);
1969 case X86_64_NO_CLASS:
1970 /* Zero sized array, struct or class. */
1971 return NULL;
1972 default:
1973 abort ();
1974 }
1975 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1976 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1977 if (n == 2
1978 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1979 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1980 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1981 && class[1] == X86_64_INTEGER_CLASS
1982 && (mode == CDImode || mode == TImode)
1983 && intreg[0] + 1 == intreg[1])
1984 return gen_rtx_REG (mode, intreg[0]);
1985 if (n == 4
1986 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1987 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1988 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1989
1990 /* Otherwise figure out the entries of the PARALLEL. */
1991 for (i = 0; i < n; i++)
1992 {
1993 switch (class[i])
1994 {
1995 case X86_64_NO_CLASS:
1996 break;
1997 case X86_64_INTEGER_CLASS:
1998 case X86_64_INTEGERSI_CLASS:
1999 /* Merge TImodes on aligned occassions here too. */
2000 if (i * 8 + 8 > bytes)
2001 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2002 else if (class[i] == X86_64_INTEGERSI_CLASS)
2003 tmpmode = SImode;
2004 else
2005 tmpmode = DImode;
2006 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2007 if (tmpmode == BLKmode)
2008 tmpmode = DImode;
2009 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2010 gen_rtx_REG (tmpmode, *intreg),
2011 GEN_INT (i*8));
2012 intreg++;
2013 break;
2014 case X86_64_SSESF_CLASS:
2015 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2016 gen_rtx_REG (SFmode,
2017 SSE_REGNO (sse_regno)),
2018 GEN_INT (i*8));
2019 sse_regno++;
2020 break;
2021 case X86_64_SSEDF_CLASS:
2022 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2023 gen_rtx_REG (DFmode,
2024 SSE_REGNO (sse_regno)),
2025 GEN_INT (i*8));
2026 sse_regno++;
2027 break;
2028 case X86_64_SSE_CLASS:
2029 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2030 tmpmode = TImode, i++;
2031 else
2032 tmpmode = DImode;
2033 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2034 gen_rtx_REG (tmpmode,
2035 SSE_REGNO (sse_regno)),
2036 GEN_INT (i*8));
2037 sse_regno++;
2038 break;
2039 default:
2040 abort ();
2041 }
2042 }
2043 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2044 for (i = 0; i < nexps; i++)
2045 XVECEXP (ret, 0, i) = exp [i];
2046 return ret;
2047 }
2048
2049 /* Update the data in CUM to advance over an argument
2050 of mode MODE and data type TYPE.
2051 (TYPE is null for libcalls where that information may not be available.) */
2052
2053 void
2054 function_arg_advance (cum, mode, type, named)
2055 CUMULATIVE_ARGS *cum; /* current arg information */
2056 enum machine_mode mode; /* current arg mode */
2057 tree type; /* type of the argument or 0 if lib support */
2058 int named; /* whether or not the argument was named */
2059 {
2060 int bytes =
2061 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2062 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2063
2064 if (TARGET_DEBUG_ARG)
2065 fprintf (stderr,
2066 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2067 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2068 if (TARGET_64BIT)
2069 {
2070 int int_nregs, sse_nregs;
2071 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2072 cum->words += words;
2073 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2074 {
2075 cum->nregs -= int_nregs;
2076 cum->sse_nregs -= sse_nregs;
2077 cum->regno += int_nregs;
2078 cum->sse_regno += sse_nregs;
2079 }
2080 else
2081 cum->words += words;
2082 }
2083 else
2084 {
2085 if (TARGET_SSE && mode == TImode)
2086 {
2087 cum->sse_words += words;
2088 cum->sse_nregs -= 1;
2089 cum->sse_regno += 1;
2090 if (cum->sse_nregs <= 0)
2091 {
2092 cum->sse_nregs = 0;
2093 cum->sse_regno = 0;
2094 }
2095 }
2096 else
2097 {
2098 cum->words += words;
2099 cum->nregs -= words;
2100 cum->regno += words;
2101
2102 if (cum->nregs <= 0)
2103 {
2104 cum->nregs = 0;
2105 cum->regno = 0;
2106 }
2107 }
2108 }
2109 return;
2110 }
2111
2112 /* Define where to put the arguments to a function.
2113 Value is zero to push the argument on the stack,
2114 or a hard register in which to store the argument.
2115
2116 MODE is the argument's machine mode.
2117 TYPE is the data type of the argument (as a tree).
2118 This is null for libcalls where that information may
2119 not be available.
2120 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2121 the preceding args and about the function being called.
2122 NAMED is nonzero if this argument is a named parameter
2123 (otherwise it is an extra parameter matching an ellipsis). */
2124
2125 rtx
2126 function_arg (cum, mode, type, named)
2127 CUMULATIVE_ARGS *cum; /* current arg information */
2128 enum machine_mode mode; /* current arg mode */
2129 tree type; /* type of the argument or 0 if lib support */
2130 int named; /* != 0 for normal args, == 0 for ... args */
2131 {
2132 rtx ret = NULL_RTX;
2133 int bytes =
2134 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2135 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2136
2137 /* Handle an hidden AL argument containing number of registers for varargs
2138 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2139 any AL settings. */
2140 if (mode == VOIDmode)
2141 {
2142 if (TARGET_64BIT)
2143 return GEN_INT (cum->maybe_vaarg
2144 ? (cum->sse_nregs < 0
2145 ? SSE_REGPARM_MAX
2146 : cum->sse_regno)
2147 : -1);
2148 else
2149 return constm1_rtx;
2150 }
2151 if (TARGET_64BIT)
2152 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2153 &x86_64_int_parameter_registers [cum->regno],
2154 cum->sse_regno);
2155 else
2156 switch (mode)
2157 {
2158 /* For now, pass fp/complex values on the stack. */
2159 default:
2160 break;
2161
2162 case BLKmode:
2163 case DImode:
2164 case SImode:
2165 case HImode:
2166 case QImode:
2167 if (words <= cum->nregs)
2168 ret = gen_rtx_REG (mode, cum->regno);
2169 break;
2170 case TImode:
2171 if (cum->sse_nregs)
2172 ret = gen_rtx_REG (mode, cum->sse_regno);
2173 break;
2174 }
2175
2176 if (TARGET_DEBUG_ARG)
2177 {
2178 fprintf (stderr,
2179 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2180 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2181
2182 if (ret)
2183 print_simple_rtl (stderr, ret);
2184 else
2185 fprintf (stderr, ", stack");
2186
2187 fprintf (stderr, " )\n");
2188 }
2189
2190 return ret;
2191 }
2192
2193 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2194 and type. */
2195
2196 int
2197 ix86_function_arg_boundary (mode, type)
2198 enum machine_mode mode;
2199 tree type;
2200 {
2201 int align;
2202 if (!TARGET_64BIT)
2203 return PARM_BOUNDARY;
2204 if (type)
2205 align = TYPE_ALIGN (type);
2206 else
2207 align = GET_MODE_ALIGNMENT (mode);
2208 if (align < PARM_BOUNDARY)
2209 align = PARM_BOUNDARY;
2210 if (align > 128)
2211 align = 128;
2212 return align;
2213 }
2214
2215 /* Return true if N is a possible register number of function value. */
2216 bool
2217 ix86_function_value_regno_p (regno)
2218 int regno;
2219 {
2220 if (!TARGET_64BIT)
2221 {
2222 return ((regno) == 0
2223 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2224 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2225 }
2226 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2227 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2228 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2229 }
2230
2231 /* Define how to find the value returned by a function.
2232 VALTYPE is the data type of the value (as a tree).
2233 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2234 otherwise, FUNC is 0. */
2235 rtx
2236 ix86_function_value (valtype)
2237 tree valtype;
2238 {
2239 if (TARGET_64BIT)
2240 {
2241 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2242 REGPARM_MAX, SSE_REGPARM_MAX,
2243 x86_64_int_return_registers, 0);
2244 /* For zero sized structures, construct_continer return NULL, but we need
2245 to keep rest of compiler happy by returning meaningfull value. */
2246 if (!ret)
2247 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2248 return ret;
2249 }
2250 else
2251 return gen_rtx_REG (TYPE_MODE (valtype),
2252 ix86_value_regno (TYPE_MODE (valtype)));
2253 }
2254
2255 /* Return false iff type is returned in memory. */
2256 int
2257 ix86_return_in_memory (type)
2258 tree type;
2259 {
2260 int needed_intregs, needed_sseregs;
2261 if (TARGET_64BIT)
2262 {
2263 return !examine_argument (TYPE_MODE (type), type, 1,
2264 &needed_intregs, &needed_sseregs);
2265 }
2266 else
2267 {
2268 if (TYPE_MODE (type) == BLKmode
2269 || (VECTOR_MODE_P (TYPE_MODE (type))
2270 && int_size_in_bytes (type) == 8)
2271 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2272 && TYPE_MODE (type) != TFmode
2273 && !VECTOR_MODE_P (TYPE_MODE (type))))
2274 return 1;
2275 return 0;
2276 }
2277 }
2278
2279 /* Define how to find the value returned by a library function
2280 assuming the value has mode MODE. */
2281 rtx
2282 ix86_libcall_value (mode)
2283 enum machine_mode mode;
2284 {
2285 if (TARGET_64BIT)
2286 {
2287 switch (mode)
2288 {
2289 case SFmode:
2290 case SCmode:
2291 case DFmode:
2292 case DCmode:
2293 return gen_rtx_REG (mode, FIRST_SSE_REG);
2294 case TFmode:
2295 case TCmode:
2296 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2297 default:
2298 return gen_rtx_REG (mode, 0);
2299 }
2300 }
2301 else
2302 return gen_rtx_REG (mode, ix86_value_regno (mode));
2303 }
2304
2305 /* Given a mode, return the register to use for a return value. */
2306
2307 static int
2308 ix86_value_regno (mode)
2309 enum machine_mode mode;
2310 {
2311 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2312 return FIRST_FLOAT_REG;
2313 if (mode == TImode || VECTOR_MODE_P (mode))
2314 return FIRST_SSE_REG;
2315 return 0;
2316 }
2317 \f
2318 /* Create the va_list data type. */
2319
2320 tree
2321 ix86_build_va_list ()
2322 {
2323 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2324
2325 /* For i386 we use plain pointer to argument area. */
2326 if (!TARGET_64BIT)
2327 return build_pointer_type (char_type_node);
2328
2329 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2330 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2331
2332 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2333 unsigned_type_node);
2334 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2335 unsigned_type_node);
2336 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2337 ptr_type_node);
2338 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2339 ptr_type_node);
2340
2341 DECL_FIELD_CONTEXT (f_gpr) = record;
2342 DECL_FIELD_CONTEXT (f_fpr) = record;
2343 DECL_FIELD_CONTEXT (f_ovf) = record;
2344 DECL_FIELD_CONTEXT (f_sav) = record;
2345
2346 TREE_CHAIN (record) = type_decl;
2347 TYPE_NAME (record) = type_decl;
2348 TYPE_FIELDS (record) = f_gpr;
2349 TREE_CHAIN (f_gpr) = f_fpr;
2350 TREE_CHAIN (f_fpr) = f_ovf;
2351 TREE_CHAIN (f_ovf) = f_sav;
2352
2353 layout_type (record);
2354
2355 /* The correct type is an array type of one element. */
2356 return build_array_type (record, build_index_type (size_zero_node));
2357 }
2358
2359 /* Perform any needed actions needed for a function that is receiving a
2360 variable number of arguments.
2361
2362 CUM is as above.
2363
2364 MODE and TYPE are the mode and type of the current parameter.
2365
2366 PRETEND_SIZE is a variable that should be set to the amount of stack
2367 that must be pushed by the prolog to pretend that our caller pushed
2368 it.
2369
2370 Normally, this macro will push all remaining incoming registers on the
2371 stack and set PRETEND_SIZE to the length of the registers pushed. */
2372
2373 void
2374 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2375 CUMULATIVE_ARGS *cum;
2376 enum machine_mode mode;
2377 tree type;
2378 int *pretend_size ATTRIBUTE_UNUSED;
2379 int no_rtl;
2380
2381 {
2382 CUMULATIVE_ARGS next_cum;
2383 rtx save_area = NULL_RTX, mem;
2384 rtx label;
2385 rtx label_ref;
2386 rtx tmp_reg;
2387 rtx nsse_reg;
2388 int set;
2389 tree fntype;
2390 int stdarg_p;
2391 int i;
2392
2393 if (!TARGET_64BIT)
2394 return;
2395
2396 /* Indicate to allocate space on the stack for varargs save area. */
2397 ix86_save_varrargs_registers = 1;
2398
2399 fntype = TREE_TYPE (current_function_decl);
2400 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2401 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2402 != void_type_node));
2403
2404 /* For varargs, we do not want to skip the dummy va_dcl argument.
2405 For stdargs, we do want to skip the last named argument. */
2406 next_cum = *cum;
2407 if (stdarg_p)
2408 function_arg_advance (&next_cum, mode, type, 1);
2409
2410 if (!no_rtl)
2411 save_area = frame_pointer_rtx;
2412
2413 set = get_varargs_alias_set ();
2414
2415 for (i = next_cum.regno; i < ix86_regparm; i++)
2416 {
2417 mem = gen_rtx_MEM (Pmode,
2418 plus_constant (save_area, i * UNITS_PER_WORD));
2419 set_mem_alias_set (mem, set);
2420 emit_move_insn (mem, gen_rtx_REG (Pmode,
2421 x86_64_int_parameter_registers[i]));
2422 }
2423
2424 if (next_cum.sse_nregs)
2425 {
2426 /* Now emit code to save SSE registers. The AX parameter contains number
2427 of SSE parameter regsiters used to call this function. We use
2428 sse_prologue_save insn template that produces computed jump across
2429 SSE saves. We need some preparation work to get this working. */
2430
2431 label = gen_label_rtx ();
2432 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2433
2434 /* Compute address to jump to :
2435 label - 5*eax + nnamed_sse_arguments*5 */
2436 tmp_reg = gen_reg_rtx (Pmode);
2437 nsse_reg = gen_reg_rtx (Pmode);
2438 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2439 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2440 gen_rtx_MULT (Pmode, nsse_reg,
2441 GEN_INT (4))));
2442 if (next_cum.sse_regno)
2443 emit_move_insn
2444 (nsse_reg,
2445 gen_rtx_CONST (DImode,
2446 gen_rtx_PLUS (DImode,
2447 label_ref,
2448 GEN_INT (next_cum.sse_regno * 4))));
2449 else
2450 emit_move_insn (nsse_reg, label_ref);
2451 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2452
2453 /* Compute address of memory block we save into. We always use pointer
2454 pointing 127 bytes after first byte to store - this is needed to keep
2455 instruction size limited by 4 bytes. */
2456 tmp_reg = gen_reg_rtx (Pmode);
2457 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2458 plus_constant (save_area,
2459 8 * REGPARM_MAX + 127)));
2460 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2461 set_mem_alias_set (mem, set);
2462 set_mem_align (mem, BITS_PER_WORD);
2463
2464 /* And finally do the dirty job! */
2465 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2466 GEN_INT (next_cum.sse_regno), label));
2467 }
2468
2469 }
2470
2471 /* Implement va_start. */
2472
2473 void
2474 ix86_va_start (valist, nextarg)
2475 tree valist;
2476 rtx nextarg;
2477 {
2478 HOST_WIDE_INT words, n_gpr, n_fpr;
2479 tree f_gpr, f_fpr, f_ovf, f_sav;
2480 tree gpr, fpr, ovf, sav, t;
2481
2482 /* Only 64bit target needs something special. */
2483 if (!TARGET_64BIT)
2484 {
2485 std_expand_builtin_va_start (valist, nextarg);
2486 return;
2487 }
2488
2489 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2490 f_fpr = TREE_CHAIN (f_gpr);
2491 f_ovf = TREE_CHAIN (f_fpr);
2492 f_sav = TREE_CHAIN (f_ovf);
2493
2494 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2495 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2496 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2497 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2498 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2499
2500 /* Count number of gp and fp argument registers used. */
2501 words = current_function_args_info.words;
2502 n_gpr = current_function_args_info.regno;
2503 n_fpr = current_function_args_info.sse_regno;
2504
2505 if (TARGET_DEBUG_ARG)
2506 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2507 (int) words, (int) n_gpr, (int) n_fpr);
2508
2509 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2510 build_int_2 (n_gpr * 8, 0));
2511 TREE_SIDE_EFFECTS (t) = 1;
2512 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2513
2514 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2515 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2516 TREE_SIDE_EFFECTS (t) = 1;
2517 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2518
2519 /* Find the overflow area. */
2520 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2521 if (words != 0)
2522 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2523 build_int_2 (words * UNITS_PER_WORD, 0));
2524 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2525 TREE_SIDE_EFFECTS (t) = 1;
2526 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2527
2528 /* Find the register save area.
2529 Prologue of the function save it right above stack frame. */
2530 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2531 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2532 TREE_SIDE_EFFECTS (t) = 1;
2533 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2534 }
2535
2536 /* Implement va_arg. */
2537 rtx
2538 ix86_va_arg (valist, type)
2539 tree valist, type;
2540 {
2541 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2542 tree f_gpr, f_fpr, f_ovf, f_sav;
2543 tree gpr, fpr, ovf, sav, t;
2544 int size, rsize;
2545 rtx lab_false, lab_over = NULL_RTX;
2546 rtx addr_rtx, r;
2547 rtx container;
2548
2549 /* Only 64bit target needs something special. */
2550 if (!TARGET_64BIT)
2551 {
2552 return std_expand_builtin_va_arg (valist, type);
2553 }
2554
2555 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2556 f_fpr = TREE_CHAIN (f_gpr);
2557 f_ovf = TREE_CHAIN (f_fpr);
2558 f_sav = TREE_CHAIN (f_ovf);
2559
2560 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2561 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2562 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2563 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2564 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2565
2566 size = int_size_in_bytes (type);
2567 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2568
2569 container = construct_container (TYPE_MODE (type), type, 0,
2570 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2571 /*
2572 * Pull the value out of the saved registers ...
2573 */
2574
2575 addr_rtx = gen_reg_rtx (Pmode);
2576
2577 if (container)
2578 {
2579 rtx int_addr_rtx, sse_addr_rtx;
2580 int needed_intregs, needed_sseregs;
2581 int need_temp;
2582
2583 lab_over = gen_label_rtx ();
2584 lab_false = gen_label_rtx ();
2585
2586 examine_argument (TYPE_MODE (type), type, 0,
2587 &needed_intregs, &needed_sseregs);
2588
2589
2590 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2591 || TYPE_ALIGN (type) > 128);
2592
2593 /* In case we are passing structure, verify that it is consetuctive block
2594 on the register save area. If not we need to do moves. */
2595 if (!need_temp && !REG_P (container))
2596 {
2597 /* Verify that all registers are strictly consetuctive */
2598 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2599 {
2600 int i;
2601
2602 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2603 {
2604 rtx slot = XVECEXP (container, 0, i);
2605 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2606 || INTVAL (XEXP (slot, 1)) != i * 16)
2607 need_temp = 1;
2608 }
2609 }
2610 else
2611 {
2612 int i;
2613
2614 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2615 {
2616 rtx slot = XVECEXP (container, 0, i);
2617 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2618 || INTVAL (XEXP (slot, 1)) != i * 8)
2619 need_temp = 1;
2620 }
2621 }
2622 }
2623 if (!need_temp)
2624 {
2625 int_addr_rtx = addr_rtx;
2626 sse_addr_rtx = addr_rtx;
2627 }
2628 else
2629 {
2630 int_addr_rtx = gen_reg_rtx (Pmode);
2631 sse_addr_rtx = gen_reg_rtx (Pmode);
2632 }
2633 /* First ensure that we fit completely in registers. */
2634 if (needed_intregs)
2635 {
2636 emit_cmp_and_jump_insns (expand_expr
2637 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2638 GEN_INT ((REGPARM_MAX - needed_intregs +
2639 1) * 8), GE, const1_rtx, SImode,
2640 1, lab_false);
2641 }
2642 if (needed_sseregs)
2643 {
2644 emit_cmp_and_jump_insns (expand_expr
2645 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2646 GEN_INT ((SSE_REGPARM_MAX -
2647 needed_sseregs + 1) * 16 +
2648 REGPARM_MAX * 8), GE, const1_rtx,
2649 SImode, 1, lab_false);
2650 }
2651
2652 /* Compute index to start of area used for integer regs. */
2653 if (needed_intregs)
2654 {
2655 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2656 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2657 if (r != int_addr_rtx)
2658 emit_move_insn (int_addr_rtx, r);
2659 }
2660 if (needed_sseregs)
2661 {
2662 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2663 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2664 if (r != sse_addr_rtx)
2665 emit_move_insn (sse_addr_rtx, r);
2666 }
2667 if (need_temp)
2668 {
2669 int i;
2670 rtx mem;
2671
2672 /* Never use the memory itself, as it has the alias set. */
2673 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2674 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2675 set_mem_alias_set (mem, get_varargs_alias_set ());
2676 set_mem_align (mem, BITS_PER_UNIT);
2677
2678 for (i = 0; i < XVECLEN (container, 0); i++)
2679 {
2680 rtx slot = XVECEXP (container, 0, i);
2681 rtx reg = XEXP (slot, 0);
2682 enum machine_mode mode = GET_MODE (reg);
2683 rtx src_addr;
2684 rtx src_mem;
2685 int src_offset;
2686 rtx dest_mem;
2687
2688 if (SSE_REGNO_P (REGNO (reg)))
2689 {
2690 src_addr = sse_addr_rtx;
2691 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2692 }
2693 else
2694 {
2695 src_addr = int_addr_rtx;
2696 src_offset = REGNO (reg) * 8;
2697 }
2698 src_mem = gen_rtx_MEM (mode, src_addr);
2699 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2700 src_mem = adjust_address (src_mem, mode, src_offset);
2701 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2702 emit_move_insn (dest_mem, src_mem);
2703 }
2704 }
2705
2706 if (needed_intregs)
2707 {
2708 t =
2709 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2710 build_int_2 (needed_intregs * 8, 0));
2711 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2712 TREE_SIDE_EFFECTS (t) = 1;
2713 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2714 }
2715 if (needed_sseregs)
2716 {
2717 t =
2718 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2719 build_int_2 (needed_sseregs * 16, 0));
2720 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2721 TREE_SIDE_EFFECTS (t) = 1;
2722 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2723 }
2724
2725 emit_jump_insn (gen_jump (lab_over));
2726 emit_barrier ();
2727 emit_label (lab_false);
2728 }
2729
2730 /* ... otherwise out of the overflow area. */
2731
2732 /* Care for on-stack alignment if needed. */
2733 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2734 t = ovf;
2735 else
2736 {
2737 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2738 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2739 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2740 }
2741 t = save_expr (t);
2742
2743 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2744 if (r != addr_rtx)
2745 emit_move_insn (addr_rtx, r);
2746
2747 t =
2748 build (PLUS_EXPR, TREE_TYPE (t), t,
2749 build_int_2 (rsize * UNITS_PER_WORD, 0));
2750 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2751 TREE_SIDE_EFFECTS (t) = 1;
2752 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2753
2754 if (container)
2755 emit_label (lab_over);
2756
2757 return addr_rtx;
2758 }
2759 \f
2760 /* Return nonzero if OP is either a i387 or SSE fp register. */
2761 int
2762 any_fp_register_operand (op, mode)
2763 rtx op;
2764 enum machine_mode mode ATTRIBUTE_UNUSED;
2765 {
2766 return ANY_FP_REG_P (op);
2767 }
2768
2769 /* Return nonzero if OP is an i387 fp register. */
2770 int
2771 fp_register_operand (op, mode)
2772 rtx op;
2773 enum machine_mode mode ATTRIBUTE_UNUSED;
2774 {
2775 return FP_REG_P (op);
2776 }
2777
2778 /* Return nonzero if OP is a non-fp register_operand. */
2779 int
2780 register_and_not_any_fp_reg_operand (op, mode)
2781 rtx op;
2782 enum machine_mode mode;
2783 {
2784 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2785 }
2786
2787 /* Return nonzero of OP is a register operand other than an
2788 i387 fp register. */
2789 int
2790 register_and_not_fp_reg_operand (op, mode)
2791 rtx op;
2792 enum machine_mode mode;
2793 {
2794 return register_operand (op, mode) && !FP_REG_P (op);
2795 }
2796
2797 /* Return nonzero if OP is general operand representable on x86_64. */
2798
2799 int
2800 x86_64_general_operand (op, mode)
2801 rtx op;
2802 enum machine_mode mode;
2803 {
2804 if (!TARGET_64BIT)
2805 return general_operand (op, mode);
2806 if (nonimmediate_operand (op, mode))
2807 return 1;
2808 return x86_64_sign_extended_value (op);
2809 }
2810
2811 /* Return nonzero if OP is general operand representable on x86_64
2812 as either sign extended or zero extended constant. */
2813
2814 int
2815 x86_64_szext_general_operand (op, mode)
2816 rtx op;
2817 enum machine_mode mode;
2818 {
2819 if (!TARGET_64BIT)
2820 return general_operand (op, mode);
2821 if (nonimmediate_operand (op, mode))
2822 return 1;
2823 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2824 }
2825
2826 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2827
2828 int
2829 x86_64_nonmemory_operand (op, mode)
2830 rtx op;
2831 enum machine_mode mode;
2832 {
2833 if (!TARGET_64BIT)
2834 return nonmemory_operand (op, mode);
2835 if (register_operand (op, mode))
2836 return 1;
2837 return x86_64_sign_extended_value (op);
2838 }
2839
2840 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2841
2842 int
2843 x86_64_movabs_operand (op, mode)
2844 rtx op;
2845 enum machine_mode mode;
2846 {
2847 if (!TARGET_64BIT || !flag_pic)
2848 return nonmemory_operand (op, mode);
2849 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2850 return 1;
2851 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2852 return 1;
2853 return 0;
2854 }
2855
2856 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2857
2858 int
2859 x86_64_szext_nonmemory_operand (op, mode)
2860 rtx op;
2861 enum machine_mode mode;
2862 {
2863 if (!TARGET_64BIT)
2864 return nonmemory_operand (op, mode);
2865 if (register_operand (op, mode))
2866 return 1;
2867 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2868 }
2869
2870 /* Return nonzero if OP is immediate operand representable on x86_64. */
2871
2872 int
2873 x86_64_immediate_operand (op, mode)
2874 rtx op;
2875 enum machine_mode mode;
2876 {
2877 if (!TARGET_64BIT)
2878 return immediate_operand (op, mode);
2879 return x86_64_sign_extended_value (op);
2880 }
2881
2882 /* Return nonzero if OP is immediate operand representable on x86_64. */
2883
2884 int
2885 x86_64_zext_immediate_operand (op, mode)
2886 rtx op;
2887 enum machine_mode mode ATTRIBUTE_UNUSED;
2888 {
2889 return x86_64_zero_extended_value (op);
2890 }
2891
2892 /* Return nonzero if OP is (const_int 1), else return zero. */
2893
2894 int
2895 const_int_1_operand (op, mode)
2896 rtx op;
2897 enum machine_mode mode ATTRIBUTE_UNUSED;
2898 {
2899 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2900 }
2901
2902 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2903 for shift & compare patterns, as shifting by 0 does not change flags),
2904 else return zero. */
2905
2906 int
2907 const_int_1_31_operand (op, mode)
2908 rtx op;
2909 enum machine_mode mode ATTRIBUTE_UNUSED;
2910 {
2911 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2912 }
2913
2914 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2915 reference and a constant. */
2916
2917 int
2918 symbolic_operand (op, mode)
2919 register rtx op;
2920 enum machine_mode mode ATTRIBUTE_UNUSED;
2921 {
2922 switch (GET_CODE (op))
2923 {
2924 case SYMBOL_REF:
2925 case LABEL_REF:
2926 return 1;
2927
2928 case CONST:
2929 op = XEXP (op, 0);
2930 if (GET_CODE (op) == SYMBOL_REF
2931 || GET_CODE (op) == LABEL_REF
2932 || (GET_CODE (op) == UNSPEC
2933 && (XINT (op, 1) == UNSPEC_GOT
2934 || XINT (op, 1) == UNSPEC_GOTOFF
2935 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2936 return 1;
2937 if (GET_CODE (op) != PLUS
2938 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2939 return 0;
2940
2941 op = XEXP (op, 0);
2942 if (GET_CODE (op) == SYMBOL_REF
2943 || GET_CODE (op) == LABEL_REF)
2944 return 1;
2945 /* Only @GOTOFF gets offsets. */
2946 if (GET_CODE (op) != UNSPEC
2947 || XINT (op, 1) != UNSPEC_GOTOFF)
2948 return 0;
2949
2950 op = XVECEXP (op, 0, 0);
2951 if (GET_CODE (op) == SYMBOL_REF
2952 || GET_CODE (op) == LABEL_REF)
2953 return 1;
2954 return 0;
2955
2956 default:
2957 return 0;
2958 }
2959 }
2960
2961 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2962
2963 int
2964 pic_symbolic_operand (op, mode)
2965 register rtx op;
2966 enum machine_mode mode ATTRIBUTE_UNUSED;
2967 {
2968 if (GET_CODE (op) != CONST)
2969 return 0;
2970 op = XEXP (op, 0);
2971 if (TARGET_64BIT)
2972 {
2973 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2974 return 1;
2975 }
2976 else
2977 {
2978 if (GET_CODE (op) == UNSPEC)
2979 return 1;
2980 if (GET_CODE (op) != PLUS
2981 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2982 return 0;
2983 op = XEXP (op, 0);
2984 if (GET_CODE (op) == UNSPEC)
2985 return 1;
2986 }
2987 return 0;
2988 }
2989
2990 /* Return true if OP is a symbolic operand that resolves locally. */
2991
2992 static int
2993 local_symbolic_operand (op, mode)
2994 rtx op;
2995 enum machine_mode mode ATTRIBUTE_UNUSED;
2996 {
2997 if (GET_CODE (op) == LABEL_REF)
2998 return 1;
2999
3000 if (GET_CODE (op) == CONST
3001 && GET_CODE (XEXP (op, 0)) == PLUS
3002 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3003 op = XEXP (XEXP (op, 0), 0);
3004
3005 if (GET_CODE (op) != SYMBOL_REF)
3006 return 0;
3007
3008 /* These we've been told are local by varasm and encode_section_info
3009 respectively. */
3010 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3011 return 1;
3012
3013 /* There is, however, a not insubstantial body of code in the rest of
3014 the compiler that assumes it can just stick the results of
3015 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3016 /* ??? This is a hack. Should update the body of the compiler to
3017 always create a DECL an invoke targetm.encode_section_info. */
3018 if (strncmp (XSTR (op, 0), internal_label_prefix,
3019 internal_label_prefix_len) == 0)
3020 return 1;
3021
3022 return 0;
3023 }
3024
3025 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3026
3027 int
3028 tls_symbolic_operand (op, mode)
3029 register rtx op;
3030 enum machine_mode mode ATTRIBUTE_UNUSED;
3031 {
3032 const char *symbol_str;
3033
3034 if (GET_CODE (op) != SYMBOL_REF)
3035 return 0;
3036 symbol_str = XSTR (op, 0);
3037
3038 if (symbol_str[0] != '%')
3039 return 0;
3040 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3041 }
3042
3043 static int
3044 tls_symbolic_operand_1 (op, kind)
3045 rtx op;
3046 enum tls_model kind;
3047 {
3048 const char *symbol_str;
3049
3050 if (GET_CODE (op) != SYMBOL_REF)
3051 return 0;
3052 symbol_str = XSTR (op, 0);
3053
3054 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3055 }
3056
3057 int
3058 global_dynamic_symbolic_operand (op, mode)
3059 register rtx op;
3060 enum machine_mode mode ATTRIBUTE_UNUSED;
3061 {
3062 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3063 }
3064
3065 int
3066 local_dynamic_symbolic_operand (op, mode)
3067 register rtx op;
3068 enum machine_mode mode ATTRIBUTE_UNUSED;
3069 {
3070 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3071 }
3072
3073 int
3074 initial_exec_symbolic_operand (op, mode)
3075 register rtx op;
3076 enum machine_mode mode ATTRIBUTE_UNUSED;
3077 {
3078 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3079 }
3080
3081 int
3082 local_exec_symbolic_operand (op, mode)
3083 register rtx op;
3084 enum machine_mode mode ATTRIBUTE_UNUSED;
3085 {
3086 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3087 }
3088
3089 /* Test for a valid operand for a call instruction. Don't allow the
3090 arg pointer register or virtual regs since they may decay into
3091 reg + const, which the patterns can't handle. */
3092
3093 int
3094 call_insn_operand (op, mode)
3095 rtx op;
3096 enum machine_mode mode ATTRIBUTE_UNUSED;
3097 {
3098 /* Disallow indirect through a virtual register. This leads to
3099 compiler aborts when trying to eliminate them. */
3100 if (GET_CODE (op) == REG
3101 && (op == arg_pointer_rtx
3102 || op == frame_pointer_rtx
3103 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3104 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3105 return 0;
3106
3107 /* Disallow `call 1234'. Due to varying assembler lameness this
3108 gets either rejected or translated to `call .+1234'. */
3109 if (GET_CODE (op) == CONST_INT)
3110 return 0;
3111
3112 /* Explicitly allow SYMBOL_REF even if pic. */
3113 if (GET_CODE (op) == SYMBOL_REF)
3114 return 1;
3115
3116 /* Otherwise we can allow any general_operand in the address. */
3117 return general_operand (op, Pmode);
3118 }
3119
3120 int
3121 constant_call_address_operand (op, mode)
3122 rtx op;
3123 enum machine_mode mode ATTRIBUTE_UNUSED;
3124 {
3125 if (GET_CODE (op) == CONST
3126 && GET_CODE (XEXP (op, 0)) == PLUS
3127 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3128 op = XEXP (XEXP (op, 0), 0);
3129 return GET_CODE (op) == SYMBOL_REF;
3130 }
3131
3132 /* Match exactly zero and one. */
3133
3134 int
3135 const0_operand (op, mode)
3136 register rtx op;
3137 enum machine_mode mode;
3138 {
3139 return op == CONST0_RTX (mode);
3140 }
3141
3142 int
3143 const1_operand (op, mode)
3144 register rtx op;
3145 enum machine_mode mode ATTRIBUTE_UNUSED;
3146 {
3147 return op == const1_rtx;
3148 }
3149
3150 /* Match 2, 4, or 8. Used for leal multiplicands. */
3151
3152 int
3153 const248_operand (op, mode)
3154 register rtx op;
3155 enum machine_mode mode ATTRIBUTE_UNUSED;
3156 {
3157 return (GET_CODE (op) == CONST_INT
3158 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3159 }
3160
3161 /* True if this is a constant appropriate for an increment or decremenmt. */
3162
3163 int
3164 incdec_operand (op, mode)
3165 register rtx op;
3166 enum machine_mode mode ATTRIBUTE_UNUSED;
3167 {
3168 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3169 registers, since carry flag is not set. */
3170 if (TARGET_PENTIUM4 && !optimize_size)
3171 return 0;
3172 return op == const1_rtx || op == constm1_rtx;
3173 }
3174
3175 /* Return nonzero if OP is acceptable as operand of DImode shift
3176 expander. */
3177
3178 int
3179 shiftdi_operand (op, mode)
3180 rtx op;
3181 enum machine_mode mode ATTRIBUTE_UNUSED;
3182 {
3183 if (TARGET_64BIT)
3184 return nonimmediate_operand (op, mode);
3185 else
3186 return register_operand (op, mode);
3187 }
3188
3189 /* Return false if this is the stack pointer, or any other fake
3190 register eliminable to the stack pointer. Otherwise, this is
3191 a register operand.
3192
3193 This is used to prevent esp from being used as an index reg.
3194 Which would only happen in pathological cases. */
3195
3196 int
3197 reg_no_sp_operand (op, mode)
3198 register rtx op;
3199 enum machine_mode mode;
3200 {
3201 rtx t = op;
3202 if (GET_CODE (t) == SUBREG)
3203 t = SUBREG_REG (t);
3204 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3205 return 0;
3206
3207 return register_operand (op, mode);
3208 }
3209
3210 int
3211 mmx_reg_operand (op, mode)
3212 register rtx op;
3213 enum machine_mode mode ATTRIBUTE_UNUSED;
3214 {
3215 return MMX_REG_P (op);
3216 }
3217
3218 /* Return false if this is any eliminable register. Otherwise
3219 general_operand. */
3220
3221 int
3222 general_no_elim_operand (op, mode)
3223 register rtx op;
3224 enum machine_mode mode;
3225 {
3226 rtx t = op;
3227 if (GET_CODE (t) == SUBREG)
3228 t = SUBREG_REG (t);
3229 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3230 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3231 || t == virtual_stack_dynamic_rtx)
3232 return 0;
3233 if (REG_P (t)
3234 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3235 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3236 return 0;
3237
3238 return general_operand (op, mode);
3239 }
3240
3241 /* Return false if this is any eliminable register. Otherwise
3242 register_operand or const_int. */
3243
3244 int
3245 nonmemory_no_elim_operand (op, mode)
3246 register rtx op;
3247 enum machine_mode mode;
3248 {
3249 rtx t = op;
3250 if (GET_CODE (t) == SUBREG)
3251 t = SUBREG_REG (t);
3252 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3253 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3254 || t == virtual_stack_dynamic_rtx)
3255 return 0;
3256
3257 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3258 }
3259
3260 /* Return false if this is any eliminable register or stack register,
3261 otherwise work like register_operand. */
3262
3263 int
3264 index_register_operand (op, mode)
3265 register rtx op;
3266 enum machine_mode mode;
3267 {
3268 rtx t = op;
3269 if (GET_CODE (t) == SUBREG)
3270 t = SUBREG_REG (t);
3271 if (!REG_P (t))
3272 return 0;
3273 if (t == arg_pointer_rtx
3274 || t == frame_pointer_rtx
3275 || t == virtual_incoming_args_rtx
3276 || t == virtual_stack_vars_rtx
3277 || t == virtual_stack_dynamic_rtx
3278 || REGNO (t) == STACK_POINTER_REGNUM)
3279 return 0;
3280
3281 return general_operand (op, mode);
3282 }
3283
3284 /* Return true if op is a Q_REGS class register. */
3285
3286 int
3287 q_regs_operand (op, mode)
3288 register rtx op;
3289 enum machine_mode mode;
3290 {
3291 if (mode != VOIDmode && GET_MODE (op) != mode)
3292 return 0;
3293 if (GET_CODE (op) == SUBREG)
3294 op = SUBREG_REG (op);
3295 return ANY_QI_REG_P (op);
3296 }
3297
3298 /* Return true if op is a NON_Q_REGS class register. */
3299
3300 int
3301 non_q_regs_operand (op, mode)
3302 register rtx op;
3303 enum machine_mode mode;
3304 {
3305 if (mode != VOIDmode && GET_MODE (op) != mode)
3306 return 0;
3307 if (GET_CODE (op) == SUBREG)
3308 op = SUBREG_REG (op);
3309 return NON_QI_REG_P (op);
3310 }
3311
3312 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3313 insns. */
3314 int
3315 sse_comparison_operator (op, mode)
3316 rtx op;
3317 enum machine_mode mode ATTRIBUTE_UNUSED;
3318 {
3319 enum rtx_code code = GET_CODE (op);
3320 switch (code)
3321 {
3322 /* Operations supported directly. */
3323 case EQ:
3324 case LT:
3325 case LE:
3326 case UNORDERED:
3327 case NE:
3328 case UNGE:
3329 case UNGT:
3330 case ORDERED:
3331 return 1;
3332 /* These are equivalent to ones above in non-IEEE comparisons. */
3333 case UNEQ:
3334 case UNLT:
3335 case UNLE:
3336 case LTGT:
3337 case GE:
3338 case GT:
3339 return !TARGET_IEEE_FP;
3340 default:
3341 return 0;
3342 }
3343 }
3344 /* Return 1 if OP is a valid comparison operator in valid mode. */
3345 int
3346 ix86_comparison_operator (op, mode)
3347 register rtx op;
3348 enum machine_mode mode;
3349 {
3350 enum machine_mode inmode;
3351 enum rtx_code code = GET_CODE (op);
3352 if (mode != VOIDmode && GET_MODE (op) != mode)
3353 return 0;
3354 if (GET_RTX_CLASS (code) != '<')
3355 return 0;
3356 inmode = GET_MODE (XEXP (op, 0));
3357
3358 if (inmode == CCFPmode || inmode == CCFPUmode)
3359 {
3360 enum rtx_code second_code, bypass_code;
3361 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3362 return (bypass_code == NIL && second_code == NIL);
3363 }
3364 switch (code)
3365 {
3366 case EQ: case NE:
3367 return 1;
3368 case LT: case GE:
3369 if (inmode == CCmode || inmode == CCGCmode
3370 || inmode == CCGOCmode || inmode == CCNOmode)
3371 return 1;
3372 return 0;
3373 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3374 if (inmode == CCmode)
3375 return 1;
3376 return 0;
3377 case GT: case LE:
3378 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3379 return 1;
3380 return 0;
3381 default:
3382 return 0;
3383 }
3384 }
3385
3386 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3387
3388 int
3389 fcmov_comparison_operator (op, mode)
3390 register rtx op;
3391 enum machine_mode mode;
3392 {
3393 enum machine_mode inmode;
3394 enum rtx_code code = GET_CODE (op);
3395 if (mode != VOIDmode && GET_MODE (op) != mode)
3396 return 0;
3397 if (GET_RTX_CLASS (code) != '<')
3398 return 0;
3399 inmode = GET_MODE (XEXP (op, 0));
3400 if (inmode == CCFPmode || inmode == CCFPUmode)
3401 {
3402 enum rtx_code second_code, bypass_code;
3403 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3404 if (bypass_code != NIL || second_code != NIL)
3405 return 0;
3406 code = ix86_fp_compare_code_to_integer (code);
3407 }
3408 /* i387 supports just limited amount of conditional codes. */
3409 switch (code)
3410 {
3411 case LTU: case GTU: case LEU: case GEU:
3412 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3413 return 1;
3414 return 0;
3415 case ORDERED: case UNORDERED:
3416 case EQ: case NE:
3417 return 1;
3418 default:
3419 return 0;
3420 }
3421 }
3422
3423 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3424
3425 int
3426 promotable_binary_operator (op, mode)
3427 register rtx op;
3428 enum machine_mode mode ATTRIBUTE_UNUSED;
3429 {
3430 switch (GET_CODE (op))
3431 {
3432 case MULT:
3433 /* Modern CPUs have same latency for HImode and SImode multiply,
3434 but 386 and 486 do HImode multiply faster. */
3435 return ix86_cpu > PROCESSOR_I486;
3436 case PLUS:
3437 case AND:
3438 case IOR:
3439 case XOR:
3440 case ASHIFT:
3441 return 1;
3442 default:
3443 return 0;
3444 }
3445 }
3446
3447 /* Nearly general operand, but accept any const_double, since we wish
3448 to be able to drop them into memory rather than have them get pulled
3449 into registers. */
3450
3451 int
3452 cmp_fp_expander_operand (op, mode)
3453 register rtx op;
3454 enum machine_mode mode;
3455 {
3456 if (mode != VOIDmode && mode != GET_MODE (op))
3457 return 0;
3458 if (GET_CODE (op) == CONST_DOUBLE)
3459 return 1;
3460 return general_operand (op, mode);
3461 }
3462
3463 /* Match an SI or HImode register for a zero_extract. */
3464
3465 int
3466 ext_register_operand (op, mode)
3467 register rtx op;
3468 enum machine_mode mode ATTRIBUTE_UNUSED;
3469 {
3470 int regno;
3471 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3472 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3473 return 0;
3474
3475 if (!register_operand (op, VOIDmode))
3476 return 0;
3477
3478 /* Be curefull to accept only registers having upper parts. */
3479 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3480 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3481 }
3482
3483 /* Return 1 if this is a valid binary floating-point operation.
3484 OP is the expression matched, and MODE is its mode. */
3485
3486 int
3487 binary_fp_operator (op, mode)
3488 register rtx op;
3489 enum machine_mode mode;
3490 {
3491 if (mode != VOIDmode && mode != GET_MODE (op))
3492 return 0;
3493
3494 switch (GET_CODE (op))
3495 {
3496 case PLUS:
3497 case MINUS:
3498 case MULT:
3499 case DIV:
3500 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3501
3502 default:
3503 return 0;
3504 }
3505 }
3506
3507 int
3508 mult_operator (op, mode)
3509 register rtx op;
3510 enum machine_mode mode ATTRIBUTE_UNUSED;
3511 {
3512 return GET_CODE (op) == MULT;
3513 }
3514
3515 int
3516 div_operator (op, mode)
3517 register rtx op;
3518 enum machine_mode mode ATTRIBUTE_UNUSED;
3519 {
3520 return GET_CODE (op) == DIV;
3521 }
3522
3523 int
3524 arith_or_logical_operator (op, mode)
3525 rtx op;
3526 enum machine_mode mode;
3527 {
3528 return ((mode == VOIDmode || GET_MODE (op) == mode)
3529 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3530 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3531 }
3532
3533 /* Returns 1 if OP is memory operand with a displacement. */
3534
3535 int
3536 memory_displacement_operand (op, mode)
3537 register rtx op;
3538 enum machine_mode mode;
3539 {
3540 struct ix86_address parts;
3541
3542 if (! memory_operand (op, mode))
3543 return 0;
3544
3545 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3546 abort ();
3547
3548 return parts.disp != NULL_RTX;
3549 }
3550
3551 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3552 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3553
3554 ??? It seems likely that this will only work because cmpsi is an
3555 expander, and no actual insns use this. */
3556
3557 int
3558 cmpsi_operand (op, mode)
3559 rtx op;
3560 enum machine_mode mode;
3561 {
3562 if (nonimmediate_operand (op, mode))
3563 return 1;
3564
3565 if (GET_CODE (op) == AND
3566 && GET_MODE (op) == SImode
3567 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3568 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3569 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3570 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3571 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3572 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3573 return 1;
3574
3575 return 0;
3576 }
3577
3578 /* Returns 1 if OP is memory operand that can not be represented by the
3579 modRM array. */
3580
3581 int
3582 long_memory_operand (op, mode)
3583 register rtx op;
3584 enum machine_mode mode;
3585 {
3586 if (! memory_operand (op, mode))
3587 return 0;
3588
3589 return memory_address_length (op) != 0;
3590 }
3591
3592 /* Return nonzero if the rtx is known aligned. */
3593
3594 int
3595 aligned_operand (op, mode)
3596 rtx op;
3597 enum machine_mode mode;
3598 {
3599 struct ix86_address parts;
3600
3601 if (!general_operand (op, mode))
3602 return 0;
3603
3604 /* Registers and immediate operands are always "aligned". */
3605 if (GET_CODE (op) != MEM)
3606 return 1;
3607
3608 /* Don't even try to do any aligned optimizations with volatiles. */
3609 if (MEM_VOLATILE_P (op))
3610 return 0;
3611
3612 op = XEXP (op, 0);
3613
3614 /* Pushes and pops are only valid on the stack pointer. */
3615 if (GET_CODE (op) == PRE_DEC
3616 || GET_CODE (op) == POST_INC)
3617 return 1;
3618
3619 /* Decode the address. */
3620 if (! ix86_decompose_address (op, &parts))
3621 abort ();
3622
3623 if (parts.base && GET_CODE (parts.base) == SUBREG)
3624 parts.base = SUBREG_REG (parts.base);
3625 if (parts.index && GET_CODE (parts.index) == SUBREG)
3626 parts.index = SUBREG_REG (parts.index);
3627
3628 /* Look for some component that isn't known to be aligned. */
3629 if (parts.index)
3630 {
3631 if (parts.scale < 4
3632 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3633 return 0;
3634 }
3635 if (parts.base)
3636 {
3637 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3638 return 0;
3639 }
3640 if (parts.disp)
3641 {
3642 if (GET_CODE (parts.disp) != CONST_INT
3643 || (INTVAL (parts.disp) & 3) != 0)
3644 return 0;
3645 }
3646
3647 /* Didn't find one -- this must be an aligned address. */
3648 return 1;
3649 }
3650 \f
3651 /* Return true if the constant is something that can be loaded with
3652 a special instruction. Only handle 0.0 and 1.0; others are less
3653 worthwhile. */
3654
3655 int
3656 standard_80387_constant_p (x)
3657 rtx x;
3658 {
3659 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3660 return -1;
3661 /* Note that on the 80387, other constants, such as pi, that we should support
3662 too. On some machines, these are much slower to load as standard constant,
3663 than to load from doubles in memory. */
3664 if (x == CONST0_RTX (GET_MODE (x)))
3665 return 1;
3666 if (x == CONST1_RTX (GET_MODE (x)))
3667 return 2;
3668 return 0;
3669 }
3670
3671 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3672 */
3673 int
3674 standard_sse_constant_p (x)
3675 rtx x;
3676 {
3677 if (GET_CODE (x) != CONST_DOUBLE)
3678 return -1;
3679 return (x == CONST0_RTX (GET_MODE (x)));
3680 }
3681
3682 /* Returns 1 if OP contains a symbol reference */
3683
3684 int
3685 symbolic_reference_mentioned_p (op)
3686 rtx op;
3687 {
3688 register const char *fmt;
3689 register int i;
3690
3691 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3692 return 1;
3693
3694 fmt = GET_RTX_FORMAT (GET_CODE (op));
3695 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3696 {
3697 if (fmt[i] == 'E')
3698 {
3699 register int j;
3700
3701 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3702 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3703 return 1;
3704 }
3705
3706 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3707 return 1;
3708 }
3709
3710 return 0;
3711 }
3712
3713 /* Return 1 if it is appropriate to emit `ret' instructions in the
3714 body of a function. Do this only if the epilogue is simple, needing a
3715 couple of insns. Prior to reloading, we can't tell how many registers
3716 must be saved, so return 0 then. Return 0 if there is no frame
3717 marker to de-allocate.
3718
3719 If NON_SAVING_SETJMP is defined and true, then it is not possible
3720 for the epilogue to be simple, so return 0. This is a special case
3721 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3722 until final, but jump_optimize may need to know sooner if a
3723 `return' is OK. */
3724
3725 int
3726 ix86_can_use_return_insn_p ()
3727 {
3728 struct ix86_frame frame;
3729
3730 #ifdef NON_SAVING_SETJMP
3731 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3732 return 0;
3733 #endif
3734
3735 if (! reload_completed || frame_pointer_needed)
3736 return 0;
3737
3738 /* Don't allow more than 32 pop, since that's all we can do
3739 with one instruction. */
3740 if (current_function_pops_args
3741 && current_function_args_size >= 32768)
3742 return 0;
3743
3744 ix86_compute_frame_layout (&frame);
3745 return frame.to_allocate == 0 && frame.nregs == 0;
3746 }
3747 \f
3748 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3749 int
3750 x86_64_sign_extended_value (value)
3751 rtx value;
3752 {
3753 switch (GET_CODE (value))
3754 {
3755 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3756 to be at least 32 and this all acceptable constants are
3757 represented as CONST_INT. */
3758 case CONST_INT:
3759 if (HOST_BITS_PER_WIDE_INT == 32)
3760 return 1;
3761 else
3762 {
3763 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3764 return trunc_int_for_mode (val, SImode) == val;
3765 }
3766 break;
3767
3768 /* For certain code models, the symbolic references are known to fit. */
3769 case SYMBOL_REF:
3770 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3771
3772 /* For certain code models, the code is near as well. */
3773 case LABEL_REF:
3774 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3775
3776 /* We also may accept the offsetted memory references in certain special
3777 cases. */
3778 case CONST:
3779 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3780 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3781 return 1;
3782 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3783 {
3784 rtx op1 = XEXP (XEXP (value, 0), 0);
3785 rtx op2 = XEXP (XEXP (value, 0), 1);
3786 HOST_WIDE_INT offset;
3787
3788 if (ix86_cmodel == CM_LARGE)
3789 return 0;
3790 if (GET_CODE (op2) != CONST_INT)
3791 return 0;
3792 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3793 switch (GET_CODE (op1))
3794 {
3795 case SYMBOL_REF:
3796 /* For CM_SMALL assume that latest object is 1MB before
3797 end of 31bits boundary. We may also accept pretty
3798 large negative constants knowing that all objects are
3799 in the positive half of address space. */
3800 if (ix86_cmodel == CM_SMALL
3801 && offset < 1024*1024*1024
3802 && trunc_int_for_mode (offset, SImode) == offset)
3803 return 1;
3804 /* For CM_KERNEL we know that all object resist in the
3805 negative half of 32bits address space. We may not
3806 accept negative offsets, since they may be just off
3807 and we may accept pretty large positive ones. */
3808 if (ix86_cmodel == CM_KERNEL
3809 && offset > 0
3810 && trunc_int_for_mode (offset, SImode) == offset)
3811 return 1;
3812 break;
3813 case LABEL_REF:
3814 /* These conditions are similar to SYMBOL_REF ones, just the
3815 constraints for code models differ. */
3816 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3817 && offset < 1024*1024*1024
3818 && trunc_int_for_mode (offset, SImode) == offset)
3819 return 1;
3820 if (ix86_cmodel == CM_KERNEL
3821 && offset > 0
3822 && trunc_int_for_mode (offset, SImode) == offset)
3823 return 1;
3824 break;
3825 default:
3826 return 0;
3827 }
3828 }
3829 return 0;
3830 default:
3831 return 0;
3832 }
3833 }
3834
3835 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3836 int
3837 x86_64_zero_extended_value (value)
3838 rtx value;
3839 {
3840 switch (GET_CODE (value))
3841 {
3842 case CONST_DOUBLE:
3843 if (HOST_BITS_PER_WIDE_INT == 32)
3844 return (GET_MODE (value) == VOIDmode
3845 && !CONST_DOUBLE_HIGH (value));
3846 else
3847 return 0;
3848 case CONST_INT:
3849 if (HOST_BITS_PER_WIDE_INT == 32)
3850 return INTVAL (value) >= 0;
3851 else
3852 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3853 break;
3854
3855 /* For certain code models, the symbolic references are known to fit. */
3856 case SYMBOL_REF:
3857 return ix86_cmodel == CM_SMALL;
3858
3859 /* For certain code models, the code is near as well. */
3860 case LABEL_REF:
3861 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3862
3863 /* We also may accept the offsetted memory references in certain special
3864 cases. */
3865 case CONST:
3866 if (GET_CODE (XEXP (value, 0)) == PLUS)
3867 {
3868 rtx op1 = XEXP (XEXP (value, 0), 0);
3869 rtx op2 = XEXP (XEXP (value, 0), 1);
3870
3871 if (ix86_cmodel == CM_LARGE)
3872 return 0;
3873 switch (GET_CODE (op1))
3874 {
3875 case SYMBOL_REF:
3876 return 0;
3877 /* For small code model we may accept pretty large positive
3878 offsets, since one bit is available for free. Negative
3879 offsets are limited by the size of NULL pointer area
3880 specified by the ABI. */
3881 if (ix86_cmodel == CM_SMALL
3882 && GET_CODE (op2) == CONST_INT
3883 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3884 && (trunc_int_for_mode (INTVAL (op2), SImode)
3885 == INTVAL (op2)))
3886 return 1;
3887 /* ??? For the kernel, we may accept adjustment of
3888 -0x10000000, since we know that it will just convert
3889 negative address space to positive, but perhaps this
3890 is not worthwhile. */
3891 break;
3892 case LABEL_REF:
3893 /* These conditions are similar to SYMBOL_REF ones, just the
3894 constraints for code models differ. */
3895 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3896 && GET_CODE (op2) == CONST_INT
3897 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3898 && (trunc_int_for_mode (INTVAL (op2), SImode)
3899 == INTVAL (op2)))
3900 return 1;
3901 break;
3902 default:
3903 return 0;
3904 }
3905 }
3906 return 0;
3907 default:
3908 return 0;
3909 }
3910 }
3911
3912 /* Value should be nonzero if functions must have frame pointers.
3913 Zero means the frame pointer need not be set up (and parms may
3914 be accessed via the stack pointer) in functions that seem suitable. */
3915
3916 int
3917 ix86_frame_pointer_required ()
3918 {
3919 /* If we accessed previous frames, then the generated code expects
3920 to be able to access the saved ebp value in our frame. */
3921 if (cfun->machine->accesses_prev_frame)
3922 return 1;
3923
3924 /* Several x86 os'es need a frame pointer for other reasons,
3925 usually pertaining to setjmp. */
3926 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3927 return 1;
3928
3929 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3930 the frame pointer by default. Turn it back on now if we've not
3931 got a leaf function. */
3932 if (TARGET_OMIT_LEAF_FRAME_POINTER
3933 && (!current_function_is_leaf || current_function_profile))
3934 return 1;
3935
3936 return 0;
3937 }
3938
3939 /* Record that the current function accesses previous call frames. */
3940
3941 void
3942 ix86_setup_frame_addresses ()
3943 {
3944 cfun->machine->accesses_prev_frame = 1;
3945 }
3946 \f
3947 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3948 # define USE_HIDDEN_LINKONCE 1
3949 #else
3950 # define USE_HIDDEN_LINKONCE 0
3951 #endif
3952
3953 static int pic_labels_used;
3954
3955 /* Fills in the label name that should be used for a pc thunk for
3956 the given register. */
3957
3958 static void
3959 get_pc_thunk_name (name, regno)
3960 char name[32];
3961 unsigned int regno;
3962 {
3963 if (USE_HIDDEN_LINKONCE)
3964 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3965 else
3966 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3967 }
3968
3969
3970 /* This function generates code for -fpic that loads %ebx with
3971 the return address of the caller and then returns. */
3972
3973 void
3974 ix86_asm_file_end (file)
3975 FILE *file;
3976 {
3977 rtx xops[2];
3978 int regno;
3979
3980 for (regno = 0; regno < 8; ++regno)
3981 {
3982 char name[32];
3983
3984 if (! ((pic_labels_used >> regno) & 1))
3985 continue;
3986
3987 get_pc_thunk_name (name, regno);
3988
3989 if (USE_HIDDEN_LINKONCE)
3990 {
3991 tree decl;
3992
3993 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3994 error_mark_node);
3995 TREE_PUBLIC (decl) = 1;
3996 TREE_STATIC (decl) = 1;
3997 DECL_ONE_ONLY (decl) = 1;
3998
3999 (*targetm.asm_out.unique_section) (decl, 0);
4000 named_section (decl, NULL, 0);
4001
4002 (*targetm.asm_out.globalize_label) (file, name);
4003 fputs ("\t.hidden\t", file);
4004 assemble_name (file, name);
4005 fputc ('\n', file);
4006 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4007 }
4008 else
4009 {
4010 text_section ();
4011 ASM_OUTPUT_LABEL (file, name);
4012 }
4013
4014 xops[0] = gen_rtx_REG (SImode, regno);
4015 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4016 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4017 output_asm_insn ("ret", xops);
4018 }
4019 }
4020
4021 /* Emit code for the SET_GOT patterns. */
4022
4023 const char *
4024 output_set_got (dest)
4025 rtx dest;
4026 {
4027 rtx xops[3];
4028
4029 xops[0] = dest;
4030 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4031
4032 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4033 {
4034 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4035
4036 if (!flag_pic)
4037 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4038 else
4039 output_asm_insn ("call\t%a2", xops);
4040
4041 #if TARGET_MACHO
4042 /* Output the "canonical" label name ("Lxx$pb") here too. This
4043 is what will be referred to by the Mach-O PIC subsystem. */
4044 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4045 #endif
4046 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4047 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4048
4049 if (flag_pic)
4050 output_asm_insn ("pop{l}\t%0", xops);
4051 }
4052 else
4053 {
4054 char name[32];
4055 get_pc_thunk_name (name, REGNO (dest));
4056 pic_labels_used |= 1 << REGNO (dest);
4057
4058 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4059 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4060 output_asm_insn ("call\t%X2", xops);
4061 }
4062
4063 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4064 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4065 else if (!TARGET_MACHO)
4066 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4067
4068 return "";
4069 }
4070
4071 /* Generate an "push" pattern for input ARG. */
4072
4073 static rtx
4074 gen_push (arg)
4075 rtx arg;
4076 {
4077 return gen_rtx_SET (VOIDmode,
4078 gen_rtx_MEM (Pmode,
4079 gen_rtx_PRE_DEC (Pmode,
4080 stack_pointer_rtx)),
4081 arg);
4082 }
4083
4084 /* Return >= 0 if there is an unused call-clobbered register available
4085 for the entire function. */
4086
4087 static unsigned int
4088 ix86_select_alt_pic_regnum ()
4089 {
4090 if (current_function_is_leaf && !current_function_profile)
4091 {
4092 int i;
4093 for (i = 2; i >= 0; --i)
4094 if (!regs_ever_live[i])
4095 return i;
4096 }
4097
4098 return INVALID_REGNUM;
4099 }
4100
4101 /* Return 1 if we need to save REGNO. */
4102 static int
4103 ix86_save_reg (regno, maybe_eh_return)
4104 unsigned int regno;
4105 int maybe_eh_return;
4106 {
4107 if (pic_offset_table_rtx
4108 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4109 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4110 || current_function_profile
4111 || current_function_calls_eh_return))
4112 {
4113 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4114 return 0;
4115 return 1;
4116 }
4117
4118 if (current_function_calls_eh_return && maybe_eh_return)
4119 {
4120 unsigned i;
4121 for (i = 0; ; i++)
4122 {
4123 unsigned test = EH_RETURN_DATA_REGNO (i);
4124 if (test == INVALID_REGNUM)
4125 break;
4126 if (test == regno)
4127 return 1;
4128 }
4129 }
4130
4131 return (regs_ever_live[regno]
4132 && !call_used_regs[regno]
4133 && !fixed_regs[regno]
4134 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4135 }
4136
4137 /* Return number of registers to be saved on the stack. */
4138
4139 static int
4140 ix86_nsaved_regs ()
4141 {
4142 int nregs = 0;
4143 int regno;
4144
4145 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4146 if (ix86_save_reg (regno, true))
4147 nregs++;
4148 return nregs;
4149 }
4150
4151 /* Return the offset between two registers, one to be eliminated, and the other
4152 its replacement, at the start of a routine. */
4153
4154 HOST_WIDE_INT
4155 ix86_initial_elimination_offset (from, to)
4156 int from;
4157 int to;
4158 {
4159 struct ix86_frame frame;
4160 ix86_compute_frame_layout (&frame);
4161
4162 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4163 return frame.hard_frame_pointer_offset;
4164 else if (from == FRAME_POINTER_REGNUM
4165 && to == HARD_FRAME_POINTER_REGNUM)
4166 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4167 else
4168 {
4169 if (to != STACK_POINTER_REGNUM)
4170 abort ();
4171 else if (from == ARG_POINTER_REGNUM)
4172 return frame.stack_pointer_offset;
4173 else if (from != FRAME_POINTER_REGNUM)
4174 abort ();
4175 else
4176 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4177 }
4178 }
4179
4180 /* Fill structure ix86_frame about frame of currently computed function. */
4181
4182 static void
4183 ix86_compute_frame_layout (frame)
4184 struct ix86_frame *frame;
4185 {
4186 HOST_WIDE_INT total_size;
4187 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4188 int offset;
4189 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4190 HOST_WIDE_INT size = get_frame_size ();
4191
4192 frame->nregs = ix86_nsaved_regs ();
4193 total_size = size;
4194
4195 /* Skip return address and saved base pointer. */
4196 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4197
4198 frame->hard_frame_pointer_offset = offset;
4199
4200 /* Do some sanity checking of stack_alignment_needed and
4201 preferred_alignment, since i386 port is the only using those features
4202 that may break easily. */
4203
4204 if (size && !stack_alignment_needed)
4205 abort ();
4206 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4207 abort ();
4208 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4209 abort ();
4210 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4211 abort ();
4212
4213 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4214 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4215
4216 /* Register save area */
4217 offset += frame->nregs * UNITS_PER_WORD;
4218
4219 /* Va-arg area */
4220 if (ix86_save_varrargs_registers)
4221 {
4222 offset += X86_64_VARARGS_SIZE;
4223 frame->va_arg_size = X86_64_VARARGS_SIZE;
4224 }
4225 else
4226 frame->va_arg_size = 0;
4227
4228 /* Align start of frame for local function. */
4229 frame->padding1 = ((offset + stack_alignment_needed - 1)
4230 & -stack_alignment_needed) - offset;
4231
4232 offset += frame->padding1;
4233
4234 /* Frame pointer points here. */
4235 frame->frame_pointer_offset = offset;
4236
4237 offset += size;
4238
4239 /* Add outgoing arguments area. Can be skipped if we eliminated
4240 all the function calls as dead code. */
4241 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4242 {
4243 offset += current_function_outgoing_args_size;
4244 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4245 }
4246 else
4247 frame->outgoing_arguments_size = 0;
4248
4249 /* Align stack boundary. Only needed if we're calling another function
4250 or using alloca. */
4251 if (!current_function_is_leaf || current_function_calls_alloca)
4252 frame->padding2 = ((offset + preferred_alignment - 1)
4253 & -preferred_alignment) - offset;
4254 else
4255 frame->padding2 = 0;
4256
4257 offset += frame->padding2;
4258
4259 /* We've reached end of stack frame. */
4260 frame->stack_pointer_offset = offset;
4261
4262 /* Size prologue needs to allocate. */
4263 frame->to_allocate =
4264 (size + frame->padding1 + frame->padding2
4265 + frame->outgoing_arguments_size + frame->va_arg_size);
4266
4267 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4268 && current_function_is_leaf)
4269 {
4270 frame->red_zone_size = frame->to_allocate;
4271 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4272 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4273 }
4274 else
4275 frame->red_zone_size = 0;
4276 frame->to_allocate -= frame->red_zone_size;
4277 frame->stack_pointer_offset -= frame->red_zone_size;
4278 #if 0
4279 fprintf (stderr, "nregs: %i\n", frame->nregs);
4280 fprintf (stderr, "size: %i\n", size);
4281 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4282 fprintf (stderr, "padding1: %i\n", frame->padding1);
4283 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4284 fprintf (stderr, "padding2: %i\n", frame->padding2);
4285 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4286 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4287 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4288 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4289 frame->hard_frame_pointer_offset);
4290 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4291 #endif
4292 }
4293
4294 /* Emit code to save registers in the prologue. */
4295
4296 static void
4297 ix86_emit_save_regs ()
4298 {
4299 register int regno;
4300 rtx insn;
4301
4302 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4303 if (ix86_save_reg (regno, true))
4304 {
4305 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4306 RTX_FRAME_RELATED_P (insn) = 1;
4307 }
4308 }
4309
4310 /* Emit code to save registers using MOV insns. First register
4311 is restored from POINTER + OFFSET. */
4312 static void
4313 ix86_emit_save_regs_using_mov (pointer, offset)
4314 rtx pointer;
4315 HOST_WIDE_INT offset;
4316 {
4317 int regno;
4318 rtx insn;
4319
4320 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4321 if (ix86_save_reg (regno, true))
4322 {
4323 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4324 Pmode, offset),
4325 gen_rtx_REG (Pmode, regno));
4326 RTX_FRAME_RELATED_P (insn) = 1;
4327 offset += UNITS_PER_WORD;
4328 }
4329 }
4330
4331 /* Expand the prologue into a bunch of separate insns. */
4332
4333 void
4334 ix86_expand_prologue ()
4335 {
4336 rtx insn;
4337 bool pic_reg_used;
4338 struct ix86_frame frame;
4339 int use_mov = 0;
4340 HOST_WIDE_INT allocate;
4341
4342 if (!optimize_size)
4343 {
4344 use_fast_prologue_epilogue
4345 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4346 if (TARGET_PROLOGUE_USING_MOVE)
4347 use_mov = use_fast_prologue_epilogue;
4348 }
4349 ix86_compute_frame_layout (&frame);
4350
4351 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4352 slower on all targets. Also sdb doesn't like it. */
4353
4354 if (frame_pointer_needed)
4355 {
4356 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4357 RTX_FRAME_RELATED_P (insn) = 1;
4358
4359 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4360 RTX_FRAME_RELATED_P (insn) = 1;
4361 }
4362
4363 allocate = frame.to_allocate;
4364 /* In case we are dealing only with single register and empty frame,
4365 push is equivalent of the mov+add sequence. */
4366 if (allocate == 0 && frame.nregs <= 1)
4367 use_mov = 0;
4368
4369 if (!use_mov)
4370 ix86_emit_save_regs ();
4371 else
4372 allocate += frame.nregs * UNITS_PER_WORD;
4373
4374 if (allocate == 0)
4375 ;
4376 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4377 {
4378 insn = emit_insn (gen_pro_epilogue_adjust_stack
4379 (stack_pointer_rtx, stack_pointer_rtx,
4380 GEN_INT (-allocate)));
4381 RTX_FRAME_RELATED_P (insn) = 1;
4382 }
4383 else
4384 {
4385 /* ??? Is this only valid for Win32? */
4386
4387 rtx arg0, sym;
4388
4389 if (TARGET_64BIT)
4390 abort ();
4391
4392 arg0 = gen_rtx_REG (SImode, 0);
4393 emit_move_insn (arg0, GEN_INT (allocate));
4394
4395 sym = gen_rtx_MEM (FUNCTION_MODE,
4396 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4397 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4398
4399 CALL_INSN_FUNCTION_USAGE (insn)
4400 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4401 CALL_INSN_FUNCTION_USAGE (insn));
4402 }
4403 if (use_mov)
4404 {
4405 if (!frame_pointer_needed || !frame.to_allocate)
4406 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4407 else
4408 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4409 -frame.nregs * UNITS_PER_WORD);
4410 }
4411
4412 #ifdef SUBTARGET_PROLOGUE
4413 SUBTARGET_PROLOGUE;
4414 #endif
4415
4416 pic_reg_used = false;
4417 if (pic_offset_table_rtx
4418 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4419 || current_function_profile))
4420 {
4421 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4422
4423 if (alt_pic_reg_used != INVALID_REGNUM)
4424 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4425
4426 pic_reg_used = true;
4427 }
4428
4429 if (pic_reg_used)
4430 {
4431 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4432
4433 /* Even with accurate pre-reload life analysis, we can wind up
4434 deleting all references to the pic register after reload.
4435 Consider if cross-jumping unifies two sides of a branch
4436 controled by a comparison vs the only read from a global.
4437 In which case, allow the set_got to be deleted, though we're
4438 too late to do anything about the ebx save in the prologue. */
4439 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4440 }
4441
4442 /* Prevent function calls from be scheduled before the call to mcount.
4443 In the pic_reg_used case, make sure that the got load isn't deleted. */
4444 if (current_function_profile)
4445 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4446 }
4447
4448 /* Emit code to restore saved registers using MOV insns. First register
4449 is restored from POINTER + OFFSET. */
4450 static void
4451 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4452 rtx pointer;
4453 int offset;
4454 int maybe_eh_return;
4455 {
4456 int regno;
4457
4458 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4459 if (ix86_save_reg (regno, maybe_eh_return))
4460 {
4461 emit_move_insn (gen_rtx_REG (Pmode, regno),
4462 adjust_address (gen_rtx_MEM (Pmode, pointer),
4463 Pmode, offset));
4464 offset += UNITS_PER_WORD;
4465 }
4466 }
4467
4468 /* Restore function stack, frame, and registers. */
4469
4470 void
4471 ix86_expand_epilogue (style)
4472 int style;
4473 {
4474 int regno;
4475 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4476 struct ix86_frame frame;
4477 HOST_WIDE_INT offset;
4478
4479 ix86_compute_frame_layout (&frame);
4480
4481 /* Calculate start of saved registers relative to ebp. Special care
4482 must be taken for the normal return case of a function using
4483 eh_return: the eax and edx registers are marked as saved, but not
4484 restored along this path. */
4485 offset = frame.nregs;
4486 if (current_function_calls_eh_return && style != 2)
4487 offset -= 2;
4488 offset *= -UNITS_PER_WORD;
4489
4490 /* If we're only restoring one register and sp is not valid then
4491 using a move instruction to restore the register since it's
4492 less work than reloading sp and popping the register.
4493
4494 The default code result in stack adjustment using add/lea instruction,
4495 while this code results in LEAVE instruction (or discrete equivalent),
4496 so it is profitable in some other cases as well. Especially when there
4497 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4498 and there is exactly one register to pop. This heruistic may need some
4499 tuning in future. */
4500 if ((!sp_valid && frame.nregs <= 1)
4501 || (TARGET_EPILOGUE_USING_MOVE
4502 && use_fast_prologue_epilogue
4503 && (frame.nregs > 1 || frame.to_allocate))
4504 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4505 || (frame_pointer_needed && TARGET_USE_LEAVE
4506 && use_fast_prologue_epilogue && frame.nregs == 1)
4507 || current_function_calls_eh_return)
4508 {
4509 /* Restore registers. We can use ebp or esp to address the memory
4510 locations. If both are available, default to ebp, since offsets
4511 are known to be small. Only exception is esp pointing directly to the
4512 end of block of saved registers, where we may simplify addressing
4513 mode. */
4514
4515 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4516 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4517 frame.to_allocate, style == 2);
4518 else
4519 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4520 offset, style == 2);
4521
4522 /* eh_return epilogues need %ecx added to the stack pointer. */
4523 if (style == 2)
4524 {
4525 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4526
4527 if (frame_pointer_needed)
4528 {
4529 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4530 tmp = plus_constant (tmp, UNITS_PER_WORD);
4531 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4532
4533 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4534 emit_move_insn (hard_frame_pointer_rtx, tmp);
4535
4536 emit_insn (gen_pro_epilogue_adjust_stack
4537 (stack_pointer_rtx, sa, const0_rtx));
4538 }
4539 else
4540 {
4541 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4542 tmp = plus_constant (tmp, (frame.to_allocate
4543 + frame.nregs * UNITS_PER_WORD));
4544 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4545 }
4546 }
4547 else if (!frame_pointer_needed)
4548 emit_insn (gen_pro_epilogue_adjust_stack
4549 (stack_pointer_rtx, stack_pointer_rtx,
4550 GEN_INT (frame.to_allocate
4551 + frame.nregs * UNITS_PER_WORD)));
4552 /* If not an i386, mov & pop is faster than "leave". */
4553 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4554 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4555 else
4556 {
4557 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4558 hard_frame_pointer_rtx,
4559 const0_rtx));
4560 if (TARGET_64BIT)
4561 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4562 else
4563 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4564 }
4565 }
4566 else
4567 {
4568 /* First step is to deallocate the stack frame so that we can
4569 pop the registers. */
4570 if (!sp_valid)
4571 {
4572 if (!frame_pointer_needed)
4573 abort ();
4574 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4575 hard_frame_pointer_rtx,
4576 GEN_INT (offset)));
4577 }
4578 else if (frame.to_allocate)
4579 emit_insn (gen_pro_epilogue_adjust_stack
4580 (stack_pointer_rtx, stack_pointer_rtx,
4581 GEN_INT (frame.to_allocate)));
4582
4583 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4584 if (ix86_save_reg (regno, false))
4585 {
4586 if (TARGET_64BIT)
4587 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4588 else
4589 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4590 }
4591 if (frame_pointer_needed)
4592 {
4593 /* Leave results in shorter dependency chains on CPUs that are
4594 able to grok it fast. */
4595 if (TARGET_USE_LEAVE)
4596 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4597 else if (TARGET_64BIT)
4598 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4599 else
4600 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4601 }
4602 }
4603
4604 /* Sibcall epilogues don't want a return instruction. */
4605 if (style == 0)
4606 return;
4607
4608 if (current_function_pops_args && current_function_args_size)
4609 {
4610 rtx popc = GEN_INT (current_function_pops_args);
4611
4612 /* i386 can only pop 64K bytes. If asked to pop more, pop
4613 return address, do explicit add, and jump indirectly to the
4614 caller. */
4615
4616 if (current_function_pops_args >= 65536)
4617 {
4618 rtx ecx = gen_rtx_REG (SImode, 2);
4619
4620 /* There are is no "pascal" calling convention in 64bit ABI. */
4621 if (TARGET_64BIT)
4622 abort ();
4623
4624 emit_insn (gen_popsi1 (ecx));
4625 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4626 emit_jump_insn (gen_return_indirect_internal (ecx));
4627 }
4628 else
4629 emit_jump_insn (gen_return_pop_internal (popc));
4630 }
4631 else
4632 emit_jump_insn (gen_return_internal ());
4633 }
4634
4635 /* Reset from the function's potential modifications. */
4636
4637 static void
4638 ix86_output_function_epilogue (file, size)
4639 FILE *file ATTRIBUTE_UNUSED;
4640 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4641 {
4642 if (pic_offset_table_rtx)
4643 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4644 }
4645 \f
4646 /* Extract the parts of an RTL expression that is a valid memory address
4647 for an instruction. Return 0 if the structure of the address is
4648 grossly off. Return -1 if the address contains ASHIFT, so it is not
4649 strictly valid, but still used for computing length of lea instruction.
4650 */
4651
4652 static int
4653 ix86_decompose_address (addr, out)
4654 register rtx addr;
4655 struct ix86_address *out;
4656 {
4657 rtx base = NULL_RTX;
4658 rtx index = NULL_RTX;
4659 rtx disp = NULL_RTX;
4660 HOST_WIDE_INT scale = 1;
4661 rtx scale_rtx = NULL_RTX;
4662 int retval = 1;
4663
4664 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4665 base = addr;
4666 else if (GET_CODE (addr) == PLUS)
4667 {
4668 rtx op0 = XEXP (addr, 0);
4669 rtx op1 = XEXP (addr, 1);
4670 enum rtx_code code0 = GET_CODE (op0);
4671 enum rtx_code code1 = GET_CODE (op1);
4672
4673 if (code0 == REG || code0 == SUBREG)
4674 {
4675 if (code1 == REG || code1 == SUBREG)
4676 index = op0, base = op1; /* index + base */
4677 else
4678 base = op0, disp = op1; /* base + displacement */
4679 }
4680 else if (code0 == MULT)
4681 {
4682 index = XEXP (op0, 0);
4683 scale_rtx = XEXP (op0, 1);
4684 if (code1 == REG || code1 == SUBREG)
4685 base = op1; /* index*scale + base */
4686 else
4687 disp = op1; /* index*scale + disp */
4688 }
4689 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4690 {
4691 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4692 scale_rtx = XEXP (XEXP (op0, 0), 1);
4693 base = XEXP (op0, 1);
4694 disp = op1;
4695 }
4696 else if (code0 == PLUS)
4697 {
4698 index = XEXP (op0, 0); /* index + base + disp */
4699 base = XEXP (op0, 1);
4700 disp = op1;
4701 }
4702 else
4703 return 0;
4704 }
4705 else if (GET_CODE (addr) == MULT)
4706 {
4707 index = XEXP (addr, 0); /* index*scale */
4708 scale_rtx = XEXP (addr, 1);
4709 }
4710 else if (GET_CODE (addr) == ASHIFT)
4711 {
4712 rtx tmp;
4713
4714 /* We're called for lea too, which implements ashift on occasion. */
4715 index = XEXP (addr, 0);
4716 tmp = XEXP (addr, 1);
4717 if (GET_CODE (tmp) != CONST_INT)
4718 return 0;
4719 scale = INTVAL (tmp);
4720 if ((unsigned HOST_WIDE_INT) scale > 3)
4721 return 0;
4722 scale = 1 << scale;
4723 retval = -1;
4724 }
4725 else
4726 disp = addr; /* displacement */
4727
4728 /* Extract the integral value of scale. */
4729 if (scale_rtx)
4730 {
4731 if (GET_CODE (scale_rtx) != CONST_INT)
4732 return 0;
4733 scale = INTVAL (scale_rtx);
4734 }
4735
4736 /* Allow arg pointer and stack pointer as index if there is not scaling */
4737 if (base && index && scale == 1
4738 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4739 || index == stack_pointer_rtx))
4740 {
4741 rtx tmp = base;
4742 base = index;
4743 index = tmp;
4744 }
4745
4746 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4747 if ((base == hard_frame_pointer_rtx
4748 || base == frame_pointer_rtx
4749 || base == arg_pointer_rtx) && !disp)
4750 disp = const0_rtx;
4751
4752 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4753 Avoid this by transforming to [%esi+0]. */
4754 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4755 && base && !index && !disp
4756 && REG_P (base)
4757 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4758 disp = const0_rtx;
4759
4760 /* Special case: encode reg+reg instead of reg*2. */
4761 if (!base && index && scale && scale == 2)
4762 base = index, scale = 1;
4763
4764 /* Special case: scaling cannot be encoded without base or displacement. */
4765 if (!base && !disp && index && scale != 1)
4766 disp = const0_rtx;
4767
4768 out->base = base;
4769 out->index = index;
4770 out->disp = disp;
4771 out->scale = scale;
4772
4773 return retval;
4774 }
4775 \f
4776 /* Return cost of the memory address x.
4777 For i386, it is better to use a complex address than let gcc copy
4778 the address into a reg and make a new pseudo. But not if the address
4779 requires to two regs - that would mean more pseudos with longer
4780 lifetimes. */
4781 int
4782 ix86_address_cost (x)
4783 rtx x;
4784 {
4785 struct ix86_address parts;
4786 int cost = 1;
4787
4788 if (!ix86_decompose_address (x, &parts))
4789 abort ();
4790
4791 if (parts.base && GET_CODE (parts.base) == SUBREG)
4792 parts.base = SUBREG_REG (parts.base);
4793 if (parts.index && GET_CODE (parts.index) == SUBREG)
4794 parts.index = SUBREG_REG (parts.index);
4795
4796 /* More complex memory references are better. */
4797 if (parts.disp && parts.disp != const0_rtx)
4798 cost--;
4799
4800 /* Attempt to minimize number of registers in the address. */
4801 if ((parts.base
4802 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4803 || (parts.index
4804 && (!REG_P (parts.index)
4805 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4806 cost++;
4807
4808 if (parts.base
4809 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4810 && parts.index
4811 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4812 && parts.base != parts.index)
4813 cost++;
4814
4815 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4816 since it's predecode logic can't detect the length of instructions
4817 and it degenerates to vector decoded. Increase cost of such
4818 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4819 to split such addresses or even refuse such addresses at all.
4820
4821 Following addressing modes are affected:
4822 [base+scale*index]
4823 [scale*index+disp]
4824 [base+index]
4825
4826 The first and last case may be avoidable by explicitly coding the zero in
4827 memory address, but I don't have AMD-K6 machine handy to check this
4828 theory. */
4829
4830 if (TARGET_K6
4831 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4832 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4833 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4834 cost += 10;
4835
4836 return cost;
4837 }
4838 \f
4839 /* If X is a machine specific address (i.e. a symbol or label being
4840 referenced as a displacement from the GOT implemented using an
4841 UNSPEC), then return the base term. Otherwise return X. */
4842
4843 rtx
4844 ix86_find_base_term (x)
4845 rtx x;
4846 {
4847 rtx term;
4848
4849 if (TARGET_64BIT)
4850 {
4851 if (GET_CODE (x) != CONST)
4852 return x;
4853 term = XEXP (x, 0);
4854 if (GET_CODE (term) == PLUS
4855 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4856 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4857 term = XEXP (term, 0);
4858 if (GET_CODE (term) != UNSPEC
4859 || XINT (term, 1) != UNSPEC_GOTPCREL)
4860 return x;
4861
4862 term = XVECEXP (term, 0, 0);
4863
4864 if (GET_CODE (term) != SYMBOL_REF
4865 && GET_CODE (term) != LABEL_REF)
4866 return x;
4867
4868 return term;
4869 }
4870
4871 if (GET_CODE (x) != PLUS
4872 || XEXP (x, 0) != pic_offset_table_rtx
4873 || GET_CODE (XEXP (x, 1)) != CONST)
4874 return x;
4875
4876 term = XEXP (XEXP (x, 1), 0);
4877
4878 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4879 term = XEXP (term, 0);
4880
4881 if (GET_CODE (term) != UNSPEC
4882 || XINT (term, 1) != UNSPEC_GOTOFF)
4883 return x;
4884
4885 term = XVECEXP (term, 0, 0);
4886
4887 if (GET_CODE (term) != SYMBOL_REF
4888 && GET_CODE (term) != LABEL_REF)
4889 return x;
4890
4891 return term;
4892 }
4893 \f
4894 /* Determine if a given RTX is a valid constant. We already know this
4895 satisfies CONSTANT_P. */
4896
4897 bool
4898 legitimate_constant_p (x)
4899 rtx x;
4900 {
4901 rtx inner;
4902
4903 switch (GET_CODE (x))
4904 {
4905 case SYMBOL_REF:
4906 /* TLS symbols are not constant. */
4907 if (tls_symbolic_operand (x, Pmode))
4908 return false;
4909 break;
4910
4911 case CONST:
4912 inner = XEXP (x, 0);
4913
4914 /* Offsets of TLS symbols are never valid.
4915 Discourage CSE from creating them. */
4916 if (GET_CODE (inner) == PLUS
4917 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4918 return false;
4919
4920 /* Only some unspecs are valid as "constants". */
4921 if (GET_CODE (inner) == UNSPEC)
4922 switch (XINT (inner, 1))
4923 {
4924 case UNSPEC_TPOFF:
4925 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4926 default:
4927 return false;
4928 }
4929 break;
4930
4931 default:
4932 break;
4933 }
4934
4935 /* Otherwise we handle everything else in the move patterns. */
4936 return true;
4937 }
4938
4939 /* Determine if a given RTX is a valid constant address. */
4940
4941 bool
4942 constant_address_p (x)
4943 rtx x;
4944 {
4945 switch (GET_CODE (x))
4946 {
4947 case LABEL_REF:
4948 case CONST_INT:
4949 return true;
4950
4951 case CONST_DOUBLE:
4952 return TARGET_64BIT;
4953
4954 case CONST:
4955 /* For Mach-O, really believe the CONST. */
4956 if (TARGET_MACHO)
4957 return true;
4958 /* Otherwise fall through. */
4959 case SYMBOL_REF:
4960 return !flag_pic && legitimate_constant_p (x);
4961
4962 default:
4963 return false;
4964 }
4965 }
4966
4967 /* Nonzero if the constant value X is a legitimate general operand
4968 when generating PIC code. It is given that flag_pic is on and
4969 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4970
4971 bool
4972 legitimate_pic_operand_p (x)
4973 rtx x;
4974 {
4975 rtx inner;
4976
4977 switch (GET_CODE (x))
4978 {
4979 case CONST:
4980 inner = XEXP (x, 0);
4981
4982 /* Only some unspecs are valid as "constants". */
4983 if (GET_CODE (inner) == UNSPEC)
4984 switch (XINT (inner, 1))
4985 {
4986 case UNSPEC_TPOFF:
4987 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4988 default:
4989 return false;
4990 }
4991 /* FALLTHRU */
4992
4993 case SYMBOL_REF:
4994 case LABEL_REF:
4995 return legitimate_pic_address_disp_p (x);
4996
4997 default:
4998 return true;
4999 }
5000 }
5001
5002 /* Determine if a given CONST RTX is a valid memory displacement
5003 in PIC mode. */
5004
5005 int
5006 legitimate_pic_address_disp_p (disp)
5007 register rtx disp;
5008 {
5009 bool saw_plus;
5010
5011 /* In 64bit mode we can allow direct addresses of symbols and labels
5012 when they are not dynamic symbols. */
5013 if (TARGET_64BIT)
5014 {
5015 rtx x = disp;
5016 if (GET_CODE (disp) == CONST)
5017 x = XEXP (disp, 0);
5018 /* ??? Handle PIC code models */
5019 if (GET_CODE (x) == PLUS
5020 && (GET_CODE (XEXP (x, 1)) == CONST_INT
5021 && ix86_cmodel == CM_SMALL_PIC
5022 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
5023 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
5024 x = XEXP (x, 0);
5025 if (local_symbolic_operand (x, Pmode))
5026 return 1;
5027 }
5028 if (GET_CODE (disp) != CONST)
5029 return 0;
5030 disp = XEXP (disp, 0);
5031
5032 if (TARGET_64BIT)
5033 {
5034 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5035 of GOT tables. We should not need these anyway. */
5036 if (GET_CODE (disp) != UNSPEC
5037 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5038 return 0;
5039
5040 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5041 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5042 return 0;
5043 return 1;
5044 }
5045
5046 saw_plus = false;
5047 if (GET_CODE (disp) == PLUS)
5048 {
5049 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5050 return 0;
5051 disp = XEXP (disp, 0);
5052 saw_plus = true;
5053 }
5054
5055 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5056 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5057 {
5058 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5059 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5060 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5061 {
5062 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5063 if (strstr (sym_name, "$pb") != 0)
5064 return 1;
5065 }
5066 }
5067
5068 if (GET_CODE (disp) != UNSPEC)
5069 return 0;
5070
5071 switch (XINT (disp, 1))
5072 {
5073 case UNSPEC_GOT:
5074 if (saw_plus)
5075 return false;
5076 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5077 case UNSPEC_GOTOFF:
5078 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5079 case UNSPEC_GOTTPOFF:
5080 if (saw_plus)
5081 return false;
5082 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5083 case UNSPEC_NTPOFF:
5084 /* ??? Could support offset here. */
5085 if (saw_plus)
5086 return false;
5087 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5088 case UNSPEC_DTPOFF:
5089 /* ??? Could support offset here. */
5090 if (saw_plus)
5091 return false;
5092 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5093 }
5094
5095 return 0;
5096 }
5097
5098 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5099 memory address for an instruction. The MODE argument is the machine mode
5100 for the MEM expression that wants to use this address.
5101
5102 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5103 convert common non-canonical forms to canonical form so that they will
5104 be recognized. */
5105
5106 int
5107 legitimate_address_p (mode, addr, strict)
5108 enum machine_mode mode;
5109 register rtx addr;
5110 int strict;
5111 {
5112 struct ix86_address parts;
5113 rtx base, index, disp;
5114 HOST_WIDE_INT scale;
5115 const char *reason = NULL;
5116 rtx reason_rtx = NULL_RTX;
5117
5118 if (TARGET_DEBUG_ADDR)
5119 {
5120 fprintf (stderr,
5121 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5122 GET_MODE_NAME (mode), strict);
5123 debug_rtx (addr);
5124 }
5125
5126 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5127 {
5128 if (TARGET_DEBUG_ADDR)
5129 fprintf (stderr, "Success.\n");
5130 return TRUE;
5131 }
5132
5133 if (ix86_decompose_address (addr, &parts) <= 0)
5134 {
5135 reason = "decomposition failed";
5136 goto report_error;
5137 }
5138
5139 base = parts.base;
5140 index = parts.index;
5141 disp = parts.disp;
5142 scale = parts.scale;
5143
5144 /* Validate base register.
5145
5146 Don't allow SUBREG's here, it can lead to spill failures when the base
5147 is one word out of a two word structure, which is represented internally
5148 as a DImode int. */
5149
5150 if (base)
5151 {
5152 rtx reg;
5153 reason_rtx = base;
5154
5155 if (GET_CODE (base) == SUBREG)
5156 reg = SUBREG_REG (base);
5157 else
5158 reg = base;
5159
5160 if (GET_CODE (reg) != REG)
5161 {
5162 reason = "base is not a register";
5163 goto report_error;
5164 }
5165
5166 if (GET_MODE (base) != Pmode)
5167 {
5168 reason = "base is not in Pmode";
5169 goto report_error;
5170 }
5171
5172 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5173 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5174 {
5175 reason = "base is not valid";
5176 goto report_error;
5177 }
5178 }
5179
5180 /* Validate index register.
5181
5182 Don't allow SUBREG's here, it can lead to spill failures when the index
5183 is one word out of a two word structure, which is represented internally
5184 as a DImode int. */
5185
5186 if (index)
5187 {
5188 rtx reg;
5189 reason_rtx = index;
5190
5191 if (GET_CODE (index) == SUBREG)
5192 reg = SUBREG_REG (index);
5193 else
5194 reg = index;
5195
5196 if (GET_CODE (reg) != REG)
5197 {
5198 reason = "index is not a register";
5199 goto report_error;
5200 }
5201
5202 if (GET_MODE (index) != Pmode)
5203 {
5204 reason = "index is not in Pmode";
5205 goto report_error;
5206 }
5207
5208 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5209 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5210 {
5211 reason = "index is not valid";
5212 goto report_error;
5213 }
5214 }
5215
5216 /* Validate scale factor. */
5217 if (scale != 1)
5218 {
5219 reason_rtx = GEN_INT (scale);
5220 if (!index)
5221 {
5222 reason = "scale without index";
5223 goto report_error;
5224 }
5225
5226 if (scale != 2 && scale != 4 && scale != 8)
5227 {
5228 reason = "scale is not a valid multiplier";
5229 goto report_error;
5230 }
5231 }
5232
5233 /* Validate displacement. */
5234 if (disp)
5235 {
5236 reason_rtx = disp;
5237
5238 if (TARGET_64BIT)
5239 {
5240 if (!x86_64_sign_extended_value (disp))
5241 {
5242 reason = "displacement is out of range";
5243 goto report_error;
5244 }
5245 }
5246 else
5247 {
5248 if (GET_CODE (disp) == CONST_DOUBLE)
5249 {
5250 reason = "displacement is a const_double";
5251 goto report_error;
5252 }
5253 }
5254
5255 if (GET_CODE (disp) == CONST
5256 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5257 switch (XINT (XEXP (disp, 0), 1))
5258 {
5259 case UNSPEC_GOT:
5260 case UNSPEC_GOTOFF:
5261 case UNSPEC_GOTPCREL:
5262 if (!flag_pic)
5263 abort ();
5264 goto is_legitimate_pic;
5265
5266 case UNSPEC_GOTTPOFF:
5267 case UNSPEC_NTPOFF:
5268 case UNSPEC_DTPOFF:
5269 break;
5270
5271 default:
5272 reason = "invalid address unspec";
5273 goto report_error;
5274 }
5275
5276 else if (flag_pic && (SYMBOLIC_CONST (disp)
5277 #if TARGET_MACHO
5278 && !machopic_operand_p (disp)
5279 #endif
5280 ))
5281 {
5282 is_legitimate_pic:
5283 if (TARGET_64BIT && (index || base))
5284 {
5285 reason = "non-constant pic memory reference";
5286 goto report_error;
5287 }
5288 if (! legitimate_pic_address_disp_p (disp))
5289 {
5290 reason = "displacement is an invalid pic construct";
5291 goto report_error;
5292 }
5293
5294 /* This code used to verify that a symbolic pic displacement
5295 includes the pic_offset_table_rtx register.
5296
5297 While this is good idea, unfortunately these constructs may
5298 be created by "adds using lea" optimization for incorrect
5299 code like:
5300
5301 int a;
5302 int foo(int i)
5303 {
5304 return *(&a+i);
5305 }
5306
5307 This code is nonsensical, but results in addressing
5308 GOT table with pic_offset_table_rtx base. We can't
5309 just refuse it easily, since it gets matched by
5310 "addsi3" pattern, that later gets split to lea in the
5311 case output register differs from input. While this
5312 can be handled by separate addsi pattern for this case
5313 that never results in lea, this seems to be easier and
5314 correct fix for crash to disable this test. */
5315 }
5316 else if (!CONSTANT_ADDRESS_P (disp))
5317 {
5318 reason = "displacement is not constant";
5319 goto report_error;
5320 }
5321 }
5322
5323 /* Everything looks valid. */
5324 if (TARGET_DEBUG_ADDR)
5325 fprintf (stderr, "Success.\n");
5326 return TRUE;
5327
5328 report_error:
5329 if (TARGET_DEBUG_ADDR)
5330 {
5331 fprintf (stderr, "Error: %s\n", reason);
5332 debug_rtx (reason_rtx);
5333 }
5334 return FALSE;
5335 }
5336 \f
5337 /* Return an unique alias set for the GOT. */
5338
5339 static HOST_WIDE_INT
5340 ix86_GOT_alias_set ()
5341 {
5342 static HOST_WIDE_INT set = -1;
5343 if (set == -1)
5344 set = new_alias_set ();
5345 return set;
5346 }
5347
5348 /* Return a legitimate reference for ORIG (an address) using the
5349 register REG. If REG is 0, a new pseudo is generated.
5350
5351 There are two types of references that must be handled:
5352
5353 1. Global data references must load the address from the GOT, via
5354 the PIC reg. An insn is emitted to do this load, and the reg is
5355 returned.
5356
5357 2. Static data references, constant pool addresses, and code labels
5358 compute the address as an offset from the GOT, whose base is in
5359 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5360 differentiate them from global data objects. The returned
5361 address is the PIC reg + an unspec constant.
5362
5363 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5364 reg also appears in the address. */
5365
5366 rtx
5367 legitimize_pic_address (orig, reg)
5368 rtx orig;
5369 rtx reg;
5370 {
5371 rtx addr = orig;
5372 rtx new = orig;
5373 rtx base;
5374
5375 #if TARGET_MACHO
5376 if (reg == 0)
5377 reg = gen_reg_rtx (Pmode);
5378 /* Use the generic Mach-O PIC machinery. */
5379 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5380 #endif
5381
5382 if (local_symbolic_operand (addr, Pmode))
5383 {
5384 /* In 64bit mode we can address such objects directly. */
5385 if (TARGET_64BIT)
5386 new = addr;
5387 else
5388 {
5389 /* This symbol may be referenced via a displacement from the PIC
5390 base address (@GOTOFF). */
5391
5392 if (reload_in_progress)
5393 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5394 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5395 new = gen_rtx_CONST (Pmode, new);
5396 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5397
5398 if (reg != 0)
5399 {
5400 emit_move_insn (reg, new);
5401 new = reg;
5402 }
5403 }
5404 }
5405 else if (GET_CODE (addr) == SYMBOL_REF)
5406 {
5407 if (TARGET_64BIT)
5408 {
5409 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5410 new = gen_rtx_CONST (Pmode, new);
5411 new = gen_rtx_MEM (Pmode, new);
5412 RTX_UNCHANGING_P (new) = 1;
5413 set_mem_alias_set (new, ix86_GOT_alias_set ());
5414
5415 if (reg == 0)
5416 reg = gen_reg_rtx (Pmode);
5417 /* Use directly gen_movsi, otherwise the address is loaded
5418 into register for CSE. We don't want to CSE this addresses,
5419 instead we CSE addresses from the GOT table, so skip this. */
5420 emit_insn (gen_movsi (reg, new));
5421 new = reg;
5422 }
5423 else
5424 {
5425 /* This symbol must be referenced via a load from the
5426 Global Offset Table (@GOT). */
5427
5428 if (reload_in_progress)
5429 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5430 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5431 new = gen_rtx_CONST (Pmode, new);
5432 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5433 new = gen_rtx_MEM (Pmode, new);
5434 RTX_UNCHANGING_P (new) = 1;
5435 set_mem_alias_set (new, ix86_GOT_alias_set ());
5436
5437 if (reg == 0)
5438 reg = gen_reg_rtx (Pmode);
5439 emit_move_insn (reg, new);
5440 new = reg;
5441 }
5442 }
5443 else
5444 {
5445 if (GET_CODE (addr) == CONST)
5446 {
5447 addr = XEXP (addr, 0);
5448
5449 /* We must match stuff we generate before. Assume the only
5450 unspecs that can get here are ours. Not that we could do
5451 anything with them anyway... */
5452 if (GET_CODE (addr) == UNSPEC
5453 || (GET_CODE (addr) == PLUS
5454 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5455 return orig;
5456 if (GET_CODE (addr) != PLUS)
5457 abort ();
5458 }
5459 if (GET_CODE (addr) == PLUS)
5460 {
5461 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5462
5463 /* Check first to see if this is a constant offset from a @GOTOFF
5464 symbol reference. */
5465 if (local_symbolic_operand (op0, Pmode)
5466 && GET_CODE (op1) == CONST_INT)
5467 {
5468 if (!TARGET_64BIT)
5469 {
5470 if (reload_in_progress)
5471 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5472 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5473 UNSPEC_GOTOFF);
5474 new = gen_rtx_PLUS (Pmode, new, op1);
5475 new = gen_rtx_CONST (Pmode, new);
5476 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5477
5478 if (reg != 0)
5479 {
5480 emit_move_insn (reg, new);
5481 new = reg;
5482 }
5483 }
5484 else
5485 {
5486 /* ??? We need to limit offsets here. */
5487 }
5488 }
5489 else
5490 {
5491 base = legitimize_pic_address (XEXP (addr, 0), reg);
5492 new = legitimize_pic_address (XEXP (addr, 1),
5493 base == reg ? NULL_RTX : reg);
5494
5495 if (GET_CODE (new) == CONST_INT)
5496 new = plus_constant (base, INTVAL (new));
5497 else
5498 {
5499 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5500 {
5501 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5502 new = XEXP (new, 1);
5503 }
5504 new = gen_rtx_PLUS (Pmode, base, new);
5505 }
5506 }
5507 }
5508 }
5509 return new;
5510 }
5511
5512 static void
5513 ix86_encode_section_info (decl, first)
5514 tree decl;
5515 int first ATTRIBUTE_UNUSED;
5516 {
5517 bool local_p = (*targetm.binds_local_p) (decl);
5518 rtx rtl, symbol;
5519
5520 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5521 if (GET_CODE (rtl) != MEM)
5522 return;
5523 symbol = XEXP (rtl, 0);
5524 if (GET_CODE (symbol) != SYMBOL_REF)
5525 return;
5526
5527 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5528 symbol so that we may access it directly in the GOT. */
5529
5530 if (flag_pic)
5531 SYMBOL_REF_FLAG (symbol) = local_p;
5532
5533 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5534 "local dynamic", "initial exec" or "local exec" TLS models
5535 respectively. */
5536
5537 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5538 {
5539 const char *symbol_str;
5540 char *newstr;
5541 size_t len;
5542 enum tls_model kind;
5543
5544 if (!flag_pic)
5545 {
5546 if (local_p)
5547 kind = TLS_MODEL_LOCAL_EXEC;
5548 else
5549 kind = TLS_MODEL_INITIAL_EXEC;
5550 }
5551 /* Local dynamic is inefficient when we're not combining the
5552 parts of the address. */
5553 else if (optimize && local_p)
5554 kind = TLS_MODEL_LOCAL_DYNAMIC;
5555 else
5556 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5557 if (kind < flag_tls_default)
5558 kind = flag_tls_default;
5559
5560 symbol_str = XSTR (symbol, 0);
5561
5562 if (symbol_str[0] == '%')
5563 {
5564 if (symbol_str[1] == tls_model_chars[kind])
5565 return;
5566 symbol_str += 2;
5567 }
5568 len = strlen (symbol_str) + 1;
5569 newstr = alloca (len + 2);
5570
5571 newstr[0] = '%';
5572 newstr[1] = tls_model_chars[kind];
5573 memcpy (newstr + 2, symbol_str, len);
5574
5575 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5576 }
5577 }
5578
5579 /* Undo the above when printing symbol names. */
5580
5581 static const char *
5582 ix86_strip_name_encoding (str)
5583 const char *str;
5584 {
5585 if (str[0] == '%')
5586 str += 2;
5587 if (str [0] == '*')
5588 str += 1;
5589 return str;
5590 }
5591 \f
5592 /* Load the thread pointer into a register. */
5593
5594 static rtx
5595 get_thread_pointer ()
5596 {
5597 rtx tp;
5598
5599 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5600 tp = gen_rtx_MEM (Pmode, tp);
5601 RTX_UNCHANGING_P (tp) = 1;
5602 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5603 tp = force_reg (Pmode, tp);
5604
5605 return tp;
5606 }
5607
5608 /* Try machine-dependent ways of modifying an illegitimate address
5609 to be legitimate. If we find one, return the new, valid address.
5610 This macro is used in only one place: `memory_address' in explow.c.
5611
5612 OLDX is the address as it was before break_out_memory_refs was called.
5613 In some cases it is useful to look at this to decide what needs to be done.
5614
5615 MODE and WIN are passed so that this macro can use
5616 GO_IF_LEGITIMATE_ADDRESS.
5617
5618 It is always safe for this macro to do nothing. It exists to recognize
5619 opportunities to optimize the output.
5620
5621 For the 80386, we handle X+REG by loading X into a register R and
5622 using R+REG. R will go in a general reg and indexing will be used.
5623 However, if REG is a broken-out memory address or multiplication,
5624 nothing needs to be done because REG can certainly go in a general reg.
5625
5626 When -fpic is used, special handling is needed for symbolic references.
5627 See comments by legitimize_pic_address in i386.c for details. */
5628
5629 rtx
5630 legitimize_address (x, oldx, mode)
5631 register rtx x;
5632 register rtx oldx ATTRIBUTE_UNUSED;
5633 enum machine_mode mode;
5634 {
5635 int changed = 0;
5636 unsigned log;
5637
5638 if (TARGET_DEBUG_ADDR)
5639 {
5640 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5641 GET_MODE_NAME (mode));
5642 debug_rtx (x);
5643 }
5644
5645 log = tls_symbolic_operand (x, mode);
5646 if (log)
5647 {
5648 rtx dest, base, off, pic;
5649
5650 switch (log)
5651 {
5652 case TLS_MODEL_GLOBAL_DYNAMIC:
5653 dest = gen_reg_rtx (Pmode);
5654 emit_insn (gen_tls_global_dynamic (dest, x));
5655 break;
5656
5657 case TLS_MODEL_LOCAL_DYNAMIC:
5658 base = gen_reg_rtx (Pmode);
5659 emit_insn (gen_tls_local_dynamic_base (base));
5660
5661 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5662 off = gen_rtx_CONST (Pmode, off);
5663
5664 return gen_rtx_PLUS (Pmode, base, off);
5665
5666 case TLS_MODEL_INITIAL_EXEC:
5667 if (flag_pic)
5668 {
5669 if (reload_in_progress)
5670 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5671 pic = pic_offset_table_rtx;
5672 }
5673 else
5674 {
5675 pic = gen_reg_rtx (Pmode);
5676 emit_insn (gen_set_got (pic));
5677 }
5678
5679 base = get_thread_pointer ();
5680
5681 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5682 off = gen_rtx_CONST (Pmode, off);
5683 off = gen_rtx_PLUS (Pmode, pic, off);
5684 off = gen_rtx_MEM (Pmode, off);
5685 RTX_UNCHANGING_P (off) = 1;
5686 set_mem_alias_set (off, ix86_GOT_alias_set ());
5687
5688 /* Damn Sun for specifing a set of dynamic relocations without
5689 considering the two-operand nature of the architecture!
5690 We'd be much better off with a "GOTNTPOFF" relocation that
5691 already contained the negated constant. */
5692 /* ??? Using negl and reg+reg addressing appears to be a lose
5693 size-wise. The negl is two bytes, just like the extra movl
5694 incurred by the two-operand subl, but reg+reg addressing
5695 uses the two-byte modrm form, unlike plain reg. */
5696
5697 dest = gen_reg_rtx (Pmode);
5698 emit_insn (gen_subsi3 (dest, base, off));
5699 break;
5700
5701 case TLS_MODEL_LOCAL_EXEC:
5702 base = get_thread_pointer ();
5703
5704 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5705 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5706 off = gen_rtx_CONST (Pmode, off);
5707
5708 if (TARGET_GNU_TLS)
5709 return gen_rtx_PLUS (Pmode, base, off);
5710 else
5711 {
5712 dest = gen_reg_rtx (Pmode);
5713 emit_insn (gen_subsi3 (dest, base, off));
5714 }
5715 break;
5716
5717 default:
5718 abort ();
5719 }
5720
5721 return dest;
5722 }
5723
5724 if (flag_pic && SYMBOLIC_CONST (x))
5725 return legitimize_pic_address (x, 0);
5726
5727 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5728 if (GET_CODE (x) == ASHIFT
5729 && GET_CODE (XEXP (x, 1)) == CONST_INT
5730 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5731 {
5732 changed = 1;
5733 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5734 GEN_INT (1 << log));
5735 }
5736
5737 if (GET_CODE (x) == PLUS)
5738 {
5739 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5740
5741 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5742 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5743 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5744 {
5745 changed = 1;
5746 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5747 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5748 GEN_INT (1 << log));
5749 }
5750
5751 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5752 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5753 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5754 {
5755 changed = 1;
5756 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5757 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5758 GEN_INT (1 << log));
5759 }
5760
5761 /* Put multiply first if it isn't already. */
5762 if (GET_CODE (XEXP (x, 1)) == MULT)
5763 {
5764 rtx tmp = XEXP (x, 0);
5765 XEXP (x, 0) = XEXP (x, 1);
5766 XEXP (x, 1) = tmp;
5767 changed = 1;
5768 }
5769
5770 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5771 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5772 created by virtual register instantiation, register elimination, and
5773 similar optimizations. */
5774 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5775 {
5776 changed = 1;
5777 x = gen_rtx_PLUS (Pmode,
5778 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5779 XEXP (XEXP (x, 1), 0)),
5780 XEXP (XEXP (x, 1), 1));
5781 }
5782
5783 /* Canonicalize
5784 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5785 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5786 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5787 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5788 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5789 && CONSTANT_P (XEXP (x, 1)))
5790 {
5791 rtx constant;
5792 rtx other = NULL_RTX;
5793
5794 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5795 {
5796 constant = XEXP (x, 1);
5797 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5798 }
5799 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5800 {
5801 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5802 other = XEXP (x, 1);
5803 }
5804 else
5805 constant = 0;
5806
5807 if (constant)
5808 {
5809 changed = 1;
5810 x = gen_rtx_PLUS (Pmode,
5811 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5812 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5813 plus_constant (other, INTVAL (constant)));
5814 }
5815 }
5816
5817 if (changed && legitimate_address_p (mode, x, FALSE))
5818 return x;
5819
5820 if (GET_CODE (XEXP (x, 0)) == MULT)
5821 {
5822 changed = 1;
5823 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5824 }
5825
5826 if (GET_CODE (XEXP (x, 1)) == MULT)
5827 {
5828 changed = 1;
5829 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5830 }
5831
5832 if (changed
5833 && GET_CODE (XEXP (x, 1)) == REG
5834 && GET_CODE (XEXP (x, 0)) == REG)
5835 return x;
5836
5837 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5838 {
5839 changed = 1;
5840 x = legitimize_pic_address (x, 0);
5841 }
5842
5843 if (changed && legitimate_address_p (mode, x, FALSE))
5844 return x;
5845
5846 if (GET_CODE (XEXP (x, 0)) == REG)
5847 {
5848 register rtx temp = gen_reg_rtx (Pmode);
5849 register rtx val = force_operand (XEXP (x, 1), temp);
5850 if (val != temp)
5851 emit_move_insn (temp, val);
5852
5853 XEXP (x, 1) = temp;
5854 return x;
5855 }
5856
5857 else if (GET_CODE (XEXP (x, 1)) == REG)
5858 {
5859 register rtx temp = gen_reg_rtx (Pmode);
5860 register rtx val = force_operand (XEXP (x, 0), temp);
5861 if (val != temp)
5862 emit_move_insn (temp, val);
5863
5864 XEXP (x, 0) = temp;
5865 return x;
5866 }
5867 }
5868
5869 return x;
5870 }
5871 \f
5872 /* Print an integer constant expression in assembler syntax. Addition
5873 and subtraction are the only arithmetic that may appear in these
5874 expressions. FILE is the stdio stream to write to, X is the rtx, and
5875 CODE is the operand print code from the output string. */
5876
5877 static void
5878 output_pic_addr_const (file, x, code)
5879 FILE *file;
5880 rtx x;
5881 int code;
5882 {
5883 char buf[256];
5884
5885 switch (GET_CODE (x))
5886 {
5887 case PC:
5888 if (flag_pic)
5889 putc ('.', file);
5890 else
5891 abort ();
5892 break;
5893
5894 case SYMBOL_REF:
5895 assemble_name (file, XSTR (x, 0));
5896 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5897 fputs ("@PLT", file);
5898 break;
5899
5900 case LABEL_REF:
5901 x = XEXP (x, 0);
5902 /* FALLTHRU */
5903 case CODE_LABEL:
5904 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5905 assemble_name (asm_out_file, buf);
5906 break;
5907
5908 case CONST_INT:
5909 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5910 break;
5911
5912 case CONST:
5913 /* This used to output parentheses around the expression,
5914 but that does not work on the 386 (either ATT or BSD assembler). */
5915 output_pic_addr_const (file, XEXP (x, 0), code);
5916 break;
5917
5918 case CONST_DOUBLE:
5919 if (GET_MODE (x) == VOIDmode)
5920 {
5921 /* We can use %d if the number is <32 bits and positive. */
5922 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5923 fprintf (file, "0x%lx%08lx",
5924 (unsigned long) CONST_DOUBLE_HIGH (x),
5925 (unsigned long) CONST_DOUBLE_LOW (x));
5926 else
5927 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5928 }
5929 else
5930 /* We can't handle floating point constants;
5931 PRINT_OPERAND must handle them. */
5932 output_operand_lossage ("floating constant misused");
5933 break;
5934
5935 case PLUS:
5936 /* Some assemblers need integer constants to appear first. */
5937 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5938 {
5939 output_pic_addr_const (file, XEXP (x, 0), code);
5940 putc ('+', file);
5941 output_pic_addr_const (file, XEXP (x, 1), code);
5942 }
5943 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5944 {
5945 output_pic_addr_const (file, XEXP (x, 1), code);
5946 putc ('+', file);
5947 output_pic_addr_const (file, XEXP (x, 0), code);
5948 }
5949 else
5950 abort ();
5951 break;
5952
5953 case MINUS:
5954 if (!TARGET_MACHO)
5955 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5956 output_pic_addr_const (file, XEXP (x, 0), code);
5957 putc ('-', file);
5958 output_pic_addr_const (file, XEXP (x, 1), code);
5959 if (!TARGET_MACHO)
5960 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5961 break;
5962
5963 case UNSPEC:
5964 if (XVECLEN (x, 0) != 1)
5965 abort ();
5966 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5967 switch (XINT (x, 1))
5968 {
5969 case UNSPEC_GOT:
5970 fputs ("@GOT", file);
5971 break;
5972 case UNSPEC_GOTOFF:
5973 fputs ("@GOTOFF", file);
5974 break;
5975 case UNSPEC_GOTPCREL:
5976 fputs ("@GOTPCREL(%rip)", file);
5977 break;
5978 case UNSPEC_GOTTPOFF:
5979 fputs ("@GOTTPOFF", file);
5980 break;
5981 case UNSPEC_TPOFF:
5982 fputs ("@TPOFF", file);
5983 break;
5984 case UNSPEC_NTPOFF:
5985 fputs ("@NTPOFF", file);
5986 break;
5987 case UNSPEC_DTPOFF:
5988 fputs ("@DTPOFF", file);
5989 break;
5990 default:
5991 output_operand_lossage ("invalid UNSPEC as operand");
5992 break;
5993 }
5994 break;
5995
5996 default:
5997 output_operand_lossage ("invalid expression as operand");
5998 }
5999 }
6000
6001 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6002 We need to handle our special PIC relocations. */
6003
6004 void
6005 i386_dwarf_output_addr_const (file, x)
6006 FILE *file;
6007 rtx x;
6008 {
6009 #ifdef ASM_QUAD
6010 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6011 #else
6012 if (TARGET_64BIT)
6013 abort ();
6014 fprintf (file, "%s", ASM_LONG);
6015 #endif
6016 if (flag_pic)
6017 output_pic_addr_const (file, x, '\0');
6018 else
6019 output_addr_const (file, x);
6020 fputc ('\n', file);
6021 }
6022
6023 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6024 We need to emit DTP-relative relocations. */
6025
6026 void
6027 i386_output_dwarf_dtprel (file, size, x)
6028 FILE *file;
6029 int size;
6030 rtx x;
6031 {
6032 switch (size)
6033 {
6034 case 4:
6035 fputs (ASM_LONG, file);
6036 break;
6037 case 8:
6038 #ifdef ASM_QUAD
6039 fputs (ASM_QUAD, file);
6040 break;
6041 #endif
6042 default:
6043 abort ();
6044 }
6045
6046 output_addr_const (file, x);
6047 fputs ("@DTPOFF", file);
6048 }
6049
6050 /* In the name of slightly smaller debug output, and to cater to
6051 general assembler losage, recognize PIC+GOTOFF and turn it back
6052 into a direct symbol reference. */
6053
6054 rtx
6055 i386_simplify_dwarf_addr (orig_x)
6056 rtx orig_x;
6057 {
6058 rtx x = orig_x, y;
6059
6060 if (GET_CODE (x) == MEM)
6061 x = XEXP (x, 0);
6062
6063 if (TARGET_64BIT)
6064 {
6065 if (GET_CODE (x) != CONST
6066 || GET_CODE (XEXP (x, 0)) != UNSPEC
6067 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6068 || GET_CODE (orig_x) != MEM)
6069 return orig_x;
6070 return XVECEXP (XEXP (x, 0), 0, 0);
6071 }
6072
6073 if (GET_CODE (x) != PLUS
6074 || GET_CODE (XEXP (x, 1)) != CONST)
6075 return orig_x;
6076
6077 if (GET_CODE (XEXP (x, 0)) == REG
6078 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6079 /* %ebx + GOT/GOTOFF */
6080 y = NULL;
6081 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6082 {
6083 /* %ebx + %reg * scale + GOT/GOTOFF */
6084 y = XEXP (x, 0);
6085 if (GET_CODE (XEXP (y, 0)) == REG
6086 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6087 y = XEXP (y, 1);
6088 else if (GET_CODE (XEXP (y, 1)) == REG
6089 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6090 y = XEXP (y, 0);
6091 else
6092 return orig_x;
6093 if (GET_CODE (y) != REG
6094 && GET_CODE (y) != MULT
6095 && GET_CODE (y) != ASHIFT)
6096 return orig_x;
6097 }
6098 else
6099 return orig_x;
6100
6101 x = XEXP (XEXP (x, 1), 0);
6102 if (GET_CODE (x) == UNSPEC
6103 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6104 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6105 {
6106 if (y)
6107 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6108 return XVECEXP (x, 0, 0);
6109 }
6110
6111 if (GET_CODE (x) == PLUS
6112 && GET_CODE (XEXP (x, 0)) == UNSPEC
6113 && GET_CODE (XEXP (x, 1)) == CONST_INT
6114 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6115 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6116 && GET_CODE (orig_x) != MEM)))
6117 {
6118 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6119 if (y)
6120 return gen_rtx_PLUS (Pmode, y, x);
6121 return x;
6122 }
6123
6124 return orig_x;
6125 }
6126 \f
6127 static void
6128 put_condition_code (code, mode, reverse, fp, file)
6129 enum rtx_code code;
6130 enum machine_mode mode;
6131 int reverse, fp;
6132 FILE *file;
6133 {
6134 const char *suffix;
6135
6136 if (mode == CCFPmode || mode == CCFPUmode)
6137 {
6138 enum rtx_code second_code, bypass_code;
6139 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6140 if (bypass_code != NIL || second_code != NIL)
6141 abort ();
6142 code = ix86_fp_compare_code_to_integer (code);
6143 mode = CCmode;
6144 }
6145 if (reverse)
6146 code = reverse_condition (code);
6147
6148 switch (code)
6149 {
6150 case EQ:
6151 suffix = "e";
6152 break;
6153 case NE:
6154 suffix = "ne";
6155 break;
6156 case GT:
6157 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6158 abort ();
6159 suffix = "g";
6160 break;
6161 case GTU:
6162 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6163 Those same assemblers have the same but opposite losage on cmov. */
6164 if (mode != CCmode)
6165 abort ();
6166 suffix = fp ? "nbe" : "a";
6167 break;
6168 case LT:
6169 if (mode == CCNOmode || mode == CCGOCmode)
6170 suffix = "s";
6171 else if (mode == CCmode || mode == CCGCmode)
6172 suffix = "l";
6173 else
6174 abort ();
6175 break;
6176 case LTU:
6177 if (mode != CCmode)
6178 abort ();
6179 suffix = "b";
6180 break;
6181 case GE:
6182 if (mode == CCNOmode || mode == CCGOCmode)
6183 suffix = "ns";
6184 else if (mode == CCmode || mode == CCGCmode)
6185 suffix = "ge";
6186 else
6187 abort ();
6188 break;
6189 case GEU:
6190 /* ??? As above. */
6191 if (mode != CCmode)
6192 abort ();
6193 suffix = fp ? "nb" : "ae";
6194 break;
6195 case LE:
6196 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6197 abort ();
6198 suffix = "le";
6199 break;
6200 case LEU:
6201 if (mode != CCmode)
6202 abort ();
6203 suffix = "be";
6204 break;
6205 case UNORDERED:
6206 suffix = fp ? "u" : "p";
6207 break;
6208 case ORDERED:
6209 suffix = fp ? "nu" : "np";
6210 break;
6211 default:
6212 abort ();
6213 }
6214 fputs (suffix, file);
6215 }
6216
6217 void
6218 print_reg (x, code, file)
6219 rtx x;
6220 int code;
6221 FILE *file;
6222 {
6223 if (REGNO (x) == ARG_POINTER_REGNUM
6224 || REGNO (x) == FRAME_POINTER_REGNUM
6225 || REGNO (x) == FLAGS_REG
6226 || REGNO (x) == FPSR_REG)
6227 abort ();
6228
6229 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6230 putc ('%', file);
6231
6232 if (code == 'w' || MMX_REG_P (x))
6233 code = 2;
6234 else if (code == 'b')
6235 code = 1;
6236 else if (code == 'k')
6237 code = 4;
6238 else if (code == 'q')
6239 code = 8;
6240 else if (code == 'y')
6241 code = 3;
6242 else if (code == 'h')
6243 code = 0;
6244 else
6245 code = GET_MODE_SIZE (GET_MODE (x));
6246
6247 /* Irritatingly, AMD extended registers use different naming convention
6248 from the normal registers. */
6249 if (REX_INT_REG_P (x))
6250 {
6251 if (!TARGET_64BIT)
6252 abort ();
6253 switch (code)
6254 {
6255 case 0:
6256 error ("extended registers have no high halves");
6257 break;
6258 case 1:
6259 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6260 break;
6261 case 2:
6262 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6263 break;
6264 case 4:
6265 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6266 break;
6267 case 8:
6268 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6269 break;
6270 default:
6271 error ("unsupported operand size for extended register");
6272 break;
6273 }
6274 return;
6275 }
6276 switch (code)
6277 {
6278 case 3:
6279 if (STACK_TOP_P (x))
6280 {
6281 fputs ("st(0)", file);
6282 break;
6283 }
6284 /* FALLTHRU */
6285 case 8:
6286 case 4:
6287 case 12:
6288 if (! ANY_FP_REG_P (x))
6289 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6290 /* FALLTHRU */
6291 case 16:
6292 case 2:
6293 fputs (hi_reg_name[REGNO (x)], file);
6294 break;
6295 case 1:
6296 fputs (qi_reg_name[REGNO (x)], file);
6297 break;
6298 case 0:
6299 fputs (qi_high_reg_name[REGNO (x)], file);
6300 break;
6301 default:
6302 abort ();
6303 }
6304 }
6305
6306 /* Locate some local-dynamic symbol still in use by this function
6307 so that we can print its name in some tls_local_dynamic_base
6308 pattern. */
6309
6310 static const char *
6311 get_some_local_dynamic_name ()
6312 {
6313 rtx insn;
6314
6315 if (cfun->machine->some_ld_name)
6316 return cfun->machine->some_ld_name;
6317
6318 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6319 if (INSN_P (insn)
6320 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6321 return cfun->machine->some_ld_name;
6322
6323 abort ();
6324 }
6325
6326 static int
6327 get_some_local_dynamic_name_1 (px, data)
6328 rtx *px;
6329 void *data ATTRIBUTE_UNUSED;
6330 {
6331 rtx x = *px;
6332
6333 if (GET_CODE (x) == SYMBOL_REF
6334 && local_dynamic_symbolic_operand (x, Pmode))
6335 {
6336 cfun->machine->some_ld_name = XSTR (x, 0);
6337 return 1;
6338 }
6339
6340 return 0;
6341 }
6342
6343 /* Meaning of CODE:
6344 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6345 C -- print opcode suffix for set/cmov insn.
6346 c -- like C, but print reversed condition
6347 F,f -- likewise, but for floating-point.
6348 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6349 nothing
6350 R -- print the prefix for register names.
6351 z -- print the opcode suffix for the size of the current operand.
6352 * -- print a star (in certain assembler syntax)
6353 A -- print an absolute memory reference.
6354 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6355 s -- print a shift double count, followed by the assemblers argument
6356 delimiter.
6357 b -- print the QImode name of the register for the indicated operand.
6358 %b0 would print %al if operands[0] is reg 0.
6359 w -- likewise, print the HImode name of the register.
6360 k -- likewise, print the SImode name of the register.
6361 q -- likewise, print the DImode name of the register.
6362 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6363 y -- print "st(0)" instead of "st" as a register.
6364 D -- print condition for SSE cmp instruction.
6365 P -- if PIC, print an @PLT suffix.
6366 X -- don't print any sort of PIC '@' suffix for a symbol.
6367 & -- print some in-use local-dynamic symbol name.
6368 */
6369
6370 void
6371 print_operand (file, x, code)
6372 FILE *file;
6373 rtx x;
6374 int code;
6375 {
6376 if (code)
6377 {
6378 switch (code)
6379 {
6380 case '*':
6381 if (ASSEMBLER_DIALECT == ASM_ATT)
6382 putc ('*', file);
6383 return;
6384
6385 case '&':
6386 assemble_name (file, get_some_local_dynamic_name ());
6387 return;
6388
6389 case 'A':
6390 if (ASSEMBLER_DIALECT == ASM_ATT)
6391 putc ('*', file);
6392 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6393 {
6394 /* Intel syntax. For absolute addresses, registers should not
6395 be surrounded by braces. */
6396 if (GET_CODE (x) != REG)
6397 {
6398 putc ('[', file);
6399 PRINT_OPERAND (file, x, 0);
6400 putc (']', file);
6401 return;
6402 }
6403 }
6404 else
6405 abort ();
6406
6407 PRINT_OPERAND (file, x, 0);
6408 return;
6409
6410
6411 case 'L':
6412 if (ASSEMBLER_DIALECT == ASM_ATT)
6413 putc ('l', file);
6414 return;
6415
6416 case 'W':
6417 if (ASSEMBLER_DIALECT == ASM_ATT)
6418 putc ('w', file);
6419 return;
6420
6421 case 'B':
6422 if (ASSEMBLER_DIALECT == ASM_ATT)
6423 putc ('b', file);
6424 return;
6425
6426 case 'Q':
6427 if (ASSEMBLER_DIALECT == ASM_ATT)
6428 putc ('l', file);
6429 return;
6430
6431 case 'S':
6432 if (ASSEMBLER_DIALECT == ASM_ATT)
6433 putc ('s', file);
6434 return;
6435
6436 case 'T':
6437 if (ASSEMBLER_DIALECT == ASM_ATT)
6438 putc ('t', file);
6439 return;
6440
6441 case 'z':
6442 /* 387 opcodes don't get size suffixes if the operands are
6443 registers. */
6444 if (STACK_REG_P (x))
6445 return;
6446
6447 /* Likewise if using Intel opcodes. */
6448 if (ASSEMBLER_DIALECT == ASM_INTEL)
6449 return;
6450
6451 /* This is the size of op from size of operand. */
6452 switch (GET_MODE_SIZE (GET_MODE (x)))
6453 {
6454 case 2:
6455 #ifdef HAVE_GAS_FILDS_FISTS
6456 putc ('s', file);
6457 #endif
6458 return;
6459
6460 case 4:
6461 if (GET_MODE (x) == SFmode)
6462 {
6463 putc ('s', file);
6464 return;
6465 }
6466 else
6467 putc ('l', file);
6468 return;
6469
6470 case 12:
6471 case 16:
6472 putc ('t', file);
6473 return;
6474
6475 case 8:
6476 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6477 {
6478 #ifdef GAS_MNEMONICS
6479 putc ('q', file);
6480 #else
6481 putc ('l', file);
6482 putc ('l', file);
6483 #endif
6484 }
6485 else
6486 putc ('l', file);
6487 return;
6488
6489 default:
6490 abort ();
6491 }
6492
6493 case 'b':
6494 case 'w':
6495 case 'k':
6496 case 'q':
6497 case 'h':
6498 case 'y':
6499 case 'X':
6500 case 'P':
6501 break;
6502
6503 case 's':
6504 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6505 {
6506 PRINT_OPERAND (file, x, 0);
6507 putc (',', file);
6508 }
6509 return;
6510
6511 case 'D':
6512 /* Little bit of braindamage here. The SSE compare instructions
6513 does use completely different names for the comparisons that the
6514 fp conditional moves. */
6515 switch (GET_CODE (x))
6516 {
6517 case EQ:
6518 case UNEQ:
6519 fputs ("eq", file);
6520 break;
6521 case LT:
6522 case UNLT:
6523 fputs ("lt", file);
6524 break;
6525 case LE:
6526 case UNLE:
6527 fputs ("le", file);
6528 break;
6529 case UNORDERED:
6530 fputs ("unord", file);
6531 break;
6532 case NE:
6533 case LTGT:
6534 fputs ("neq", file);
6535 break;
6536 case UNGE:
6537 case GE:
6538 fputs ("nlt", file);
6539 break;
6540 case UNGT:
6541 case GT:
6542 fputs ("nle", file);
6543 break;
6544 case ORDERED:
6545 fputs ("ord", file);
6546 break;
6547 default:
6548 abort ();
6549 break;
6550 }
6551 return;
6552 case 'O':
6553 #ifdef CMOV_SUN_AS_SYNTAX
6554 if (ASSEMBLER_DIALECT == ASM_ATT)
6555 {
6556 switch (GET_MODE (x))
6557 {
6558 case HImode: putc ('w', file); break;
6559 case SImode:
6560 case SFmode: putc ('l', file); break;
6561 case DImode:
6562 case DFmode: putc ('q', file); break;
6563 default: abort ();
6564 }
6565 putc ('.', file);
6566 }
6567 #endif
6568 return;
6569 case 'C':
6570 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6571 return;
6572 case 'F':
6573 #ifdef CMOV_SUN_AS_SYNTAX
6574 if (ASSEMBLER_DIALECT == ASM_ATT)
6575 putc ('.', file);
6576 #endif
6577 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6578 return;
6579
6580 /* Like above, but reverse condition */
6581 case 'c':
6582 /* Check to see if argument to %c is really a constant
6583 and not a condition code which needs to be reversed. */
6584 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6585 {
6586 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6587 return;
6588 }
6589 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6590 return;
6591 case 'f':
6592 #ifdef CMOV_SUN_AS_SYNTAX
6593 if (ASSEMBLER_DIALECT == ASM_ATT)
6594 putc ('.', file);
6595 #endif
6596 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6597 return;
6598 case '+':
6599 {
6600 rtx x;
6601
6602 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6603 return;
6604
6605 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6606 if (x)
6607 {
6608 int pred_val = INTVAL (XEXP (x, 0));
6609
6610 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6611 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6612 {
6613 int taken = pred_val > REG_BR_PROB_BASE / 2;
6614 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6615
6616 /* Emit hints only in the case default branch prediction
6617 heruistics would fail. */
6618 if (taken != cputaken)
6619 {
6620 /* We use 3e (DS) prefix for taken branches and
6621 2e (CS) prefix for not taken branches. */
6622 if (taken)
6623 fputs ("ds ; ", file);
6624 else
6625 fputs ("cs ; ", file);
6626 }
6627 }
6628 }
6629 return;
6630 }
6631 default:
6632 output_operand_lossage ("invalid operand code `%c'", code);
6633 }
6634 }
6635
6636 if (GET_CODE (x) == REG)
6637 {
6638 PRINT_REG (x, code, file);
6639 }
6640
6641 else if (GET_CODE (x) == MEM)
6642 {
6643 /* No `byte ptr' prefix for call instructions. */
6644 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6645 {
6646 const char * size;
6647 switch (GET_MODE_SIZE (GET_MODE (x)))
6648 {
6649 case 1: size = "BYTE"; break;
6650 case 2: size = "WORD"; break;
6651 case 4: size = "DWORD"; break;
6652 case 8: size = "QWORD"; break;
6653 case 12: size = "XWORD"; break;
6654 case 16: size = "XMMWORD"; break;
6655 default:
6656 abort ();
6657 }
6658
6659 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6660 if (code == 'b')
6661 size = "BYTE";
6662 else if (code == 'w')
6663 size = "WORD";
6664 else if (code == 'k')
6665 size = "DWORD";
6666
6667 fputs (size, file);
6668 fputs (" PTR ", file);
6669 }
6670
6671 x = XEXP (x, 0);
6672 if (flag_pic && CONSTANT_ADDRESS_P (x))
6673 output_pic_addr_const (file, x, code);
6674 /* Avoid (%rip) for call operands. */
6675 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6676 && GET_CODE (x) != CONST_INT)
6677 output_addr_const (file, x);
6678 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6679 output_operand_lossage ("invalid constraints for operand");
6680 else
6681 output_address (x);
6682 }
6683
6684 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6685 {
6686 REAL_VALUE_TYPE r;
6687 long l;
6688
6689 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6690 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6691
6692 if (ASSEMBLER_DIALECT == ASM_ATT)
6693 putc ('$', file);
6694 fprintf (file, "0x%lx", l);
6695 }
6696
6697 /* These float cases don't actually occur as immediate operands. */
6698 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6699 {
6700 REAL_VALUE_TYPE r;
6701 char dstr[30];
6702
6703 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6704 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6705 fprintf (file, "%s", dstr);
6706 }
6707
6708 else if (GET_CODE (x) == CONST_DOUBLE
6709 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6710 {
6711 REAL_VALUE_TYPE r;
6712 char dstr[30];
6713
6714 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6715 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6716 fprintf (file, "%s", dstr);
6717 }
6718
6719 else
6720 {
6721 if (code != 'P')
6722 {
6723 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6724 {
6725 if (ASSEMBLER_DIALECT == ASM_ATT)
6726 putc ('$', file);
6727 }
6728 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6729 || GET_CODE (x) == LABEL_REF)
6730 {
6731 if (ASSEMBLER_DIALECT == ASM_ATT)
6732 putc ('$', file);
6733 else
6734 fputs ("OFFSET FLAT:", file);
6735 }
6736 }
6737 if (GET_CODE (x) == CONST_INT)
6738 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6739 else if (flag_pic)
6740 output_pic_addr_const (file, x, code);
6741 else
6742 output_addr_const (file, x);
6743 }
6744 }
6745 \f
6746 /* Print a memory operand whose address is ADDR. */
6747
6748 void
6749 print_operand_address (file, addr)
6750 FILE *file;
6751 register rtx addr;
6752 {
6753 struct ix86_address parts;
6754 rtx base, index, disp;
6755 int scale;
6756
6757 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6758 {
6759 if (ASSEMBLER_DIALECT == ASM_INTEL)
6760 fputs ("DWORD PTR ", file);
6761 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6762 putc ('%', file);
6763 fputs ("gs:0", file);
6764 return;
6765 }
6766
6767 if (! ix86_decompose_address (addr, &parts))
6768 abort ();
6769
6770 base = parts.base;
6771 index = parts.index;
6772 disp = parts.disp;
6773 scale = parts.scale;
6774
6775 if (!base && !index)
6776 {
6777 /* Displacement only requires special attention. */
6778
6779 if (GET_CODE (disp) == CONST_INT)
6780 {
6781 if (ASSEMBLER_DIALECT == ASM_INTEL)
6782 {
6783 if (USER_LABEL_PREFIX[0] == 0)
6784 putc ('%', file);
6785 fputs ("ds:", file);
6786 }
6787 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6788 }
6789 else if (flag_pic)
6790 output_pic_addr_const (file, addr, 0);
6791 else
6792 output_addr_const (file, addr);
6793
6794 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6795 if (TARGET_64BIT
6796 && (GET_CODE (addr) == SYMBOL_REF
6797 || GET_CODE (addr) == LABEL_REF
6798 || (GET_CODE (addr) == CONST
6799 && GET_CODE (XEXP (addr, 0)) == PLUS
6800 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6801 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6802 fputs ("(%rip)", file);
6803 }
6804 else
6805 {
6806 if (ASSEMBLER_DIALECT == ASM_ATT)
6807 {
6808 if (disp)
6809 {
6810 if (flag_pic)
6811 output_pic_addr_const (file, disp, 0);
6812 else if (GET_CODE (disp) == LABEL_REF)
6813 output_asm_label (disp);
6814 else
6815 output_addr_const (file, disp);
6816 }
6817
6818 putc ('(', file);
6819 if (base)
6820 PRINT_REG (base, 0, file);
6821 if (index)
6822 {
6823 putc (',', file);
6824 PRINT_REG (index, 0, file);
6825 if (scale != 1)
6826 fprintf (file, ",%d", scale);
6827 }
6828 putc (')', file);
6829 }
6830 else
6831 {
6832 rtx offset = NULL_RTX;
6833
6834 if (disp)
6835 {
6836 /* Pull out the offset of a symbol; print any symbol itself. */
6837 if (GET_CODE (disp) == CONST
6838 && GET_CODE (XEXP (disp, 0)) == PLUS
6839 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6840 {
6841 offset = XEXP (XEXP (disp, 0), 1);
6842 disp = gen_rtx_CONST (VOIDmode,
6843 XEXP (XEXP (disp, 0), 0));
6844 }
6845
6846 if (flag_pic)
6847 output_pic_addr_const (file, disp, 0);
6848 else if (GET_CODE (disp) == LABEL_REF)
6849 output_asm_label (disp);
6850 else if (GET_CODE (disp) == CONST_INT)
6851 offset = disp;
6852 else
6853 output_addr_const (file, disp);
6854 }
6855
6856 putc ('[', file);
6857 if (base)
6858 {
6859 PRINT_REG (base, 0, file);
6860 if (offset)
6861 {
6862 if (INTVAL (offset) >= 0)
6863 putc ('+', file);
6864 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6865 }
6866 }
6867 else if (offset)
6868 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6869 else
6870 putc ('0', file);
6871
6872 if (index)
6873 {
6874 putc ('+', file);
6875 PRINT_REG (index, 0, file);
6876 if (scale != 1)
6877 fprintf (file, "*%d", scale);
6878 }
6879 putc (']', file);
6880 }
6881 }
6882 }
6883
6884 bool
6885 output_addr_const_extra (file, x)
6886 FILE *file;
6887 rtx x;
6888 {
6889 rtx op;
6890
6891 if (GET_CODE (x) != UNSPEC)
6892 return false;
6893
6894 op = XVECEXP (x, 0, 0);
6895 switch (XINT (x, 1))
6896 {
6897 case UNSPEC_GOTTPOFF:
6898 output_addr_const (file, op);
6899 fputs ("@GOTTPOFF", file);
6900 break;
6901 case UNSPEC_TPOFF:
6902 output_addr_const (file, op);
6903 fputs ("@TPOFF", file);
6904 break;
6905 case UNSPEC_NTPOFF:
6906 output_addr_const (file, op);
6907 fputs ("@NTPOFF", file);
6908 break;
6909 case UNSPEC_DTPOFF:
6910 output_addr_const (file, op);
6911 fputs ("@DTPOFF", file);
6912 break;
6913
6914 default:
6915 return false;
6916 }
6917
6918 return true;
6919 }
6920 \f
6921 /* Split one or more DImode RTL references into pairs of SImode
6922 references. The RTL can be REG, offsettable MEM, integer constant, or
6923 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6924 split and "num" is its length. lo_half and hi_half are output arrays
6925 that parallel "operands". */
6926
6927 void
6928 split_di (operands, num, lo_half, hi_half)
6929 rtx operands[];
6930 int num;
6931 rtx lo_half[], hi_half[];
6932 {
6933 while (num--)
6934 {
6935 rtx op = operands[num];
6936
6937 /* simplify_subreg refuse to split volatile memory addresses,
6938 but we still have to handle it. */
6939 if (GET_CODE (op) == MEM)
6940 {
6941 lo_half[num] = adjust_address (op, SImode, 0);
6942 hi_half[num] = adjust_address (op, SImode, 4);
6943 }
6944 else
6945 {
6946 lo_half[num] = simplify_gen_subreg (SImode, op,
6947 GET_MODE (op) == VOIDmode
6948 ? DImode : GET_MODE (op), 0);
6949 hi_half[num] = simplify_gen_subreg (SImode, op,
6950 GET_MODE (op) == VOIDmode
6951 ? DImode : GET_MODE (op), 4);
6952 }
6953 }
6954 }
6955 /* Split one or more TImode RTL references into pairs of SImode
6956 references. The RTL can be REG, offsettable MEM, integer constant, or
6957 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6958 split and "num" is its length. lo_half and hi_half are output arrays
6959 that parallel "operands". */
6960
6961 void
6962 split_ti (operands, num, lo_half, hi_half)
6963 rtx operands[];
6964 int num;
6965 rtx lo_half[], hi_half[];
6966 {
6967 while (num--)
6968 {
6969 rtx op = operands[num];
6970
6971 /* simplify_subreg refuse to split volatile memory addresses, but we
6972 still have to handle it. */
6973 if (GET_CODE (op) == MEM)
6974 {
6975 lo_half[num] = adjust_address (op, DImode, 0);
6976 hi_half[num] = adjust_address (op, DImode, 8);
6977 }
6978 else
6979 {
6980 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6981 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6982 }
6983 }
6984 }
6985 \f
6986 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6987 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6988 is the expression of the binary operation. The output may either be
6989 emitted here, or returned to the caller, like all output_* functions.
6990
6991 There is no guarantee that the operands are the same mode, as they
6992 might be within FLOAT or FLOAT_EXTEND expressions. */
6993
6994 #ifndef SYSV386_COMPAT
6995 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6996 wants to fix the assemblers because that causes incompatibility
6997 with gcc. No-one wants to fix gcc because that causes
6998 incompatibility with assemblers... You can use the option of
6999 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7000 #define SYSV386_COMPAT 1
7001 #endif
7002
7003 const char *
7004 output_387_binary_op (insn, operands)
7005 rtx insn;
7006 rtx *operands;
7007 {
7008 static char buf[30];
7009 const char *p;
7010 const char *ssep;
7011 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7012
7013 #ifdef ENABLE_CHECKING
7014 /* Even if we do not want to check the inputs, this documents input
7015 constraints. Which helps in understanding the following code. */
7016 if (STACK_REG_P (operands[0])
7017 && ((REG_P (operands[1])
7018 && REGNO (operands[0]) == REGNO (operands[1])
7019 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7020 || (REG_P (operands[2])
7021 && REGNO (operands[0]) == REGNO (operands[2])
7022 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7023 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7024 ; /* ok */
7025 else if (!is_sse)
7026 abort ();
7027 #endif
7028
7029 switch (GET_CODE (operands[3]))
7030 {
7031 case PLUS:
7032 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7033 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7034 p = "fiadd";
7035 else
7036 p = "fadd";
7037 ssep = "add";
7038 break;
7039
7040 case MINUS:
7041 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7042 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7043 p = "fisub";
7044 else
7045 p = "fsub";
7046 ssep = "sub";
7047 break;
7048
7049 case MULT:
7050 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7051 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7052 p = "fimul";
7053 else
7054 p = "fmul";
7055 ssep = "mul";
7056 break;
7057
7058 case DIV:
7059 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7060 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7061 p = "fidiv";
7062 else
7063 p = "fdiv";
7064 ssep = "div";
7065 break;
7066
7067 default:
7068 abort ();
7069 }
7070
7071 if (is_sse)
7072 {
7073 strcpy (buf, ssep);
7074 if (GET_MODE (operands[0]) == SFmode)
7075 strcat (buf, "ss\t{%2, %0|%0, %2}");
7076 else
7077 strcat (buf, "sd\t{%2, %0|%0, %2}");
7078 return buf;
7079 }
7080 strcpy (buf, p);
7081
7082 switch (GET_CODE (operands[3]))
7083 {
7084 case MULT:
7085 case PLUS:
7086 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7087 {
7088 rtx temp = operands[2];
7089 operands[2] = operands[1];
7090 operands[1] = temp;
7091 }
7092
7093 /* know operands[0] == operands[1]. */
7094
7095 if (GET_CODE (operands[2]) == MEM)
7096 {
7097 p = "%z2\t%2";
7098 break;
7099 }
7100
7101 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7102 {
7103 if (STACK_TOP_P (operands[0]))
7104 /* How is it that we are storing to a dead operand[2]?
7105 Well, presumably operands[1] is dead too. We can't
7106 store the result to st(0) as st(0) gets popped on this
7107 instruction. Instead store to operands[2] (which I
7108 think has to be st(1)). st(1) will be popped later.
7109 gcc <= 2.8.1 didn't have this check and generated
7110 assembly code that the Unixware assembler rejected. */
7111 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7112 else
7113 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7114 break;
7115 }
7116
7117 if (STACK_TOP_P (operands[0]))
7118 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7119 else
7120 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7121 break;
7122
7123 case MINUS:
7124 case DIV:
7125 if (GET_CODE (operands[1]) == MEM)
7126 {
7127 p = "r%z1\t%1";
7128 break;
7129 }
7130
7131 if (GET_CODE (operands[2]) == MEM)
7132 {
7133 p = "%z2\t%2";
7134 break;
7135 }
7136
7137 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7138 {
7139 #if SYSV386_COMPAT
7140 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7141 derived assemblers, confusingly reverse the direction of
7142 the operation for fsub{r} and fdiv{r} when the
7143 destination register is not st(0). The Intel assembler
7144 doesn't have this brain damage. Read !SYSV386_COMPAT to
7145 figure out what the hardware really does. */
7146 if (STACK_TOP_P (operands[0]))
7147 p = "{p\t%0, %2|rp\t%2, %0}";
7148 else
7149 p = "{rp\t%2, %0|p\t%0, %2}";
7150 #else
7151 if (STACK_TOP_P (operands[0]))
7152 /* As above for fmul/fadd, we can't store to st(0). */
7153 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7154 else
7155 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7156 #endif
7157 break;
7158 }
7159
7160 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7161 {
7162 #if SYSV386_COMPAT
7163 if (STACK_TOP_P (operands[0]))
7164 p = "{rp\t%0, %1|p\t%1, %0}";
7165 else
7166 p = "{p\t%1, %0|rp\t%0, %1}";
7167 #else
7168 if (STACK_TOP_P (operands[0]))
7169 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7170 else
7171 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7172 #endif
7173 break;
7174 }
7175
7176 if (STACK_TOP_P (operands[0]))
7177 {
7178 if (STACK_TOP_P (operands[1]))
7179 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7180 else
7181 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7182 break;
7183 }
7184 else if (STACK_TOP_P (operands[1]))
7185 {
7186 #if SYSV386_COMPAT
7187 p = "{\t%1, %0|r\t%0, %1}";
7188 #else
7189 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7190 #endif
7191 }
7192 else
7193 {
7194 #if SYSV386_COMPAT
7195 p = "{r\t%2, %0|\t%0, %2}";
7196 #else
7197 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7198 #endif
7199 }
7200 break;
7201
7202 default:
7203 abort ();
7204 }
7205
7206 strcat (buf, p);
7207 return buf;
7208 }
7209
7210 /* Output code to initialize control word copies used by
7211 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7212 is set to control word rounding downwards. */
7213 void
7214 emit_i387_cw_initialization (normal, round_down)
7215 rtx normal, round_down;
7216 {
7217 rtx reg = gen_reg_rtx (HImode);
7218
7219 emit_insn (gen_x86_fnstcw_1 (normal));
7220 emit_move_insn (reg, normal);
7221 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7222 && !TARGET_64BIT)
7223 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7224 else
7225 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7226 emit_move_insn (round_down, reg);
7227 }
7228
7229 /* Output code for INSN to convert a float to a signed int. OPERANDS
7230 are the insn operands. The output may be [HSD]Imode and the input
7231 operand may be [SDX]Fmode. */
7232
7233 const char *
7234 output_fix_trunc (insn, operands)
7235 rtx insn;
7236 rtx *operands;
7237 {
7238 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7239 int dimode_p = GET_MODE (operands[0]) == DImode;
7240
7241 /* Jump through a hoop or two for DImode, since the hardware has no
7242 non-popping instruction. We used to do this a different way, but
7243 that was somewhat fragile and broke with post-reload splitters. */
7244 if (dimode_p && !stack_top_dies)
7245 output_asm_insn ("fld\t%y1", operands);
7246
7247 if (!STACK_TOP_P (operands[1]))
7248 abort ();
7249
7250 if (GET_CODE (operands[0]) != MEM)
7251 abort ();
7252
7253 output_asm_insn ("fldcw\t%3", operands);
7254 if (stack_top_dies || dimode_p)
7255 output_asm_insn ("fistp%z0\t%0", operands);
7256 else
7257 output_asm_insn ("fist%z0\t%0", operands);
7258 output_asm_insn ("fldcw\t%2", operands);
7259
7260 return "";
7261 }
7262
7263 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7264 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7265 when fucom should be used. */
7266
7267 const char *
7268 output_fp_compare (insn, operands, eflags_p, unordered_p)
7269 rtx insn;
7270 rtx *operands;
7271 int eflags_p, unordered_p;
7272 {
7273 int stack_top_dies;
7274 rtx cmp_op0 = operands[0];
7275 rtx cmp_op1 = operands[1];
7276 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7277
7278 if (eflags_p == 2)
7279 {
7280 cmp_op0 = cmp_op1;
7281 cmp_op1 = operands[2];
7282 }
7283 if (is_sse)
7284 {
7285 if (GET_MODE (operands[0]) == SFmode)
7286 if (unordered_p)
7287 return "ucomiss\t{%1, %0|%0, %1}";
7288 else
7289 return "comiss\t{%1, %0|%0, %y}";
7290 else
7291 if (unordered_p)
7292 return "ucomisd\t{%1, %0|%0, %1}";
7293 else
7294 return "comisd\t{%1, %0|%0, %y}";
7295 }
7296
7297 if (! STACK_TOP_P (cmp_op0))
7298 abort ();
7299
7300 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7301
7302 if (STACK_REG_P (cmp_op1)
7303 && stack_top_dies
7304 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7305 && REGNO (cmp_op1) != FIRST_STACK_REG)
7306 {
7307 /* If both the top of the 387 stack dies, and the other operand
7308 is also a stack register that dies, then this must be a
7309 `fcompp' float compare */
7310
7311 if (eflags_p == 1)
7312 {
7313 /* There is no double popping fcomi variant. Fortunately,
7314 eflags is immune from the fstp's cc clobbering. */
7315 if (unordered_p)
7316 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7317 else
7318 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7319 return "fstp\t%y0";
7320 }
7321 else
7322 {
7323 if (eflags_p == 2)
7324 {
7325 if (unordered_p)
7326 return "fucompp\n\tfnstsw\t%0";
7327 else
7328 return "fcompp\n\tfnstsw\t%0";
7329 }
7330 else
7331 {
7332 if (unordered_p)
7333 return "fucompp";
7334 else
7335 return "fcompp";
7336 }
7337 }
7338 }
7339 else
7340 {
7341 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7342
7343 static const char * const alt[24] =
7344 {
7345 "fcom%z1\t%y1",
7346 "fcomp%z1\t%y1",
7347 "fucom%z1\t%y1",
7348 "fucomp%z1\t%y1",
7349
7350 "ficom%z1\t%y1",
7351 "ficomp%z1\t%y1",
7352 NULL,
7353 NULL,
7354
7355 "fcomi\t{%y1, %0|%0, %y1}",
7356 "fcomip\t{%y1, %0|%0, %y1}",
7357 "fucomi\t{%y1, %0|%0, %y1}",
7358 "fucomip\t{%y1, %0|%0, %y1}",
7359
7360 NULL,
7361 NULL,
7362 NULL,
7363 NULL,
7364
7365 "fcom%z2\t%y2\n\tfnstsw\t%0",
7366 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7367 "fucom%z2\t%y2\n\tfnstsw\t%0",
7368 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7369
7370 "ficom%z2\t%y2\n\tfnstsw\t%0",
7371 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7372 NULL,
7373 NULL
7374 };
7375
7376 int mask;
7377 const char *ret;
7378
7379 mask = eflags_p << 3;
7380 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7381 mask |= unordered_p << 1;
7382 mask |= stack_top_dies;
7383
7384 if (mask >= 24)
7385 abort ();
7386 ret = alt[mask];
7387 if (ret == NULL)
7388 abort ();
7389
7390 return ret;
7391 }
7392 }
7393
7394 void
7395 ix86_output_addr_vec_elt (file, value)
7396 FILE *file;
7397 int value;
7398 {
7399 const char *directive = ASM_LONG;
7400
7401 if (TARGET_64BIT)
7402 {
7403 #ifdef ASM_QUAD
7404 directive = ASM_QUAD;
7405 #else
7406 abort ();
7407 #endif
7408 }
7409
7410 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7411 }
7412
7413 void
7414 ix86_output_addr_diff_elt (file, value, rel)
7415 FILE *file;
7416 int value, rel;
7417 {
7418 if (TARGET_64BIT)
7419 fprintf (file, "%s%s%d-%s%d\n",
7420 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7421 else if (HAVE_AS_GOTOFF_IN_DATA)
7422 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7423 #if TARGET_MACHO
7424 else if (TARGET_MACHO)
7425 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7426 machopic_function_base_name () + 1);
7427 #endif
7428 else
7429 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7430 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7431 }
7432 \f
7433 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7434 for the target. */
7435
7436 void
7437 ix86_expand_clear (dest)
7438 rtx dest;
7439 {
7440 rtx tmp;
7441
7442 /* We play register width games, which are only valid after reload. */
7443 if (!reload_completed)
7444 abort ();
7445
7446 /* Avoid HImode and its attendant prefix byte. */
7447 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7448 dest = gen_rtx_REG (SImode, REGNO (dest));
7449
7450 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7451
7452 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7453 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7454 {
7455 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7456 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7457 }
7458
7459 emit_insn (tmp);
7460 }
7461
7462 /* X is an unchanging MEM. If it is a constant pool reference, return
7463 the constant pool rtx, else NULL. */
7464
7465 static rtx
7466 maybe_get_pool_constant (x)
7467 rtx x;
7468 {
7469 x = XEXP (x, 0);
7470
7471 if (flag_pic)
7472 {
7473 if (GET_CODE (x) != PLUS)
7474 return NULL_RTX;
7475 if (XEXP (x, 0) != pic_offset_table_rtx)
7476 return NULL_RTX;
7477 x = XEXP (x, 1);
7478 if (GET_CODE (x) != CONST)
7479 return NULL_RTX;
7480 x = XEXP (x, 0);
7481 if (GET_CODE (x) != UNSPEC)
7482 return NULL_RTX;
7483 if (XINT (x, 1) != UNSPEC_GOTOFF)
7484 return NULL_RTX;
7485 x = XVECEXP (x, 0, 0);
7486 }
7487
7488 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7489 return get_pool_constant (x);
7490
7491 return NULL_RTX;
7492 }
7493
7494 void
7495 ix86_expand_move (mode, operands)
7496 enum machine_mode mode;
7497 rtx operands[];
7498 {
7499 int strict = (reload_in_progress || reload_completed);
7500 rtx insn, op0, op1, tmp;
7501
7502 op0 = operands[0];
7503 op1 = operands[1];
7504
7505 /* ??? We have a slight problem. We need to say that tls symbols are
7506 not legitimate constants so that reload does not helpfully reload
7507 these constants from a REG_EQUIV, which we cannot handle. (Recall
7508 that general- and local-dynamic address resolution requires a
7509 function call.)
7510
7511 However, if we say that tls symbols are not legitimate constants,
7512 then emit_move_insn helpfully drop them into the constant pool.
7513
7514 It is far easier to work around emit_move_insn than reload. Recognize
7515 the MEM that we would have created and extract the symbol_ref. */
7516
7517 if (mode == Pmode
7518 && GET_CODE (op1) == MEM
7519 && RTX_UNCHANGING_P (op1))
7520 {
7521 tmp = maybe_get_pool_constant (op1);
7522 /* Note that we only care about symbolic constants here, which
7523 unlike CONST_INT will always have a proper mode. */
7524 if (tmp && GET_MODE (tmp) == Pmode)
7525 op1 = tmp;
7526 }
7527
7528 if (tls_symbolic_operand (op1, Pmode))
7529 {
7530 op1 = legitimize_address (op1, op1, VOIDmode);
7531 if (GET_CODE (op0) == MEM)
7532 {
7533 tmp = gen_reg_rtx (mode);
7534 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7535 op1 = tmp;
7536 }
7537 }
7538 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7539 {
7540 #if TARGET_MACHO
7541 if (MACHOPIC_PURE)
7542 {
7543 rtx temp = ((reload_in_progress
7544 || ((op0 && GET_CODE (op0) == REG)
7545 && mode == Pmode))
7546 ? op0 : gen_reg_rtx (Pmode));
7547 op1 = machopic_indirect_data_reference (op1, temp);
7548 op1 = machopic_legitimize_pic_address (op1, mode,
7549 temp == op1 ? 0 : temp);
7550 }
7551 else
7552 {
7553 if (MACHOPIC_INDIRECT)
7554 op1 = machopic_indirect_data_reference (op1, 0);
7555 }
7556 if (op0 != op1)
7557 {
7558 insn = gen_rtx_SET (VOIDmode, op0, op1);
7559 emit_insn (insn);
7560 }
7561 return;
7562 #endif /* TARGET_MACHO */
7563 if (GET_CODE (op0) == MEM)
7564 op1 = force_reg (Pmode, op1);
7565 else
7566 {
7567 rtx temp = op0;
7568 if (GET_CODE (temp) != REG)
7569 temp = gen_reg_rtx (Pmode);
7570 temp = legitimize_pic_address (op1, temp);
7571 if (temp == op0)
7572 return;
7573 op1 = temp;
7574 }
7575 }
7576 else
7577 {
7578 if (GET_CODE (op0) == MEM
7579 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7580 || !push_operand (op0, mode))
7581 && GET_CODE (op1) == MEM)
7582 op1 = force_reg (mode, op1);
7583
7584 if (push_operand (op0, mode)
7585 && ! general_no_elim_operand (op1, mode))
7586 op1 = copy_to_mode_reg (mode, op1);
7587
7588 /* Force large constants in 64bit compilation into register
7589 to get them CSEed. */
7590 if (TARGET_64BIT && mode == DImode
7591 && immediate_operand (op1, mode)
7592 && !x86_64_zero_extended_value (op1)
7593 && !register_operand (op0, mode)
7594 && optimize && !reload_completed && !reload_in_progress)
7595 op1 = copy_to_mode_reg (mode, op1);
7596
7597 if (FLOAT_MODE_P (mode))
7598 {
7599 /* If we are loading a floating point constant to a register,
7600 force the value to memory now, since we'll get better code
7601 out the back end. */
7602
7603 if (strict)
7604 ;
7605 else if (GET_CODE (op1) == CONST_DOUBLE
7606 && register_operand (op0, mode))
7607 op1 = validize_mem (force_const_mem (mode, op1));
7608 }
7609 }
7610
7611 insn = gen_rtx_SET (VOIDmode, op0, op1);
7612
7613 emit_insn (insn);
7614 }
7615
7616 void
7617 ix86_expand_vector_move (mode, operands)
7618 enum machine_mode mode;
7619 rtx operands[];
7620 {
7621 /* Force constants other than zero into memory. We do not know how
7622 the instructions used to build constants modify the upper 64 bits
7623 of the register, once we have that information we may be able
7624 to handle some of them more efficiently. */
7625 if ((reload_in_progress | reload_completed) == 0
7626 && register_operand (operands[0], mode)
7627 && CONSTANT_P (operands[1]))
7628 {
7629 rtx addr = gen_reg_rtx (Pmode);
7630 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7631 operands[1] = gen_rtx_MEM (mode, addr);
7632 }
7633
7634 /* Make operand1 a register if it isn't already. */
7635 if ((reload_in_progress | reload_completed) == 0
7636 && !register_operand (operands[0], mode)
7637 && !register_operand (operands[1], mode)
7638 && operands[1] != CONST0_RTX (mode))
7639 {
7640 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7641 emit_move_insn (operands[0], temp);
7642 return;
7643 }
7644
7645 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7646 }
7647
7648 /* Attempt to expand a binary operator. Make the expansion closer to the
7649 actual machine, then just general_operand, which will allow 3 separate
7650 memory references (one output, two input) in a single insn. */
7651
7652 void
7653 ix86_expand_binary_operator (code, mode, operands)
7654 enum rtx_code code;
7655 enum machine_mode mode;
7656 rtx operands[];
7657 {
7658 int matching_memory;
7659 rtx src1, src2, dst, op, clob;
7660
7661 dst = operands[0];
7662 src1 = operands[1];
7663 src2 = operands[2];
7664
7665 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7666 if (GET_RTX_CLASS (code) == 'c'
7667 && (rtx_equal_p (dst, src2)
7668 || immediate_operand (src1, mode)))
7669 {
7670 rtx temp = src1;
7671 src1 = src2;
7672 src2 = temp;
7673 }
7674
7675 /* If the destination is memory, and we do not have matching source
7676 operands, do things in registers. */
7677 matching_memory = 0;
7678 if (GET_CODE (dst) == MEM)
7679 {
7680 if (rtx_equal_p (dst, src1))
7681 matching_memory = 1;
7682 else if (GET_RTX_CLASS (code) == 'c'
7683 && rtx_equal_p (dst, src2))
7684 matching_memory = 2;
7685 else
7686 dst = gen_reg_rtx (mode);
7687 }
7688
7689 /* Both source operands cannot be in memory. */
7690 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7691 {
7692 if (matching_memory != 2)
7693 src2 = force_reg (mode, src2);
7694 else
7695 src1 = force_reg (mode, src1);
7696 }
7697
7698 /* If the operation is not commutable, source 1 cannot be a constant
7699 or non-matching memory. */
7700 if ((CONSTANT_P (src1)
7701 || (!matching_memory && GET_CODE (src1) == MEM))
7702 && GET_RTX_CLASS (code) != 'c')
7703 src1 = force_reg (mode, src1);
7704
7705 /* If optimizing, copy to regs to improve CSE */
7706 if (optimize && ! no_new_pseudos)
7707 {
7708 if (GET_CODE (dst) == MEM)
7709 dst = gen_reg_rtx (mode);
7710 if (GET_CODE (src1) == MEM)
7711 src1 = force_reg (mode, src1);
7712 if (GET_CODE (src2) == MEM)
7713 src2 = force_reg (mode, src2);
7714 }
7715
7716 /* Emit the instruction. */
7717
7718 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7719 if (reload_in_progress)
7720 {
7721 /* Reload doesn't know about the flags register, and doesn't know that
7722 it doesn't want to clobber it. We can only do this with PLUS. */
7723 if (code != PLUS)
7724 abort ();
7725 emit_insn (op);
7726 }
7727 else
7728 {
7729 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7730 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7731 }
7732
7733 /* Fix up the destination if needed. */
7734 if (dst != operands[0])
7735 emit_move_insn (operands[0], dst);
7736 }
7737
7738 /* Return TRUE or FALSE depending on whether the binary operator meets the
7739 appropriate constraints. */
7740
7741 int
7742 ix86_binary_operator_ok (code, mode, operands)
7743 enum rtx_code code;
7744 enum machine_mode mode ATTRIBUTE_UNUSED;
7745 rtx operands[3];
7746 {
7747 /* Both source operands cannot be in memory. */
7748 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7749 return 0;
7750 /* If the operation is not commutable, source 1 cannot be a constant. */
7751 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7752 return 0;
7753 /* If the destination is memory, we must have a matching source operand. */
7754 if (GET_CODE (operands[0]) == MEM
7755 && ! (rtx_equal_p (operands[0], operands[1])
7756 || (GET_RTX_CLASS (code) == 'c'
7757 && rtx_equal_p (operands[0], operands[2]))))
7758 return 0;
7759 /* If the operation is not commutable and the source 1 is memory, we must
7760 have a matching destination. */
7761 if (GET_CODE (operands[1]) == MEM
7762 && GET_RTX_CLASS (code) != 'c'
7763 && ! rtx_equal_p (operands[0], operands[1]))
7764 return 0;
7765 return 1;
7766 }
7767
7768 /* Attempt to expand a unary operator. Make the expansion closer to the
7769 actual machine, then just general_operand, which will allow 2 separate
7770 memory references (one output, one input) in a single insn. */
7771
7772 void
7773 ix86_expand_unary_operator (code, mode, operands)
7774 enum rtx_code code;
7775 enum machine_mode mode;
7776 rtx operands[];
7777 {
7778 int matching_memory;
7779 rtx src, dst, op, clob;
7780
7781 dst = operands[0];
7782 src = operands[1];
7783
7784 /* If the destination is memory, and we do not have matching source
7785 operands, do things in registers. */
7786 matching_memory = 0;
7787 if (GET_CODE (dst) == MEM)
7788 {
7789 if (rtx_equal_p (dst, src))
7790 matching_memory = 1;
7791 else
7792 dst = gen_reg_rtx (mode);
7793 }
7794
7795 /* When source operand is memory, destination must match. */
7796 if (!matching_memory && GET_CODE (src) == MEM)
7797 src = force_reg (mode, src);
7798
7799 /* If optimizing, copy to regs to improve CSE */
7800 if (optimize && ! no_new_pseudos)
7801 {
7802 if (GET_CODE (dst) == MEM)
7803 dst = gen_reg_rtx (mode);
7804 if (GET_CODE (src) == MEM)
7805 src = force_reg (mode, src);
7806 }
7807
7808 /* Emit the instruction. */
7809
7810 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7811 if (reload_in_progress || code == NOT)
7812 {
7813 /* Reload doesn't know about the flags register, and doesn't know that
7814 it doesn't want to clobber it. */
7815 if (code != NOT)
7816 abort ();
7817 emit_insn (op);
7818 }
7819 else
7820 {
7821 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7822 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7823 }
7824
7825 /* Fix up the destination if needed. */
7826 if (dst != operands[0])
7827 emit_move_insn (operands[0], dst);
7828 }
7829
7830 /* Return TRUE or FALSE depending on whether the unary operator meets the
7831 appropriate constraints. */
7832
7833 int
7834 ix86_unary_operator_ok (code, mode, operands)
7835 enum rtx_code code ATTRIBUTE_UNUSED;
7836 enum machine_mode mode ATTRIBUTE_UNUSED;
7837 rtx operands[2] ATTRIBUTE_UNUSED;
7838 {
7839 /* If one of operands is memory, source and destination must match. */
7840 if ((GET_CODE (operands[0]) == MEM
7841 || GET_CODE (operands[1]) == MEM)
7842 && ! rtx_equal_p (operands[0], operands[1]))
7843 return FALSE;
7844 return TRUE;
7845 }
7846
7847 /* Return TRUE or FALSE depending on whether the first SET in INSN
7848 has source and destination with matching CC modes, and that the
7849 CC mode is at least as constrained as REQ_MODE. */
7850
7851 int
7852 ix86_match_ccmode (insn, req_mode)
7853 rtx insn;
7854 enum machine_mode req_mode;
7855 {
7856 rtx set;
7857 enum machine_mode set_mode;
7858
7859 set = PATTERN (insn);
7860 if (GET_CODE (set) == PARALLEL)
7861 set = XVECEXP (set, 0, 0);
7862 if (GET_CODE (set) != SET)
7863 abort ();
7864 if (GET_CODE (SET_SRC (set)) != COMPARE)
7865 abort ();
7866
7867 set_mode = GET_MODE (SET_DEST (set));
7868 switch (set_mode)
7869 {
7870 case CCNOmode:
7871 if (req_mode != CCNOmode
7872 && (req_mode != CCmode
7873 || XEXP (SET_SRC (set), 1) != const0_rtx))
7874 return 0;
7875 break;
7876 case CCmode:
7877 if (req_mode == CCGCmode)
7878 return 0;
7879 /* FALLTHRU */
7880 case CCGCmode:
7881 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7882 return 0;
7883 /* FALLTHRU */
7884 case CCGOCmode:
7885 if (req_mode == CCZmode)
7886 return 0;
7887 /* FALLTHRU */
7888 case CCZmode:
7889 break;
7890
7891 default:
7892 abort ();
7893 }
7894
7895 return (GET_MODE (SET_SRC (set)) == set_mode);
7896 }
7897
7898 /* Generate insn patterns to do an integer compare of OPERANDS. */
7899
7900 static rtx
7901 ix86_expand_int_compare (code, op0, op1)
7902 enum rtx_code code;
7903 rtx op0, op1;
7904 {
7905 enum machine_mode cmpmode;
7906 rtx tmp, flags;
7907
7908 cmpmode = SELECT_CC_MODE (code, op0, op1);
7909 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7910
7911 /* This is very simple, but making the interface the same as in the
7912 FP case makes the rest of the code easier. */
7913 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7914 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7915
7916 /* Return the test that should be put into the flags user, i.e.
7917 the bcc, scc, or cmov instruction. */
7918 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7919 }
7920
7921 /* Figure out whether to use ordered or unordered fp comparisons.
7922 Return the appropriate mode to use. */
7923
7924 enum machine_mode
7925 ix86_fp_compare_mode (code)
7926 enum rtx_code code ATTRIBUTE_UNUSED;
7927 {
7928 /* ??? In order to make all comparisons reversible, we do all comparisons
7929 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7930 all forms trapping and nontrapping comparisons, we can make inequality
7931 comparisons trapping again, since it results in better code when using
7932 FCOM based compares. */
7933 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7934 }
7935
7936 enum machine_mode
7937 ix86_cc_mode (code, op0, op1)
7938 enum rtx_code code;
7939 rtx op0, op1;
7940 {
7941 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7942 return ix86_fp_compare_mode (code);
7943 switch (code)
7944 {
7945 /* Only zero flag is needed. */
7946 case EQ: /* ZF=0 */
7947 case NE: /* ZF!=0 */
7948 return CCZmode;
7949 /* Codes needing carry flag. */
7950 case GEU: /* CF=0 */
7951 case GTU: /* CF=0 & ZF=0 */
7952 case LTU: /* CF=1 */
7953 case LEU: /* CF=1 | ZF=1 */
7954 return CCmode;
7955 /* Codes possibly doable only with sign flag when
7956 comparing against zero. */
7957 case GE: /* SF=OF or SF=0 */
7958 case LT: /* SF<>OF or SF=1 */
7959 if (op1 == const0_rtx)
7960 return CCGOCmode;
7961 else
7962 /* For other cases Carry flag is not required. */
7963 return CCGCmode;
7964 /* Codes doable only with sign flag when comparing
7965 against zero, but we miss jump instruction for it
7966 so we need to use relational tests agains overflow
7967 that thus needs to be zero. */
7968 case GT: /* ZF=0 & SF=OF */
7969 case LE: /* ZF=1 | SF<>OF */
7970 if (op1 == const0_rtx)
7971 return CCNOmode;
7972 else
7973 return CCGCmode;
7974 /* strcmp pattern do (use flags) and combine may ask us for proper
7975 mode. */
7976 case USE:
7977 return CCmode;
7978 default:
7979 abort ();
7980 }
7981 }
7982
7983 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7984
7985 int
7986 ix86_use_fcomi_compare (code)
7987 enum rtx_code code ATTRIBUTE_UNUSED;
7988 {
7989 enum rtx_code swapped_code = swap_condition (code);
7990 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7991 || (ix86_fp_comparison_cost (swapped_code)
7992 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7993 }
7994
7995 /* Swap, force into registers, or otherwise massage the two operands
7996 to a fp comparison. The operands are updated in place; the new
7997 comparsion code is returned. */
7998
7999 static enum rtx_code
8000 ix86_prepare_fp_compare_args (code, pop0, pop1)
8001 enum rtx_code code;
8002 rtx *pop0, *pop1;
8003 {
8004 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8005 rtx op0 = *pop0, op1 = *pop1;
8006 enum machine_mode op_mode = GET_MODE (op0);
8007 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8008
8009 /* All of the unordered compare instructions only work on registers.
8010 The same is true of the XFmode compare instructions. The same is
8011 true of the fcomi compare instructions. */
8012
8013 if (!is_sse
8014 && (fpcmp_mode == CCFPUmode
8015 || op_mode == XFmode
8016 || op_mode == TFmode
8017 || ix86_use_fcomi_compare (code)))
8018 {
8019 op0 = force_reg (op_mode, op0);
8020 op1 = force_reg (op_mode, op1);
8021 }
8022 else
8023 {
8024 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8025 things around if they appear profitable, otherwise force op0
8026 into a register. */
8027
8028 if (standard_80387_constant_p (op0) == 0
8029 || (GET_CODE (op0) == MEM
8030 && ! (standard_80387_constant_p (op1) == 0
8031 || GET_CODE (op1) == MEM)))
8032 {
8033 rtx tmp;
8034 tmp = op0, op0 = op1, op1 = tmp;
8035 code = swap_condition (code);
8036 }
8037
8038 if (GET_CODE (op0) != REG)
8039 op0 = force_reg (op_mode, op0);
8040
8041 if (CONSTANT_P (op1))
8042 {
8043 if (standard_80387_constant_p (op1))
8044 op1 = force_reg (op_mode, op1);
8045 else
8046 op1 = validize_mem (force_const_mem (op_mode, op1));
8047 }
8048 }
8049
8050 /* Try to rearrange the comparison to make it cheaper. */
8051 if (ix86_fp_comparison_cost (code)
8052 > ix86_fp_comparison_cost (swap_condition (code))
8053 && (GET_CODE (op1) == REG || !no_new_pseudos))
8054 {
8055 rtx tmp;
8056 tmp = op0, op0 = op1, op1 = tmp;
8057 code = swap_condition (code);
8058 if (GET_CODE (op0) != REG)
8059 op0 = force_reg (op_mode, op0);
8060 }
8061
8062 *pop0 = op0;
8063 *pop1 = op1;
8064 return code;
8065 }
8066
8067 /* Convert comparison codes we use to represent FP comparison to integer
8068 code that will result in proper branch. Return UNKNOWN if no such code
8069 is available. */
8070 static enum rtx_code
8071 ix86_fp_compare_code_to_integer (code)
8072 enum rtx_code code;
8073 {
8074 switch (code)
8075 {
8076 case GT:
8077 return GTU;
8078 case GE:
8079 return GEU;
8080 case ORDERED:
8081 case UNORDERED:
8082 return code;
8083 break;
8084 case UNEQ:
8085 return EQ;
8086 break;
8087 case UNLT:
8088 return LTU;
8089 break;
8090 case UNLE:
8091 return LEU;
8092 break;
8093 case LTGT:
8094 return NE;
8095 break;
8096 default:
8097 return UNKNOWN;
8098 }
8099 }
8100
8101 /* Split comparison code CODE into comparisons we can do using branch
8102 instructions. BYPASS_CODE is comparison code for branch that will
8103 branch around FIRST_CODE and SECOND_CODE. If some of branches
8104 is not required, set value to NIL.
8105 We never require more than two branches. */
8106 static void
8107 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8108 enum rtx_code code, *bypass_code, *first_code, *second_code;
8109 {
8110 *first_code = code;
8111 *bypass_code = NIL;
8112 *second_code = NIL;
8113
8114 /* The fcomi comparison sets flags as follows:
8115
8116 cmp ZF PF CF
8117 > 0 0 0
8118 < 0 0 1
8119 = 1 0 0
8120 un 1 1 1 */
8121
8122 switch (code)
8123 {
8124 case GT: /* GTU - CF=0 & ZF=0 */
8125 case GE: /* GEU - CF=0 */
8126 case ORDERED: /* PF=0 */
8127 case UNORDERED: /* PF=1 */
8128 case UNEQ: /* EQ - ZF=1 */
8129 case UNLT: /* LTU - CF=1 */
8130 case UNLE: /* LEU - CF=1 | ZF=1 */
8131 case LTGT: /* EQ - ZF=0 */
8132 break;
8133 case LT: /* LTU - CF=1 - fails on unordered */
8134 *first_code = UNLT;
8135 *bypass_code = UNORDERED;
8136 break;
8137 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8138 *first_code = UNLE;
8139 *bypass_code = UNORDERED;
8140 break;
8141 case EQ: /* EQ - ZF=1 - fails on unordered */
8142 *first_code = UNEQ;
8143 *bypass_code = UNORDERED;
8144 break;
8145 case NE: /* NE - ZF=0 - fails on unordered */
8146 *first_code = LTGT;
8147 *second_code = UNORDERED;
8148 break;
8149 case UNGE: /* GEU - CF=0 - fails on unordered */
8150 *first_code = GE;
8151 *second_code = UNORDERED;
8152 break;
8153 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8154 *first_code = GT;
8155 *second_code = UNORDERED;
8156 break;
8157 default:
8158 abort ();
8159 }
8160 if (!TARGET_IEEE_FP)
8161 {
8162 *second_code = NIL;
8163 *bypass_code = NIL;
8164 }
8165 }
8166
8167 /* Return cost of comparison done fcom + arithmetics operations on AX.
8168 All following functions do use number of instructions as an cost metrics.
8169 In future this should be tweaked to compute bytes for optimize_size and
8170 take into account performance of various instructions on various CPUs. */
8171 static int
8172 ix86_fp_comparison_arithmetics_cost (code)
8173 enum rtx_code code;
8174 {
8175 if (!TARGET_IEEE_FP)
8176 return 4;
8177 /* The cost of code output by ix86_expand_fp_compare. */
8178 switch (code)
8179 {
8180 case UNLE:
8181 case UNLT:
8182 case LTGT:
8183 case GT:
8184 case GE:
8185 case UNORDERED:
8186 case ORDERED:
8187 case UNEQ:
8188 return 4;
8189 break;
8190 case LT:
8191 case NE:
8192 case EQ:
8193 case UNGE:
8194 return 5;
8195 break;
8196 case LE:
8197 case UNGT:
8198 return 6;
8199 break;
8200 default:
8201 abort ();
8202 }
8203 }
8204
8205 /* Return cost of comparison done using fcomi operation.
8206 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8207 static int
8208 ix86_fp_comparison_fcomi_cost (code)
8209 enum rtx_code code;
8210 {
8211 enum rtx_code bypass_code, first_code, second_code;
8212 /* Return arbitarily high cost when instruction is not supported - this
8213 prevents gcc from using it. */
8214 if (!TARGET_CMOVE)
8215 return 1024;
8216 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8217 return (bypass_code != NIL || second_code != NIL) + 2;
8218 }
8219
8220 /* Return cost of comparison done using sahf operation.
8221 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8222 static int
8223 ix86_fp_comparison_sahf_cost (code)
8224 enum rtx_code code;
8225 {
8226 enum rtx_code bypass_code, first_code, second_code;
8227 /* Return arbitarily high cost when instruction is not preferred - this
8228 avoids gcc from using it. */
8229 if (!TARGET_USE_SAHF && !optimize_size)
8230 return 1024;
8231 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8232 return (bypass_code != NIL || second_code != NIL) + 3;
8233 }
8234
8235 /* Compute cost of the comparison done using any method.
8236 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8237 static int
8238 ix86_fp_comparison_cost (code)
8239 enum rtx_code code;
8240 {
8241 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8242 int min;
8243
8244 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8245 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8246
8247 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8248 if (min > sahf_cost)
8249 min = sahf_cost;
8250 if (min > fcomi_cost)
8251 min = fcomi_cost;
8252 return min;
8253 }
8254
8255 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8256
8257 static rtx
8258 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8259 enum rtx_code code;
8260 rtx op0, op1, scratch;
8261 rtx *second_test;
8262 rtx *bypass_test;
8263 {
8264 enum machine_mode fpcmp_mode, intcmp_mode;
8265 rtx tmp, tmp2;
8266 int cost = ix86_fp_comparison_cost (code);
8267 enum rtx_code bypass_code, first_code, second_code;
8268
8269 fpcmp_mode = ix86_fp_compare_mode (code);
8270 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8271
8272 if (second_test)
8273 *second_test = NULL_RTX;
8274 if (bypass_test)
8275 *bypass_test = NULL_RTX;
8276
8277 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8278
8279 /* Do fcomi/sahf based test when profitable. */
8280 if ((bypass_code == NIL || bypass_test)
8281 && (second_code == NIL || second_test)
8282 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8283 {
8284 if (TARGET_CMOVE)
8285 {
8286 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8287 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8288 tmp);
8289 emit_insn (tmp);
8290 }
8291 else
8292 {
8293 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8294 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8295 if (!scratch)
8296 scratch = gen_reg_rtx (HImode);
8297 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8298 emit_insn (gen_x86_sahf_1 (scratch));
8299 }
8300
8301 /* The FP codes work out to act like unsigned. */
8302 intcmp_mode = fpcmp_mode;
8303 code = first_code;
8304 if (bypass_code != NIL)
8305 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8306 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8307 const0_rtx);
8308 if (second_code != NIL)
8309 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8310 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8311 const0_rtx);
8312 }
8313 else
8314 {
8315 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8316 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8317 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8318 if (!scratch)
8319 scratch = gen_reg_rtx (HImode);
8320 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8321
8322 /* In the unordered case, we have to check C2 for NaN's, which
8323 doesn't happen to work out to anything nice combination-wise.
8324 So do some bit twiddling on the value we've got in AH to come
8325 up with an appropriate set of condition codes. */
8326
8327 intcmp_mode = CCNOmode;
8328 switch (code)
8329 {
8330 case GT:
8331 case UNGT:
8332 if (code == GT || !TARGET_IEEE_FP)
8333 {
8334 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8335 code = EQ;
8336 }
8337 else
8338 {
8339 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8340 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8341 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8342 intcmp_mode = CCmode;
8343 code = GEU;
8344 }
8345 break;
8346 case LT:
8347 case UNLT:
8348 if (code == LT && TARGET_IEEE_FP)
8349 {
8350 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8351 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8352 intcmp_mode = CCmode;
8353 code = EQ;
8354 }
8355 else
8356 {
8357 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8358 code = NE;
8359 }
8360 break;
8361 case GE:
8362 case UNGE:
8363 if (code == GE || !TARGET_IEEE_FP)
8364 {
8365 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8366 code = EQ;
8367 }
8368 else
8369 {
8370 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8371 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8372 GEN_INT (0x01)));
8373 code = NE;
8374 }
8375 break;
8376 case LE:
8377 case UNLE:
8378 if (code == LE && TARGET_IEEE_FP)
8379 {
8380 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8381 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8382 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8383 intcmp_mode = CCmode;
8384 code = LTU;
8385 }
8386 else
8387 {
8388 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8389 code = NE;
8390 }
8391 break;
8392 case EQ:
8393 case UNEQ:
8394 if (code == EQ && TARGET_IEEE_FP)
8395 {
8396 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8397 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8398 intcmp_mode = CCmode;
8399 code = EQ;
8400 }
8401 else
8402 {
8403 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8404 code = NE;
8405 break;
8406 }
8407 break;
8408 case NE:
8409 case LTGT:
8410 if (code == NE && TARGET_IEEE_FP)
8411 {
8412 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8413 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8414 GEN_INT (0x40)));
8415 code = NE;
8416 }
8417 else
8418 {
8419 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8420 code = EQ;
8421 }
8422 break;
8423
8424 case UNORDERED:
8425 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8426 code = NE;
8427 break;
8428 case ORDERED:
8429 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8430 code = EQ;
8431 break;
8432
8433 default:
8434 abort ();
8435 }
8436 }
8437
8438 /* Return the test that should be put into the flags user, i.e.
8439 the bcc, scc, or cmov instruction. */
8440 return gen_rtx_fmt_ee (code, VOIDmode,
8441 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8442 const0_rtx);
8443 }
8444
8445 rtx
8446 ix86_expand_compare (code, second_test, bypass_test)
8447 enum rtx_code code;
8448 rtx *second_test, *bypass_test;
8449 {
8450 rtx op0, op1, ret;
8451 op0 = ix86_compare_op0;
8452 op1 = ix86_compare_op1;
8453
8454 if (second_test)
8455 *second_test = NULL_RTX;
8456 if (bypass_test)
8457 *bypass_test = NULL_RTX;
8458
8459 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8460 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8461 second_test, bypass_test);
8462 else
8463 ret = ix86_expand_int_compare (code, op0, op1);
8464
8465 return ret;
8466 }
8467
8468 /* Return true if the CODE will result in nontrivial jump sequence. */
8469 bool
8470 ix86_fp_jump_nontrivial_p (code)
8471 enum rtx_code code;
8472 {
8473 enum rtx_code bypass_code, first_code, second_code;
8474 if (!TARGET_CMOVE)
8475 return true;
8476 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8477 return bypass_code != NIL || second_code != NIL;
8478 }
8479
8480 void
8481 ix86_expand_branch (code, label)
8482 enum rtx_code code;
8483 rtx label;
8484 {
8485 rtx tmp;
8486
8487 switch (GET_MODE (ix86_compare_op0))
8488 {
8489 case QImode:
8490 case HImode:
8491 case SImode:
8492 simple:
8493 tmp = ix86_expand_compare (code, NULL, NULL);
8494 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8495 gen_rtx_LABEL_REF (VOIDmode, label),
8496 pc_rtx);
8497 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8498 return;
8499
8500 case SFmode:
8501 case DFmode:
8502 case XFmode:
8503 case TFmode:
8504 {
8505 rtvec vec;
8506 int use_fcomi;
8507 enum rtx_code bypass_code, first_code, second_code;
8508
8509 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8510 &ix86_compare_op1);
8511
8512 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8513
8514 /* Check whether we will use the natural sequence with one jump. If
8515 so, we can expand jump early. Otherwise delay expansion by
8516 creating compound insn to not confuse optimizers. */
8517 if (bypass_code == NIL && second_code == NIL
8518 && TARGET_CMOVE)
8519 {
8520 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8521 gen_rtx_LABEL_REF (VOIDmode, label),
8522 pc_rtx, NULL_RTX);
8523 }
8524 else
8525 {
8526 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8527 ix86_compare_op0, ix86_compare_op1);
8528 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8529 gen_rtx_LABEL_REF (VOIDmode, label),
8530 pc_rtx);
8531 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8532
8533 use_fcomi = ix86_use_fcomi_compare (code);
8534 vec = rtvec_alloc (3 + !use_fcomi);
8535 RTVEC_ELT (vec, 0) = tmp;
8536 RTVEC_ELT (vec, 1)
8537 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8538 RTVEC_ELT (vec, 2)
8539 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8540 if (! use_fcomi)
8541 RTVEC_ELT (vec, 3)
8542 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8543
8544 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8545 }
8546 return;
8547 }
8548
8549 case DImode:
8550 if (TARGET_64BIT)
8551 goto simple;
8552 /* Expand DImode branch into multiple compare+branch. */
8553 {
8554 rtx lo[2], hi[2], label2;
8555 enum rtx_code code1, code2, code3;
8556
8557 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8558 {
8559 tmp = ix86_compare_op0;
8560 ix86_compare_op0 = ix86_compare_op1;
8561 ix86_compare_op1 = tmp;
8562 code = swap_condition (code);
8563 }
8564 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8565 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8566
8567 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8568 avoid two branches. This costs one extra insn, so disable when
8569 optimizing for size. */
8570
8571 if ((code == EQ || code == NE)
8572 && (!optimize_size
8573 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8574 {
8575 rtx xor0, xor1;
8576
8577 xor1 = hi[0];
8578 if (hi[1] != const0_rtx)
8579 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8580 NULL_RTX, 0, OPTAB_WIDEN);
8581
8582 xor0 = lo[0];
8583 if (lo[1] != const0_rtx)
8584 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8585 NULL_RTX, 0, OPTAB_WIDEN);
8586
8587 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8588 NULL_RTX, 0, OPTAB_WIDEN);
8589
8590 ix86_compare_op0 = tmp;
8591 ix86_compare_op1 = const0_rtx;
8592 ix86_expand_branch (code, label);
8593 return;
8594 }
8595
8596 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8597 op1 is a constant and the low word is zero, then we can just
8598 examine the high word. */
8599
8600 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8601 switch (code)
8602 {
8603 case LT: case LTU: case GE: case GEU:
8604 ix86_compare_op0 = hi[0];
8605 ix86_compare_op1 = hi[1];
8606 ix86_expand_branch (code, label);
8607 return;
8608 default:
8609 break;
8610 }
8611
8612 /* Otherwise, we need two or three jumps. */
8613
8614 label2 = gen_label_rtx ();
8615
8616 code1 = code;
8617 code2 = swap_condition (code);
8618 code3 = unsigned_condition (code);
8619
8620 switch (code)
8621 {
8622 case LT: case GT: case LTU: case GTU:
8623 break;
8624
8625 case LE: code1 = LT; code2 = GT; break;
8626 case GE: code1 = GT; code2 = LT; break;
8627 case LEU: code1 = LTU; code2 = GTU; break;
8628 case GEU: code1 = GTU; code2 = LTU; break;
8629
8630 case EQ: code1 = NIL; code2 = NE; break;
8631 case NE: code2 = NIL; break;
8632
8633 default:
8634 abort ();
8635 }
8636
8637 /*
8638 * a < b =>
8639 * if (hi(a) < hi(b)) goto true;
8640 * if (hi(a) > hi(b)) goto false;
8641 * if (lo(a) < lo(b)) goto true;
8642 * false:
8643 */
8644
8645 ix86_compare_op0 = hi[0];
8646 ix86_compare_op1 = hi[1];
8647
8648 if (code1 != NIL)
8649 ix86_expand_branch (code1, label);
8650 if (code2 != NIL)
8651 ix86_expand_branch (code2, label2);
8652
8653 ix86_compare_op0 = lo[0];
8654 ix86_compare_op1 = lo[1];
8655 ix86_expand_branch (code3, label);
8656
8657 if (code2 != NIL)
8658 emit_label (label2);
8659 return;
8660 }
8661
8662 default:
8663 abort ();
8664 }
8665 }
8666
8667 /* Split branch based on floating point condition. */
8668 void
8669 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8670 enum rtx_code code;
8671 rtx op1, op2, target1, target2, tmp;
8672 {
8673 rtx second, bypass;
8674 rtx label = NULL_RTX;
8675 rtx condition;
8676 int bypass_probability = -1, second_probability = -1, probability = -1;
8677 rtx i;
8678
8679 if (target2 != pc_rtx)
8680 {
8681 rtx tmp = target2;
8682 code = reverse_condition_maybe_unordered (code);
8683 target2 = target1;
8684 target1 = tmp;
8685 }
8686
8687 condition = ix86_expand_fp_compare (code, op1, op2,
8688 tmp, &second, &bypass);
8689
8690 if (split_branch_probability >= 0)
8691 {
8692 /* Distribute the probabilities across the jumps.
8693 Assume the BYPASS and SECOND to be always test
8694 for UNORDERED. */
8695 probability = split_branch_probability;
8696
8697 /* Value of 1 is low enough to make no need for probability
8698 to be updated. Later we may run some experiments and see
8699 if unordered values are more frequent in practice. */
8700 if (bypass)
8701 bypass_probability = 1;
8702 if (second)
8703 second_probability = 1;
8704 }
8705 if (bypass != NULL_RTX)
8706 {
8707 label = gen_label_rtx ();
8708 i = emit_jump_insn (gen_rtx_SET
8709 (VOIDmode, pc_rtx,
8710 gen_rtx_IF_THEN_ELSE (VOIDmode,
8711 bypass,
8712 gen_rtx_LABEL_REF (VOIDmode,
8713 label),
8714 pc_rtx)));
8715 if (bypass_probability >= 0)
8716 REG_NOTES (i)
8717 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8718 GEN_INT (bypass_probability),
8719 REG_NOTES (i));
8720 }
8721 i = emit_jump_insn (gen_rtx_SET
8722 (VOIDmode, pc_rtx,
8723 gen_rtx_IF_THEN_ELSE (VOIDmode,
8724 condition, target1, target2)));
8725 if (probability >= 0)
8726 REG_NOTES (i)
8727 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8728 GEN_INT (probability),
8729 REG_NOTES (i));
8730 if (second != NULL_RTX)
8731 {
8732 i = emit_jump_insn (gen_rtx_SET
8733 (VOIDmode, pc_rtx,
8734 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8735 target2)));
8736 if (second_probability >= 0)
8737 REG_NOTES (i)
8738 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8739 GEN_INT (second_probability),
8740 REG_NOTES (i));
8741 }
8742 if (label != NULL_RTX)
8743 emit_label (label);
8744 }
8745
8746 int
8747 ix86_expand_setcc (code, dest)
8748 enum rtx_code code;
8749 rtx dest;
8750 {
8751 rtx ret, tmp, tmpreg;
8752 rtx second_test, bypass_test;
8753
8754 if (GET_MODE (ix86_compare_op0) == DImode
8755 && !TARGET_64BIT)
8756 return 0; /* FAIL */
8757
8758 if (GET_MODE (dest) != QImode)
8759 abort ();
8760
8761 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8762 PUT_MODE (ret, QImode);
8763
8764 tmp = dest;
8765 tmpreg = dest;
8766
8767 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8768 if (bypass_test || second_test)
8769 {
8770 rtx test = second_test;
8771 int bypass = 0;
8772 rtx tmp2 = gen_reg_rtx (QImode);
8773 if (bypass_test)
8774 {
8775 if (second_test)
8776 abort ();
8777 test = bypass_test;
8778 bypass = 1;
8779 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8780 }
8781 PUT_MODE (test, QImode);
8782 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8783
8784 if (bypass)
8785 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8786 else
8787 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8788 }
8789
8790 return 1; /* DONE */
8791 }
8792
8793 int
8794 ix86_expand_int_movcc (operands)
8795 rtx operands[];
8796 {
8797 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8798 rtx compare_seq, compare_op;
8799 rtx second_test, bypass_test;
8800 enum machine_mode mode = GET_MODE (operands[0]);
8801
8802 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8803 In case comparsion is done with immediate, we can convert it to LTU or
8804 GEU by altering the integer. */
8805
8806 if ((code == LEU || code == GTU)
8807 && GET_CODE (ix86_compare_op1) == CONST_INT
8808 && mode != HImode
8809 && INTVAL (ix86_compare_op1) != -1
8810 /* For x86-64, the immediate field in the instruction is 32-bit
8811 signed, so we can't increment a DImode value above 0x7fffffff. */
8812 && (!TARGET_64BIT
8813 || GET_MODE (ix86_compare_op0) != DImode
8814 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8815 && GET_CODE (operands[2]) == CONST_INT
8816 && GET_CODE (operands[3]) == CONST_INT)
8817 {
8818 if (code == LEU)
8819 code = LTU;
8820 else
8821 code = GEU;
8822 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8823 GET_MODE (ix86_compare_op0));
8824 }
8825
8826 start_sequence ();
8827 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8828 compare_seq = get_insns ();
8829 end_sequence ();
8830
8831 compare_code = GET_CODE (compare_op);
8832
8833 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8834 HImode insns, we'd be swallowed in word prefix ops. */
8835
8836 if (mode != HImode
8837 && (mode != DImode || TARGET_64BIT)
8838 && GET_CODE (operands[2]) == CONST_INT
8839 && GET_CODE (operands[3]) == CONST_INT)
8840 {
8841 rtx out = operands[0];
8842 HOST_WIDE_INT ct = INTVAL (operands[2]);
8843 HOST_WIDE_INT cf = INTVAL (operands[3]);
8844 HOST_WIDE_INT diff;
8845
8846 if ((compare_code == LTU || compare_code == GEU)
8847 && !second_test && !bypass_test)
8848 {
8849 /* Detect overlap between destination and compare sources. */
8850 rtx tmp = out;
8851
8852 /* To simplify rest of code, restrict to the GEU case. */
8853 if (compare_code == LTU)
8854 {
8855 int tmp = ct;
8856 ct = cf;
8857 cf = tmp;
8858 compare_code = reverse_condition (compare_code);
8859 code = reverse_condition (code);
8860 }
8861 diff = ct - cf;
8862
8863 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8864 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8865 tmp = gen_reg_rtx (mode);
8866
8867 emit_insn (compare_seq);
8868 if (mode == DImode)
8869 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8870 else
8871 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8872
8873 if (diff == 1)
8874 {
8875 /*
8876 * cmpl op0,op1
8877 * sbbl dest,dest
8878 * [addl dest, ct]
8879 *
8880 * Size 5 - 8.
8881 */
8882 if (ct)
8883 tmp = expand_simple_binop (mode, PLUS,
8884 tmp, GEN_INT (ct),
8885 tmp, 1, OPTAB_DIRECT);
8886 }
8887 else if (cf == -1)
8888 {
8889 /*
8890 * cmpl op0,op1
8891 * sbbl dest,dest
8892 * orl $ct, dest
8893 *
8894 * Size 8.
8895 */
8896 tmp = expand_simple_binop (mode, IOR,
8897 tmp, GEN_INT (ct),
8898 tmp, 1, OPTAB_DIRECT);
8899 }
8900 else if (diff == -1 && ct)
8901 {
8902 /*
8903 * cmpl op0,op1
8904 * sbbl dest,dest
8905 * notl dest
8906 * [addl dest, cf]
8907 *
8908 * Size 8 - 11.
8909 */
8910 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8911 if (cf)
8912 tmp = expand_simple_binop (mode, PLUS,
8913 tmp, GEN_INT (cf),
8914 tmp, 1, OPTAB_DIRECT);
8915 }
8916 else
8917 {
8918 /*
8919 * cmpl op0,op1
8920 * sbbl dest,dest
8921 * [notl dest]
8922 * andl cf - ct, dest
8923 * [addl dest, ct]
8924 *
8925 * Size 8 - 11.
8926 */
8927
8928 if (cf == 0)
8929 {
8930 cf = ct;
8931 ct = 0;
8932 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8933 }
8934
8935 tmp = expand_simple_binop (mode, AND,
8936 tmp,
8937 gen_int_mode (cf - ct, mode),
8938 tmp, 1, OPTAB_DIRECT);
8939 if (ct)
8940 tmp = expand_simple_binop (mode, PLUS,
8941 tmp, GEN_INT (ct),
8942 tmp, 1, OPTAB_DIRECT);
8943 }
8944
8945 if (tmp != out)
8946 emit_move_insn (out, tmp);
8947
8948 return 1; /* DONE */
8949 }
8950
8951 diff = ct - cf;
8952 if (diff < 0)
8953 {
8954 HOST_WIDE_INT tmp;
8955 tmp = ct, ct = cf, cf = tmp;
8956 diff = -diff;
8957 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8958 {
8959 /* We may be reversing unordered compare to normal compare, that
8960 is not valid in general (we may convert non-trapping condition
8961 to trapping one), however on i386 we currently emit all
8962 comparisons unordered. */
8963 compare_code = reverse_condition_maybe_unordered (compare_code);
8964 code = reverse_condition_maybe_unordered (code);
8965 }
8966 else
8967 {
8968 compare_code = reverse_condition (compare_code);
8969 code = reverse_condition (code);
8970 }
8971 }
8972
8973 compare_code = NIL;
8974 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8975 && GET_CODE (ix86_compare_op1) == CONST_INT)
8976 {
8977 if (ix86_compare_op1 == const0_rtx
8978 && (code == LT || code == GE))
8979 compare_code = code;
8980 else if (ix86_compare_op1 == constm1_rtx)
8981 {
8982 if (code == LE)
8983 compare_code = LT;
8984 else if (code == GT)
8985 compare_code = GE;
8986 }
8987 }
8988
8989 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8990 if (compare_code != NIL
8991 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8992 && (cf == -1 || ct == -1))
8993 {
8994 /* If lea code below could be used, only optimize
8995 if it results in a 2 insn sequence. */
8996
8997 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8998 || diff == 3 || diff == 5 || diff == 9)
8999 || (compare_code == LT && ct == -1)
9000 || (compare_code == GE && cf == -1))
9001 {
9002 /*
9003 * notl op1 (if necessary)
9004 * sarl $31, op1
9005 * orl cf, op1
9006 */
9007 if (ct != -1)
9008 {
9009 cf = ct;
9010 ct = -1;
9011 code = reverse_condition (code);
9012 }
9013
9014 out = emit_store_flag (out, code, ix86_compare_op0,
9015 ix86_compare_op1, VOIDmode, 0, -1);
9016
9017 out = expand_simple_binop (mode, IOR,
9018 out, GEN_INT (cf),
9019 out, 1, OPTAB_DIRECT);
9020 if (out != operands[0])
9021 emit_move_insn (operands[0], out);
9022
9023 return 1; /* DONE */
9024 }
9025 }
9026
9027 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9028 || diff == 3 || diff == 5 || diff == 9)
9029 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9030 {
9031 /*
9032 * xorl dest,dest
9033 * cmpl op1,op2
9034 * setcc dest
9035 * lea cf(dest*(ct-cf)),dest
9036 *
9037 * Size 14.
9038 *
9039 * This also catches the degenerate setcc-only case.
9040 */
9041
9042 rtx tmp;
9043 int nops;
9044
9045 out = emit_store_flag (out, code, ix86_compare_op0,
9046 ix86_compare_op1, VOIDmode, 0, 1);
9047
9048 nops = 0;
9049 /* On x86_64 the lea instruction operates on Pmode, so we need
9050 to get arithmetics done in proper mode to match. */
9051 if (diff == 1)
9052 tmp = out;
9053 else
9054 {
9055 rtx out1;
9056 out1 = out;
9057 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9058 nops++;
9059 if (diff & 1)
9060 {
9061 tmp = gen_rtx_PLUS (mode, tmp, out1);
9062 nops++;
9063 }
9064 }
9065 if (cf != 0)
9066 {
9067 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9068 nops++;
9069 }
9070 if (tmp != out
9071 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9072 {
9073 if (nops == 1)
9074 {
9075 rtx clob;
9076
9077 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9078 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9079
9080 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9081 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9082 emit_insn (tmp);
9083 }
9084 else
9085 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9086 }
9087 if (out != operands[0])
9088 emit_move_insn (operands[0], copy_rtx (out));
9089
9090 return 1; /* DONE */
9091 }
9092
9093 /*
9094 * General case: Jumpful:
9095 * xorl dest,dest cmpl op1, op2
9096 * cmpl op1, op2 movl ct, dest
9097 * setcc dest jcc 1f
9098 * decl dest movl cf, dest
9099 * andl (cf-ct),dest 1:
9100 * addl ct,dest
9101 *
9102 * Size 20. Size 14.
9103 *
9104 * This is reasonably steep, but branch mispredict costs are
9105 * high on modern cpus, so consider failing only if optimizing
9106 * for space.
9107 *
9108 * %%% Parameterize branch_cost on the tuning architecture, then
9109 * use that. The 80386 couldn't care less about mispredicts.
9110 */
9111
9112 if (!optimize_size && !TARGET_CMOVE)
9113 {
9114 if (cf == 0)
9115 {
9116 cf = ct;
9117 ct = 0;
9118 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9119 /* We may be reversing unordered compare to normal compare,
9120 that is not valid in general (we may convert non-trapping
9121 condition to trapping one), however on i386 we currently
9122 emit all comparisons unordered. */
9123 code = reverse_condition_maybe_unordered (code);
9124 else
9125 {
9126 code = reverse_condition (code);
9127 if (compare_code != NIL)
9128 compare_code = reverse_condition (compare_code);
9129 }
9130 }
9131
9132 if (compare_code != NIL)
9133 {
9134 /* notl op1 (if needed)
9135 sarl $31, op1
9136 andl (cf-ct), op1
9137 addl ct, op1
9138
9139 For x < 0 (resp. x <= -1) there will be no notl,
9140 so if possible swap the constants to get rid of the
9141 complement.
9142 True/false will be -1/0 while code below (store flag
9143 followed by decrement) is 0/-1, so the constants need
9144 to be exchanged once more. */
9145
9146 if (compare_code == GE || !cf)
9147 {
9148 code = reverse_condition (code);
9149 compare_code = LT;
9150 }
9151 else
9152 {
9153 HOST_WIDE_INT tmp = cf;
9154 cf = ct;
9155 ct = tmp;
9156 }
9157
9158 out = emit_store_flag (out, code, ix86_compare_op0,
9159 ix86_compare_op1, VOIDmode, 0, -1);
9160 }
9161 else
9162 {
9163 out = emit_store_flag (out, code, ix86_compare_op0,
9164 ix86_compare_op1, VOIDmode, 0, 1);
9165
9166 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9167 out, 1, OPTAB_DIRECT);
9168 }
9169
9170 out = expand_simple_binop (mode, AND, out,
9171 gen_int_mode (cf - ct, mode),
9172 out, 1, OPTAB_DIRECT);
9173 if (ct)
9174 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9175 out, 1, OPTAB_DIRECT);
9176 if (out != operands[0])
9177 emit_move_insn (operands[0], out);
9178
9179 return 1; /* DONE */
9180 }
9181 }
9182
9183 if (!TARGET_CMOVE)
9184 {
9185 /* Try a few things more with specific constants and a variable. */
9186
9187 optab op;
9188 rtx var, orig_out, out, tmp;
9189
9190 if (optimize_size)
9191 return 0; /* FAIL */
9192
9193 /* If one of the two operands is an interesting constant, load a
9194 constant with the above and mask it in with a logical operation. */
9195
9196 if (GET_CODE (operands[2]) == CONST_INT)
9197 {
9198 var = operands[3];
9199 if (INTVAL (operands[2]) == 0)
9200 operands[3] = constm1_rtx, op = and_optab;
9201 else if (INTVAL (operands[2]) == -1)
9202 operands[3] = const0_rtx, op = ior_optab;
9203 else
9204 return 0; /* FAIL */
9205 }
9206 else if (GET_CODE (operands[3]) == CONST_INT)
9207 {
9208 var = operands[2];
9209 if (INTVAL (operands[3]) == 0)
9210 operands[2] = constm1_rtx, op = and_optab;
9211 else if (INTVAL (operands[3]) == -1)
9212 operands[2] = const0_rtx, op = ior_optab;
9213 else
9214 return 0; /* FAIL */
9215 }
9216 else
9217 return 0; /* FAIL */
9218
9219 orig_out = operands[0];
9220 tmp = gen_reg_rtx (mode);
9221 operands[0] = tmp;
9222
9223 /* Recurse to get the constant loaded. */
9224 if (ix86_expand_int_movcc (operands) == 0)
9225 return 0; /* FAIL */
9226
9227 /* Mask in the interesting variable. */
9228 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9229 OPTAB_WIDEN);
9230 if (out != orig_out)
9231 emit_move_insn (orig_out, out);
9232
9233 return 1; /* DONE */
9234 }
9235
9236 /*
9237 * For comparison with above,
9238 *
9239 * movl cf,dest
9240 * movl ct,tmp
9241 * cmpl op1,op2
9242 * cmovcc tmp,dest
9243 *
9244 * Size 15.
9245 */
9246
9247 if (! nonimmediate_operand (operands[2], mode))
9248 operands[2] = force_reg (mode, operands[2]);
9249 if (! nonimmediate_operand (operands[3], mode))
9250 operands[3] = force_reg (mode, operands[3]);
9251
9252 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9253 {
9254 rtx tmp = gen_reg_rtx (mode);
9255 emit_move_insn (tmp, operands[3]);
9256 operands[3] = tmp;
9257 }
9258 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9259 {
9260 rtx tmp = gen_reg_rtx (mode);
9261 emit_move_insn (tmp, operands[2]);
9262 operands[2] = tmp;
9263 }
9264 if (! register_operand (operands[2], VOIDmode)
9265 && ! register_operand (operands[3], VOIDmode))
9266 operands[2] = force_reg (mode, operands[2]);
9267
9268 emit_insn (compare_seq);
9269 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9270 gen_rtx_IF_THEN_ELSE (mode,
9271 compare_op, operands[2],
9272 operands[3])));
9273 if (bypass_test)
9274 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9275 gen_rtx_IF_THEN_ELSE (mode,
9276 bypass_test,
9277 operands[3],
9278 operands[0])));
9279 if (second_test)
9280 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9281 gen_rtx_IF_THEN_ELSE (mode,
9282 second_test,
9283 operands[2],
9284 operands[0])));
9285
9286 return 1; /* DONE */
9287 }
9288
9289 int
9290 ix86_expand_fp_movcc (operands)
9291 rtx operands[];
9292 {
9293 enum rtx_code code;
9294 rtx tmp;
9295 rtx compare_op, second_test, bypass_test;
9296
9297 /* For SF/DFmode conditional moves based on comparisons
9298 in same mode, we may want to use SSE min/max instructions. */
9299 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9300 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9301 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9302 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9303 && (!TARGET_IEEE_FP
9304 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9305 /* We may be called from the post-reload splitter. */
9306 && (!REG_P (operands[0])
9307 || SSE_REG_P (operands[0])
9308 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9309 {
9310 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9311 code = GET_CODE (operands[1]);
9312
9313 /* See if we have (cross) match between comparison operands and
9314 conditional move operands. */
9315 if (rtx_equal_p (operands[2], op1))
9316 {
9317 rtx tmp = op0;
9318 op0 = op1;
9319 op1 = tmp;
9320 code = reverse_condition_maybe_unordered (code);
9321 }
9322 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9323 {
9324 /* Check for min operation. */
9325 if (code == LT)
9326 {
9327 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9328 if (memory_operand (op0, VOIDmode))
9329 op0 = force_reg (GET_MODE (operands[0]), op0);
9330 if (GET_MODE (operands[0]) == SFmode)
9331 emit_insn (gen_minsf3 (operands[0], op0, op1));
9332 else
9333 emit_insn (gen_mindf3 (operands[0], op0, op1));
9334 return 1;
9335 }
9336 /* Check for max operation. */
9337 if (code == GT)
9338 {
9339 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9340 if (memory_operand (op0, VOIDmode))
9341 op0 = force_reg (GET_MODE (operands[0]), op0);
9342 if (GET_MODE (operands[0]) == SFmode)
9343 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9344 else
9345 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9346 return 1;
9347 }
9348 }
9349 /* Manage condition to be sse_comparison_operator. In case we are
9350 in non-ieee mode, try to canonicalize the destination operand
9351 to be first in the comparison - this helps reload to avoid extra
9352 moves. */
9353 if (!sse_comparison_operator (operands[1], VOIDmode)
9354 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9355 {
9356 rtx tmp = ix86_compare_op0;
9357 ix86_compare_op0 = ix86_compare_op1;
9358 ix86_compare_op1 = tmp;
9359 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9360 VOIDmode, ix86_compare_op0,
9361 ix86_compare_op1);
9362 }
9363 /* Similary try to manage result to be first operand of conditional
9364 move. We also don't support the NE comparison on SSE, so try to
9365 avoid it. */
9366 if ((rtx_equal_p (operands[0], operands[3])
9367 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9368 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9369 {
9370 rtx tmp = operands[2];
9371 operands[2] = operands[3];
9372 operands[3] = tmp;
9373 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9374 (GET_CODE (operands[1])),
9375 VOIDmode, ix86_compare_op0,
9376 ix86_compare_op1);
9377 }
9378 if (GET_MODE (operands[0]) == SFmode)
9379 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9380 operands[2], operands[3],
9381 ix86_compare_op0, ix86_compare_op1));
9382 else
9383 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9384 operands[2], operands[3],
9385 ix86_compare_op0, ix86_compare_op1));
9386 return 1;
9387 }
9388
9389 /* The floating point conditional move instructions don't directly
9390 support conditions resulting from a signed integer comparison. */
9391
9392 code = GET_CODE (operands[1]);
9393 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9394
9395 /* The floating point conditional move instructions don't directly
9396 support signed integer comparisons. */
9397
9398 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9399 {
9400 if (second_test != NULL || bypass_test != NULL)
9401 abort ();
9402 tmp = gen_reg_rtx (QImode);
9403 ix86_expand_setcc (code, tmp);
9404 code = NE;
9405 ix86_compare_op0 = tmp;
9406 ix86_compare_op1 = const0_rtx;
9407 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9408 }
9409 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9410 {
9411 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9412 emit_move_insn (tmp, operands[3]);
9413 operands[3] = tmp;
9414 }
9415 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9416 {
9417 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9418 emit_move_insn (tmp, operands[2]);
9419 operands[2] = tmp;
9420 }
9421
9422 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9423 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9424 compare_op,
9425 operands[2],
9426 operands[3])));
9427 if (bypass_test)
9428 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9429 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9430 bypass_test,
9431 operands[3],
9432 operands[0])));
9433 if (second_test)
9434 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9435 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9436 second_test,
9437 operands[2],
9438 operands[0])));
9439
9440 return 1;
9441 }
9442
9443 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9444 works for floating pointer parameters and nonoffsetable memories.
9445 For pushes, it returns just stack offsets; the values will be saved
9446 in the right order. Maximally three parts are generated. */
9447
9448 static int
9449 ix86_split_to_parts (operand, parts, mode)
9450 rtx operand;
9451 rtx *parts;
9452 enum machine_mode mode;
9453 {
9454 int size;
9455
9456 if (!TARGET_64BIT)
9457 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9458 else
9459 size = (GET_MODE_SIZE (mode) + 4) / 8;
9460
9461 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9462 abort ();
9463 if (size < 2 || size > 3)
9464 abort ();
9465
9466 /* Optimize constant pool reference to immediates. This is used by fp
9467 moves, that force all constants to memory to allow combining. */
9468 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9469 {
9470 rtx tmp = maybe_get_pool_constant (operand);
9471 if (tmp)
9472 operand = tmp;
9473 }
9474
9475 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9476 {
9477 /* The only non-offsetable memories we handle are pushes. */
9478 if (! push_operand (operand, VOIDmode))
9479 abort ();
9480
9481 operand = copy_rtx (operand);
9482 PUT_MODE (operand, Pmode);
9483 parts[0] = parts[1] = parts[2] = operand;
9484 }
9485 else if (!TARGET_64BIT)
9486 {
9487 if (mode == DImode)
9488 split_di (&operand, 1, &parts[0], &parts[1]);
9489 else
9490 {
9491 if (REG_P (operand))
9492 {
9493 if (!reload_completed)
9494 abort ();
9495 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9496 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9497 if (size == 3)
9498 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9499 }
9500 else if (offsettable_memref_p (operand))
9501 {
9502 operand = adjust_address (operand, SImode, 0);
9503 parts[0] = operand;
9504 parts[1] = adjust_address (operand, SImode, 4);
9505 if (size == 3)
9506 parts[2] = adjust_address (operand, SImode, 8);
9507 }
9508 else if (GET_CODE (operand) == CONST_DOUBLE)
9509 {
9510 REAL_VALUE_TYPE r;
9511 long l[4];
9512
9513 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9514 switch (mode)
9515 {
9516 case XFmode:
9517 case TFmode:
9518 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9519 parts[2] = gen_int_mode (l[2], SImode);
9520 break;
9521 case DFmode:
9522 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9523 break;
9524 default:
9525 abort ();
9526 }
9527 parts[1] = gen_int_mode (l[1], SImode);
9528 parts[0] = gen_int_mode (l[0], SImode);
9529 }
9530 else
9531 abort ();
9532 }
9533 }
9534 else
9535 {
9536 if (mode == TImode)
9537 split_ti (&operand, 1, &parts[0], &parts[1]);
9538 if (mode == XFmode || mode == TFmode)
9539 {
9540 if (REG_P (operand))
9541 {
9542 if (!reload_completed)
9543 abort ();
9544 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9545 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9546 }
9547 else if (offsettable_memref_p (operand))
9548 {
9549 operand = adjust_address (operand, DImode, 0);
9550 parts[0] = operand;
9551 parts[1] = adjust_address (operand, SImode, 8);
9552 }
9553 else if (GET_CODE (operand) == CONST_DOUBLE)
9554 {
9555 REAL_VALUE_TYPE r;
9556 long l[3];
9557
9558 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9559 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9560 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9561 if (HOST_BITS_PER_WIDE_INT >= 64)
9562 parts[0]
9563 = gen_int_mode
9564 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9565 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9566 DImode);
9567 else
9568 parts[0] = immed_double_const (l[0], l[1], DImode);
9569 parts[1] = gen_int_mode (l[2], SImode);
9570 }
9571 else
9572 abort ();
9573 }
9574 }
9575
9576 return size;
9577 }
9578
9579 /* Emit insns to perform a move or push of DI, DF, and XF values.
9580 Return false when normal moves are needed; true when all required
9581 insns have been emitted. Operands 2-4 contain the input values
9582 int the correct order; operands 5-7 contain the output values. */
9583
9584 void
9585 ix86_split_long_move (operands)
9586 rtx operands[];
9587 {
9588 rtx part[2][3];
9589 int nparts;
9590 int push = 0;
9591 int collisions = 0;
9592 enum machine_mode mode = GET_MODE (operands[0]);
9593
9594 /* The DFmode expanders may ask us to move double.
9595 For 64bit target this is single move. By hiding the fact
9596 here we simplify i386.md splitters. */
9597 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9598 {
9599 /* Optimize constant pool reference to immediates. This is used by
9600 fp moves, that force all constants to memory to allow combining. */
9601
9602 if (GET_CODE (operands[1]) == MEM
9603 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9604 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9605 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9606 if (push_operand (operands[0], VOIDmode))
9607 {
9608 operands[0] = copy_rtx (operands[0]);
9609 PUT_MODE (operands[0], Pmode);
9610 }
9611 else
9612 operands[0] = gen_lowpart (DImode, operands[0]);
9613 operands[1] = gen_lowpart (DImode, operands[1]);
9614 emit_move_insn (operands[0], operands[1]);
9615 return;
9616 }
9617
9618 /* The only non-offsettable memory we handle is push. */
9619 if (push_operand (operands[0], VOIDmode))
9620 push = 1;
9621 else if (GET_CODE (operands[0]) == MEM
9622 && ! offsettable_memref_p (operands[0]))
9623 abort ();
9624
9625 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9626 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9627
9628 /* When emitting push, take care for source operands on the stack. */
9629 if (push && GET_CODE (operands[1]) == MEM
9630 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9631 {
9632 if (nparts == 3)
9633 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9634 XEXP (part[1][2], 0));
9635 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9636 XEXP (part[1][1], 0));
9637 }
9638
9639 /* We need to do copy in the right order in case an address register
9640 of the source overlaps the destination. */
9641 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9642 {
9643 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9644 collisions++;
9645 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9646 collisions++;
9647 if (nparts == 3
9648 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9649 collisions++;
9650
9651 /* Collision in the middle part can be handled by reordering. */
9652 if (collisions == 1 && nparts == 3
9653 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9654 {
9655 rtx tmp;
9656 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9657 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9658 }
9659
9660 /* If there are more collisions, we can't handle it by reordering.
9661 Do an lea to the last part and use only one colliding move. */
9662 else if (collisions > 1)
9663 {
9664 collisions = 1;
9665 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9666 XEXP (part[1][0], 0)));
9667 part[1][0] = change_address (part[1][0],
9668 TARGET_64BIT ? DImode : SImode,
9669 part[0][nparts - 1]);
9670 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9671 if (nparts == 3)
9672 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9673 }
9674 }
9675
9676 if (push)
9677 {
9678 if (!TARGET_64BIT)
9679 {
9680 if (nparts == 3)
9681 {
9682 /* We use only first 12 bytes of TFmode value, but for pushing we
9683 are required to adjust stack as if we were pushing real 16byte
9684 value. */
9685 if (mode == TFmode && !TARGET_64BIT)
9686 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9687 GEN_INT (-4)));
9688 emit_move_insn (part[0][2], part[1][2]);
9689 }
9690 }
9691 else
9692 {
9693 /* In 64bit mode we don't have 32bit push available. In case this is
9694 register, it is OK - we will just use larger counterpart. We also
9695 retype memory - these comes from attempt to avoid REX prefix on
9696 moving of second half of TFmode value. */
9697 if (GET_MODE (part[1][1]) == SImode)
9698 {
9699 if (GET_CODE (part[1][1]) == MEM)
9700 part[1][1] = adjust_address (part[1][1], DImode, 0);
9701 else if (REG_P (part[1][1]))
9702 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9703 else
9704 abort ();
9705 if (GET_MODE (part[1][0]) == SImode)
9706 part[1][0] = part[1][1];
9707 }
9708 }
9709 emit_move_insn (part[0][1], part[1][1]);
9710 emit_move_insn (part[0][0], part[1][0]);
9711 return;
9712 }
9713
9714 /* Choose correct order to not overwrite the source before it is copied. */
9715 if ((REG_P (part[0][0])
9716 && REG_P (part[1][1])
9717 && (REGNO (part[0][0]) == REGNO (part[1][1])
9718 || (nparts == 3
9719 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9720 || (collisions > 0
9721 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9722 {
9723 if (nparts == 3)
9724 {
9725 operands[2] = part[0][2];
9726 operands[3] = part[0][1];
9727 operands[4] = part[0][0];
9728 operands[5] = part[1][2];
9729 operands[6] = part[1][1];
9730 operands[7] = part[1][0];
9731 }
9732 else
9733 {
9734 operands[2] = part[0][1];
9735 operands[3] = part[0][0];
9736 operands[5] = part[1][1];
9737 operands[6] = part[1][0];
9738 }
9739 }
9740 else
9741 {
9742 if (nparts == 3)
9743 {
9744 operands[2] = part[0][0];
9745 operands[3] = part[0][1];
9746 operands[4] = part[0][2];
9747 operands[5] = part[1][0];
9748 operands[6] = part[1][1];
9749 operands[7] = part[1][2];
9750 }
9751 else
9752 {
9753 operands[2] = part[0][0];
9754 operands[3] = part[0][1];
9755 operands[5] = part[1][0];
9756 operands[6] = part[1][1];
9757 }
9758 }
9759 emit_move_insn (operands[2], operands[5]);
9760 emit_move_insn (operands[3], operands[6]);
9761 if (nparts == 3)
9762 emit_move_insn (operands[4], operands[7]);
9763
9764 return;
9765 }
9766
9767 void
9768 ix86_split_ashldi (operands, scratch)
9769 rtx *operands, scratch;
9770 {
9771 rtx low[2], high[2];
9772 int count;
9773
9774 if (GET_CODE (operands[2]) == CONST_INT)
9775 {
9776 split_di (operands, 2, low, high);
9777 count = INTVAL (operands[2]) & 63;
9778
9779 if (count >= 32)
9780 {
9781 emit_move_insn (high[0], low[1]);
9782 emit_move_insn (low[0], const0_rtx);
9783
9784 if (count > 32)
9785 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9786 }
9787 else
9788 {
9789 if (!rtx_equal_p (operands[0], operands[1]))
9790 emit_move_insn (operands[0], operands[1]);
9791 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9792 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9793 }
9794 }
9795 else
9796 {
9797 if (!rtx_equal_p (operands[0], operands[1]))
9798 emit_move_insn (operands[0], operands[1]);
9799
9800 split_di (operands, 1, low, high);
9801
9802 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9803 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9804
9805 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9806 {
9807 if (! no_new_pseudos)
9808 scratch = force_reg (SImode, const0_rtx);
9809 else
9810 emit_move_insn (scratch, const0_rtx);
9811
9812 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9813 scratch));
9814 }
9815 else
9816 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9817 }
9818 }
9819
9820 void
9821 ix86_split_ashrdi (operands, scratch)
9822 rtx *operands, scratch;
9823 {
9824 rtx low[2], high[2];
9825 int count;
9826
9827 if (GET_CODE (operands[2]) == CONST_INT)
9828 {
9829 split_di (operands, 2, low, high);
9830 count = INTVAL (operands[2]) & 63;
9831
9832 if (count >= 32)
9833 {
9834 emit_move_insn (low[0], high[1]);
9835
9836 if (! reload_completed)
9837 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9838 else
9839 {
9840 emit_move_insn (high[0], low[0]);
9841 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9842 }
9843
9844 if (count > 32)
9845 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9846 }
9847 else
9848 {
9849 if (!rtx_equal_p (operands[0], operands[1]))
9850 emit_move_insn (operands[0], operands[1]);
9851 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9852 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9853 }
9854 }
9855 else
9856 {
9857 if (!rtx_equal_p (operands[0], operands[1]))
9858 emit_move_insn (operands[0], operands[1]);
9859
9860 split_di (operands, 1, low, high);
9861
9862 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9863 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9864
9865 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9866 {
9867 if (! no_new_pseudos)
9868 scratch = gen_reg_rtx (SImode);
9869 emit_move_insn (scratch, high[0]);
9870 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9871 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9872 scratch));
9873 }
9874 else
9875 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9876 }
9877 }
9878
9879 void
9880 ix86_split_lshrdi (operands, scratch)
9881 rtx *operands, scratch;
9882 {
9883 rtx low[2], high[2];
9884 int count;
9885
9886 if (GET_CODE (operands[2]) == CONST_INT)
9887 {
9888 split_di (operands, 2, low, high);
9889 count = INTVAL (operands[2]) & 63;
9890
9891 if (count >= 32)
9892 {
9893 emit_move_insn (low[0], high[1]);
9894 emit_move_insn (high[0], const0_rtx);
9895
9896 if (count > 32)
9897 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9898 }
9899 else
9900 {
9901 if (!rtx_equal_p (operands[0], operands[1]))
9902 emit_move_insn (operands[0], operands[1]);
9903 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9904 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9905 }
9906 }
9907 else
9908 {
9909 if (!rtx_equal_p (operands[0], operands[1]))
9910 emit_move_insn (operands[0], operands[1]);
9911
9912 split_di (operands, 1, low, high);
9913
9914 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9915 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9916
9917 /* Heh. By reversing the arguments, we can reuse this pattern. */
9918 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9919 {
9920 if (! no_new_pseudos)
9921 scratch = force_reg (SImode, const0_rtx);
9922 else
9923 emit_move_insn (scratch, const0_rtx);
9924
9925 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9926 scratch));
9927 }
9928 else
9929 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9930 }
9931 }
9932
9933 /* Helper function for the string operations below. Dest VARIABLE whether
9934 it is aligned to VALUE bytes. If true, jump to the label. */
9935 static rtx
9936 ix86_expand_aligntest (variable, value)
9937 rtx variable;
9938 int value;
9939 {
9940 rtx label = gen_label_rtx ();
9941 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9942 if (GET_MODE (variable) == DImode)
9943 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9944 else
9945 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9946 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9947 1, label);
9948 return label;
9949 }
9950
9951 /* Adjust COUNTER by the VALUE. */
9952 static void
9953 ix86_adjust_counter (countreg, value)
9954 rtx countreg;
9955 HOST_WIDE_INT value;
9956 {
9957 if (GET_MODE (countreg) == DImode)
9958 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9959 else
9960 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9961 }
9962
9963 /* Zero extend possibly SImode EXP to Pmode register. */
9964 rtx
9965 ix86_zero_extend_to_Pmode (exp)
9966 rtx exp;
9967 {
9968 rtx r;
9969 if (GET_MODE (exp) == VOIDmode)
9970 return force_reg (Pmode, exp);
9971 if (GET_MODE (exp) == Pmode)
9972 return copy_to_mode_reg (Pmode, exp);
9973 r = gen_reg_rtx (Pmode);
9974 emit_insn (gen_zero_extendsidi2 (r, exp));
9975 return r;
9976 }
9977
9978 /* Expand string move (memcpy) operation. Use i386 string operations when
9979 profitable. expand_clrstr contains similar code. */
9980 int
9981 ix86_expand_movstr (dst, src, count_exp, align_exp)
9982 rtx dst, src, count_exp, align_exp;
9983 {
9984 rtx srcreg, destreg, countreg;
9985 enum machine_mode counter_mode;
9986 HOST_WIDE_INT align = 0;
9987 unsigned HOST_WIDE_INT count = 0;
9988 rtx insns;
9989
9990 start_sequence ();
9991
9992 if (GET_CODE (align_exp) == CONST_INT)
9993 align = INTVAL (align_exp);
9994
9995 /* This simple hack avoids all inlining code and simplifies code below. */
9996 if (!TARGET_ALIGN_STRINGOPS)
9997 align = 64;
9998
9999 if (GET_CODE (count_exp) == CONST_INT)
10000 count = INTVAL (count_exp);
10001
10002 /* Figure out proper mode for counter. For 32bits it is always SImode,
10003 for 64bits use SImode when possible, otherwise DImode.
10004 Set count to number of bytes copied when known at compile time. */
10005 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10006 || x86_64_zero_extended_value (count_exp))
10007 counter_mode = SImode;
10008 else
10009 counter_mode = DImode;
10010
10011 if (counter_mode != SImode && counter_mode != DImode)
10012 abort ();
10013
10014 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10015 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10016
10017 emit_insn (gen_cld ());
10018
10019 /* When optimizing for size emit simple rep ; movsb instruction for
10020 counts not divisible by 4. */
10021
10022 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10023 {
10024 countreg = ix86_zero_extend_to_Pmode (count_exp);
10025 if (TARGET_64BIT)
10026 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10027 destreg, srcreg, countreg));
10028 else
10029 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10030 destreg, srcreg, countreg));
10031 }
10032
10033 /* For constant aligned (or small unaligned) copies use rep movsl
10034 followed by code copying the rest. For PentiumPro ensure 8 byte
10035 alignment to allow rep movsl acceleration. */
10036
10037 else if (count != 0
10038 && (align >= 8
10039 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10040 || optimize_size || count < (unsigned int) 64))
10041 {
10042 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10043 if (count & ~(size - 1))
10044 {
10045 countreg = copy_to_mode_reg (counter_mode,
10046 GEN_INT ((count >> (size == 4 ? 2 : 3))
10047 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10048 countreg = ix86_zero_extend_to_Pmode (countreg);
10049 if (size == 4)
10050 {
10051 if (TARGET_64BIT)
10052 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10053 destreg, srcreg, countreg));
10054 else
10055 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10056 destreg, srcreg, countreg));
10057 }
10058 else
10059 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10060 destreg, srcreg, countreg));
10061 }
10062 if (size == 8 && (count & 0x04))
10063 emit_insn (gen_strmovsi (destreg, srcreg));
10064 if (count & 0x02)
10065 emit_insn (gen_strmovhi (destreg, srcreg));
10066 if (count & 0x01)
10067 emit_insn (gen_strmovqi (destreg, srcreg));
10068 }
10069 /* The generic code based on the glibc implementation:
10070 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10071 allowing accelerated copying there)
10072 - copy the data using rep movsl
10073 - copy the rest. */
10074 else
10075 {
10076 rtx countreg2;
10077 rtx label = NULL;
10078 int desired_alignment = (TARGET_PENTIUMPRO
10079 && (count == 0 || count >= (unsigned int) 260)
10080 ? 8 : UNITS_PER_WORD);
10081
10082 /* In case we don't know anything about the alignment, default to
10083 library version, since it is usually equally fast and result in
10084 shorter code. */
10085 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10086 {
10087 end_sequence ();
10088 return 0;
10089 }
10090
10091 if (TARGET_SINGLE_STRINGOP)
10092 emit_insn (gen_cld ());
10093
10094 countreg2 = gen_reg_rtx (Pmode);
10095 countreg = copy_to_mode_reg (counter_mode, count_exp);
10096
10097 /* We don't use loops to align destination and to copy parts smaller
10098 than 4 bytes, because gcc is able to optimize such code better (in
10099 the case the destination or the count really is aligned, gcc is often
10100 able to predict the branches) and also it is friendlier to the
10101 hardware branch prediction.
10102
10103 Using loops is benefical for generic case, because we can
10104 handle small counts using the loops. Many CPUs (such as Athlon)
10105 have large REP prefix setup costs.
10106
10107 This is quite costy. Maybe we can revisit this decision later or
10108 add some customizability to this code. */
10109
10110 if (count == 0 && align < desired_alignment)
10111 {
10112 label = gen_label_rtx ();
10113 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10114 LEU, 0, counter_mode, 1, label);
10115 }
10116 if (align <= 1)
10117 {
10118 rtx label = ix86_expand_aligntest (destreg, 1);
10119 emit_insn (gen_strmovqi (destreg, srcreg));
10120 ix86_adjust_counter (countreg, 1);
10121 emit_label (label);
10122 LABEL_NUSES (label) = 1;
10123 }
10124 if (align <= 2)
10125 {
10126 rtx label = ix86_expand_aligntest (destreg, 2);
10127 emit_insn (gen_strmovhi (destreg, srcreg));
10128 ix86_adjust_counter (countreg, 2);
10129 emit_label (label);
10130 LABEL_NUSES (label) = 1;
10131 }
10132 if (align <= 4 && desired_alignment > 4)
10133 {
10134 rtx label = ix86_expand_aligntest (destreg, 4);
10135 emit_insn (gen_strmovsi (destreg, srcreg));
10136 ix86_adjust_counter (countreg, 4);
10137 emit_label (label);
10138 LABEL_NUSES (label) = 1;
10139 }
10140
10141 if (label && desired_alignment > 4 && !TARGET_64BIT)
10142 {
10143 emit_label (label);
10144 LABEL_NUSES (label) = 1;
10145 label = NULL_RTX;
10146 }
10147 if (!TARGET_SINGLE_STRINGOP)
10148 emit_insn (gen_cld ());
10149 if (TARGET_64BIT)
10150 {
10151 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10152 GEN_INT (3)));
10153 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10154 destreg, srcreg, countreg2));
10155 }
10156 else
10157 {
10158 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10159 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10160 destreg, srcreg, countreg2));
10161 }
10162
10163 if (label)
10164 {
10165 emit_label (label);
10166 LABEL_NUSES (label) = 1;
10167 }
10168 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10169 emit_insn (gen_strmovsi (destreg, srcreg));
10170 if ((align <= 4 || count == 0) && TARGET_64BIT)
10171 {
10172 rtx label = ix86_expand_aligntest (countreg, 4);
10173 emit_insn (gen_strmovsi (destreg, srcreg));
10174 emit_label (label);
10175 LABEL_NUSES (label) = 1;
10176 }
10177 if (align > 2 && count != 0 && (count & 2))
10178 emit_insn (gen_strmovhi (destreg, srcreg));
10179 if (align <= 2 || count == 0)
10180 {
10181 rtx label = ix86_expand_aligntest (countreg, 2);
10182 emit_insn (gen_strmovhi (destreg, srcreg));
10183 emit_label (label);
10184 LABEL_NUSES (label) = 1;
10185 }
10186 if (align > 1 && count != 0 && (count & 1))
10187 emit_insn (gen_strmovqi (destreg, srcreg));
10188 if (align <= 1 || count == 0)
10189 {
10190 rtx label = ix86_expand_aligntest (countreg, 1);
10191 emit_insn (gen_strmovqi (destreg, srcreg));
10192 emit_label (label);
10193 LABEL_NUSES (label) = 1;
10194 }
10195 }
10196
10197 insns = get_insns ();
10198 end_sequence ();
10199
10200 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10201 emit_insn (insns);
10202 return 1;
10203 }
10204
10205 /* Expand string clear operation (bzero). Use i386 string operations when
10206 profitable. expand_movstr contains similar code. */
10207 int
10208 ix86_expand_clrstr (src, count_exp, align_exp)
10209 rtx src, count_exp, align_exp;
10210 {
10211 rtx destreg, zeroreg, countreg;
10212 enum machine_mode counter_mode;
10213 HOST_WIDE_INT align = 0;
10214 unsigned HOST_WIDE_INT count = 0;
10215
10216 if (GET_CODE (align_exp) == CONST_INT)
10217 align = INTVAL (align_exp);
10218
10219 /* This simple hack avoids all inlining code and simplifies code below. */
10220 if (!TARGET_ALIGN_STRINGOPS)
10221 align = 32;
10222
10223 if (GET_CODE (count_exp) == CONST_INT)
10224 count = INTVAL (count_exp);
10225 /* Figure out proper mode for counter. For 32bits it is always SImode,
10226 for 64bits use SImode when possible, otherwise DImode.
10227 Set count to number of bytes copied when known at compile time. */
10228 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10229 || x86_64_zero_extended_value (count_exp))
10230 counter_mode = SImode;
10231 else
10232 counter_mode = DImode;
10233
10234 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10235
10236 emit_insn (gen_cld ());
10237
10238 /* When optimizing for size emit simple rep ; movsb instruction for
10239 counts not divisible by 4. */
10240
10241 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10242 {
10243 countreg = ix86_zero_extend_to_Pmode (count_exp);
10244 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10245 if (TARGET_64BIT)
10246 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10247 destreg, countreg));
10248 else
10249 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10250 destreg, countreg));
10251 }
10252 else if (count != 0
10253 && (align >= 8
10254 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10255 || optimize_size || count < (unsigned int) 64))
10256 {
10257 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10258 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10259 if (count & ~(size - 1))
10260 {
10261 countreg = copy_to_mode_reg (counter_mode,
10262 GEN_INT ((count >> (size == 4 ? 2 : 3))
10263 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10264 countreg = ix86_zero_extend_to_Pmode (countreg);
10265 if (size == 4)
10266 {
10267 if (TARGET_64BIT)
10268 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10269 destreg, countreg));
10270 else
10271 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10272 destreg, countreg));
10273 }
10274 else
10275 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10276 destreg, countreg));
10277 }
10278 if (size == 8 && (count & 0x04))
10279 emit_insn (gen_strsetsi (destreg,
10280 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10281 if (count & 0x02)
10282 emit_insn (gen_strsethi (destreg,
10283 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10284 if (count & 0x01)
10285 emit_insn (gen_strsetqi (destreg,
10286 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10287 }
10288 else
10289 {
10290 rtx countreg2;
10291 rtx label = NULL;
10292 /* Compute desired alignment of the string operation. */
10293 int desired_alignment = (TARGET_PENTIUMPRO
10294 && (count == 0 || count >= (unsigned int) 260)
10295 ? 8 : UNITS_PER_WORD);
10296
10297 /* In case we don't know anything about the alignment, default to
10298 library version, since it is usually equally fast and result in
10299 shorter code. */
10300 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10301 return 0;
10302
10303 if (TARGET_SINGLE_STRINGOP)
10304 emit_insn (gen_cld ());
10305
10306 countreg2 = gen_reg_rtx (Pmode);
10307 countreg = copy_to_mode_reg (counter_mode, count_exp);
10308 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10309
10310 if (count == 0 && align < desired_alignment)
10311 {
10312 label = gen_label_rtx ();
10313 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10314 LEU, 0, counter_mode, 1, label);
10315 }
10316 if (align <= 1)
10317 {
10318 rtx label = ix86_expand_aligntest (destreg, 1);
10319 emit_insn (gen_strsetqi (destreg,
10320 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10321 ix86_adjust_counter (countreg, 1);
10322 emit_label (label);
10323 LABEL_NUSES (label) = 1;
10324 }
10325 if (align <= 2)
10326 {
10327 rtx label = ix86_expand_aligntest (destreg, 2);
10328 emit_insn (gen_strsethi (destreg,
10329 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10330 ix86_adjust_counter (countreg, 2);
10331 emit_label (label);
10332 LABEL_NUSES (label) = 1;
10333 }
10334 if (align <= 4 && desired_alignment > 4)
10335 {
10336 rtx label = ix86_expand_aligntest (destreg, 4);
10337 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10338 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10339 : zeroreg)));
10340 ix86_adjust_counter (countreg, 4);
10341 emit_label (label);
10342 LABEL_NUSES (label) = 1;
10343 }
10344
10345 if (label && desired_alignment > 4 && !TARGET_64BIT)
10346 {
10347 emit_label (label);
10348 LABEL_NUSES (label) = 1;
10349 label = NULL_RTX;
10350 }
10351
10352 if (!TARGET_SINGLE_STRINGOP)
10353 emit_insn (gen_cld ());
10354 if (TARGET_64BIT)
10355 {
10356 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10357 GEN_INT (3)));
10358 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10359 destreg, countreg2));
10360 }
10361 else
10362 {
10363 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10364 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10365 destreg, countreg2));
10366 }
10367 if (label)
10368 {
10369 emit_label (label);
10370 LABEL_NUSES (label) = 1;
10371 }
10372
10373 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10374 emit_insn (gen_strsetsi (destreg,
10375 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10376 if (TARGET_64BIT && (align <= 4 || count == 0))
10377 {
10378 rtx label = ix86_expand_aligntest (countreg, 4);
10379 emit_insn (gen_strsetsi (destreg,
10380 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10381 emit_label (label);
10382 LABEL_NUSES (label) = 1;
10383 }
10384 if (align > 2 && count != 0 && (count & 2))
10385 emit_insn (gen_strsethi (destreg,
10386 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10387 if (align <= 2 || count == 0)
10388 {
10389 rtx label = ix86_expand_aligntest (countreg, 2);
10390 emit_insn (gen_strsethi (destreg,
10391 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10392 emit_label (label);
10393 LABEL_NUSES (label) = 1;
10394 }
10395 if (align > 1 && count != 0 && (count & 1))
10396 emit_insn (gen_strsetqi (destreg,
10397 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10398 if (align <= 1 || count == 0)
10399 {
10400 rtx label = ix86_expand_aligntest (countreg, 1);
10401 emit_insn (gen_strsetqi (destreg,
10402 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10403 emit_label (label);
10404 LABEL_NUSES (label) = 1;
10405 }
10406 }
10407 return 1;
10408 }
10409 /* Expand strlen. */
10410 int
10411 ix86_expand_strlen (out, src, eoschar, align)
10412 rtx out, src, eoschar, align;
10413 {
10414 rtx addr, scratch1, scratch2, scratch3, scratch4;
10415
10416 /* The generic case of strlen expander is long. Avoid it's
10417 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10418
10419 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10420 && !TARGET_INLINE_ALL_STRINGOPS
10421 && !optimize_size
10422 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10423 return 0;
10424
10425 addr = force_reg (Pmode, XEXP (src, 0));
10426 scratch1 = gen_reg_rtx (Pmode);
10427
10428 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10429 && !optimize_size)
10430 {
10431 /* Well it seems that some optimizer does not combine a call like
10432 foo(strlen(bar), strlen(bar));
10433 when the move and the subtraction is done here. It does calculate
10434 the length just once when these instructions are done inside of
10435 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10436 often used and I use one fewer register for the lifetime of
10437 output_strlen_unroll() this is better. */
10438
10439 emit_move_insn (out, addr);
10440
10441 ix86_expand_strlensi_unroll_1 (out, align);
10442
10443 /* strlensi_unroll_1 returns the address of the zero at the end of
10444 the string, like memchr(), so compute the length by subtracting
10445 the start address. */
10446 if (TARGET_64BIT)
10447 emit_insn (gen_subdi3 (out, out, addr));
10448 else
10449 emit_insn (gen_subsi3 (out, out, addr));
10450 }
10451 else
10452 {
10453 scratch2 = gen_reg_rtx (Pmode);
10454 scratch3 = gen_reg_rtx (Pmode);
10455 scratch4 = force_reg (Pmode, constm1_rtx);
10456
10457 emit_move_insn (scratch3, addr);
10458 eoschar = force_reg (QImode, eoschar);
10459
10460 emit_insn (gen_cld ());
10461 if (TARGET_64BIT)
10462 {
10463 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10464 align, scratch4, scratch3));
10465 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10466 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10467 }
10468 else
10469 {
10470 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10471 align, scratch4, scratch3));
10472 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10473 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10474 }
10475 }
10476 return 1;
10477 }
10478
10479 /* Expand the appropriate insns for doing strlen if not just doing
10480 repnz; scasb
10481
10482 out = result, initialized with the start address
10483 align_rtx = alignment of the address.
10484 scratch = scratch register, initialized with the startaddress when
10485 not aligned, otherwise undefined
10486
10487 This is just the body. It needs the initialisations mentioned above and
10488 some address computing at the end. These things are done in i386.md. */
10489
10490 static void
10491 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10492 rtx out, align_rtx;
10493 {
10494 int align;
10495 rtx tmp;
10496 rtx align_2_label = NULL_RTX;
10497 rtx align_3_label = NULL_RTX;
10498 rtx align_4_label = gen_label_rtx ();
10499 rtx end_0_label = gen_label_rtx ();
10500 rtx mem;
10501 rtx tmpreg = gen_reg_rtx (SImode);
10502 rtx scratch = gen_reg_rtx (SImode);
10503
10504 align = 0;
10505 if (GET_CODE (align_rtx) == CONST_INT)
10506 align = INTVAL (align_rtx);
10507
10508 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10509
10510 /* Is there a known alignment and is it less than 4? */
10511 if (align < 4)
10512 {
10513 rtx scratch1 = gen_reg_rtx (Pmode);
10514 emit_move_insn (scratch1, out);
10515 /* Is there a known alignment and is it not 2? */
10516 if (align != 2)
10517 {
10518 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10519 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10520
10521 /* Leave just the 3 lower bits. */
10522 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10523 NULL_RTX, 0, OPTAB_WIDEN);
10524
10525 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10526 Pmode, 1, align_4_label);
10527 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10528 Pmode, 1, align_2_label);
10529 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10530 Pmode, 1, align_3_label);
10531 }
10532 else
10533 {
10534 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10535 check if is aligned to 4 - byte. */
10536
10537 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10538 NULL_RTX, 0, OPTAB_WIDEN);
10539
10540 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10541 Pmode, 1, align_4_label);
10542 }
10543
10544 mem = gen_rtx_MEM (QImode, out);
10545
10546 /* Now compare the bytes. */
10547
10548 /* Compare the first n unaligned byte on a byte per byte basis. */
10549 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10550 QImode, 1, end_0_label);
10551
10552 /* Increment the address. */
10553 if (TARGET_64BIT)
10554 emit_insn (gen_adddi3 (out, out, const1_rtx));
10555 else
10556 emit_insn (gen_addsi3 (out, out, const1_rtx));
10557
10558 /* Not needed with an alignment of 2 */
10559 if (align != 2)
10560 {
10561 emit_label (align_2_label);
10562
10563 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10564 end_0_label);
10565
10566 if (TARGET_64BIT)
10567 emit_insn (gen_adddi3 (out, out, const1_rtx));
10568 else
10569 emit_insn (gen_addsi3 (out, out, const1_rtx));
10570
10571 emit_label (align_3_label);
10572 }
10573
10574 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10575 end_0_label);
10576
10577 if (TARGET_64BIT)
10578 emit_insn (gen_adddi3 (out, out, const1_rtx));
10579 else
10580 emit_insn (gen_addsi3 (out, out, const1_rtx));
10581 }
10582
10583 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10584 align this loop. It gives only huge programs, but does not help to
10585 speed up. */
10586 emit_label (align_4_label);
10587
10588 mem = gen_rtx_MEM (SImode, out);
10589 emit_move_insn (scratch, mem);
10590 if (TARGET_64BIT)
10591 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10592 else
10593 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10594
10595 /* This formula yields a nonzero result iff one of the bytes is zero.
10596 This saves three branches inside loop and many cycles. */
10597
10598 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10599 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10600 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10601 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10602 gen_int_mode (0x80808080, SImode)));
10603 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10604 align_4_label);
10605
10606 if (TARGET_CMOVE)
10607 {
10608 rtx reg = gen_reg_rtx (SImode);
10609 rtx reg2 = gen_reg_rtx (Pmode);
10610 emit_move_insn (reg, tmpreg);
10611 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10612
10613 /* If zero is not in the first two bytes, move two bytes forward. */
10614 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10615 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10616 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10617 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10618 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10619 reg,
10620 tmpreg)));
10621 /* Emit lea manually to avoid clobbering of flags. */
10622 emit_insn (gen_rtx_SET (SImode, reg2,
10623 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10624
10625 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10626 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10627 emit_insn (gen_rtx_SET (VOIDmode, out,
10628 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10629 reg2,
10630 out)));
10631
10632 }
10633 else
10634 {
10635 rtx end_2_label = gen_label_rtx ();
10636 /* Is zero in the first two bytes? */
10637
10638 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10639 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10640 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10641 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10642 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10643 pc_rtx);
10644 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10645 JUMP_LABEL (tmp) = end_2_label;
10646
10647 /* Not in the first two. Move two bytes forward. */
10648 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10649 if (TARGET_64BIT)
10650 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10651 else
10652 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10653
10654 emit_label (end_2_label);
10655
10656 }
10657
10658 /* Avoid branch in fixing the byte. */
10659 tmpreg = gen_lowpart (QImode, tmpreg);
10660 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10661 if (TARGET_64BIT)
10662 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10663 else
10664 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10665
10666 emit_label (end_0_label);
10667 }
10668
10669 void
10670 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10671 rtx retval, fnaddr, callarg1, callarg2, pop;
10672 {
10673 rtx use = NULL, call;
10674
10675 if (pop == const0_rtx)
10676 pop = NULL;
10677 if (TARGET_64BIT && pop)
10678 abort ();
10679
10680 #if TARGET_MACHO
10681 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10682 fnaddr = machopic_indirect_call_target (fnaddr);
10683 #else
10684 /* Static functions and indirect calls don't need the pic register. */
10685 if (! TARGET_64BIT && flag_pic
10686 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10687 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10688 use_reg (&use, pic_offset_table_rtx);
10689
10690 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10691 {
10692 rtx al = gen_rtx_REG (QImode, 0);
10693 emit_move_insn (al, callarg2);
10694 use_reg (&use, al);
10695 }
10696 #endif /* TARGET_MACHO */
10697
10698 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10699 {
10700 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10701 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10702 }
10703
10704 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10705 if (retval)
10706 call = gen_rtx_SET (VOIDmode, retval, call);
10707 if (pop)
10708 {
10709 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10710 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10711 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10712 }
10713
10714 call = emit_call_insn (call);
10715 if (use)
10716 CALL_INSN_FUNCTION_USAGE (call) = use;
10717 }
10718
10719 \f
10720 /* Clear stack slot assignments remembered from previous functions.
10721 This is called from INIT_EXPANDERS once before RTL is emitted for each
10722 function. */
10723
10724 static struct machine_function *
10725 ix86_init_machine_status ()
10726 {
10727 return ggc_alloc_cleared (sizeof (struct machine_function));
10728 }
10729
10730 /* Return a MEM corresponding to a stack slot with mode MODE.
10731 Allocate a new slot if necessary.
10732
10733 The RTL for a function can have several slots available: N is
10734 which slot to use. */
10735
10736 rtx
10737 assign_386_stack_local (mode, n)
10738 enum machine_mode mode;
10739 int n;
10740 {
10741 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10742 abort ();
10743
10744 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10745 ix86_stack_locals[(int) mode][n]
10746 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10747
10748 return ix86_stack_locals[(int) mode][n];
10749 }
10750
10751 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10752
10753 static GTY(()) rtx ix86_tls_symbol;
10754 rtx
10755 ix86_tls_get_addr ()
10756 {
10757
10758 if (!ix86_tls_symbol)
10759 {
10760 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10761 ? "___tls_get_addr"
10762 : "__tls_get_addr"));
10763 }
10764
10765 return ix86_tls_symbol;
10766 }
10767 \f
10768 /* Calculate the length of the memory address in the instruction
10769 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10770
10771 static int
10772 memory_address_length (addr)
10773 rtx addr;
10774 {
10775 struct ix86_address parts;
10776 rtx base, index, disp;
10777 int len;
10778
10779 if (GET_CODE (addr) == PRE_DEC
10780 || GET_CODE (addr) == POST_INC
10781 || GET_CODE (addr) == PRE_MODIFY
10782 || GET_CODE (addr) == POST_MODIFY)
10783 return 0;
10784
10785 if (! ix86_decompose_address (addr, &parts))
10786 abort ();
10787
10788 base = parts.base;
10789 index = parts.index;
10790 disp = parts.disp;
10791 len = 0;
10792
10793 /* Register Indirect. */
10794 if (base && !index && !disp)
10795 {
10796 /* Special cases: ebp and esp need the two-byte modrm form. */
10797 if (addr == stack_pointer_rtx
10798 || addr == arg_pointer_rtx
10799 || addr == frame_pointer_rtx
10800 || addr == hard_frame_pointer_rtx)
10801 len = 1;
10802 }
10803
10804 /* Direct Addressing. */
10805 else if (disp && !base && !index)
10806 len = 4;
10807
10808 else
10809 {
10810 /* Find the length of the displacement constant. */
10811 if (disp)
10812 {
10813 if (GET_CODE (disp) == CONST_INT
10814 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10815 len = 1;
10816 else
10817 len = 4;
10818 }
10819
10820 /* An index requires the two-byte modrm form. */
10821 if (index)
10822 len += 1;
10823 }
10824
10825 return len;
10826 }
10827
10828 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10829 is set, expect that insn have 8bit immediate alternative. */
10830 int
10831 ix86_attr_length_immediate_default (insn, shortform)
10832 rtx insn;
10833 int shortform;
10834 {
10835 int len = 0;
10836 int i;
10837 extract_insn_cached (insn);
10838 for (i = recog_data.n_operands - 1; i >= 0; --i)
10839 if (CONSTANT_P (recog_data.operand[i]))
10840 {
10841 if (len)
10842 abort ();
10843 if (shortform
10844 && GET_CODE (recog_data.operand[i]) == CONST_INT
10845 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10846 len = 1;
10847 else
10848 {
10849 switch (get_attr_mode (insn))
10850 {
10851 case MODE_QI:
10852 len+=1;
10853 break;
10854 case MODE_HI:
10855 len+=2;
10856 break;
10857 case MODE_SI:
10858 len+=4;
10859 break;
10860 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10861 case MODE_DI:
10862 len+=4;
10863 break;
10864 default:
10865 fatal_insn ("unknown insn mode", insn);
10866 }
10867 }
10868 }
10869 return len;
10870 }
10871 /* Compute default value for "length_address" attribute. */
10872 int
10873 ix86_attr_length_address_default (insn)
10874 rtx insn;
10875 {
10876 int i;
10877 extract_insn_cached (insn);
10878 for (i = recog_data.n_operands - 1; i >= 0; --i)
10879 if (GET_CODE (recog_data.operand[i]) == MEM)
10880 {
10881 return memory_address_length (XEXP (recog_data.operand[i], 0));
10882 break;
10883 }
10884 return 0;
10885 }
10886 \f
10887 /* Return the maximum number of instructions a cpu can issue. */
10888
10889 static int
10890 ix86_issue_rate ()
10891 {
10892 switch (ix86_cpu)
10893 {
10894 case PROCESSOR_PENTIUM:
10895 case PROCESSOR_K6:
10896 return 2;
10897
10898 case PROCESSOR_PENTIUMPRO:
10899 case PROCESSOR_PENTIUM4:
10900 case PROCESSOR_ATHLON:
10901 return 3;
10902
10903 default:
10904 return 1;
10905 }
10906 }
10907
10908 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10909 by DEP_INSN and nothing set by DEP_INSN. */
10910
10911 static int
10912 ix86_flags_dependant (insn, dep_insn, insn_type)
10913 rtx insn, dep_insn;
10914 enum attr_type insn_type;
10915 {
10916 rtx set, set2;
10917
10918 /* Simplify the test for uninteresting insns. */
10919 if (insn_type != TYPE_SETCC
10920 && insn_type != TYPE_ICMOV
10921 && insn_type != TYPE_FCMOV
10922 && insn_type != TYPE_IBR)
10923 return 0;
10924
10925 if ((set = single_set (dep_insn)) != 0)
10926 {
10927 set = SET_DEST (set);
10928 set2 = NULL_RTX;
10929 }
10930 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10931 && XVECLEN (PATTERN (dep_insn), 0) == 2
10932 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10933 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10934 {
10935 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10936 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10937 }
10938 else
10939 return 0;
10940
10941 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10942 return 0;
10943
10944 /* This test is true if the dependent insn reads the flags but
10945 not any other potentially set register. */
10946 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10947 return 0;
10948
10949 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10950 return 0;
10951
10952 return 1;
10953 }
10954
10955 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10956 address with operands set by DEP_INSN. */
10957
10958 static int
10959 ix86_agi_dependant (insn, dep_insn, insn_type)
10960 rtx insn, dep_insn;
10961 enum attr_type insn_type;
10962 {
10963 rtx addr;
10964
10965 if (insn_type == TYPE_LEA
10966 && TARGET_PENTIUM)
10967 {
10968 addr = PATTERN (insn);
10969 if (GET_CODE (addr) == SET)
10970 ;
10971 else if (GET_CODE (addr) == PARALLEL
10972 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10973 addr = XVECEXP (addr, 0, 0);
10974 else
10975 abort ();
10976 addr = SET_SRC (addr);
10977 }
10978 else
10979 {
10980 int i;
10981 extract_insn_cached (insn);
10982 for (i = recog_data.n_operands - 1; i >= 0; --i)
10983 if (GET_CODE (recog_data.operand[i]) == MEM)
10984 {
10985 addr = XEXP (recog_data.operand[i], 0);
10986 goto found;
10987 }
10988 return 0;
10989 found:;
10990 }
10991
10992 return modified_in_p (addr, dep_insn);
10993 }
10994
10995 static int
10996 ix86_adjust_cost (insn, link, dep_insn, cost)
10997 rtx insn, link, dep_insn;
10998 int cost;
10999 {
11000 enum attr_type insn_type, dep_insn_type;
11001 enum attr_memory memory, dep_memory;
11002 rtx set, set2;
11003 int dep_insn_code_number;
11004
11005 /* Anti and output depenancies have zero cost on all CPUs. */
11006 if (REG_NOTE_KIND (link) != 0)
11007 return 0;
11008
11009 dep_insn_code_number = recog_memoized (dep_insn);
11010
11011 /* If we can't recognize the insns, we can't really do anything. */
11012 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11013 return cost;
11014
11015 insn_type = get_attr_type (insn);
11016 dep_insn_type = get_attr_type (dep_insn);
11017
11018 switch (ix86_cpu)
11019 {
11020 case PROCESSOR_PENTIUM:
11021 /* Address Generation Interlock adds a cycle of latency. */
11022 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11023 cost += 1;
11024
11025 /* ??? Compares pair with jump/setcc. */
11026 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11027 cost = 0;
11028
11029 /* Floating point stores require value to be ready one cycle ealier. */
11030 if (insn_type == TYPE_FMOV
11031 && get_attr_memory (insn) == MEMORY_STORE
11032 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11033 cost += 1;
11034 break;
11035
11036 case PROCESSOR_PENTIUMPRO:
11037 memory = get_attr_memory (insn);
11038 dep_memory = get_attr_memory (dep_insn);
11039
11040 /* Since we can't represent delayed latencies of load+operation,
11041 increase the cost here for non-imov insns. */
11042 if (dep_insn_type != TYPE_IMOV
11043 && dep_insn_type != TYPE_FMOV
11044 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11045 cost += 1;
11046
11047 /* INT->FP conversion is expensive. */
11048 if (get_attr_fp_int_src (dep_insn))
11049 cost += 5;
11050
11051 /* There is one cycle extra latency between an FP op and a store. */
11052 if (insn_type == TYPE_FMOV
11053 && (set = single_set (dep_insn)) != NULL_RTX
11054 && (set2 = single_set (insn)) != NULL_RTX
11055 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11056 && GET_CODE (SET_DEST (set2)) == MEM)
11057 cost += 1;
11058
11059 /* Show ability of reorder buffer to hide latency of load by executing
11060 in parallel with previous instruction in case
11061 previous instruction is not needed to compute the address. */
11062 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11063 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11064 {
11065 /* Claim moves to take one cycle, as core can issue one load
11066 at time and the next load can start cycle later. */
11067 if (dep_insn_type == TYPE_IMOV
11068 || dep_insn_type == TYPE_FMOV)
11069 cost = 1;
11070 else if (cost > 1)
11071 cost--;
11072 }
11073 break;
11074
11075 case PROCESSOR_K6:
11076 memory = get_attr_memory (insn);
11077 dep_memory = get_attr_memory (dep_insn);
11078 /* The esp dependency is resolved before the instruction is really
11079 finished. */
11080 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11081 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11082 return 1;
11083
11084 /* Since we can't represent delayed latencies of load+operation,
11085 increase the cost here for non-imov insns. */
11086 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11087 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11088
11089 /* INT->FP conversion is expensive. */
11090 if (get_attr_fp_int_src (dep_insn))
11091 cost += 5;
11092
11093 /* Show ability of reorder buffer to hide latency of load by executing
11094 in parallel with previous instruction in case
11095 previous instruction is not needed to compute the address. */
11096 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11097 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11098 {
11099 /* Claim moves to take one cycle, as core can issue one load
11100 at time and the next load can start cycle later. */
11101 if (dep_insn_type == TYPE_IMOV
11102 || dep_insn_type == TYPE_FMOV)
11103 cost = 1;
11104 else if (cost > 2)
11105 cost -= 2;
11106 else
11107 cost = 1;
11108 }
11109 break;
11110
11111 case PROCESSOR_ATHLON:
11112 memory = get_attr_memory (insn);
11113 dep_memory = get_attr_memory (dep_insn);
11114
11115 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11116 {
11117 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11118 cost += 2;
11119 else
11120 cost += 3;
11121 }
11122 /* Show ability of reorder buffer to hide latency of load by executing
11123 in parallel with previous instruction in case
11124 previous instruction is not needed to compute the address. */
11125 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11126 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11127 {
11128 /* Claim moves to take one cycle, as core can issue one load
11129 at time and the next load can start cycle later. */
11130 if (dep_insn_type == TYPE_IMOV
11131 || dep_insn_type == TYPE_FMOV)
11132 cost = 0;
11133 else if (cost >= 3)
11134 cost -= 3;
11135 else
11136 cost = 0;
11137 }
11138
11139 default:
11140 break;
11141 }
11142
11143 return cost;
11144 }
11145
11146 static union
11147 {
11148 struct ppro_sched_data
11149 {
11150 rtx decode[3];
11151 int issued_this_cycle;
11152 } ppro;
11153 } ix86_sched_data;
11154
11155 static enum attr_ppro_uops
11156 ix86_safe_ppro_uops (insn)
11157 rtx insn;
11158 {
11159 if (recog_memoized (insn) >= 0)
11160 return get_attr_ppro_uops (insn);
11161 else
11162 return PPRO_UOPS_MANY;
11163 }
11164
11165 static void
11166 ix86_dump_ppro_packet (dump)
11167 FILE *dump;
11168 {
11169 if (ix86_sched_data.ppro.decode[0])
11170 {
11171 fprintf (dump, "PPRO packet: %d",
11172 INSN_UID (ix86_sched_data.ppro.decode[0]));
11173 if (ix86_sched_data.ppro.decode[1])
11174 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11175 if (ix86_sched_data.ppro.decode[2])
11176 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11177 fputc ('\n', dump);
11178 }
11179 }
11180
11181 /* We're beginning a new block. Initialize data structures as necessary. */
11182
11183 static void
11184 ix86_sched_init (dump, sched_verbose, veclen)
11185 FILE *dump ATTRIBUTE_UNUSED;
11186 int sched_verbose ATTRIBUTE_UNUSED;
11187 int veclen ATTRIBUTE_UNUSED;
11188 {
11189 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11190 }
11191
11192 /* Shift INSN to SLOT, and shift everything else down. */
11193
11194 static void
11195 ix86_reorder_insn (insnp, slot)
11196 rtx *insnp, *slot;
11197 {
11198 if (insnp != slot)
11199 {
11200 rtx insn = *insnp;
11201 do
11202 insnp[0] = insnp[1];
11203 while (++insnp != slot);
11204 *insnp = insn;
11205 }
11206 }
11207
11208 static void
11209 ix86_sched_reorder_ppro (ready, e_ready)
11210 rtx *ready;
11211 rtx *e_ready;
11212 {
11213 rtx decode[3];
11214 enum attr_ppro_uops cur_uops;
11215 int issued_this_cycle;
11216 rtx *insnp;
11217 int i;
11218
11219 /* At this point .ppro.decode contains the state of the three
11220 decoders from last "cycle". That is, those insns that were
11221 actually independent. But here we're scheduling for the
11222 decoder, and we may find things that are decodable in the
11223 same cycle. */
11224
11225 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11226 issued_this_cycle = 0;
11227
11228 insnp = e_ready;
11229 cur_uops = ix86_safe_ppro_uops (*insnp);
11230
11231 /* If the decoders are empty, and we've a complex insn at the
11232 head of the priority queue, let it issue without complaint. */
11233 if (decode[0] == NULL)
11234 {
11235 if (cur_uops == PPRO_UOPS_MANY)
11236 {
11237 decode[0] = *insnp;
11238 goto ppro_done;
11239 }
11240
11241 /* Otherwise, search for a 2-4 uop unsn to issue. */
11242 while (cur_uops != PPRO_UOPS_FEW)
11243 {
11244 if (insnp == ready)
11245 break;
11246 cur_uops = ix86_safe_ppro_uops (*--insnp);
11247 }
11248
11249 /* If so, move it to the head of the line. */
11250 if (cur_uops == PPRO_UOPS_FEW)
11251 ix86_reorder_insn (insnp, e_ready);
11252
11253 /* Issue the head of the queue. */
11254 issued_this_cycle = 1;
11255 decode[0] = *e_ready--;
11256 }
11257
11258 /* Look for simple insns to fill in the other two slots. */
11259 for (i = 1; i < 3; ++i)
11260 if (decode[i] == NULL)
11261 {
11262 if (ready > e_ready)
11263 goto ppro_done;
11264
11265 insnp = e_ready;
11266 cur_uops = ix86_safe_ppro_uops (*insnp);
11267 while (cur_uops != PPRO_UOPS_ONE)
11268 {
11269 if (insnp == ready)
11270 break;
11271 cur_uops = ix86_safe_ppro_uops (*--insnp);
11272 }
11273
11274 /* Found one. Move it to the head of the queue and issue it. */
11275 if (cur_uops == PPRO_UOPS_ONE)
11276 {
11277 ix86_reorder_insn (insnp, e_ready);
11278 decode[i] = *e_ready--;
11279 issued_this_cycle++;
11280 continue;
11281 }
11282
11283 /* ??? Didn't find one. Ideally, here we would do a lazy split
11284 of 2-uop insns, issue one and queue the other. */
11285 }
11286
11287 ppro_done:
11288 if (issued_this_cycle == 0)
11289 issued_this_cycle = 1;
11290 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11291 }
11292
11293 /* We are about to being issuing insns for this clock cycle.
11294 Override the default sort algorithm to better slot instructions. */
11295 static int
11296 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11297 FILE *dump ATTRIBUTE_UNUSED;
11298 int sched_verbose ATTRIBUTE_UNUSED;
11299 rtx *ready;
11300 int *n_readyp;
11301 int clock_var ATTRIBUTE_UNUSED;
11302 {
11303 int n_ready = *n_readyp;
11304 rtx *e_ready = ready + n_ready - 1;
11305
11306 /* Make sure to go ahead and initialize key items in
11307 ix86_sched_data if we are not going to bother trying to
11308 reorder the ready queue. */
11309 if (n_ready < 2)
11310 {
11311 ix86_sched_data.ppro.issued_this_cycle = 1;
11312 goto out;
11313 }
11314
11315 switch (ix86_cpu)
11316 {
11317 default:
11318 break;
11319
11320 case PROCESSOR_PENTIUMPRO:
11321 ix86_sched_reorder_ppro (ready, e_ready);
11322 break;
11323 }
11324
11325 out:
11326 return ix86_issue_rate ();
11327 }
11328
11329 /* We are about to issue INSN. Return the number of insns left on the
11330 ready queue that can be issued this cycle. */
11331
11332 static int
11333 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11334 FILE *dump;
11335 int sched_verbose;
11336 rtx insn;
11337 int can_issue_more;
11338 {
11339 int i;
11340 switch (ix86_cpu)
11341 {
11342 default:
11343 return can_issue_more - 1;
11344
11345 case PROCESSOR_PENTIUMPRO:
11346 {
11347 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11348
11349 if (uops == PPRO_UOPS_MANY)
11350 {
11351 if (sched_verbose)
11352 ix86_dump_ppro_packet (dump);
11353 ix86_sched_data.ppro.decode[0] = insn;
11354 ix86_sched_data.ppro.decode[1] = NULL;
11355 ix86_sched_data.ppro.decode[2] = NULL;
11356 if (sched_verbose)
11357 ix86_dump_ppro_packet (dump);
11358 ix86_sched_data.ppro.decode[0] = NULL;
11359 }
11360 else if (uops == PPRO_UOPS_FEW)
11361 {
11362 if (sched_verbose)
11363 ix86_dump_ppro_packet (dump);
11364 ix86_sched_data.ppro.decode[0] = insn;
11365 ix86_sched_data.ppro.decode[1] = NULL;
11366 ix86_sched_data.ppro.decode[2] = NULL;
11367 }
11368 else
11369 {
11370 for (i = 0; i < 3; ++i)
11371 if (ix86_sched_data.ppro.decode[i] == NULL)
11372 {
11373 ix86_sched_data.ppro.decode[i] = insn;
11374 break;
11375 }
11376 if (i == 3)
11377 abort ();
11378 if (i == 2)
11379 {
11380 if (sched_verbose)
11381 ix86_dump_ppro_packet (dump);
11382 ix86_sched_data.ppro.decode[0] = NULL;
11383 ix86_sched_data.ppro.decode[1] = NULL;
11384 ix86_sched_data.ppro.decode[2] = NULL;
11385 }
11386 }
11387 }
11388 return --ix86_sched_data.ppro.issued_this_cycle;
11389 }
11390 }
11391
11392 static int
11393 ia32_use_dfa_pipeline_interface ()
11394 {
11395 if (ix86_cpu == PROCESSOR_PENTIUM)
11396 return 1;
11397 return 0;
11398 }
11399
11400 /* How many alternative schedules to try. This should be as wide as the
11401 scheduling freedom in the DFA, but no wider. Making this value too
11402 large results extra work for the scheduler. */
11403
11404 static int
11405 ia32_multipass_dfa_lookahead ()
11406 {
11407 if (ix86_cpu == PROCESSOR_PENTIUM)
11408 return 2;
11409 else
11410 return 0;
11411 }
11412
11413 \f
11414 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11415 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11416 appropriate. */
11417
11418 void
11419 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11420 rtx insns;
11421 rtx dstref, srcref, dstreg, srcreg;
11422 {
11423 rtx insn;
11424
11425 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11426 if (INSN_P (insn))
11427 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11428 dstreg, srcreg);
11429 }
11430
11431 /* Subroutine of above to actually do the updating by recursively walking
11432 the rtx. */
11433
11434 static void
11435 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11436 rtx x;
11437 rtx dstref, srcref, dstreg, srcreg;
11438 {
11439 enum rtx_code code = GET_CODE (x);
11440 const char *format_ptr = GET_RTX_FORMAT (code);
11441 int i, j;
11442
11443 if (code == MEM && XEXP (x, 0) == dstreg)
11444 MEM_COPY_ATTRIBUTES (x, dstref);
11445 else if (code == MEM && XEXP (x, 0) == srcreg)
11446 MEM_COPY_ATTRIBUTES (x, srcref);
11447
11448 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11449 {
11450 if (*format_ptr == 'e')
11451 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11452 dstreg, srcreg);
11453 else if (*format_ptr == 'E')
11454 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11455 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11456 dstreg, srcreg);
11457 }
11458 }
11459 \f
11460 /* Compute the alignment given to a constant that is being placed in memory.
11461 EXP is the constant and ALIGN is the alignment that the object would
11462 ordinarily have.
11463 The value of this function is used instead of that alignment to align
11464 the object. */
11465
11466 int
11467 ix86_constant_alignment (exp, align)
11468 tree exp;
11469 int align;
11470 {
11471 if (TREE_CODE (exp) == REAL_CST)
11472 {
11473 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11474 return 64;
11475 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11476 return 128;
11477 }
11478 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11479 && align < 256)
11480 return 256;
11481
11482 return align;
11483 }
11484
11485 /* Compute the alignment for a static variable.
11486 TYPE is the data type, and ALIGN is the alignment that
11487 the object would ordinarily have. The value of this function is used
11488 instead of that alignment to align the object. */
11489
11490 int
11491 ix86_data_alignment (type, align)
11492 tree type;
11493 int align;
11494 {
11495 if (AGGREGATE_TYPE_P (type)
11496 && TYPE_SIZE (type)
11497 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11498 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11499 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11500 return 256;
11501
11502 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11503 to 16byte boundary. */
11504 if (TARGET_64BIT)
11505 {
11506 if (AGGREGATE_TYPE_P (type)
11507 && TYPE_SIZE (type)
11508 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11509 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11510 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11511 return 128;
11512 }
11513
11514 if (TREE_CODE (type) == ARRAY_TYPE)
11515 {
11516 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11517 return 64;
11518 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11519 return 128;
11520 }
11521 else if (TREE_CODE (type) == COMPLEX_TYPE)
11522 {
11523
11524 if (TYPE_MODE (type) == DCmode && align < 64)
11525 return 64;
11526 if (TYPE_MODE (type) == XCmode && align < 128)
11527 return 128;
11528 }
11529 else if ((TREE_CODE (type) == RECORD_TYPE
11530 || TREE_CODE (type) == UNION_TYPE
11531 || TREE_CODE (type) == QUAL_UNION_TYPE)
11532 && TYPE_FIELDS (type))
11533 {
11534 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11535 return 64;
11536 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11537 return 128;
11538 }
11539 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11540 || TREE_CODE (type) == INTEGER_TYPE)
11541 {
11542 if (TYPE_MODE (type) == DFmode && align < 64)
11543 return 64;
11544 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11545 return 128;
11546 }
11547
11548 return align;
11549 }
11550
11551 /* Compute the alignment for a local variable.
11552 TYPE is the data type, and ALIGN is the alignment that
11553 the object would ordinarily have. The value of this macro is used
11554 instead of that alignment to align the object. */
11555
11556 int
11557 ix86_local_alignment (type, align)
11558 tree type;
11559 int align;
11560 {
11561 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11562 to 16byte boundary. */
11563 if (TARGET_64BIT)
11564 {
11565 if (AGGREGATE_TYPE_P (type)
11566 && TYPE_SIZE (type)
11567 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11568 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11569 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11570 return 128;
11571 }
11572 if (TREE_CODE (type) == ARRAY_TYPE)
11573 {
11574 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11575 return 64;
11576 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11577 return 128;
11578 }
11579 else if (TREE_CODE (type) == COMPLEX_TYPE)
11580 {
11581 if (TYPE_MODE (type) == DCmode && align < 64)
11582 return 64;
11583 if (TYPE_MODE (type) == XCmode && align < 128)
11584 return 128;
11585 }
11586 else if ((TREE_CODE (type) == RECORD_TYPE
11587 || TREE_CODE (type) == UNION_TYPE
11588 || TREE_CODE (type) == QUAL_UNION_TYPE)
11589 && TYPE_FIELDS (type))
11590 {
11591 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11592 return 64;
11593 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11594 return 128;
11595 }
11596 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11597 || TREE_CODE (type) == INTEGER_TYPE)
11598 {
11599
11600 if (TYPE_MODE (type) == DFmode && align < 64)
11601 return 64;
11602 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11603 return 128;
11604 }
11605 return align;
11606 }
11607 \f
11608 /* Emit RTL insns to initialize the variable parts of a trampoline.
11609 FNADDR is an RTX for the address of the function's pure code.
11610 CXT is an RTX for the static chain value for the function. */
11611 void
11612 x86_initialize_trampoline (tramp, fnaddr, cxt)
11613 rtx tramp, fnaddr, cxt;
11614 {
11615 if (!TARGET_64BIT)
11616 {
11617 /* Compute offset from the end of the jmp to the target function. */
11618 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11619 plus_constant (tramp, 10),
11620 NULL_RTX, 1, OPTAB_DIRECT);
11621 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11622 gen_int_mode (0xb9, QImode));
11623 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11624 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11625 gen_int_mode (0xe9, QImode));
11626 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11627 }
11628 else
11629 {
11630 int offset = 0;
11631 /* Try to load address using shorter movl instead of movabs.
11632 We may want to support movq for kernel mode, but kernel does not use
11633 trampolines at the moment. */
11634 if (x86_64_zero_extended_value (fnaddr))
11635 {
11636 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11637 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11638 gen_int_mode (0xbb41, HImode));
11639 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11640 gen_lowpart (SImode, fnaddr));
11641 offset += 6;
11642 }
11643 else
11644 {
11645 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11646 gen_int_mode (0xbb49, HImode));
11647 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11648 fnaddr);
11649 offset += 10;
11650 }
11651 /* Load static chain using movabs to r10. */
11652 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11653 gen_int_mode (0xba49, HImode));
11654 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11655 cxt);
11656 offset += 10;
11657 /* Jump to the r11 */
11658 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11659 gen_int_mode (0xff49, HImode));
11660 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11661 gen_int_mode (0xe3, QImode));
11662 offset += 3;
11663 if (offset > TRAMPOLINE_SIZE)
11664 abort ();
11665 }
11666 }
11667 \f
11668 #define def_builtin(MASK, NAME, TYPE, CODE) \
11669 do { \
11670 if ((MASK) & target_flags) \
11671 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11672 NULL, NULL_TREE); \
11673 } while (0)
11674
11675 struct builtin_description
11676 {
11677 const unsigned int mask;
11678 const enum insn_code icode;
11679 const char *const name;
11680 const enum ix86_builtins code;
11681 const enum rtx_code comparison;
11682 const unsigned int flag;
11683 };
11684
11685 /* Used for builtins that are enabled both by -msse and -msse2. */
11686 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11687
11688 static const struct builtin_description bdesc_comi[] =
11689 {
11690 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11691 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11692 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11693 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11694 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11695 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11696 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11697 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11698 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11699 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11700 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11701 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11702 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11703 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11704 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11705 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11706 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11707 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11708 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11709 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11710 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11711 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11712 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11713 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11714 };
11715
11716 static const struct builtin_description bdesc_2arg[] =
11717 {
11718 /* SSE */
11719 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11720 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11721 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11722 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11723 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11724 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11725 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11726 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11727
11728 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11729 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11730 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11731 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11732 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11733 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11734 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11735 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11736 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11737 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11738 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11739 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11740 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11741 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11742 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11743 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11744 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11745 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11746 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11747 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11748 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11749 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11750 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11751 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11752
11753 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11754 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11755 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11756 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11757
11758 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11759 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11760 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11761 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11762 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11763
11764 /* MMX */
11765 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11766 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11767 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11768 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11769 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11770 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11771
11772 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11773 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11774 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11775 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11776 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11777 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11778 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11779 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11780
11781 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11782 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11783 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11784
11785 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11786 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11787 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11788 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11789
11790 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11791 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11792
11793 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11794 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11795 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11796 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11797 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11798 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11799
11800 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11801 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11802 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11803 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11804
11805 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11806 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11807 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11808 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11809 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11810 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11811
11812 /* Special. */
11813 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11814 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11815 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11816
11817 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11818 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11819
11820 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11821 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11822 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11823 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11824 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11825 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11826
11827 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11828 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11829 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11830 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11831 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11832 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11833
11834 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11835 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11836 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11837 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11838
11839 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11840 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11841
11842 /* SSE2 */
11843 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11844 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11845 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11846 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11847 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11848 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11849 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11850 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11851
11852 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11853 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11854 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11855 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11856 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11857 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11858 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11859 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11860 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11861 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11862 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11863 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11864 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11865 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11866 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11867 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11868 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11869 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11870 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11871 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11872 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11873 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11874 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11875 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11876
11877 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11878 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11879 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11880 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11881
11882 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11883 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11884 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11885 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11886
11887 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11888 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11889 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11890
11891 /* SSE2 MMX */
11892 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11893 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11894 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11897 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11898 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11899 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11900
11901 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11902 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11903 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11904 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11905 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11906 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11907 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11908 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11909
11910 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11913 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11914
11915 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11917 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11918 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11919
11920 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11921 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11922
11923 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11924 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11925 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11926 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11927 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11928 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11929
11930 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11931 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11932 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11933 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11934
11935 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11937 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11938 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11939 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11940 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11941
11942 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11943 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11944 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11945
11946 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11947 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11948
11949 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11950 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11951 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11952 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11953 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11954 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11955
11956 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11957 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11958 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11959 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11960 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11961 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11962
11963 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11964 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11965 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11966 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11967
11968 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11969
11970 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11971 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11972 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11973 };
11974
11975 static const struct builtin_description bdesc_1arg[] =
11976 {
11977 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11978 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11979
11980 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11981 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11982 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11983
11984 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11985 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11986 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11987 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11988
11989 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11990 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11991 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11992
11993 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11994
11995 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11996 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11997
11998 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11999 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12000 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12001 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12002 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12003
12004 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12005
12006 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12007 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12008
12009 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12010 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12011 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
12012 };
12013
12014 void
12015 ix86_init_builtins ()
12016 {
12017 if (TARGET_MMX)
12018 ix86_init_mmx_sse_builtins ();
12019 }
12020
12021 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12022 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12023 builtins. */
12024 static void
12025 ix86_init_mmx_sse_builtins ()
12026 {
12027 const struct builtin_description * d;
12028 size_t i;
12029
12030 tree pchar_type_node = build_pointer_type (char_type_node);
12031 tree pfloat_type_node = build_pointer_type (float_type_node);
12032 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12033 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12034 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12035
12036 /* Comparisons. */
12037 tree int_ftype_v4sf_v4sf
12038 = build_function_type_list (integer_type_node,
12039 V4SF_type_node, V4SF_type_node, NULL_TREE);
12040 tree v4si_ftype_v4sf_v4sf
12041 = build_function_type_list (V4SI_type_node,
12042 V4SF_type_node, V4SF_type_node, NULL_TREE);
12043 /* MMX/SSE/integer conversions. */
12044 tree int_ftype_v4sf
12045 = build_function_type_list (integer_type_node,
12046 V4SF_type_node, NULL_TREE);
12047 tree int_ftype_v8qi
12048 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12049 tree v4sf_ftype_v4sf_int
12050 = build_function_type_list (V4SF_type_node,
12051 V4SF_type_node, integer_type_node, NULL_TREE);
12052 tree v4sf_ftype_v4sf_v2si
12053 = build_function_type_list (V4SF_type_node,
12054 V4SF_type_node, V2SI_type_node, NULL_TREE);
12055 tree int_ftype_v4hi_int
12056 = build_function_type_list (integer_type_node,
12057 V4HI_type_node, integer_type_node, NULL_TREE);
12058 tree v4hi_ftype_v4hi_int_int
12059 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12060 integer_type_node, integer_type_node,
12061 NULL_TREE);
12062 /* Miscellaneous. */
12063 tree v8qi_ftype_v4hi_v4hi
12064 = build_function_type_list (V8QI_type_node,
12065 V4HI_type_node, V4HI_type_node, NULL_TREE);
12066 tree v4hi_ftype_v2si_v2si
12067 = build_function_type_list (V4HI_type_node,
12068 V2SI_type_node, V2SI_type_node, NULL_TREE);
12069 tree v4sf_ftype_v4sf_v4sf_int
12070 = build_function_type_list (V4SF_type_node,
12071 V4SF_type_node, V4SF_type_node,
12072 integer_type_node, NULL_TREE);
12073 tree v2si_ftype_v4hi_v4hi
12074 = build_function_type_list (V2SI_type_node,
12075 V4HI_type_node, V4HI_type_node, NULL_TREE);
12076 tree v4hi_ftype_v4hi_int
12077 = build_function_type_list (V4HI_type_node,
12078 V4HI_type_node, integer_type_node, NULL_TREE);
12079 tree v4hi_ftype_v4hi_di
12080 = build_function_type_list (V4HI_type_node,
12081 V4HI_type_node, long_long_unsigned_type_node,
12082 NULL_TREE);
12083 tree v2si_ftype_v2si_di
12084 = build_function_type_list (V2SI_type_node,
12085 V2SI_type_node, long_long_unsigned_type_node,
12086 NULL_TREE);
12087 tree void_ftype_void
12088 = build_function_type (void_type_node, void_list_node);
12089 tree void_ftype_unsigned
12090 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12091 tree unsigned_ftype_void
12092 = build_function_type (unsigned_type_node, void_list_node);
12093 tree di_ftype_void
12094 = build_function_type (long_long_unsigned_type_node, void_list_node);
12095 tree v4sf_ftype_void
12096 = build_function_type (V4SF_type_node, void_list_node);
12097 tree v2si_ftype_v4sf
12098 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12099 /* Loads/stores. */
12100 tree void_ftype_v8qi_v8qi_pchar
12101 = build_function_type_list (void_type_node,
12102 V8QI_type_node, V8QI_type_node,
12103 pchar_type_node, NULL_TREE);
12104 tree v4sf_ftype_pfloat
12105 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12106 /* @@@ the type is bogus */
12107 tree v4sf_ftype_v4sf_pv2si
12108 = build_function_type_list (V4SF_type_node,
12109 V4SF_type_node, pv2di_type_node, NULL_TREE);
12110 tree void_ftype_pv2si_v4sf
12111 = build_function_type_list (void_type_node,
12112 pv2di_type_node, V4SF_type_node, NULL_TREE);
12113 tree void_ftype_pfloat_v4sf
12114 = build_function_type_list (void_type_node,
12115 pfloat_type_node, V4SF_type_node, NULL_TREE);
12116 tree void_ftype_pdi_di
12117 = build_function_type_list (void_type_node,
12118 pdi_type_node, long_long_unsigned_type_node,
12119 NULL_TREE);
12120 tree void_ftype_pv2di_v2di
12121 = build_function_type_list (void_type_node,
12122 pv2di_type_node, V2DI_type_node, NULL_TREE);
12123 /* Normal vector unops. */
12124 tree v4sf_ftype_v4sf
12125 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12126
12127 /* Normal vector binops. */
12128 tree v4sf_ftype_v4sf_v4sf
12129 = build_function_type_list (V4SF_type_node,
12130 V4SF_type_node, V4SF_type_node, NULL_TREE);
12131 tree v8qi_ftype_v8qi_v8qi
12132 = build_function_type_list (V8QI_type_node,
12133 V8QI_type_node, V8QI_type_node, NULL_TREE);
12134 tree v4hi_ftype_v4hi_v4hi
12135 = build_function_type_list (V4HI_type_node,
12136 V4HI_type_node, V4HI_type_node, NULL_TREE);
12137 tree v2si_ftype_v2si_v2si
12138 = build_function_type_list (V2SI_type_node,
12139 V2SI_type_node, V2SI_type_node, NULL_TREE);
12140 tree di_ftype_di_di
12141 = build_function_type_list (long_long_unsigned_type_node,
12142 long_long_unsigned_type_node,
12143 long_long_unsigned_type_node, NULL_TREE);
12144
12145 tree v2si_ftype_v2sf
12146 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12147 tree v2sf_ftype_v2si
12148 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12149 tree v2si_ftype_v2si
12150 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12151 tree v2sf_ftype_v2sf
12152 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12153 tree v2sf_ftype_v2sf_v2sf
12154 = build_function_type_list (V2SF_type_node,
12155 V2SF_type_node, V2SF_type_node, NULL_TREE);
12156 tree v2si_ftype_v2sf_v2sf
12157 = build_function_type_list (V2SI_type_node,
12158 V2SF_type_node, V2SF_type_node, NULL_TREE);
12159 tree pint_type_node = build_pointer_type (integer_type_node);
12160 tree pdouble_type_node = build_pointer_type (double_type_node);
12161 tree int_ftype_v2df_v2df
12162 = build_function_type_list (integer_type_node,
12163 V2DF_type_node, V2DF_type_node, NULL_TREE);
12164
12165 tree ti_ftype_void
12166 = build_function_type (intTI_type_node, void_list_node);
12167 tree ti_ftype_ti_ti
12168 = build_function_type_list (intTI_type_node,
12169 intTI_type_node, intTI_type_node, NULL_TREE);
12170 tree void_ftype_pvoid
12171 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12172 tree v2di_ftype_di
12173 = build_function_type_list (V2DI_type_node,
12174 long_long_unsigned_type_node, NULL_TREE);
12175 tree v4sf_ftype_v4si
12176 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12177 tree v4si_ftype_v4sf
12178 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12179 tree v2df_ftype_v4si
12180 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12181 tree v4si_ftype_v2df
12182 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12183 tree v2si_ftype_v2df
12184 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12185 tree v4sf_ftype_v2df
12186 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12187 tree v2df_ftype_v2si
12188 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12189 tree v2df_ftype_v4sf
12190 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12191 tree int_ftype_v2df
12192 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12193 tree v2df_ftype_v2df_int
12194 = build_function_type_list (V2DF_type_node,
12195 V2DF_type_node, integer_type_node, NULL_TREE);
12196 tree v4sf_ftype_v4sf_v2df
12197 = build_function_type_list (V4SF_type_node,
12198 V4SF_type_node, V2DF_type_node, NULL_TREE);
12199 tree v2df_ftype_v2df_v4sf
12200 = build_function_type_list (V2DF_type_node,
12201 V2DF_type_node, V4SF_type_node, NULL_TREE);
12202 tree v2df_ftype_v2df_v2df_int
12203 = build_function_type_list (V2DF_type_node,
12204 V2DF_type_node, V2DF_type_node,
12205 integer_type_node,
12206 NULL_TREE);
12207 tree v2df_ftype_v2df_pv2si
12208 = build_function_type_list (V2DF_type_node,
12209 V2DF_type_node, pv2si_type_node, NULL_TREE);
12210 tree void_ftype_pv2si_v2df
12211 = build_function_type_list (void_type_node,
12212 pv2si_type_node, V2DF_type_node, NULL_TREE);
12213 tree void_ftype_pdouble_v2df
12214 = build_function_type_list (void_type_node,
12215 pdouble_type_node, V2DF_type_node, NULL_TREE);
12216 tree void_ftype_pint_int
12217 = build_function_type_list (void_type_node,
12218 pint_type_node, integer_type_node, NULL_TREE);
12219 tree void_ftype_v16qi_v16qi_pchar
12220 = build_function_type_list (void_type_node,
12221 V16QI_type_node, V16QI_type_node,
12222 pchar_type_node, NULL_TREE);
12223 tree v2df_ftype_pdouble
12224 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12225 tree v2df_ftype_v2df_v2df
12226 = build_function_type_list (V2DF_type_node,
12227 V2DF_type_node, V2DF_type_node, NULL_TREE);
12228 tree v16qi_ftype_v16qi_v16qi
12229 = build_function_type_list (V16QI_type_node,
12230 V16QI_type_node, V16QI_type_node, NULL_TREE);
12231 tree v8hi_ftype_v8hi_v8hi
12232 = build_function_type_list (V8HI_type_node,
12233 V8HI_type_node, V8HI_type_node, NULL_TREE);
12234 tree v4si_ftype_v4si_v4si
12235 = build_function_type_list (V4SI_type_node,
12236 V4SI_type_node, V4SI_type_node, NULL_TREE);
12237 tree v2di_ftype_v2di_v2di
12238 = build_function_type_list (V2DI_type_node,
12239 V2DI_type_node, V2DI_type_node, NULL_TREE);
12240 tree v2di_ftype_v2df_v2df
12241 = build_function_type_list (V2DI_type_node,
12242 V2DF_type_node, V2DF_type_node, NULL_TREE);
12243 tree v2df_ftype_v2df
12244 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12245 tree v2df_ftype_double
12246 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12247 tree v2df_ftype_double_double
12248 = build_function_type_list (V2DF_type_node,
12249 double_type_node, double_type_node, NULL_TREE);
12250 tree int_ftype_v8hi_int
12251 = build_function_type_list (integer_type_node,
12252 V8HI_type_node, integer_type_node, NULL_TREE);
12253 tree v8hi_ftype_v8hi_int_int
12254 = build_function_type_list (V8HI_type_node,
12255 V8HI_type_node, integer_type_node,
12256 integer_type_node, NULL_TREE);
12257 tree v2di_ftype_v2di_int
12258 = build_function_type_list (V2DI_type_node,
12259 V2DI_type_node, integer_type_node, NULL_TREE);
12260 tree v4si_ftype_v4si_int
12261 = build_function_type_list (V4SI_type_node,
12262 V4SI_type_node, integer_type_node, NULL_TREE);
12263 tree v8hi_ftype_v8hi_int
12264 = build_function_type_list (V8HI_type_node,
12265 V8HI_type_node, integer_type_node, NULL_TREE);
12266 tree v8hi_ftype_v8hi_v2di
12267 = build_function_type_list (V8HI_type_node,
12268 V8HI_type_node, V2DI_type_node, NULL_TREE);
12269 tree v4si_ftype_v4si_v2di
12270 = build_function_type_list (V4SI_type_node,
12271 V4SI_type_node, V2DI_type_node, NULL_TREE);
12272 tree v4si_ftype_v8hi_v8hi
12273 = build_function_type_list (V4SI_type_node,
12274 V8HI_type_node, V8HI_type_node, NULL_TREE);
12275 tree di_ftype_v8qi_v8qi
12276 = build_function_type_list (long_long_unsigned_type_node,
12277 V8QI_type_node, V8QI_type_node, NULL_TREE);
12278 tree v2di_ftype_v16qi_v16qi
12279 = build_function_type_list (V2DI_type_node,
12280 V16QI_type_node, V16QI_type_node, NULL_TREE);
12281 tree int_ftype_v16qi
12282 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12283
12284 /* Add all builtins that are more or less simple operations on two
12285 operands. */
12286 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12287 {
12288 /* Use one of the operands; the target can have a different mode for
12289 mask-generating compares. */
12290 enum machine_mode mode;
12291 tree type;
12292
12293 if (d->name == 0)
12294 continue;
12295 mode = insn_data[d->icode].operand[1].mode;
12296
12297 switch (mode)
12298 {
12299 case V16QImode:
12300 type = v16qi_ftype_v16qi_v16qi;
12301 break;
12302 case V8HImode:
12303 type = v8hi_ftype_v8hi_v8hi;
12304 break;
12305 case V4SImode:
12306 type = v4si_ftype_v4si_v4si;
12307 break;
12308 case V2DImode:
12309 type = v2di_ftype_v2di_v2di;
12310 break;
12311 case V2DFmode:
12312 type = v2df_ftype_v2df_v2df;
12313 break;
12314 case TImode:
12315 type = ti_ftype_ti_ti;
12316 break;
12317 case V4SFmode:
12318 type = v4sf_ftype_v4sf_v4sf;
12319 break;
12320 case V8QImode:
12321 type = v8qi_ftype_v8qi_v8qi;
12322 break;
12323 case V4HImode:
12324 type = v4hi_ftype_v4hi_v4hi;
12325 break;
12326 case V2SImode:
12327 type = v2si_ftype_v2si_v2si;
12328 break;
12329 case DImode:
12330 type = di_ftype_di_di;
12331 break;
12332
12333 default:
12334 abort ();
12335 }
12336
12337 /* Override for comparisons. */
12338 if (d->icode == CODE_FOR_maskcmpv4sf3
12339 || d->icode == CODE_FOR_maskncmpv4sf3
12340 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12341 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12342 type = v4si_ftype_v4sf_v4sf;
12343
12344 if (d->icode == CODE_FOR_maskcmpv2df3
12345 || d->icode == CODE_FOR_maskncmpv2df3
12346 || d->icode == CODE_FOR_vmmaskcmpv2df3
12347 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12348 type = v2di_ftype_v2df_v2df;
12349
12350 def_builtin (d->mask, d->name, type, d->code);
12351 }
12352
12353 /* Add the remaining MMX insns with somewhat more complicated types. */
12354 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12355 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12356 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12357 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12358 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12359 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12360 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12361
12362 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12363 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12364 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12365
12366 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12367 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12368
12369 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12370 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12371
12372 /* comi/ucomi insns. */
12373 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12374 if (d->mask == MASK_SSE2)
12375 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12376 else
12377 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12378
12379 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12380 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12381 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12382
12383 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12384 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12385 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12386 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12387 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12388 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12389
12390 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12391 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12392 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12393 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12394
12395 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12396 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12397
12398 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12399
12400 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12401 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12402 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12403 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12404 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12405 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12406
12407 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12408 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12409 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12410 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12411
12412 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12413 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12414 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12415 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12416
12417 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12418
12419 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12420
12421 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12422 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12423 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12424 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12425 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12426 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12427
12428 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12429
12430 /* Original 3DNow! */
12431 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12432 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12433 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12434 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12435 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12436 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12437 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12438 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12439 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12440 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12441 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12442 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12443 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12444 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12445 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12446 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12447 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12448 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12449 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12450 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12451
12452 /* 3DNow! extension as used in the Athlon CPU. */
12453 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12454 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12455 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12456 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12457 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12458 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12459
12460 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12461
12462 /* SSE2 */
12463 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12464 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12465
12466 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12467 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12468
12469 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12470 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12471 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12472 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12473 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12474 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12475
12476 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12477 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12478 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12479 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12480
12481 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12482 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12483 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12484 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12485 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12486
12487 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12488 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12489 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12490 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12491
12492 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12493 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12494
12495 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12496
12497 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12498 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12499
12500 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12501 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12502 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12503 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12504 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12505
12506 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12507
12508 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12509 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12510
12511 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12512 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12513 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12514
12515 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12516 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12517 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12518
12519 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12520 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12521 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12522 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12523 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12524 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12525 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12526
12527 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12528 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12529 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12530
12531 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12532 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12533 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12534
12535 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12536 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12537 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12538
12539 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12540 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12541
12542 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12543 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12544 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12545
12546 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12547 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12548 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12549
12550 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12551 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12552
12553 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12554 }
12555
12556 /* Errors in the source file can cause expand_expr to return const0_rtx
12557 where we expect a vector. To avoid crashing, use one of the vector
12558 clear instructions. */
12559 static rtx
12560 safe_vector_operand (x, mode)
12561 rtx x;
12562 enum machine_mode mode;
12563 {
12564 if (x != const0_rtx)
12565 return x;
12566 x = gen_reg_rtx (mode);
12567
12568 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12569 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12570 : gen_rtx_SUBREG (DImode, x, 0)));
12571 else
12572 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12573 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12574 return x;
12575 }
12576
12577 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12578
12579 static rtx
12580 ix86_expand_binop_builtin (icode, arglist, target)
12581 enum insn_code icode;
12582 tree arglist;
12583 rtx target;
12584 {
12585 rtx pat;
12586 tree arg0 = TREE_VALUE (arglist);
12587 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12588 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12589 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12590 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12591 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12592 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12593
12594 if (VECTOR_MODE_P (mode0))
12595 op0 = safe_vector_operand (op0, mode0);
12596 if (VECTOR_MODE_P (mode1))
12597 op1 = safe_vector_operand (op1, mode1);
12598
12599 if (! target
12600 || GET_MODE (target) != tmode
12601 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12602 target = gen_reg_rtx (tmode);
12603
12604 /* In case the insn wants input operands in modes different from
12605 the result, abort. */
12606 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12607 abort ();
12608
12609 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12610 op0 = copy_to_mode_reg (mode0, op0);
12611 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12612 op1 = copy_to_mode_reg (mode1, op1);
12613
12614 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12615 yet one of the two must not be a memory. This is normally enforced
12616 by expanders, but we didn't bother to create one here. */
12617 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12618 op0 = copy_to_mode_reg (mode0, op0);
12619
12620 pat = GEN_FCN (icode) (target, op0, op1);
12621 if (! pat)
12622 return 0;
12623 emit_insn (pat);
12624 return target;
12625 }
12626
12627 /* In type_for_mode we restrict the ability to create TImode types
12628 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12629 to have a V4SFmode signature. Convert them in-place to TImode. */
12630
12631 static rtx
12632 ix86_expand_timode_binop_builtin (icode, arglist, target)
12633 enum insn_code icode;
12634 tree arglist;
12635 rtx target;
12636 {
12637 rtx pat;
12638 tree arg0 = TREE_VALUE (arglist);
12639 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12640 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12641 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12642
12643 op0 = gen_lowpart (TImode, op0);
12644 op1 = gen_lowpart (TImode, op1);
12645 target = gen_reg_rtx (TImode);
12646
12647 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12648 op0 = copy_to_mode_reg (TImode, op0);
12649 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12650 op1 = copy_to_mode_reg (TImode, op1);
12651
12652 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12653 yet one of the two must not be a memory. This is normally enforced
12654 by expanders, but we didn't bother to create one here. */
12655 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12656 op0 = copy_to_mode_reg (TImode, op0);
12657
12658 pat = GEN_FCN (icode) (target, op0, op1);
12659 if (! pat)
12660 return 0;
12661 emit_insn (pat);
12662
12663 return gen_lowpart (V4SFmode, target);
12664 }
12665
12666 /* Subroutine of ix86_expand_builtin to take care of stores. */
12667
12668 static rtx
12669 ix86_expand_store_builtin (icode, arglist)
12670 enum insn_code icode;
12671 tree arglist;
12672 {
12673 rtx pat;
12674 tree arg0 = TREE_VALUE (arglist);
12675 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12676 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12677 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12678 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12679 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12680
12681 if (VECTOR_MODE_P (mode1))
12682 op1 = safe_vector_operand (op1, mode1);
12683
12684 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12685
12686 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12687 op1 = copy_to_mode_reg (mode1, op1);
12688
12689 pat = GEN_FCN (icode) (op0, op1);
12690 if (pat)
12691 emit_insn (pat);
12692 return 0;
12693 }
12694
12695 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12696
12697 static rtx
12698 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12699 enum insn_code icode;
12700 tree arglist;
12701 rtx target;
12702 int do_load;
12703 {
12704 rtx pat;
12705 tree arg0 = TREE_VALUE (arglist);
12706 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12707 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12708 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12709
12710 if (! target
12711 || GET_MODE (target) != tmode
12712 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12713 target = gen_reg_rtx (tmode);
12714 if (do_load)
12715 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12716 else
12717 {
12718 if (VECTOR_MODE_P (mode0))
12719 op0 = safe_vector_operand (op0, mode0);
12720
12721 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12722 op0 = copy_to_mode_reg (mode0, op0);
12723 }
12724
12725 pat = GEN_FCN (icode) (target, op0);
12726 if (! pat)
12727 return 0;
12728 emit_insn (pat);
12729 return target;
12730 }
12731
12732 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12733 sqrtss, rsqrtss, rcpss. */
12734
12735 static rtx
12736 ix86_expand_unop1_builtin (icode, arglist, target)
12737 enum insn_code icode;
12738 tree arglist;
12739 rtx target;
12740 {
12741 rtx pat;
12742 tree arg0 = TREE_VALUE (arglist);
12743 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12744 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12745 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12746
12747 if (! target
12748 || GET_MODE (target) != tmode
12749 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12750 target = gen_reg_rtx (tmode);
12751
12752 if (VECTOR_MODE_P (mode0))
12753 op0 = safe_vector_operand (op0, mode0);
12754
12755 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12756 op0 = copy_to_mode_reg (mode0, op0);
12757
12758 op1 = op0;
12759 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12760 op1 = copy_to_mode_reg (mode0, op1);
12761
12762 pat = GEN_FCN (icode) (target, op0, op1);
12763 if (! pat)
12764 return 0;
12765 emit_insn (pat);
12766 return target;
12767 }
12768
12769 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12770
12771 static rtx
12772 ix86_expand_sse_compare (d, arglist, target)
12773 const struct builtin_description *d;
12774 tree arglist;
12775 rtx target;
12776 {
12777 rtx pat;
12778 tree arg0 = TREE_VALUE (arglist);
12779 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12780 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12781 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12782 rtx op2;
12783 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12784 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12785 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12786 enum rtx_code comparison = d->comparison;
12787
12788 if (VECTOR_MODE_P (mode0))
12789 op0 = safe_vector_operand (op0, mode0);
12790 if (VECTOR_MODE_P (mode1))
12791 op1 = safe_vector_operand (op1, mode1);
12792
12793 /* Swap operands if we have a comparison that isn't available in
12794 hardware. */
12795 if (d->flag)
12796 {
12797 rtx tmp = gen_reg_rtx (mode1);
12798 emit_move_insn (tmp, op1);
12799 op1 = op0;
12800 op0 = tmp;
12801 }
12802
12803 if (! target
12804 || GET_MODE (target) != tmode
12805 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12806 target = gen_reg_rtx (tmode);
12807
12808 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12809 op0 = copy_to_mode_reg (mode0, op0);
12810 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12811 op1 = copy_to_mode_reg (mode1, op1);
12812
12813 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12814 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12815 if (! pat)
12816 return 0;
12817 emit_insn (pat);
12818 return target;
12819 }
12820
12821 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12822
12823 static rtx
12824 ix86_expand_sse_comi (d, arglist, target)
12825 const struct builtin_description *d;
12826 tree arglist;
12827 rtx target;
12828 {
12829 rtx pat;
12830 tree arg0 = TREE_VALUE (arglist);
12831 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12832 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12833 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12834 rtx op2;
12835 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12836 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12837 enum rtx_code comparison = d->comparison;
12838
12839 if (VECTOR_MODE_P (mode0))
12840 op0 = safe_vector_operand (op0, mode0);
12841 if (VECTOR_MODE_P (mode1))
12842 op1 = safe_vector_operand (op1, mode1);
12843
12844 /* Swap operands if we have a comparison that isn't available in
12845 hardware. */
12846 if (d->flag)
12847 {
12848 rtx tmp = op1;
12849 op1 = op0;
12850 op0 = tmp;
12851 }
12852
12853 target = gen_reg_rtx (SImode);
12854 emit_move_insn (target, const0_rtx);
12855 target = gen_rtx_SUBREG (QImode, target, 0);
12856
12857 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12858 op0 = copy_to_mode_reg (mode0, op0);
12859 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12860 op1 = copy_to_mode_reg (mode1, op1);
12861
12862 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12863 pat = GEN_FCN (d->icode) (op0, op1, op2);
12864 if (! pat)
12865 return 0;
12866 emit_insn (pat);
12867 emit_insn (gen_rtx_SET (VOIDmode,
12868 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12869 gen_rtx_fmt_ee (comparison, QImode,
12870 gen_rtx_REG (CCmode, FLAGS_REG),
12871 const0_rtx)));
12872
12873 return SUBREG_REG (target);
12874 }
12875
12876 /* Expand an expression EXP that calls a built-in function,
12877 with result going to TARGET if that's convenient
12878 (and in mode MODE if that's convenient).
12879 SUBTARGET may be used as the target for computing one of EXP's operands.
12880 IGNORE is nonzero if the value is to be ignored. */
12881
12882 rtx
12883 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12884 tree exp;
12885 rtx target;
12886 rtx subtarget ATTRIBUTE_UNUSED;
12887 enum machine_mode mode ATTRIBUTE_UNUSED;
12888 int ignore ATTRIBUTE_UNUSED;
12889 {
12890 const struct builtin_description *d;
12891 size_t i;
12892 enum insn_code icode;
12893 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12894 tree arglist = TREE_OPERAND (exp, 1);
12895 tree arg0, arg1, arg2;
12896 rtx op0, op1, op2, pat;
12897 enum machine_mode tmode, mode0, mode1, mode2;
12898 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12899
12900 switch (fcode)
12901 {
12902 case IX86_BUILTIN_EMMS:
12903 emit_insn (gen_emms ());
12904 return 0;
12905
12906 case IX86_BUILTIN_SFENCE:
12907 emit_insn (gen_sfence ());
12908 return 0;
12909
12910 case IX86_BUILTIN_PEXTRW:
12911 case IX86_BUILTIN_PEXTRW128:
12912 icode = (fcode == IX86_BUILTIN_PEXTRW
12913 ? CODE_FOR_mmx_pextrw
12914 : CODE_FOR_sse2_pextrw);
12915 arg0 = TREE_VALUE (arglist);
12916 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12917 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12918 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12919 tmode = insn_data[icode].operand[0].mode;
12920 mode0 = insn_data[icode].operand[1].mode;
12921 mode1 = insn_data[icode].operand[2].mode;
12922
12923 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12924 op0 = copy_to_mode_reg (mode0, op0);
12925 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12926 {
12927 /* @@@ better error message */
12928 error ("selector must be an immediate");
12929 return gen_reg_rtx (tmode);
12930 }
12931 if (target == 0
12932 || GET_MODE (target) != tmode
12933 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12934 target = gen_reg_rtx (tmode);
12935 pat = GEN_FCN (icode) (target, op0, op1);
12936 if (! pat)
12937 return 0;
12938 emit_insn (pat);
12939 return target;
12940
12941 case IX86_BUILTIN_PINSRW:
12942 case IX86_BUILTIN_PINSRW128:
12943 icode = (fcode == IX86_BUILTIN_PINSRW
12944 ? CODE_FOR_mmx_pinsrw
12945 : CODE_FOR_sse2_pinsrw);
12946 arg0 = TREE_VALUE (arglist);
12947 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12948 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12949 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12950 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12951 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12952 tmode = insn_data[icode].operand[0].mode;
12953 mode0 = insn_data[icode].operand[1].mode;
12954 mode1 = insn_data[icode].operand[2].mode;
12955 mode2 = insn_data[icode].operand[3].mode;
12956
12957 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12958 op0 = copy_to_mode_reg (mode0, op0);
12959 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12960 op1 = copy_to_mode_reg (mode1, op1);
12961 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12962 {
12963 /* @@@ better error message */
12964 error ("selector must be an immediate");
12965 return const0_rtx;
12966 }
12967 if (target == 0
12968 || GET_MODE (target) != tmode
12969 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12970 target = gen_reg_rtx (tmode);
12971 pat = GEN_FCN (icode) (target, op0, op1, op2);
12972 if (! pat)
12973 return 0;
12974 emit_insn (pat);
12975 return target;
12976
12977 case IX86_BUILTIN_MASKMOVQ:
12978 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12979 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12980 : CODE_FOR_sse2_maskmovdqu);
12981 /* Note the arg order is different from the operand order. */
12982 arg1 = TREE_VALUE (arglist);
12983 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12984 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12985 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12986 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12987 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12988 mode0 = insn_data[icode].operand[0].mode;
12989 mode1 = insn_data[icode].operand[1].mode;
12990 mode2 = insn_data[icode].operand[2].mode;
12991
12992 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12993 op0 = copy_to_mode_reg (mode0, op0);
12994 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12995 op1 = copy_to_mode_reg (mode1, op1);
12996 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12997 op2 = copy_to_mode_reg (mode2, op2);
12998 pat = GEN_FCN (icode) (op0, op1, op2);
12999 if (! pat)
13000 return 0;
13001 emit_insn (pat);
13002 return 0;
13003
13004 case IX86_BUILTIN_SQRTSS:
13005 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13006 case IX86_BUILTIN_RSQRTSS:
13007 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13008 case IX86_BUILTIN_RCPSS:
13009 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13010
13011 case IX86_BUILTIN_ANDPS:
13012 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
13013 arglist, target);
13014 case IX86_BUILTIN_ANDNPS:
13015 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
13016 arglist, target);
13017 case IX86_BUILTIN_ORPS:
13018 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
13019 arglist, target);
13020 case IX86_BUILTIN_XORPS:
13021 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
13022 arglist, target);
13023
13024 case IX86_BUILTIN_LOADAPS:
13025 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13026
13027 case IX86_BUILTIN_LOADUPS:
13028 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13029
13030 case IX86_BUILTIN_STOREAPS:
13031 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13032 case IX86_BUILTIN_STOREUPS:
13033 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13034
13035 case IX86_BUILTIN_LOADSS:
13036 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13037
13038 case IX86_BUILTIN_STORESS:
13039 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13040
13041 case IX86_BUILTIN_LOADHPS:
13042 case IX86_BUILTIN_LOADLPS:
13043 case IX86_BUILTIN_LOADHPD:
13044 case IX86_BUILTIN_LOADLPD:
13045 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13046 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13047 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13048 : CODE_FOR_sse2_movlpd);
13049 arg0 = TREE_VALUE (arglist);
13050 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13051 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13052 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13053 tmode = insn_data[icode].operand[0].mode;
13054 mode0 = insn_data[icode].operand[1].mode;
13055 mode1 = insn_data[icode].operand[2].mode;
13056
13057 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13058 op0 = copy_to_mode_reg (mode0, op0);
13059 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13060 if (target == 0
13061 || GET_MODE (target) != tmode
13062 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13063 target = gen_reg_rtx (tmode);
13064 pat = GEN_FCN (icode) (target, op0, op1);
13065 if (! pat)
13066 return 0;
13067 emit_insn (pat);
13068 return target;
13069
13070 case IX86_BUILTIN_STOREHPS:
13071 case IX86_BUILTIN_STORELPS:
13072 case IX86_BUILTIN_STOREHPD:
13073 case IX86_BUILTIN_STORELPD:
13074 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13075 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13076 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13077 : CODE_FOR_sse2_movlpd);
13078 arg0 = TREE_VALUE (arglist);
13079 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13080 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13081 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13082 mode0 = insn_data[icode].operand[1].mode;
13083 mode1 = insn_data[icode].operand[2].mode;
13084
13085 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13086 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13087 op1 = copy_to_mode_reg (mode1, op1);
13088
13089 pat = GEN_FCN (icode) (op0, op0, op1);
13090 if (! pat)
13091 return 0;
13092 emit_insn (pat);
13093 return 0;
13094
13095 case IX86_BUILTIN_MOVNTPS:
13096 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13097 case IX86_BUILTIN_MOVNTQ:
13098 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13099
13100 case IX86_BUILTIN_LDMXCSR:
13101 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13102 target = assign_386_stack_local (SImode, 0);
13103 emit_move_insn (target, op0);
13104 emit_insn (gen_ldmxcsr (target));
13105 return 0;
13106
13107 case IX86_BUILTIN_STMXCSR:
13108 target = assign_386_stack_local (SImode, 0);
13109 emit_insn (gen_stmxcsr (target));
13110 return copy_to_mode_reg (SImode, target);
13111
13112 case IX86_BUILTIN_SHUFPS:
13113 case IX86_BUILTIN_SHUFPD:
13114 icode = (fcode == IX86_BUILTIN_SHUFPS
13115 ? CODE_FOR_sse_shufps
13116 : CODE_FOR_sse2_shufpd);
13117 arg0 = TREE_VALUE (arglist);
13118 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13119 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13120 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13121 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13122 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13123 tmode = insn_data[icode].operand[0].mode;
13124 mode0 = insn_data[icode].operand[1].mode;
13125 mode1 = insn_data[icode].operand[2].mode;
13126 mode2 = insn_data[icode].operand[3].mode;
13127
13128 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13129 op0 = copy_to_mode_reg (mode0, op0);
13130 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13131 op1 = copy_to_mode_reg (mode1, op1);
13132 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13133 {
13134 /* @@@ better error message */
13135 error ("mask must be an immediate");
13136 return gen_reg_rtx (tmode);
13137 }
13138 if (target == 0
13139 || GET_MODE (target) != tmode
13140 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13141 target = gen_reg_rtx (tmode);
13142 pat = GEN_FCN (icode) (target, op0, op1, op2);
13143 if (! pat)
13144 return 0;
13145 emit_insn (pat);
13146 return target;
13147
13148 case IX86_BUILTIN_PSHUFW:
13149 case IX86_BUILTIN_PSHUFD:
13150 case IX86_BUILTIN_PSHUFHW:
13151 case IX86_BUILTIN_PSHUFLW:
13152 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13153 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13154 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13155 : CODE_FOR_mmx_pshufw);
13156 arg0 = TREE_VALUE (arglist);
13157 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13158 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13159 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13160 tmode = insn_data[icode].operand[0].mode;
13161 mode1 = insn_data[icode].operand[1].mode;
13162 mode2 = insn_data[icode].operand[2].mode;
13163
13164 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13165 op0 = copy_to_mode_reg (mode1, op0);
13166 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13167 {
13168 /* @@@ better error message */
13169 error ("mask must be an immediate");
13170 return const0_rtx;
13171 }
13172 if (target == 0
13173 || GET_MODE (target) != tmode
13174 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13175 target = gen_reg_rtx (tmode);
13176 pat = GEN_FCN (icode) (target, op0, op1);
13177 if (! pat)
13178 return 0;
13179 emit_insn (pat);
13180 return target;
13181
13182 case IX86_BUILTIN_FEMMS:
13183 emit_insn (gen_femms ());
13184 return NULL_RTX;
13185
13186 case IX86_BUILTIN_PAVGUSB:
13187 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13188
13189 case IX86_BUILTIN_PF2ID:
13190 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13191
13192 case IX86_BUILTIN_PFACC:
13193 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13194
13195 case IX86_BUILTIN_PFADD:
13196 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13197
13198 case IX86_BUILTIN_PFCMPEQ:
13199 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13200
13201 case IX86_BUILTIN_PFCMPGE:
13202 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13203
13204 case IX86_BUILTIN_PFCMPGT:
13205 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13206
13207 case IX86_BUILTIN_PFMAX:
13208 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13209
13210 case IX86_BUILTIN_PFMIN:
13211 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13212
13213 case IX86_BUILTIN_PFMUL:
13214 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13215
13216 case IX86_BUILTIN_PFRCP:
13217 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13218
13219 case IX86_BUILTIN_PFRCPIT1:
13220 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13221
13222 case IX86_BUILTIN_PFRCPIT2:
13223 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13224
13225 case IX86_BUILTIN_PFRSQIT1:
13226 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13227
13228 case IX86_BUILTIN_PFRSQRT:
13229 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13230
13231 case IX86_BUILTIN_PFSUB:
13232 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13233
13234 case IX86_BUILTIN_PFSUBR:
13235 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13236
13237 case IX86_BUILTIN_PI2FD:
13238 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13239
13240 case IX86_BUILTIN_PMULHRW:
13241 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13242
13243 case IX86_BUILTIN_PF2IW:
13244 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13245
13246 case IX86_BUILTIN_PFNACC:
13247 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13248
13249 case IX86_BUILTIN_PFPNACC:
13250 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13251
13252 case IX86_BUILTIN_PI2FW:
13253 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13254
13255 case IX86_BUILTIN_PSWAPDSI:
13256 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13257
13258 case IX86_BUILTIN_PSWAPDSF:
13259 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13260
13261 case IX86_BUILTIN_SSE_ZERO:
13262 target = gen_reg_rtx (V4SFmode);
13263 emit_insn (gen_sse_clrv4sf (target));
13264 return target;
13265
13266 case IX86_BUILTIN_MMX_ZERO:
13267 target = gen_reg_rtx (DImode);
13268 emit_insn (gen_mmx_clrdi (target));
13269 return target;
13270
13271 case IX86_BUILTIN_SQRTSD:
13272 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13273 case IX86_BUILTIN_LOADAPD:
13274 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13275 case IX86_BUILTIN_LOADUPD:
13276 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13277
13278 case IX86_BUILTIN_STOREAPD:
13279 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13280 case IX86_BUILTIN_STOREUPD:
13281 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13282
13283 case IX86_BUILTIN_LOADSD:
13284 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13285
13286 case IX86_BUILTIN_STORESD:
13287 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13288
13289 case IX86_BUILTIN_SETPD1:
13290 target = assign_386_stack_local (DFmode, 0);
13291 arg0 = TREE_VALUE (arglist);
13292 emit_move_insn (adjust_address (target, DFmode, 0),
13293 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13294 op0 = gen_reg_rtx (V2DFmode);
13295 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13296 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13297 return op0;
13298
13299 case IX86_BUILTIN_SETPD:
13300 target = assign_386_stack_local (V2DFmode, 0);
13301 arg0 = TREE_VALUE (arglist);
13302 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13303 emit_move_insn (adjust_address (target, DFmode, 0),
13304 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13305 emit_move_insn (adjust_address (target, DFmode, 8),
13306 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13307 op0 = gen_reg_rtx (V2DFmode);
13308 emit_insn (gen_sse2_movapd (op0, target));
13309 return op0;
13310
13311 case IX86_BUILTIN_LOADRPD:
13312 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13313 gen_reg_rtx (V2DFmode), 1);
13314 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13315 return target;
13316
13317 case IX86_BUILTIN_LOADPD1:
13318 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13319 gen_reg_rtx (V2DFmode), 1);
13320 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13321 return target;
13322
13323 case IX86_BUILTIN_STOREPD1:
13324 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13325 case IX86_BUILTIN_STORERPD:
13326 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13327
13328 case IX86_BUILTIN_MFENCE:
13329 emit_insn (gen_sse2_mfence ());
13330 return 0;
13331 case IX86_BUILTIN_LFENCE:
13332 emit_insn (gen_sse2_lfence ());
13333 return 0;
13334
13335 case IX86_BUILTIN_CLFLUSH:
13336 arg0 = TREE_VALUE (arglist);
13337 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13338 icode = CODE_FOR_sse2_clflush;
13339 mode0 = insn_data[icode].operand[0].mode;
13340 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13341 op0 = copy_to_mode_reg (mode0, op0);
13342
13343 emit_insn (gen_sse2_clflush (op0));
13344 return 0;
13345
13346 case IX86_BUILTIN_MOVNTPD:
13347 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13348 case IX86_BUILTIN_MOVNTDQ:
13349 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13350 case IX86_BUILTIN_MOVNTI:
13351 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13352
13353 default:
13354 break;
13355 }
13356
13357 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13358 if (d->code == fcode)
13359 {
13360 /* Compares are treated specially. */
13361 if (d->icode == CODE_FOR_maskcmpv4sf3
13362 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13363 || d->icode == CODE_FOR_maskncmpv4sf3
13364 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13365 || d->icode == CODE_FOR_maskcmpv2df3
13366 || d->icode == CODE_FOR_vmmaskcmpv2df3
13367 || d->icode == CODE_FOR_maskncmpv2df3
13368 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13369 return ix86_expand_sse_compare (d, arglist, target);
13370
13371 return ix86_expand_binop_builtin (d->icode, arglist, target);
13372 }
13373
13374 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13375 if (d->code == fcode)
13376 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13377
13378 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13379 if (d->code == fcode)
13380 return ix86_expand_sse_comi (d, arglist, target);
13381
13382 /* @@@ Should really do something sensible here. */
13383 return 0;
13384 }
13385
13386 /* Store OPERAND to the memory after reload is completed. This means
13387 that we can't easily use assign_stack_local. */
13388 rtx
13389 ix86_force_to_memory (mode, operand)
13390 enum machine_mode mode;
13391 rtx operand;
13392 {
13393 rtx result;
13394 if (!reload_completed)
13395 abort ();
13396 if (TARGET_64BIT && TARGET_RED_ZONE)
13397 {
13398 result = gen_rtx_MEM (mode,
13399 gen_rtx_PLUS (Pmode,
13400 stack_pointer_rtx,
13401 GEN_INT (-RED_ZONE_SIZE)));
13402 emit_move_insn (result, operand);
13403 }
13404 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13405 {
13406 switch (mode)
13407 {
13408 case HImode:
13409 case SImode:
13410 operand = gen_lowpart (DImode, operand);
13411 /* FALLTHRU */
13412 case DImode:
13413 emit_insn (
13414 gen_rtx_SET (VOIDmode,
13415 gen_rtx_MEM (DImode,
13416 gen_rtx_PRE_DEC (DImode,
13417 stack_pointer_rtx)),
13418 operand));
13419 break;
13420 default:
13421 abort ();
13422 }
13423 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13424 }
13425 else
13426 {
13427 switch (mode)
13428 {
13429 case DImode:
13430 {
13431 rtx operands[2];
13432 split_di (&operand, 1, operands, operands + 1);
13433 emit_insn (
13434 gen_rtx_SET (VOIDmode,
13435 gen_rtx_MEM (SImode,
13436 gen_rtx_PRE_DEC (Pmode,
13437 stack_pointer_rtx)),
13438 operands[1]));
13439 emit_insn (
13440 gen_rtx_SET (VOIDmode,
13441 gen_rtx_MEM (SImode,
13442 gen_rtx_PRE_DEC (Pmode,
13443 stack_pointer_rtx)),
13444 operands[0]));
13445 }
13446 break;
13447 case HImode:
13448 /* It is better to store HImodes as SImodes. */
13449 if (!TARGET_PARTIAL_REG_STALL)
13450 operand = gen_lowpart (SImode, operand);
13451 /* FALLTHRU */
13452 case SImode:
13453 emit_insn (
13454 gen_rtx_SET (VOIDmode,
13455 gen_rtx_MEM (GET_MODE (operand),
13456 gen_rtx_PRE_DEC (SImode,
13457 stack_pointer_rtx)),
13458 operand));
13459 break;
13460 default:
13461 abort ();
13462 }
13463 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13464 }
13465 return result;
13466 }
13467
13468 /* Free operand from the memory. */
13469 void
13470 ix86_free_from_memory (mode)
13471 enum machine_mode mode;
13472 {
13473 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13474 {
13475 int size;
13476
13477 if (mode == DImode || TARGET_64BIT)
13478 size = 8;
13479 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13480 size = 2;
13481 else
13482 size = 4;
13483 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13484 to pop or add instruction if registers are available. */
13485 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13486 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13487 GEN_INT (size))));
13488 }
13489 }
13490
13491 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13492 QImode must go into class Q_REGS.
13493 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13494 movdf to do mem-to-mem moves through integer regs. */
13495 enum reg_class
13496 ix86_preferred_reload_class (x, class)
13497 rtx x;
13498 enum reg_class class;
13499 {
13500 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13501 {
13502 /* SSE can't load any constant directly yet. */
13503 if (SSE_CLASS_P (class))
13504 return NO_REGS;
13505 /* Floats can load 0 and 1. */
13506 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13507 {
13508 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13509 if (MAYBE_SSE_CLASS_P (class))
13510 return (reg_class_subset_p (class, GENERAL_REGS)
13511 ? GENERAL_REGS : FLOAT_REGS);
13512 else
13513 return class;
13514 }
13515 /* General regs can load everything. */
13516 if (reg_class_subset_p (class, GENERAL_REGS))
13517 return GENERAL_REGS;
13518 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13519 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13520 return NO_REGS;
13521 }
13522 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13523 return NO_REGS;
13524 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13525 return Q_REGS;
13526 return class;
13527 }
13528
13529 /* If we are copying between general and FP registers, we need a memory
13530 location. The same is true for SSE and MMX registers.
13531
13532 The macro can't work reliably when one of the CLASSES is class containing
13533 registers from multiple units (SSE, MMX, integer). We avoid this by never
13534 combining those units in single alternative in the machine description.
13535 Ensure that this constraint holds to avoid unexpected surprises.
13536
13537 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13538 enforce these sanity checks. */
13539 int
13540 ix86_secondary_memory_needed (class1, class2, mode, strict)
13541 enum reg_class class1, class2;
13542 enum machine_mode mode;
13543 int strict;
13544 {
13545 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13546 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13547 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13548 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13549 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13550 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13551 {
13552 if (strict)
13553 abort ();
13554 else
13555 return 1;
13556 }
13557 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13558 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13559 && (mode) != SImode)
13560 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13561 && (mode) != SImode));
13562 }
13563 /* Return the cost of moving data from a register in class CLASS1 to
13564 one in class CLASS2.
13565
13566 It is not required that the cost always equal 2 when FROM is the same as TO;
13567 on some machines it is expensive to move between registers if they are not
13568 general registers. */
13569 int
13570 ix86_register_move_cost (mode, class1, class2)
13571 enum machine_mode mode;
13572 enum reg_class class1, class2;
13573 {
13574 /* In case we require secondary memory, compute cost of the store followed
13575 by load. In case of copying from general_purpose_register we may emit
13576 multiple stores followed by single load causing memory size mismatch
13577 stall. Count this as arbitarily high cost of 20. */
13578 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13579 {
13580 int add_cost = 0;
13581 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13582 add_cost = 20;
13583 return (MEMORY_MOVE_COST (mode, class1, 0)
13584 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13585 }
13586 /* Moves between SSE/MMX and integer unit are expensive. */
13587 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13588 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13589 return ix86_cost->mmxsse_to_integer;
13590 if (MAYBE_FLOAT_CLASS_P (class1))
13591 return ix86_cost->fp_move;
13592 if (MAYBE_SSE_CLASS_P (class1))
13593 return ix86_cost->sse_move;
13594 if (MAYBE_MMX_CLASS_P (class1))
13595 return ix86_cost->mmx_move;
13596 return 2;
13597 }
13598
13599 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13600 int
13601 ix86_hard_regno_mode_ok (regno, mode)
13602 int regno;
13603 enum machine_mode mode;
13604 {
13605 /* Flags and only flags can only hold CCmode values. */
13606 if (CC_REGNO_P (regno))
13607 return GET_MODE_CLASS (mode) == MODE_CC;
13608 if (GET_MODE_CLASS (mode) == MODE_CC
13609 || GET_MODE_CLASS (mode) == MODE_RANDOM
13610 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13611 return 0;
13612 if (FP_REGNO_P (regno))
13613 return VALID_FP_MODE_P (mode);
13614 if (SSE_REGNO_P (regno))
13615 return VALID_SSE_REG_MODE (mode);
13616 if (MMX_REGNO_P (regno))
13617 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13618 /* We handle both integer and floats in the general purpose registers.
13619 In future we should be able to handle vector modes as well. */
13620 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13621 return 0;
13622 /* Take care for QImode values - they can be in non-QI regs, but then
13623 they do cause partial register stalls. */
13624 if (regno < 4 || mode != QImode || TARGET_64BIT)
13625 return 1;
13626 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13627 }
13628
13629 /* Return the cost of moving data of mode M between a
13630 register and memory. A value of 2 is the default; this cost is
13631 relative to those in `REGISTER_MOVE_COST'.
13632
13633 If moving between registers and memory is more expensive than
13634 between two registers, you should define this macro to express the
13635 relative cost.
13636
13637 Model also increased moving costs of QImode registers in non
13638 Q_REGS classes.
13639 */
13640 int
13641 ix86_memory_move_cost (mode, class, in)
13642 enum machine_mode mode;
13643 enum reg_class class;
13644 int in;
13645 {
13646 if (FLOAT_CLASS_P (class))
13647 {
13648 int index;
13649 switch (mode)
13650 {
13651 case SFmode:
13652 index = 0;
13653 break;
13654 case DFmode:
13655 index = 1;
13656 break;
13657 case XFmode:
13658 case TFmode:
13659 index = 2;
13660 break;
13661 default:
13662 return 100;
13663 }
13664 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13665 }
13666 if (SSE_CLASS_P (class))
13667 {
13668 int index;
13669 switch (GET_MODE_SIZE (mode))
13670 {
13671 case 4:
13672 index = 0;
13673 break;
13674 case 8:
13675 index = 1;
13676 break;
13677 case 16:
13678 index = 2;
13679 break;
13680 default:
13681 return 100;
13682 }
13683 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13684 }
13685 if (MMX_CLASS_P (class))
13686 {
13687 int index;
13688 switch (GET_MODE_SIZE (mode))
13689 {
13690 case 4:
13691 index = 0;
13692 break;
13693 case 8:
13694 index = 1;
13695 break;
13696 default:
13697 return 100;
13698 }
13699 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13700 }
13701 switch (GET_MODE_SIZE (mode))
13702 {
13703 case 1:
13704 if (in)
13705 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13706 : ix86_cost->movzbl_load);
13707 else
13708 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13709 : ix86_cost->int_store[0] + 4);
13710 break;
13711 case 2:
13712 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13713 default:
13714 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13715 if (mode == TFmode)
13716 mode = XFmode;
13717 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13718 * (int) GET_MODE_SIZE (mode) / 4);
13719 }
13720 }
13721
13722 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13723 static void
13724 ix86_svr3_asm_out_constructor (symbol, priority)
13725 rtx symbol;
13726 int priority ATTRIBUTE_UNUSED;
13727 {
13728 init_section ();
13729 fputs ("\tpushl $", asm_out_file);
13730 assemble_name (asm_out_file, XSTR (symbol, 0));
13731 fputc ('\n', asm_out_file);
13732 }
13733 #endif
13734
13735 #if TARGET_MACHO
13736
13737 static int current_machopic_label_num;
13738
13739 /* Given a symbol name and its associated stub, write out the
13740 definition of the stub. */
13741
13742 void
13743 machopic_output_stub (file, symb, stub)
13744 FILE *file;
13745 const char *symb, *stub;
13746 {
13747 unsigned int length;
13748 char *binder_name, *symbol_name, lazy_ptr_name[32];
13749 int label = ++current_machopic_label_num;
13750
13751 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13752 symb = (*targetm.strip_name_encoding) (symb);
13753
13754 length = strlen (stub);
13755 binder_name = alloca (length + 32);
13756 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13757
13758 length = strlen (symb);
13759 symbol_name = alloca (length + 32);
13760 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13761
13762 sprintf (lazy_ptr_name, "L%d$lz", label);
13763
13764 if (MACHOPIC_PURE)
13765 machopic_picsymbol_stub_section ();
13766 else
13767 machopic_symbol_stub_section ();
13768
13769 fprintf (file, "%s:\n", stub);
13770 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13771
13772 if (MACHOPIC_PURE)
13773 {
13774 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13775 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13776 fprintf (file, "\tjmp %%edx\n");
13777 }
13778 else
13779 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13780
13781 fprintf (file, "%s:\n", binder_name);
13782
13783 if (MACHOPIC_PURE)
13784 {
13785 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13786 fprintf (file, "\tpushl %%eax\n");
13787 }
13788 else
13789 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13790
13791 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13792
13793 machopic_lazy_symbol_ptr_section ();
13794 fprintf (file, "%s:\n", lazy_ptr_name);
13795 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13796 fprintf (file, "\t.long %s\n", binder_name);
13797 }
13798 #endif /* TARGET_MACHO */
13799
13800 /* Order the registers for register allocator. */
13801
13802 void
13803 x86_order_regs_for_local_alloc ()
13804 {
13805 int pos = 0;
13806 int i;
13807
13808 /* First allocate the local general purpose registers. */
13809 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13810 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13811 reg_alloc_order [pos++] = i;
13812
13813 /* Global general purpose registers. */
13814 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13815 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13816 reg_alloc_order [pos++] = i;
13817
13818 /* x87 registers come first in case we are doing FP math
13819 using them. */
13820 if (!TARGET_SSE_MATH)
13821 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13822 reg_alloc_order [pos++] = i;
13823
13824 /* SSE registers. */
13825 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13826 reg_alloc_order [pos++] = i;
13827 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13828 reg_alloc_order [pos++] = i;
13829
13830 /* x87 registerts. */
13831 if (TARGET_SSE_MATH)
13832 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13833 reg_alloc_order [pos++] = i;
13834
13835 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13836 reg_alloc_order [pos++] = i;
13837
13838 /* Initialize the rest of array as we do not allocate some registers
13839 at all. */
13840 while (pos < FIRST_PSEUDO_REGISTER)
13841 reg_alloc_order [pos++] = 0;
13842 }
13843
13844 void
13845 x86_output_mi_thunk (file, delta, function)
13846 FILE *file;
13847 int delta;
13848 tree function;
13849 {
13850 tree parm;
13851 rtx xops[3];
13852
13853 if (ix86_regparm > 0)
13854 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13855 else
13856 parm = NULL_TREE;
13857 for (; parm; parm = TREE_CHAIN (parm))
13858 if (TREE_VALUE (parm) == void_type_node)
13859 break;
13860
13861 xops[0] = GEN_INT (delta);
13862 if (TARGET_64BIT)
13863 {
13864 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13865 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13866 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13867 if (flag_pic)
13868 {
13869 fprintf (file, "\tjmp *");
13870 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13871 fprintf (file, "@GOTPCREL(%%rip)\n");
13872 }
13873 else
13874 {
13875 fprintf (file, "\tjmp ");
13876 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13877 fprintf (file, "\n");
13878 }
13879 }
13880 else
13881 {
13882 if (parm)
13883 xops[1] = gen_rtx_REG (SImode, 0);
13884 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13885 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13886 else
13887 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13888 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13889
13890 if (flag_pic)
13891 {
13892 xops[0] = pic_offset_table_rtx;
13893 xops[1] = gen_label_rtx ();
13894 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13895
13896 if (ix86_regparm > 2)
13897 abort ();
13898 output_asm_insn ("push{l}\t%0", xops);
13899 output_asm_insn ("call\t%P1", xops);
13900 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13901 output_asm_insn ("pop{l}\t%0", xops);
13902 output_asm_insn
13903 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13904 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13905 output_asm_insn
13906 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13907 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13908 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13909 }
13910 else
13911 {
13912 fprintf (file, "\tjmp ");
13913 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13914 fprintf (file, "\n");
13915 }
13916 }
13917 }
13918
13919 int
13920 x86_field_alignment (field, computed)
13921 tree field;
13922 int computed;
13923 {
13924 enum machine_mode mode;
13925 tree type = TREE_TYPE (field);
13926
13927 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13928 return computed;
13929 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13930 ? get_inner_array_type (type) : type);
13931 if (mode == DFmode || mode == DCmode
13932 || GET_MODE_CLASS (mode) == MODE_INT
13933 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13934 return MIN (32, computed);
13935 return computed;
13936 }
13937
13938 /* Implement machine specific optimizations.
13939 At the moment we implement single transformation: AMD Athlon works faster
13940 when RET is not destination of conditional jump or directly preceeded
13941 by other jump instruction. We avoid the penalty by inserting NOP just
13942 before the RET instructions in such cases. */
13943 void
13944 x86_machine_dependent_reorg (first)
13945 rtx first ATTRIBUTE_UNUSED;
13946 {
13947 edge e;
13948
13949 if (!TARGET_ATHLON || !optimize || optimize_size)
13950 return;
13951 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13952 {
13953 basic_block bb = e->src;
13954 rtx ret = bb->end;
13955 rtx prev;
13956 bool insert = false;
13957
13958 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13959 continue;
13960 prev = prev_nonnote_insn (ret);
13961 if (prev && GET_CODE (prev) == CODE_LABEL)
13962 {
13963 edge e;
13964 for (e = bb->pred; e; e = e->pred_next)
13965 if (EDGE_FREQUENCY (e) && e->src->index > 0
13966 && !(e->flags & EDGE_FALLTHRU))
13967 insert = 1;
13968 }
13969 if (!insert)
13970 {
13971 prev = prev_real_insn (ret);
13972 if (prev && GET_CODE (prev) == JUMP_INSN
13973 && any_condjump_p (prev))
13974 insert = 1;
13975 }
13976 if (insert)
13977 emit_insn_before (gen_nop (), ret);
13978 }
13979 }
13980
13981 #include "gt-i386.h"
This page took 0.716852 seconds and 6 git commands to generate.