]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
re PR middle-end/7245 (ICE in find_reloads)
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 };
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 };
125
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 };
162
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
198 };
199
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
235 };
236
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
272 };
273
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309 };
310
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 };
347
348 const struct processor_costs *ix86_cost = &pentium_cost;
349
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
399
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403 #define FAST_PROLOGUE_INSN_COUNT 30
404
405 /* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407 static int use_fast_prologue_epilogue;
408
409 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
410
411 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
412 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
413 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
414 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
415
416 /* Array of the smallest class containing reg number REGNO, indexed by
417 REGNO. Used by REGNO_REG_CLASS in i386.h. */
418
419 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
420 {
421 /* ax, dx, cx, bx */
422 AREG, DREG, CREG, BREG,
423 /* si, di, bp, sp */
424 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
425 /* FP registers */
426 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
427 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
428 /* arg pointer */
429 NON_Q_REGS,
430 /* flags, fpsr, dirflag, frame */
431 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
433 SSE_REGS, SSE_REGS,
434 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
435 MMX_REGS, MMX_REGS,
436 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
437 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
438 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
439 SSE_REGS, SSE_REGS,
440 };
441
442 /* The "default" register map used in 32bit mode. */
443
444 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
445 {
446 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
447 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
448 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
449 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
450 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
451 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
452 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
453 };
454
455 static int const x86_64_int_parameter_registers[6] =
456 {
457 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
458 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
459 };
460
461 static int const x86_64_int_return_registers[4] =
462 {
463 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
464 };
465
466 /* The "default" register map used in 64bit mode. */
467 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
468 {
469 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
470 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
471 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
472 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
473 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
474 8,9,10,11,12,13,14,15, /* extended integer registers */
475 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
476 };
477
478 /* Define the register numbers to be used in Dwarf debugging information.
479 The SVR4 reference port C compiler uses the following register numbers
480 in its Dwarf output code:
481 0 for %eax (gcc regno = 0)
482 1 for %ecx (gcc regno = 2)
483 2 for %edx (gcc regno = 1)
484 3 for %ebx (gcc regno = 3)
485 4 for %esp (gcc regno = 7)
486 5 for %ebp (gcc regno = 6)
487 6 for %esi (gcc regno = 4)
488 7 for %edi (gcc regno = 5)
489 The following three DWARF register numbers are never generated by
490 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
491 believes these numbers have these meanings.
492 8 for %eip (no gcc equivalent)
493 9 for %eflags (gcc regno = 17)
494 10 for %trapno (no gcc equivalent)
495 It is not at all clear how we should number the FP stack registers
496 for the x86 architecture. If the version of SDB on x86/svr4 were
497 a bit less brain dead with respect to floating-point then we would
498 have a precedent to follow with respect to DWARF register numbers
499 for x86 FP registers, but the SDB on x86/svr4 is so completely
500 broken with respect to FP registers that it is hardly worth thinking
501 of it as something to strive for compatibility with.
502 The version of x86/svr4 SDB I have at the moment does (partially)
503 seem to believe that DWARF register number 11 is associated with
504 the x86 register %st(0), but that's about all. Higher DWARF
505 register numbers don't seem to be associated with anything in
506 particular, and even for DWARF regno 11, SDB only seems to under-
507 stand that it should say that a variable lives in %st(0) (when
508 asked via an `=' command) if we said it was in DWARF regno 11,
509 but SDB still prints garbage when asked for the value of the
510 variable in question (via a `/' command).
511 (Also note that the labels SDB prints for various FP stack regs
512 when doing an `x' command are all wrong.)
513 Note that these problems generally don't affect the native SVR4
514 C compiler because it doesn't allow the use of -O with -g and
515 because when it is *not* optimizing, it allocates a memory
516 location for each floating-point variable, and the memory
517 location is what gets described in the DWARF AT_location
518 attribute for the variable in question.
519 Regardless of the severe mental illness of the x86/svr4 SDB, we
520 do something sensible here and we use the following DWARF
521 register numbers. Note that these are all stack-top-relative
522 numbers.
523 11 for %st(0) (gcc regno = 8)
524 12 for %st(1) (gcc regno = 9)
525 13 for %st(2) (gcc regno = 10)
526 14 for %st(3) (gcc regno = 11)
527 15 for %st(4) (gcc regno = 12)
528 16 for %st(5) (gcc regno = 13)
529 17 for %st(6) (gcc regno = 14)
530 18 for %st(7) (gcc regno = 15)
531 */
532 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
533 {
534 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
535 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
536 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
537 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
538 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
539 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
540 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
541 };
542
543 /* Test and compare insns in i386.md store the information needed to
544 generate branch and scc insns here. */
545
546 rtx ix86_compare_op0 = NULL_RTX;
547 rtx ix86_compare_op1 = NULL_RTX;
548
549 /* The encoding characters for the four TLS models present in ELF. */
550
551 static char const tls_model_chars[] = " GLil";
552
553 #define MAX_386_STACK_LOCALS 3
554 /* Size of the register save area. */
555 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
556
557 /* Define the structure for the machine field in struct function. */
558 struct machine_function GTY(())
559 {
560 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
561 const char *some_ld_name;
562 int save_varrargs_registers;
563 int accesses_prev_frame;
564 };
565
566 #define ix86_stack_locals (cfun->machine->stack_locals)
567 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
568
569 /* Structure describing stack frame layout.
570 Stack grows downward:
571
572 [arguments]
573 <- ARG_POINTER
574 saved pc
575
576 saved frame pointer if frame_pointer_needed
577 <- HARD_FRAME_POINTER
578 [saved regs]
579
580 [padding1] \
581 )
582 [va_arg registers] (
583 > to_allocate <- FRAME_POINTER
584 [frame] (
585 )
586 [padding2] /
587 */
588 struct ix86_frame
589 {
590 int nregs;
591 int padding1;
592 int va_arg_size;
593 HOST_WIDE_INT frame;
594 int padding2;
595 int outgoing_arguments_size;
596 int red_zone_size;
597
598 HOST_WIDE_INT to_allocate;
599 /* The offsets relative to ARG_POINTER. */
600 HOST_WIDE_INT frame_pointer_offset;
601 HOST_WIDE_INT hard_frame_pointer_offset;
602 HOST_WIDE_INT stack_pointer_offset;
603 };
604
605 /* Used to enable/disable debugging features. */
606 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
607 /* Code model option as passed by user. */
608 const char *ix86_cmodel_string;
609 /* Parsed value. */
610 enum cmodel ix86_cmodel;
611 /* Asm dialect. */
612 const char *ix86_asm_string;
613 enum asm_dialect ix86_asm_dialect = ASM_ATT;
614 /* TLS dialext. */
615 const char *ix86_tls_dialect_string;
616 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
617
618 /* Which unit we are generating floating point math for. */
619 enum fpmath_unit ix86_fpmath;
620
621 /* Which cpu are we scheduling for. */
622 enum processor_type ix86_cpu;
623 /* Which instruction set architecture to use. */
624 enum processor_type ix86_arch;
625
626 /* Strings to hold which cpu and instruction set architecture to use. */
627 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
628 const char *ix86_arch_string; /* for -march=<xxx> */
629 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
630
631 /* # of registers to use to pass arguments. */
632 const char *ix86_regparm_string;
633
634 /* true if sse prefetch instruction is not NOOP. */
635 int x86_prefetch_sse;
636
637 /* ix86_regparm_string as a number */
638 int ix86_regparm;
639
640 /* Alignment to use for loops and jumps: */
641
642 /* Power of two alignment for loops. */
643 const char *ix86_align_loops_string;
644
645 /* Power of two alignment for non-loop jumps. */
646 const char *ix86_align_jumps_string;
647
648 /* Power of two alignment for stack boundary in bytes. */
649 const char *ix86_preferred_stack_boundary_string;
650
651 /* Preferred alignment for stack boundary in bits. */
652 int ix86_preferred_stack_boundary;
653
654 /* Values 1-5: see jump.c */
655 int ix86_branch_cost;
656 const char *ix86_branch_cost_string;
657
658 /* Power of two alignment for functions. */
659 const char *ix86_align_funcs_string;
660
661 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
662 static char internal_label_prefix[16];
663 static int internal_label_prefix_len;
664 \f
665 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
666 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
667 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
668 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
669 int, int, FILE *));
670 static const char *get_some_local_dynamic_name PARAMS ((void));
671 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
672 static rtx maybe_get_pool_constant PARAMS ((rtx));
673 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
674 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
675 rtx *, rtx *));
676 static rtx get_thread_pointer PARAMS ((void));
677 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
678 static rtx gen_push PARAMS ((rtx));
679 static int memory_address_length PARAMS ((rtx addr));
680 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
681 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
682 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
683 static void ix86_dump_ppro_packet PARAMS ((FILE *));
684 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
685 static struct machine_function * ix86_init_machine_status PARAMS ((void));
686 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
687 static int ix86_nsaved_regs PARAMS ((void));
688 static void ix86_emit_save_regs PARAMS ((void));
689 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
690 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
691 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
692 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
693 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
694 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
695 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
696 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
697 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
698 static int ix86_issue_rate PARAMS ((void));
699 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
700 static void ix86_sched_init PARAMS ((FILE *, int, int));
701 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
702 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
703 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
704 static int ia32_multipass_dfa_lookahead PARAMS ((void));
705 static void ix86_init_mmx_sse_builtins PARAMS ((void));
706
707 struct ix86_address
708 {
709 rtx base, index, disp;
710 HOST_WIDE_INT scale;
711 };
712
713 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
714
715 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
716 static const char *ix86_strip_name_encoding PARAMS ((const char *))
717 ATTRIBUTE_UNUSED;
718
719 struct builtin_description;
720 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
721 tree, rtx));
722 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
723 tree, rtx));
724 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
726 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
727 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
728 tree, rtx));
729 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
730 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
731 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
732 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
733 enum rtx_code *,
734 enum rtx_code *,
735 enum rtx_code *));
736 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
737 rtx *, rtx *));
738 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
740 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
741 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
742 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
743 static int ix86_save_reg PARAMS ((unsigned int, int));
744 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
745 static int ix86_comp_type_attributes PARAMS ((tree, tree));
746 const struct attribute_spec ix86_attribute_table[];
747 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
748 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
749
750 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
751 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
752 #endif
753
754 /* Register class used for passing given 64bit part of the argument.
755 These represent classes as documented by the PS ABI, with the exception
756 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
757 use SF or DFmode move instead of DImode to avoid reformating penalties.
758
759 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
760 whenever possible (upper half does contain padding).
761 */
762 enum x86_64_reg_class
763 {
764 X86_64_NO_CLASS,
765 X86_64_INTEGER_CLASS,
766 X86_64_INTEGERSI_CLASS,
767 X86_64_SSE_CLASS,
768 X86_64_SSESF_CLASS,
769 X86_64_SSEDF_CLASS,
770 X86_64_SSEUP_CLASS,
771 X86_64_X87_CLASS,
772 X86_64_X87UP_CLASS,
773 X86_64_MEMORY_CLASS
774 };
775 static const char * const x86_64_reg_class_name[] =
776 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777
778 #define MAX_CLASSES 4
779 static int classify_argument PARAMS ((enum machine_mode, tree,
780 enum x86_64_reg_class [MAX_CLASSES],
781 int));
782 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
783 int *));
784 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
785 const int *, int));
786 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
787 enum x86_64_reg_class));
788 \f
789 /* Initialize the GCC target structure. */
790 #undef TARGET_ATTRIBUTE_TABLE
791 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
792 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
793 # undef TARGET_MERGE_DECL_ATTRIBUTES
794 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
795 #endif
796
797 #undef TARGET_COMP_TYPE_ATTRIBUTES
798 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799
800 #undef TARGET_INIT_BUILTINS
801 #define TARGET_INIT_BUILTINS ix86_init_builtins
802
803 #undef TARGET_EXPAND_BUILTIN
804 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805
806 #undef TARGET_ASM_FUNCTION_EPILOGUE
807 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
808
809 #undef TARGET_ASM_OPEN_PAREN
810 #define TARGET_ASM_OPEN_PAREN ""
811 #undef TARGET_ASM_CLOSE_PAREN
812 #define TARGET_ASM_CLOSE_PAREN ""
813
814 #undef TARGET_ASM_ALIGNED_HI_OP
815 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
816 #undef TARGET_ASM_ALIGNED_SI_OP
817 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
818 #ifdef ASM_QUAD
819 #undef TARGET_ASM_ALIGNED_DI_OP
820 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
821 #endif
822
823 #undef TARGET_ASM_UNALIGNED_HI_OP
824 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
825 #undef TARGET_ASM_UNALIGNED_SI_OP
826 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
827 #undef TARGET_ASM_UNALIGNED_DI_OP
828 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829
830 #undef TARGET_SCHED_ADJUST_COST
831 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
832 #undef TARGET_SCHED_ISSUE_RATE
833 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
834 #undef TARGET_SCHED_VARIABLE_ISSUE
835 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
836 #undef TARGET_SCHED_INIT
837 #define TARGET_SCHED_INIT ix86_sched_init
838 #undef TARGET_SCHED_REORDER
839 #define TARGET_SCHED_REORDER ix86_sched_reorder
840 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
841 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
842 ia32_use_dfa_pipeline_interface
843 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
844 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
845 ia32_multipass_dfa_lookahead
846
847 #ifdef HAVE_AS_TLS
848 #undef TARGET_HAVE_TLS
849 #define TARGET_HAVE_TLS true
850 #endif
851
852 struct gcc_target targetm = TARGET_INITIALIZER;
853 \f
854 /* Sometimes certain combinations of command options do not make
855 sense on a particular target machine. You can define a macro
856 `OVERRIDE_OPTIONS' to take account of this. This macro, if
857 defined, is executed once just after all the command options have
858 been parsed.
859
860 Don't use this macro to turn on various extra optimizations for
861 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
862
863 void
864 override_options ()
865 {
866 int i;
867 /* Comes from final.c -- no real reason to change it. */
868 #define MAX_CODE_ALIGN 16
869
870 static struct ptt
871 {
872 const struct processor_costs *cost; /* Processor costs */
873 const int target_enable; /* Target flags to enable. */
874 const int target_disable; /* Target flags to disable. */
875 const int align_loop; /* Default alignments. */
876 const int align_loop_max_skip;
877 const int align_jump;
878 const int align_jump_max_skip;
879 const int align_func;
880 const int branch_cost;
881 }
882 const processor_target_table[PROCESSOR_max] =
883 {
884 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
885 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
886 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
887 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
888 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
889 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
890 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
891 };
892
893 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
894 static struct pta
895 {
896 const char *const name; /* processor name or nickname. */
897 const enum processor_type processor;
898 const enum pta_flags
899 {
900 PTA_SSE = 1,
901 PTA_SSE2 = 2,
902 PTA_MMX = 4,
903 PTA_PREFETCH_SSE = 8,
904 PTA_3DNOW = 16,
905 PTA_3DNOW_A = 64
906 } flags;
907 }
908 const processor_alias_table[] =
909 {
910 {"i386", PROCESSOR_I386, 0},
911 {"i486", PROCESSOR_I486, 0},
912 {"i586", PROCESSOR_PENTIUM, 0},
913 {"pentium", PROCESSOR_PENTIUM, 0},
914 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
915 {"i686", PROCESSOR_PENTIUMPRO, 0},
916 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
917 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
918 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
919 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
920 PTA_MMX | PTA_PREFETCH_SSE},
921 {"k6", PROCESSOR_K6, PTA_MMX},
922 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
923 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
924 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
925 | PTA_3DNOW_A},
926 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
927 | PTA_3DNOW | PTA_3DNOW_A},
928 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
929 | PTA_3DNOW_A | PTA_SSE},
930 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
931 | PTA_3DNOW_A | PTA_SSE},
932 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
933 | PTA_3DNOW_A | PTA_SSE},
934 };
935
936 int const pta_size = ARRAY_SIZE (processor_alias_table);
937
938 #ifdef SUBTARGET_OVERRIDE_OPTIONS
939 SUBTARGET_OVERRIDE_OPTIONS;
940 #endif
941
942 if (!ix86_cpu_string && ix86_arch_string)
943 ix86_cpu_string = ix86_arch_string;
944 if (!ix86_cpu_string)
945 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
946 if (!ix86_arch_string)
947 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
948
949 if (ix86_cmodel_string != 0)
950 {
951 if (!strcmp (ix86_cmodel_string, "small"))
952 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
953 else if (flag_pic)
954 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
955 else if (!strcmp (ix86_cmodel_string, "32"))
956 ix86_cmodel = CM_32;
957 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
958 ix86_cmodel = CM_KERNEL;
959 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
960 ix86_cmodel = CM_MEDIUM;
961 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
962 ix86_cmodel = CM_LARGE;
963 else
964 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
965 }
966 else
967 {
968 ix86_cmodel = CM_32;
969 if (TARGET_64BIT)
970 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
971 }
972 if (ix86_asm_string != 0)
973 {
974 if (!strcmp (ix86_asm_string, "intel"))
975 ix86_asm_dialect = ASM_INTEL;
976 else if (!strcmp (ix86_asm_string, "att"))
977 ix86_asm_dialect = ASM_ATT;
978 else
979 error ("bad value (%s) for -masm= switch", ix86_asm_string);
980 }
981 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
982 error ("code model `%s' not supported in the %s bit mode",
983 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
984 if (ix86_cmodel == CM_LARGE)
985 sorry ("code model `large' not supported yet");
986 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
987 sorry ("%i-bit mode not compiled in",
988 (target_flags & MASK_64BIT) ? 64 : 32);
989
990 for (i = 0; i < pta_size; i++)
991 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
992 {
993 ix86_arch = processor_alias_table[i].processor;
994 /* Default cpu tuning to the architecture. */
995 ix86_cpu = ix86_arch;
996 if (processor_alias_table[i].flags & PTA_MMX
997 && !(target_flags & MASK_MMX_SET))
998 target_flags |= MASK_MMX;
999 if (processor_alias_table[i].flags & PTA_3DNOW
1000 && !(target_flags & MASK_3DNOW_SET))
1001 target_flags |= MASK_3DNOW;
1002 if (processor_alias_table[i].flags & PTA_3DNOW_A
1003 && !(target_flags & MASK_3DNOW_A_SET))
1004 target_flags |= MASK_3DNOW_A;
1005 if (processor_alias_table[i].flags & PTA_SSE
1006 && !(target_flags & MASK_SSE_SET))
1007 target_flags |= MASK_SSE;
1008 if (processor_alias_table[i].flags & PTA_SSE2
1009 && !(target_flags & MASK_SSE2_SET))
1010 target_flags |= MASK_SSE2;
1011 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1012 x86_prefetch_sse = true;
1013 break;
1014 }
1015
1016 if (i == pta_size)
1017 error ("bad value (%s) for -march= switch", ix86_arch_string);
1018
1019 for (i = 0; i < pta_size; i++)
1020 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1021 {
1022 ix86_cpu = processor_alias_table[i].processor;
1023 break;
1024 }
1025 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1026 x86_prefetch_sse = true;
1027 if (i == pta_size)
1028 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1029
1030 if (optimize_size)
1031 ix86_cost = &size_cost;
1032 else
1033 ix86_cost = processor_target_table[ix86_cpu].cost;
1034 target_flags |= processor_target_table[ix86_cpu].target_enable;
1035 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1036
1037 /* Arrange to set up i386_stack_locals for all functions. */
1038 init_machine_status = ix86_init_machine_status;
1039
1040 /* Validate -mregparm= value. */
1041 if (ix86_regparm_string)
1042 {
1043 i = atoi (ix86_regparm_string);
1044 if (i < 0 || i > REGPARM_MAX)
1045 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1046 else
1047 ix86_regparm = i;
1048 }
1049 else
1050 if (TARGET_64BIT)
1051 ix86_regparm = REGPARM_MAX;
1052
1053 /* If the user has provided any of the -malign-* options,
1054 warn and use that value only if -falign-* is not set.
1055 Remove this code in GCC 3.2 or later. */
1056 if (ix86_align_loops_string)
1057 {
1058 warning ("-malign-loops is obsolete, use -falign-loops");
1059 if (align_loops == 0)
1060 {
1061 i = atoi (ix86_align_loops_string);
1062 if (i < 0 || i > MAX_CODE_ALIGN)
1063 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1064 else
1065 align_loops = 1 << i;
1066 }
1067 }
1068
1069 if (ix86_align_jumps_string)
1070 {
1071 warning ("-malign-jumps is obsolete, use -falign-jumps");
1072 if (align_jumps == 0)
1073 {
1074 i = atoi (ix86_align_jumps_string);
1075 if (i < 0 || i > MAX_CODE_ALIGN)
1076 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1077 else
1078 align_jumps = 1 << i;
1079 }
1080 }
1081
1082 if (ix86_align_funcs_string)
1083 {
1084 warning ("-malign-functions is obsolete, use -falign-functions");
1085 if (align_functions == 0)
1086 {
1087 i = atoi (ix86_align_funcs_string);
1088 if (i < 0 || i > MAX_CODE_ALIGN)
1089 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1090 else
1091 align_functions = 1 << i;
1092 }
1093 }
1094
1095 /* Default align_* from the processor table. */
1096 if (align_loops == 0)
1097 {
1098 align_loops = processor_target_table[ix86_cpu].align_loop;
1099 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1100 }
1101 if (align_jumps == 0)
1102 {
1103 align_jumps = processor_target_table[ix86_cpu].align_jump;
1104 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1105 }
1106 if (align_functions == 0)
1107 {
1108 align_functions = processor_target_table[ix86_cpu].align_func;
1109 }
1110
1111 /* Validate -mpreferred-stack-boundary= value, or provide default.
1112 The default of 128 bits is for Pentium III's SSE __m128, but we
1113 don't want additional code to keep the stack aligned when
1114 optimizing for code size. */
1115 ix86_preferred_stack_boundary = (optimize_size
1116 ? TARGET_64BIT ? 64 : 32
1117 : 128);
1118 if (ix86_preferred_stack_boundary_string)
1119 {
1120 i = atoi (ix86_preferred_stack_boundary_string);
1121 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1122 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1123 TARGET_64BIT ? 3 : 2);
1124 else
1125 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1126 }
1127
1128 /* Validate -mbranch-cost= value, or provide default. */
1129 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1130 if (ix86_branch_cost_string)
1131 {
1132 i = atoi (ix86_branch_cost_string);
1133 if (i < 0 || i > 5)
1134 error ("-mbranch-cost=%d is not between 0 and 5", i);
1135 else
1136 ix86_branch_cost = i;
1137 }
1138
1139 if (ix86_tls_dialect_string)
1140 {
1141 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1142 ix86_tls_dialect = TLS_DIALECT_GNU;
1143 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1144 ix86_tls_dialect = TLS_DIALECT_SUN;
1145 else
1146 error ("bad value (%s) for -mtls-dialect= switch",
1147 ix86_tls_dialect_string);
1148 }
1149
1150 /* Keep nonleaf frame pointers. */
1151 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1152 flag_omit_frame_pointer = 1;
1153
1154 /* If we're doing fast math, we don't care about comparison order
1155 wrt NaNs. This lets us use a shorter comparison sequence. */
1156 if (flag_unsafe_math_optimizations)
1157 target_flags &= ~MASK_IEEE_FP;
1158
1159 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1160 since the insns won't need emulation. */
1161 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1162 target_flags &= ~MASK_NO_FANCY_MATH_387;
1163
1164 if (TARGET_64BIT)
1165 {
1166 if (TARGET_ALIGN_DOUBLE)
1167 error ("-malign-double makes no sense in the 64bit mode");
1168 if (TARGET_RTD)
1169 error ("-mrtd calling convention not supported in the 64bit mode");
1170 /* Enable by default the SSE and MMX builtins. */
1171 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1172 ix86_fpmath = FPMATH_SSE;
1173 }
1174 else
1175 ix86_fpmath = FPMATH_387;
1176
1177 if (ix86_fpmath_string != 0)
1178 {
1179 if (! strcmp (ix86_fpmath_string, "387"))
1180 ix86_fpmath = FPMATH_387;
1181 else if (! strcmp (ix86_fpmath_string, "sse"))
1182 {
1183 if (!TARGET_SSE)
1184 {
1185 warning ("SSE instruction set disabled, using 387 arithmetics");
1186 ix86_fpmath = FPMATH_387;
1187 }
1188 else
1189 ix86_fpmath = FPMATH_SSE;
1190 }
1191 else if (! strcmp (ix86_fpmath_string, "387,sse")
1192 || ! strcmp (ix86_fpmath_string, "sse,387"))
1193 {
1194 if (!TARGET_SSE)
1195 {
1196 warning ("SSE instruction set disabled, using 387 arithmetics");
1197 ix86_fpmath = FPMATH_387;
1198 }
1199 else if (!TARGET_80387)
1200 {
1201 warning ("387 instruction set disabled, using SSE arithmetics");
1202 ix86_fpmath = FPMATH_SSE;
1203 }
1204 else
1205 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1206 }
1207 else
1208 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1209 }
1210
1211 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1212 on by -msse. */
1213 if (TARGET_SSE)
1214 {
1215 target_flags |= MASK_MMX;
1216 x86_prefetch_sse = true;
1217 }
1218
1219 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1220 if (TARGET_3DNOW)
1221 {
1222 target_flags |= MASK_MMX;
1223 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1224 extensions it adds. */
1225 if (x86_3dnow_a & (1 << ix86_arch))
1226 target_flags |= MASK_3DNOW_A;
1227 }
1228 if ((x86_accumulate_outgoing_args & CPUMASK)
1229 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1230 && !optimize_size)
1231 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1232
1233 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1234 {
1235 char *p;
1236 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1237 p = strchr (internal_label_prefix, 'X');
1238 internal_label_prefix_len = p - internal_label_prefix;
1239 *p = '\0';
1240 }
1241 }
1242 \f
1243 void
1244 optimization_options (level, size)
1245 int level;
1246 int size ATTRIBUTE_UNUSED;
1247 {
1248 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1249 make the problem with not enough registers even worse. */
1250 #ifdef INSN_SCHEDULING
1251 if (level > 1)
1252 flag_schedule_insns = 0;
1253 #endif
1254 if (TARGET_64BIT && optimize >= 1)
1255 flag_omit_frame_pointer = 1;
1256 if (TARGET_64BIT)
1257 {
1258 flag_pcc_struct_return = 0;
1259 flag_asynchronous_unwind_tables = 1;
1260 }
1261 }
1262 \f
1263 /* Table of valid machine attributes. */
1264 const struct attribute_spec ix86_attribute_table[] =
1265 {
1266 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1267 /* Stdcall attribute says callee is responsible for popping arguments
1268 if they are not variable. */
1269 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1270 /* Cdecl attribute says the callee is a normal C declaration */
1271 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1272 /* Regparm attribute specifies how many integer arguments are to be
1273 passed in registers. */
1274 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1275 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1276 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1277 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1278 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1279 #endif
1280 { NULL, 0, 0, false, false, false, NULL }
1281 };
1282
1283 /* Handle a "cdecl" or "stdcall" attribute;
1284 arguments as in struct attribute_spec.handler. */
1285 static tree
1286 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1287 tree *node;
1288 tree name;
1289 tree args ATTRIBUTE_UNUSED;
1290 int flags ATTRIBUTE_UNUSED;
1291 bool *no_add_attrs;
1292 {
1293 if (TREE_CODE (*node) != FUNCTION_TYPE
1294 && TREE_CODE (*node) != METHOD_TYPE
1295 && TREE_CODE (*node) != FIELD_DECL
1296 && TREE_CODE (*node) != TYPE_DECL)
1297 {
1298 warning ("`%s' attribute only applies to functions",
1299 IDENTIFIER_POINTER (name));
1300 *no_add_attrs = true;
1301 }
1302
1303 if (TARGET_64BIT)
1304 {
1305 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1306 *no_add_attrs = true;
1307 }
1308
1309 return NULL_TREE;
1310 }
1311
1312 /* Handle a "regparm" attribute;
1313 arguments as in struct attribute_spec.handler. */
1314 static tree
1315 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1316 tree *node;
1317 tree name;
1318 tree args;
1319 int flags ATTRIBUTE_UNUSED;
1320 bool *no_add_attrs;
1321 {
1322 if (TREE_CODE (*node) != FUNCTION_TYPE
1323 && TREE_CODE (*node) != METHOD_TYPE
1324 && TREE_CODE (*node) != FIELD_DECL
1325 && TREE_CODE (*node) != TYPE_DECL)
1326 {
1327 warning ("`%s' attribute only applies to functions",
1328 IDENTIFIER_POINTER (name));
1329 *no_add_attrs = true;
1330 }
1331 else
1332 {
1333 tree cst;
1334
1335 cst = TREE_VALUE (args);
1336 if (TREE_CODE (cst) != INTEGER_CST)
1337 {
1338 warning ("`%s' attribute requires an integer constant argument",
1339 IDENTIFIER_POINTER (name));
1340 *no_add_attrs = true;
1341 }
1342 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1343 {
1344 warning ("argument to `%s' attribute larger than %d",
1345 IDENTIFIER_POINTER (name), REGPARM_MAX);
1346 *no_add_attrs = true;
1347 }
1348 }
1349
1350 return NULL_TREE;
1351 }
1352
1353 /* Return 0 if the attributes for two types are incompatible, 1 if they
1354 are compatible, and 2 if they are nearly compatible (which causes a
1355 warning to be generated). */
1356
1357 static int
1358 ix86_comp_type_attributes (type1, type2)
1359 tree type1;
1360 tree type2;
1361 {
1362 /* Check for mismatch of non-default calling convention. */
1363 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1364
1365 if (TREE_CODE (type1) != FUNCTION_TYPE)
1366 return 1;
1367
1368 /* Check for mismatched return types (cdecl vs stdcall). */
1369 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1370 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1371 return 0;
1372 return 1;
1373 }
1374 \f
1375 /* Value is the number of bytes of arguments automatically
1376 popped when returning from a subroutine call.
1377 FUNDECL is the declaration node of the function (as a tree),
1378 FUNTYPE is the data type of the function (as a tree),
1379 or for a library call it is an identifier node for the subroutine name.
1380 SIZE is the number of bytes of arguments passed on the stack.
1381
1382 On the 80386, the RTD insn may be used to pop them if the number
1383 of args is fixed, but if the number is variable then the caller
1384 must pop them all. RTD can't be used for library calls now
1385 because the library is compiled with the Unix compiler.
1386 Use of RTD is a selectable option, since it is incompatible with
1387 standard Unix calling sequences. If the option is not selected,
1388 the caller must always pop the args.
1389
1390 The attribute stdcall is equivalent to RTD on a per module basis. */
1391
1392 int
1393 ix86_return_pops_args (fundecl, funtype, size)
1394 tree fundecl;
1395 tree funtype;
1396 int size;
1397 {
1398 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1399
1400 /* Cdecl functions override -mrtd, and never pop the stack. */
1401 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1402
1403 /* Stdcall functions will pop the stack if not variable args. */
1404 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1405 rtd = 1;
1406
1407 if (rtd
1408 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1409 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1410 == void_type_node)))
1411 return size;
1412 }
1413
1414 /* Lose any fake structure return argument if it is passed on the stack. */
1415 if (aggregate_value_p (TREE_TYPE (funtype))
1416 && !TARGET_64BIT)
1417 {
1418 int nregs = ix86_regparm;
1419
1420 if (funtype)
1421 {
1422 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1423
1424 if (attr)
1425 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1426 }
1427
1428 if (!nregs)
1429 return GET_MODE_SIZE (Pmode);
1430 }
1431
1432 return 0;
1433 }
1434 \f
1435 /* Argument support functions. */
1436
1437 /* Return true when register may be used to pass function parameters. */
1438 bool
1439 ix86_function_arg_regno_p (regno)
1440 int regno;
1441 {
1442 int i;
1443 if (!TARGET_64BIT)
1444 return (regno < REGPARM_MAX
1445 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1446 if (SSE_REGNO_P (regno) && TARGET_SSE)
1447 return true;
1448 /* RAX is used as hidden argument to va_arg functions. */
1449 if (!regno)
1450 return true;
1451 for (i = 0; i < REGPARM_MAX; i++)
1452 if (regno == x86_64_int_parameter_registers[i])
1453 return true;
1454 return false;
1455 }
1456
1457 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1458 for a call to a function whose data type is FNTYPE.
1459 For a library call, FNTYPE is 0. */
1460
1461 void
1462 init_cumulative_args (cum, fntype, libname)
1463 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1464 tree fntype; /* tree ptr for function decl */
1465 rtx libname; /* SYMBOL_REF of library name or 0 */
1466 {
1467 static CUMULATIVE_ARGS zero_cum;
1468 tree param, next_param;
1469
1470 if (TARGET_DEBUG_ARG)
1471 {
1472 fprintf (stderr, "\ninit_cumulative_args (");
1473 if (fntype)
1474 fprintf (stderr, "fntype code = %s, ret code = %s",
1475 tree_code_name[(int) TREE_CODE (fntype)],
1476 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1477 else
1478 fprintf (stderr, "no fntype");
1479
1480 if (libname)
1481 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1482 }
1483
1484 *cum = zero_cum;
1485
1486 /* Set up the number of registers to use for passing arguments. */
1487 cum->nregs = ix86_regparm;
1488 cum->sse_nregs = SSE_REGPARM_MAX;
1489 if (fntype && !TARGET_64BIT)
1490 {
1491 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1492
1493 if (attr)
1494 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1495 }
1496 cum->maybe_vaarg = false;
1497
1498 /* Determine if this function has variable arguments. This is
1499 indicated by the last argument being 'void_type_mode' if there
1500 are no variable arguments. If there are variable arguments, then
1501 we won't pass anything in registers */
1502
1503 if (cum->nregs)
1504 {
1505 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1506 param != 0; param = next_param)
1507 {
1508 next_param = TREE_CHAIN (param);
1509 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1510 {
1511 if (!TARGET_64BIT)
1512 cum->nregs = 0;
1513 cum->maybe_vaarg = true;
1514 }
1515 }
1516 }
1517 if ((!fntype && !libname)
1518 || (fntype && !TYPE_ARG_TYPES (fntype)))
1519 cum->maybe_vaarg = 1;
1520
1521 if (TARGET_DEBUG_ARG)
1522 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1523
1524 return;
1525 }
1526
1527 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1528 of this code is to classify each 8bytes of incoming argument by the register
1529 class and assign registers accordingly. */
1530
1531 /* Return the union class of CLASS1 and CLASS2.
1532 See the x86-64 PS ABI for details. */
1533
1534 static enum x86_64_reg_class
1535 merge_classes (class1, class2)
1536 enum x86_64_reg_class class1, class2;
1537 {
1538 /* Rule #1: If both classes are equal, this is the resulting class. */
1539 if (class1 == class2)
1540 return class1;
1541
1542 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1543 the other class. */
1544 if (class1 == X86_64_NO_CLASS)
1545 return class2;
1546 if (class2 == X86_64_NO_CLASS)
1547 return class1;
1548
1549 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1550 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1551 return X86_64_MEMORY_CLASS;
1552
1553 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1554 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1555 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1556 return X86_64_INTEGERSI_CLASS;
1557 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1558 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1559 return X86_64_INTEGER_CLASS;
1560
1561 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1562 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1563 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1564 return X86_64_MEMORY_CLASS;
1565
1566 /* Rule #6: Otherwise class SSE is used. */
1567 return X86_64_SSE_CLASS;
1568 }
1569
1570 /* Classify the argument of type TYPE and mode MODE.
1571 CLASSES will be filled by the register class used to pass each word
1572 of the operand. The number of words is returned. In case the parameter
1573 should be passed in memory, 0 is returned. As a special case for zero
1574 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1575
1576 BIT_OFFSET is used internally for handling records and specifies offset
1577 of the offset in bits modulo 256 to avoid overflow cases.
1578
1579 See the x86-64 PS ABI for details.
1580 */
1581
1582 static int
1583 classify_argument (mode, type, classes, bit_offset)
1584 enum machine_mode mode;
1585 tree type;
1586 enum x86_64_reg_class classes[MAX_CLASSES];
1587 int bit_offset;
1588 {
1589 int bytes =
1590 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1591 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1592
1593 if (type && AGGREGATE_TYPE_P (type))
1594 {
1595 int i;
1596 tree field;
1597 enum x86_64_reg_class subclasses[MAX_CLASSES];
1598
1599 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1600 if (bytes > 16)
1601 return 0;
1602
1603 for (i = 0; i < words; i++)
1604 classes[i] = X86_64_NO_CLASS;
1605
1606 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1607 signalize memory class, so handle it as special case. */
1608 if (!words)
1609 {
1610 classes[0] = X86_64_NO_CLASS;
1611 return 1;
1612 }
1613
1614 /* Classify each field of record and merge classes. */
1615 if (TREE_CODE (type) == RECORD_TYPE)
1616 {
1617 /* For classes first merge in the field of the subclasses. */
1618 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1619 {
1620 tree bases = TYPE_BINFO_BASETYPES (type);
1621 int n_bases = TREE_VEC_LENGTH (bases);
1622 int i;
1623
1624 for (i = 0; i < n_bases; ++i)
1625 {
1626 tree binfo = TREE_VEC_ELT (bases, i);
1627 int num;
1628 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1629 tree type = BINFO_TYPE (binfo);
1630
1631 num = classify_argument (TYPE_MODE (type),
1632 type, subclasses,
1633 (offset + bit_offset) % 256);
1634 if (!num)
1635 return 0;
1636 for (i = 0; i < num; i++)
1637 {
1638 int pos = (offset + bit_offset) / 8 / 8;
1639 classes[i + pos] =
1640 merge_classes (subclasses[i], classes[i + pos]);
1641 }
1642 }
1643 }
1644 /* And now merge the fields of structure. */
1645 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1646 {
1647 if (TREE_CODE (field) == FIELD_DECL)
1648 {
1649 int num;
1650
1651 /* Bitfields are always classified as integer. Handle them
1652 early, since later code would consider them to be
1653 misaligned integers. */
1654 if (DECL_BIT_FIELD (field))
1655 {
1656 for (i = int_bit_position (field) / 8 / 8;
1657 i < (int_bit_position (field)
1658 + tree_low_cst (DECL_SIZE (field), 0)
1659 + 63) / 8 / 8; i++)
1660 classes[i] =
1661 merge_classes (X86_64_INTEGER_CLASS,
1662 classes[i]);
1663 }
1664 else
1665 {
1666 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1667 TREE_TYPE (field), subclasses,
1668 (int_bit_position (field)
1669 + bit_offset) % 256);
1670 if (!num)
1671 return 0;
1672 for (i = 0; i < num; i++)
1673 {
1674 int pos =
1675 (int_bit_position (field) + bit_offset) / 8 / 8;
1676 classes[i + pos] =
1677 merge_classes (subclasses[i], classes[i + pos]);
1678 }
1679 }
1680 }
1681 }
1682 }
1683 /* Arrays are handled as small records. */
1684 else if (TREE_CODE (type) == ARRAY_TYPE)
1685 {
1686 int num;
1687 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1688 TREE_TYPE (type), subclasses, bit_offset);
1689 if (!num)
1690 return 0;
1691
1692 /* The partial classes are now full classes. */
1693 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1694 subclasses[0] = X86_64_SSE_CLASS;
1695 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1696 subclasses[0] = X86_64_INTEGER_CLASS;
1697
1698 for (i = 0; i < words; i++)
1699 classes[i] = subclasses[i % num];
1700 }
1701 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1702 else if (TREE_CODE (type) == UNION_TYPE
1703 || TREE_CODE (type) == QUAL_UNION_TYPE)
1704 {
1705 /* For classes first merge in the field of the subclasses. */
1706 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1707 {
1708 tree bases = TYPE_BINFO_BASETYPES (type);
1709 int n_bases = TREE_VEC_LENGTH (bases);
1710 int i;
1711
1712 for (i = 0; i < n_bases; ++i)
1713 {
1714 tree binfo = TREE_VEC_ELT (bases, i);
1715 int num;
1716 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1717 tree type = BINFO_TYPE (binfo);
1718
1719 num = classify_argument (TYPE_MODE (type),
1720 type, subclasses,
1721 (offset + bit_offset) % 256);
1722 if (!num)
1723 return 0;
1724 for (i = 0; i < num; i++)
1725 {
1726 int pos = (offset + bit_offset) / 8 / 8;
1727 classes[i + pos] =
1728 merge_classes (subclasses[i], classes[i + pos]);
1729 }
1730 }
1731 }
1732 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1733 {
1734 if (TREE_CODE (field) == FIELD_DECL)
1735 {
1736 int num;
1737 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1738 TREE_TYPE (field), subclasses,
1739 bit_offset);
1740 if (!num)
1741 return 0;
1742 for (i = 0; i < num; i++)
1743 classes[i] = merge_classes (subclasses[i], classes[i]);
1744 }
1745 }
1746 }
1747 else
1748 abort ();
1749
1750 /* Final merger cleanup. */
1751 for (i = 0; i < words; i++)
1752 {
1753 /* If one class is MEMORY, everything should be passed in
1754 memory. */
1755 if (classes[i] == X86_64_MEMORY_CLASS)
1756 return 0;
1757
1758 /* The X86_64_SSEUP_CLASS should be always preceded by
1759 X86_64_SSE_CLASS. */
1760 if (classes[i] == X86_64_SSEUP_CLASS
1761 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1762 classes[i] = X86_64_SSE_CLASS;
1763
1764 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1765 if (classes[i] == X86_64_X87UP_CLASS
1766 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1767 classes[i] = X86_64_SSE_CLASS;
1768 }
1769 return words;
1770 }
1771
1772 /* Compute alignment needed. We align all types to natural boundaries with
1773 exception of XFmode that is aligned to 64bits. */
1774 if (mode != VOIDmode && mode != BLKmode)
1775 {
1776 int mode_alignment = GET_MODE_BITSIZE (mode);
1777
1778 if (mode == XFmode)
1779 mode_alignment = 128;
1780 else if (mode == XCmode)
1781 mode_alignment = 256;
1782 /* Misaligned fields are always returned in memory. */
1783 if (bit_offset % mode_alignment)
1784 return 0;
1785 }
1786
1787 /* Classification of atomic types. */
1788 switch (mode)
1789 {
1790 case DImode:
1791 case SImode:
1792 case HImode:
1793 case QImode:
1794 case CSImode:
1795 case CHImode:
1796 case CQImode:
1797 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1798 classes[0] = X86_64_INTEGERSI_CLASS;
1799 else
1800 classes[0] = X86_64_INTEGER_CLASS;
1801 return 1;
1802 case CDImode:
1803 case TImode:
1804 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1805 return 2;
1806 case CTImode:
1807 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1808 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1809 return 4;
1810 case SFmode:
1811 if (!(bit_offset % 64))
1812 classes[0] = X86_64_SSESF_CLASS;
1813 else
1814 classes[0] = X86_64_SSE_CLASS;
1815 return 1;
1816 case DFmode:
1817 classes[0] = X86_64_SSEDF_CLASS;
1818 return 1;
1819 case TFmode:
1820 classes[0] = X86_64_X87_CLASS;
1821 classes[1] = X86_64_X87UP_CLASS;
1822 return 2;
1823 case TCmode:
1824 classes[0] = X86_64_X87_CLASS;
1825 classes[1] = X86_64_X87UP_CLASS;
1826 classes[2] = X86_64_X87_CLASS;
1827 classes[3] = X86_64_X87UP_CLASS;
1828 return 4;
1829 case DCmode:
1830 classes[0] = X86_64_SSEDF_CLASS;
1831 classes[1] = X86_64_SSEDF_CLASS;
1832 return 2;
1833 case SCmode:
1834 classes[0] = X86_64_SSE_CLASS;
1835 return 1;
1836 case V4SFmode:
1837 case V4SImode:
1838 case V16QImode:
1839 case V8HImode:
1840 case V2DFmode:
1841 case V2DImode:
1842 classes[0] = X86_64_SSE_CLASS;
1843 classes[1] = X86_64_SSEUP_CLASS;
1844 return 2;
1845 case V2SFmode:
1846 case V2SImode:
1847 case V4HImode:
1848 case V8QImode:
1849 classes[0] = X86_64_SSE_CLASS;
1850 return 1;
1851 case BLKmode:
1852 case VOIDmode:
1853 return 0;
1854 default:
1855 abort ();
1856 }
1857 }
1858
1859 /* Examine the argument and return set number of register required in each
1860 class. Return 0 iff parameter should be passed in memory. */
1861 static int
1862 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1863 enum machine_mode mode;
1864 tree type;
1865 int *int_nregs, *sse_nregs;
1866 int in_return;
1867 {
1868 enum x86_64_reg_class class[MAX_CLASSES];
1869 int n = classify_argument (mode, type, class, 0);
1870
1871 *int_nregs = 0;
1872 *sse_nregs = 0;
1873 if (!n)
1874 return 0;
1875 for (n--; n >= 0; n--)
1876 switch (class[n])
1877 {
1878 case X86_64_INTEGER_CLASS:
1879 case X86_64_INTEGERSI_CLASS:
1880 (*int_nregs)++;
1881 break;
1882 case X86_64_SSE_CLASS:
1883 case X86_64_SSESF_CLASS:
1884 case X86_64_SSEDF_CLASS:
1885 (*sse_nregs)++;
1886 break;
1887 case X86_64_NO_CLASS:
1888 case X86_64_SSEUP_CLASS:
1889 break;
1890 case X86_64_X87_CLASS:
1891 case X86_64_X87UP_CLASS:
1892 if (!in_return)
1893 return 0;
1894 break;
1895 case X86_64_MEMORY_CLASS:
1896 abort ();
1897 }
1898 return 1;
1899 }
1900 /* Construct container for the argument used by GCC interface. See
1901 FUNCTION_ARG for the detailed description. */
1902 static rtx
1903 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1904 enum machine_mode mode;
1905 tree type;
1906 int in_return;
1907 int nintregs, nsseregs;
1908 const int * intreg;
1909 int sse_regno;
1910 {
1911 enum machine_mode tmpmode;
1912 int bytes =
1913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1914 enum x86_64_reg_class class[MAX_CLASSES];
1915 int n;
1916 int i;
1917 int nexps = 0;
1918 int needed_sseregs, needed_intregs;
1919 rtx exp[MAX_CLASSES];
1920 rtx ret;
1921
1922 n = classify_argument (mode, type, class, 0);
1923 if (TARGET_DEBUG_ARG)
1924 {
1925 if (!n)
1926 fprintf (stderr, "Memory class\n");
1927 else
1928 {
1929 fprintf (stderr, "Classes:");
1930 for (i = 0; i < n; i++)
1931 {
1932 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1933 }
1934 fprintf (stderr, "\n");
1935 }
1936 }
1937 if (!n)
1938 return NULL;
1939 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1940 return NULL;
1941 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1942 return NULL;
1943
1944 /* First construct simple cases. Avoid SCmode, since we want to use
1945 single register to pass this type. */
1946 if (n == 1 && mode != SCmode)
1947 switch (class[0])
1948 {
1949 case X86_64_INTEGER_CLASS:
1950 case X86_64_INTEGERSI_CLASS:
1951 return gen_rtx_REG (mode, intreg[0]);
1952 case X86_64_SSE_CLASS:
1953 case X86_64_SSESF_CLASS:
1954 case X86_64_SSEDF_CLASS:
1955 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1956 case X86_64_X87_CLASS:
1957 return gen_rtx_REG (mode, FIRST_STACK_REG);
1958 case X86_64_NO_CLASS:
1959 /* Zero sized array, struct or class. */
1960 return NULL;
1961 default:
1962 abort ();
1963 }
1964 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1965 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1966 if (n == 2
1967 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1968 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1969 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1970 && class[1] == X86_64_INTEGER_CLASS
1971 && (mode == CDImode || mode == TImode)
1972 && intreg[0] + 1 == intreg[1])
1973 return gen_rtx_REG (mode, intreg[0]);
1974 if (n == 4
1975 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1976 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1977 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1978
1979 /* Otherwise figure out the entries of the PARALLEL. */
1980 for (i = 0; i < n; i++)
1981 {
1982 switch (class[i])
1983 {
1984 case X86_64_NO_CLASS:
1985 break;
1986 case X86_64_INTEGER_CLASS:
1987 case X86_64_INTEGERSI_CLASS:
1988 /* Merge TImodes on aligned occassions here too. */
1989 if (i * 8 + 8 > bytes)
1990 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1991 else if (class[i] == X86_64_INTEGERSI_CLASS)
1992 tmpmode = SImode;
1993 else
1994 tmpmode = DImode;
1995 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1996 if (tmpmode == BLKmode)
1997 tmpmode = DImode;
1998 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1999 gen_rtx_REG (tmpmode, *intreg),
2000 GEN_INT (i*8));
2001 intreg++;
2002 break;
2003 case X86_64_SSESF_CLASS:
2004 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2005 gen_rtx_REG (SFmode,
2006 SSE_REGNO (sse_regno)),
2007 GEN_INT (i*8));
2008 sse_regno++;
2009 break;
2010 case X86_64_SSEDF_CLASS:
2011 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2012 gen_rtx_REG (DFmode,
2013 SSE_REGNO (sse_regno)),
2014 GEN_INT (i*8));
2015 sse_regno++;
2016 break;
2017 case X86_64_SSE_CLASS:
2018 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2019 tmpmode = TImode, i++;
2020 else
2021 tmpmode = DImode;
2022 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2023 gen_rtx_REG (tmpmode,
2024 SSE_REGNO (sse_regno)),
2025 GEN_INT (i*8));
2026 sse_regno++;
2027 break;
2028 default:
2029 abort ();
2030 }
2031 }
2032 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2033 for (i = 0; i < nexps; i++)
2034 XVECEXP (ret, 0, i) = exp [i];
2035 return ret;
2036 }
2037
2038 /* Update the data in CUM to advance over an argument
2039 of mode MODE and data type TYPE.
2040 (TYPE is null for libcalls where that information may not be available.) */
2041
2042 void
2043 function_arg_advance (cum, mode, type, named)
2044 CUMULATIVE_ARGS *cum; /* current arg information */
2045 enum machine_mode mode; /* current arg mode */
2046 tree type; /* type of the argument or 0 if lib support */
2047 int named; /* whether or not the argument was named */
2048 {
2049 int bytes =
2050 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2051 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2052
2053 if (TARGET_DEBUG_ARG)
2054 fprintf (stderr,
2055 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2056 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2057 if (TARGET_64BIT)
2058 {
2059 int int_nregs, sse_nregs;
2060 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2061 cum->words += words;
2062 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2063 {
2064 cum->nregs -= int_nregs;
2065 cum->sse_nregs -= sse_nregs;
2066 cum->regno += int_nregs;
2067 cum->sse_regno += sse_nregs;
2068 }
2069 else
2070 cum->words += words;
2071 }
2072 else
2073 {
2074 if (TARGET_SSE && mode == TImode)
2075 {
2076 cum->sse_words += words;
2077 cum->sse_nregs -= 1;
2078 cum->sse_regno += 1;
2079 if (cum->sse_nregs <= 0)
2080 {
2081 cum->sse_nregs = 0;
2082 cum->sse_regno = 0;
2083 }
2084 }
2085 else
2086 {
2087 cum->words += words;
2088 cum->nregs -= words;
2089 cum->regno += words;
2090
2091 if (cum->nregs <= 0)
2092 {
2093 cum->nregs = 0;
2094 cum->regno = 0;
2095 }
2096 }
2097 }
2098 return;
2099 }
2100
2101 /* Define where to put the arguments to a function.
2102 Value is zero to push the argument on the stack,
2103 or a hard register in which to store the argument.
2104
2105 MODE is the argument's machine mode.
2106 TYPE is the data type of the argument (as a tree).
2107 This is null for libcalls where that information may
2108 not be available.
2109 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2110 the preceding args and about the function being called.
2111 NAMED is nonzero if this argument is a named parameter
2112 (otherwise it is an extra parameter matching an ellipsis). */
2113
2114 rtx
2115 function_arg (cum, mode, type, named)
2116 CUMULATIVE_ARGS *cum; /* current arg information */
2117 enum machine_mode mode; /* current arg mode */
2118 tree type; /* type of the argument or 0 if lib support */
2119 int named; /* != 0 for normal args, == 0 for ... args */
2120 {
2121 rtx ret = NULL_RTX;
2122 int bytes =
2123 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2124 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2125
2126 /* Handle an hidden AL argument containing number of registers for varargs
2127 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2128 any AL settings. */
2129 if (mode == VOIDmode)
2130 {
2131 if (TARGET_64BIT)
2132 return GEN_INT (cum->maybe_vaarg
2133 ? (cum->sse_nregs < 0
2134 ? SSE_REGPARM_MAX
2135 : cum->sse_regno)
2136 : -1);
2137 else
2138 return constm1_rtx;
2139 }
2140 if (TARGET_64BIT)
2141 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2142 &x86_64_int_parameter_registers [cum->regno],
2143 cum->sse_regno);
2144 else
2145 switch (mode)
2146 {
2147 /* For now, pass fp/complex values on the stack. */
2148 default:
2149 break;
2150
2151 case BLKmode:
2152 case DImode:
2153 case SImode:
2154 case HImode:
2155 case QImode:
2156 if (words <= cum->nregs)
2157 ret = gen_rtx_REG (mode, cum->regno);
2158 break;
2159 case TImode:
2160 if (cum->sse_nregs)
2161 ret = gen_rtx_REG (mode, cum->sse_regno);
2162 break;
2163 }
2164
2165 if (TARGET_DEBUG_ARG)
2166 {
2167 fprintf (stderr,
2168 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2169 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2170
2171 if (ret)
2172 print_simple_rtl (stderr, ret);
2173 else
2174 fprintf (stderr, ", stack");
2175
2176 fprintf (stderr, " )\n");
2177 }
2178
2179 return ret;
2180 }
2181
2182 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2183 and type. */
2184
2185 int
2186 ix86_function_arg_boundary (mode, type)
2187 enum machine_mode mode;
2188 tree type;
2189 {
2190 int align;
2191 if (!TARGET_64BIT)
2192 return PARM_BOUNDARY;
2193 if (type)
2194 align = TYPE_ALIGN (type);
2195 else
2196 align = GET_MODE_ALIGNMENT (mode);
2197 if (align < PARM_BOUNDARY)
2198 align = PARM_BOUNDARY;
2199 if (align > 128)
2200 align = 128;
2201 return align;
2202 }
2203
2204 /* Return true if N is a possible register number of function value. */
2205 bool
2206 ix86_function_value_regno_p (regno)
2207 int regno;
2208 {
2209 if (!TARGET_64BIT)
2210 {
2211 return ((regno) == 0
2212 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2213 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2214 }
2215 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2216 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2217 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2218 }
2219
2220 /* Define how to find the value returned by a function.
2221 VALTYPE is the data type of the value (as a tree).
2222 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2223 otherwise, FUNC is 0. */
2224 rtx
2225 ix86_function_value (valtype)
2226 tree valtype;
2227 {
2228 if (TARGET_64BIT)
2229 {
2230 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2231 REGPARM_MAX, SSE_REGPARM_MAX,
2232 x86_64_int_return_registers, 0);
2233 /* For zero sized structures, construct_continer return NULL, but we need
2234 to keep rest of compiler happy by returning meaningfull value. */
2235 if (!ret)
2236 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2237 return ret;
2238 }
2239 else
2240 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2241 }
2242
2243 /* Return false iff type is returned in memory. */
2244 int
2245 ix86_return_in_memory (type)
2246 tree type;
2247 {
2248 int needed_intregs, needed_sseregs;
2249 if (TARGET_64BIT)
2250 {
2251 return !examine_argument (TYPE_MODE (type), type, 1,
2252 &needed_intregs, &needed_sseregs);
2253 }
2254 else
2255 {
2256 if (TYPE_MODE (type) == BLKmode
2257 || (VECTOR_MODE_P (TYPE_MODE (type))
2258 && int_size_in_bytes (type) == 8)
2259 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2260 && TYPE_MODE (type) != TFmode
2261 && !VECTOR_MODE_P (TYPE_MODE (type))))
2262 return 1;
2263 return 0;
2264 }
2265 }
2266
2267 /* Define how to find the value returned by a library function
2268 assuming the value has mode MODE. */
2269 rtx
2270 ix86_libcall_value (mode)
2271 enum machine_mode mode;
2272 {
2273 if (TARGET_64BIT)
2274 {
2275 switch (mode)
2276 {
2277 case SFmode:
2278 case SCmode:
2279 case DFmode:
2280 case DCmode:
2281 return gen_rtx_REG (mode, FIRST_SSE_REG);
2282 case TFmode:
2283 case TCmode:
2284 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2285 default:
2286 return gen_rtx_REG (mode, 0);
2287 }
2288 }
2289 else
2290 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2291 }
2292 \f
2293 /* Create the va_list data type. */
2294
2295 tree
2296 ix86_build_va_list ()
2297 {
2298 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2299
2300 /* For i386 we use plain pointer to argument area. */
2301 if (!TARGET_64BIT)
2302 return build_pointer_type (char_type_node);
2303
2304 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2305 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2306
2307 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2308 unsigned_type_node);
2309 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2310 unsigned_type_node);
2311 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2312 ptr_type_node);
2313 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2314 ptr_type_node);
2315
2316 DECL_FIELD_CONTEXT (f_gpr) = record;
2317 DECL_FIELD_CONTEXT (f_fpr) = record;
2318 DECL_FIELD_CONTEXT (f_ovf) = record;
2319 DECL_FIELD_CONTEXT (f_sav) = record;
2320
2321 TREE_CHAIN (record) = type_decl;
2322 TYPE_NAME (record) = type_decl;
2323 TYPE_FIELDS (record) = f_gpr;
2324 TREE_CHAIN (f_gpr) = f_fpr;
2325 TREE_CHAIN (f_fpr) = f_ovf;
2326 TREE_CHAIN (f_ovf) = f_sav;
2327
2328 layout_type (record);
2329
2330 /* The correct type is an array type of one element. */
2331 return build_array_type (record, build_index_type (size_zero_node));
2332 }
2333
2334 /* Perform any needed actions needed for a function that is receiving a
2335 variable number of arguments.
2336
2337 CUM is as above.
2338
2339 MODE and TYPE are the mode and type of the current parameter.
2340
2341 PRETEND_SIZE is a variable that should be set to the amount of stack
2342 that must be pushed by the prolog to pretend that our caller pushed
2343 it.
2344
2345 Normally, this macro will push all remaining incoming registers on the
2346 stack and set PRETEND_SIZE to the length of the registers pushed. */
2347
2348 void
2349 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2350 CUMULATIVE_ARGS *cum;
2351 enum machine_mode mode;
2352 tree type;
2353 int *pretend_size ATTRIBUTE_UNUSED;
2354 int no_rtl;
2355
2356 {
2357 CUMULATIVE_ARGS next_cum;
2358 rtx save_area = NULL_RTX, mem;
2359 rtx label;
2360 rtx label_ref;
2361 rtx tmp_reg;
2362 rtx nsse_reg;
2363 int set;
2364 tree fntype;
2365 int stdarg_p;
2366 int i;
2367
2368 if (!TARGET_64BIT)
2369 return;
2370
2371 /* Indicate to allocate space on the stack for varargs save area. */
2372 ix86_save_varrargs_registers = 1;
2373
2374 fntype = TREE_TYPE (current_function_decl);
2375 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2376 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2377 != void_type_node));
2378
2379 /* For varargs, we do not want to skip the dummy va_dcl argument.
2380 For stdargs, we do want to skip the last named argument. */
2381 next_cum = *cum;
2382 if (stdarg_p)
2383 function_arg_advance (&next_cum, mode, type, 1);
2384
2385 if (!no_rtl)
2386 save_area = frame_pointer_rtx;
2387
2388 set = get_varargs_alias_set ();
2389
2390 for (i = next_cum.regno; i < ix86_regparm; i++)
2391 {
2392 mem = gen_rtx_MEM (Pmode,
2393 plus_constant (save_area, i * UNITS_PER_WORD));
2394 set_mem_alias_set (mem, set);
2395 emit_move_insn (mem, gen_rtx_REG (Pmode,
2396 x86_64_int_parameter_registers[i]));
2397 }
2398
2399 if (next_cum.sse_nregs)
2400 {
2401 /* Now emit code to save SSE registers. The AX parameter contains number
2402 of SSE parameter regsiters used to call this function. We use
2403 sse_prologue_save insn template that produces computed jump across
2404 SSE saves. We need some preparation work to get this working. */
2405
2406 label = gen_label_rtx ();
2407 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2408
2409 /* Compute address to jump to :
2410 label - 5*eax + nnamed_sse_arguments*5 */
2411 tmp_reg = gen_reg_rtx (Pmode);
2412 nsse_reg = gen_reg_rtx (Pmode);
2413 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2414 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2415 gen_rtx_MULT (Pmode, nsse_reg,
2416 GEN_INT (4))));
2417 if (next_cum.sse_regno)
2418 emit_move_insn
2419 (nsse_reg,
2420 gen_rtx_CONST (DImode,
2421 gen_rtx_PLUS (DImode,
2422 label_ref,
2423 GEN_INT (next_cum.sse_regno * 4))));
2424 else
2425 emit_move_insn (nsse_reg, label_ref);
2426 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2427
2428 /* Compute address of memory block we save into. We always use pointer
2429 pointing 127 bytes after first byte to store - this is needed to keep
2430 instruction size limited by 4 bytes. */
2431 tmp_reg = gen_reg_rtx (Pmode);
2432 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2433 plus_constant (save_area,
2434 8 * REGPARM_MAX + 127)));
2435 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2436 set_mem_alias_set (mem, set);
2437 set_mem_align (mem, BITS_PER_WORD);
2438
2439 /* And finally do the dirty job! */
2440 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2441 GEN_INT (next_cum.sse_regno), label));
2442 }
2443
2444 }
2445
2446 /* Implement va_start. */
2447
2448 void
2449 ix86_va_start (stdarg_p, valist, nextarg)
2450 int stdarg_p;
2451 tree valist;
2452 rtx nextarg;
2453 {
2454 HOST_WIDE_INT words, n_gpr, n_fpr;
2455 tree f_gpr, f_fpr, f_ovf, f_sav;
2456 tree gpr, fpr, ovf, sav, t;
2457
2458 /* Only 64bit target needs something special. */
2459 if (!TARGET_64BIT)
2460 {
2461 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2462 return;
2463 }
2464
2465 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2466 f_fpr = TREE_CHAIN (f_gpr);
2467 f_ovf = TREE_CHAIN (f_fpr);
2468 f_sav = TREE_CHAIN (f_ovf);
2469
2470 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2471 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2472 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2473 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2474 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2475
2476 /* Count number of gp and fp argument registers used. */
2477 words = current_function_args_info.words;
2478 n_gpr = current_function_args_info.regno;
2479 n_fpr = current_function_args_info.sse_regno;
2480
2481 if (TARGET_DEBUG_ARG)
2482 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2483 (int) words, (int) n_gpr, (int) n_fpr);
2484
2485 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2486 build_int_2 (n_gpr * 8, 0));
2487 TREE_SIDE_EFFECTS (t) = 1;
2488 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2489
2490 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2491 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2492 TREE_SIDE_EFFECTS (t) = 1;
2493 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2494
2495 /* Find the overflow area. */
2496 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2497 if (words != 0)
2498 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2499 build_int_2 (words * UNITS_PER_WORD, 0));
2500 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2501 TREE_SIDE_EFFECTS (t) = 1;
2502 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2503
2504 /* Find the register save area.
2505 Prologue of the function save it right above stack frame. */
2506 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2507 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2508 TREE_SIDE_EFFECTS (t) = 1;
2509 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2510 }
2511
2512 /* Implement va_arg. */
2513 rtx
2514 ix86_va_arg (valist, type)
2515 tree valist, type;
2516 {
2517 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2518 tree f_gpr, f_fpr, f_ovf, f_sav;
2519 tree gpr, fpr, ovf, sav, t;
2520 int size, rsize;
2521 rtx lab_false, lab_over = NULL_RTX;
2522 rtx addr_rtx, r;
2523 rtx container;
2524
2525 /* Only 64bit target needs something special. */
2526 if (!TARGET_64BIT)
2527 {
2528 return std_expand_builtin_va_arg (valist, type);
2529 }
2530
2531 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2532 f_fpr = TREE_CHAIN (f_gpr);
2533 f_ovf = TREE_CHAIN (f_fpr);
2534 f_sav = TREE_CHAIN (f_ovf);
2535
2536 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2537 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2538 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2539 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2540 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2541
2542 size = int_size_in_bytes (type);
2543 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2544
2545 container = construct_container (TYPE_MODE (type), type, 0,
2546 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2547 /*
2548 * Pull the value out of the saved registers ...
2549 */
2550
2551 addr_rtx = gen_reg_rtx (Pmode);
2552
2553 if (container)
2554 {
2555 rtx int_addr_rtx, sse_addr_rtx;
2556 int needed_intregs, needed_sseregs;
2557 int need_temp;
2558
2559 lab_over = gen_label_rtx ();
2560 lab_false = gen_label_rtx ();
2561
2562 examine_argument (TYPE_MODE (type), type, 0,
2563 &needed_intregs, &needed_sseregs);
2564
2565
2566 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2567 || TYPE_ALIGN (type) > 128);
2568
2569 /* In case we are passing structure, verify that it is consetuctive block
2570 on the register save area. If not we need to do moves. */
2571 if (!need_temp && !REG_P (container))
2572 {
2573 /* Verify that all registers are strictly consetuctive */
2574 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2575 {
2576 int i;
2577
2578 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2579 {
2580 rtx slot = XVECEXP (container, 0, i);
2581 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2582 || INTVAL (XEXP (slot, 1)) != i * 16)
2583 need_temp = 1;
2584 }
2585 }
2586 else
2587 {
2588 int i;
2589
2590 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2591 {
2592 rtx slot = XVECEXP (container, 0, i);
2593 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2594 || INTVAL (XEXP (slot, 1)) != i * 8)
2595 need_temp = 1;
2596 }
2597 }
2598 }
2599 if (!need_temp)
2600 {
2601 int_addr_rtx = addr_rtx;
2602 sse_addr_rtx = addr_rtx;
2603 }
2604 else
2605 {
2606 int_addr_rtx = gen_reg_rtx (Pmode);
2607 sse_addr_rtx = gen_reg_rtx (Pmode);
2608 }
2609 /* First ensure that we fit completely in registers. */
2610 if (needed_intregs)
2611 {
2612 emit_cmp_and_jump_insns (expand_expr
2613 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2614 GEN_INT ((REGPARM_MAX - needed_intregs +
2615 1) * 8), GE, const1_rtx, SImode,
2616 1, lab_false);
2617 }
2618 if (needed_sseregs)
2619 {
2620 emit_cmp_and_jump_insns (expand_expr
2621 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2622 GEN_INT ((SSE_REGPARM_MAX -
2623 needed_sseregs + 1) * 16 +
2624 REGPARM_MAX * 8), GE, const1_rtx,
2625 SImode, 1, lab_false);
2626 }
2627
2628 /* Compute index to start of area used for integer regs. */
2629 if (needed_intregs)
2630 {
2631 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2632 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2633 if (r != int_addr_rtx)
2634 emit_move_insn (int_addr_rtx, r);
2635 }
2636 if (needed_sseregs)
2637 {
2638 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2639 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2640 if (r != sse_addr_rtx)
2641 emit_move_insn (sse_addr_rtx, r);
2642 }
2643 if (need_temp)
2644 {
2645 int i;
2646 rtx mem;
2647
2648 /* Never use the memory itself, as it has the alias set. */
2649 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2650 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2651 set_mem_alias_set (mem, get_varargs_alias_set ());
2652 set_mem_align (mem, BITS_PER_UNIT);
2653
2654 for (i = 0; i < XVECLEN (container, 0); i++)
2655 {
2656 rtx slot = XVECEXP (container, 0, i);
2657 rtx reg = XEXP (slot, 0);
2658 enum machine_mode mode = GET_MODE (reg);
2659 rtx src_addr;
2660 rtx src_mem;
2661 int src_offset;
2662 rtx dest_mem;
2663
2664 if (SSE_REGNO_P (REGNO (reg)))
2665 {
2666 src_addr = sse_addr_rtx;
2667 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2668 }
2669 else
2670 {
2671 src_addr = int_addr_rtx;
2672 src_offset = REGNO (reg) * 8;
2673 }
2674 src_mem = gen_rtx_MEM (mode, src_addr);
2675 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2676 src_mem = adjust_address (src_mem, mode, src_offset);
2677 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2678 emit_move_insn (dest_mem, src_mem);
2679 }
2680 }
2681
2682 if (needed_intregs)
2683 {
2684 t =
2685 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2686 build_int_2 (needed_intregs * 8, 0));
2687 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2688 TREE_SIDE_EFFECTS (t) = 1;
2689 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2690 }
2691 if (needed_sseregs)
2692 {
2693 t =
2694 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2695 build_int_2 (needed_sseregs * 16, 0));
2696 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2697 TREE_SIDE_EFFECTS (t) = 1;
2698 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2699 }
2700
2701 emit_jump_insn (gen_jump (lab_over));
2702 emit_barrier ();
2703 emit_label (lab_false);
2704 }
2705
2706 /* ... otherwise out of the overflow area. */
2707
2708 /* Care for on-stack alignment if needed. */
2709 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2710 t = ovf;
2711 else
2712 {
2713 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2714 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2715 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2716 }
2717 t = save_expr (t);
2718
2719 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2720 if (r != addr_rtx)
2721 emit_move_insn (addr_rtx, r);
2722
2723 t =
2724 build (PLUS_EXPR, TREE_TYPE (t), t,
2725 build_int_2 (rsize * UNITS_PER_WORD, 0));
2726 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2727 TREE_SIDE_EFFECTS (t) = 1;
2728 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2729
2730 if (container)
2731 emit_label (lab_over);
2732
2733 return addr_rtx;
2734 }
2735 \f
2736 /* Return nonzero if OP is general operand representable on x86_64. */
2737
2738 int
2739 x86_64_general_operand (op, mode)
2740 rtx op;
2741 enum machine_mode mode;
2742 {
2743 if (!TARGET_64BIT)
2744 return general_operand (op, mode);
2745 if (nonimmediate_operand (op, mode))
2746 return 1;
2747 return x86_64_sign_extended_value (op);
2748 }
2749
2750 /* Return nonzero if OP is general operand representable on x86_64
2751 as either sign extended or zero extended constant. */
2752
2753 int
2754 x86_64_szext_general_operand (op, mode)
2755 rtx op;
2756 enum machine_mode mode;
2757 {
2758 if (!TARGET_64BIT)
2759 return general_operand (op, mode);
2760 if (nonimmediate_operand (op, mode))
2761 return 1;
2762 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2763 }
2764
2765 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2766
2767 int
2768 x86_64_nonmemory_operand (op, mode)
2769 rtx op;
2770 enum machine_mode mode;
2771 {
2772 if (!TARGET_64BIT)
2773 return nonmemory_operand (op, mode);
2774 if (register_operand (op, mode))
2775 return 1;
2776 return x86_64_sign_extended_value (op);
2777 }
2778
2779 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2780
2781 int
2782 x86_64_movabs_operand (op, mode)
2783 rtx op;
2784 enum machine_mode mode;
2785 {
2786 if (!TARGET_64BIT || !flag_pic)
2787 return nonmemory_operand (op, mode);
2788 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2789 return 1;
2790 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2791 return 1;
2792 return 0;
2793 }
2794
2795 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2796
2797 int
2798 x86_64_szext_nonmemory_operand (op, mode)
2799 rtx op;
2800 enum machine_mode mode;
2801 {
2802 if (!TARGET_64BIT)
2803 return nonmemory_operand (op, mode);
2804 if (register_operand (op, mode))
2805 return 1;
2806 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2807 }
2808
2809 /* Return nonzero if OP is immediate operand representable on x86_64. */
2810
2811 int
2812 x86_64_immediate_operand (op, mode)
2813 rtx op;
2814 enum machine_mode mode;
2815 {
2816 if (!TARGET_64BIT)
2817 return immediate_operand (op, mode);
2818 return x86_64_sign_extended_value (op);
2819 }
2820
2821 /* Return nonzero if OP is immediate operand representable on x86_64. */
2822
2823 int
2824 x86_64_zext_immediate_operand (op, mode)
2825 rtx op;
2826 enum machine_mode mode ATTRIBUTE_UNUSED;
2827 {
2828 return x86_64_zero_extended_value (op);
2829 }
2830
2831 /* Return nonzero if OP is (const_int 1), else return zero. */
2832
2833 int
2834 const_int_1_operand (op, mode)
2835 rtx op;
2836 enum machine_mode mode ATTRIBUTE_UNUSED;
2837 {
2838 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2839 }
2840
2841 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2842 for shift & compare patterns, as shifting by 0 does not change flags),
2843 else return zero. */
2844
2845 int
2846 const_int_1_31_operand (op, mode)
2847 rtx op;
2848 enum machine_mode mode ATTRIBUTE_UNUSED;
2849 {
2850 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2851 }
2852
2853 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2854 reference and a constant. */
2855
2856 int
2857 symbolic_operand (op, mode)
2858 register rtx op;
2859 enum machine_mode mode ATTRIBUTE_UNUSED;
2860 {
2861 switch (GET_CODE (op))
2862 {
2863 case SYMBOL_REF:
2864 case LABEL_REF:
2865 return 1;
2866
2867 case CONST:
2868 op = XEXP (op, 0);
2869 if (GET_CODE (op) == SYMBOL_REF
2870 || GET_CODE (op) == LABEL_REF
2871 || (GET_CODE (op) == UNSPEC
2872 && (XINT (op, 1) == UNSPEC_GOT
2873 || XINT (op, 1) == UNSPEC_GOTOFF
2874 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2875 return 1;
2876 if (GET_CODE (op) != PLUS
2877 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2878 return 0;
2879
2880 op = XEXP (op, 0);
2881 if (GET_CODE (op) == SYMBOL_REF
2882 || GET_CODE (op) == LABEL_REF)
2883 return 1;
2884 /* Only @GOTOFF gets offsets. */
2885 if (GET_CODE (op) != UNSPEC
2886 || XINT (op, 1) != UNSPEC_GOTOFF)
2887 return 0;
2888
2889 op = XVECEXP (op, 0, 0);
2890 if (GET_CODE (op) == SYMBOL_REF
2891 || GET_CODE (op) == LABEL_REF)
2892 return 1;
2893 return 0;
2894
2895 default:
2896 return 0;
2897 }
2898 }
2899
2900 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2901
2902 int
2903 pic_symbolic_operand (op, mode)
2904 register rtx op;
2905 enum machine_mode mode ATTRIBUTE_UNUSED;
2906 {
2907 if (GET_CODE (op) != CONST)
2908 return 0;
2909 op = XEXP (op, 0);
2910 if (TARGET_64BIT)
2911 {
2912 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2913 return 1;
2914 }
2915 else
2916 {
2917 if (GET_CODE (op) == UNSPEC)
2918 return 1;
2919 if (GET_CODE (op) != PLUS
2920 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2921 return 0;
2922 op = XEXP (op, 0);
2923 if (GET_CODE (op) == UNSPEC)
2924 return 1;
2925 }
2926 return 0;
2927 }
2928
2929 /* Return true if OP is a symbolic operand that resolves locally. */
2930
2931 static int
2932 local_symbolic_operand (op, mode)
2933 rtx op;
2934 enum machine_mode mode ATTRIBUTE_UNUSED;
2935 {
2936 if (GET_CODE (op) == LABEL_REF)
2937 return 1;
2938
2939 if (GET_CODE (op) == CONST
2940 && GET_CODE (XEXP (op, 0)) == PLUS
2941 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2942 op = XEXP (XEXP (op, 0), 0);
2943
2944 if (GET_CODE (op) != SYMBOL_REF)
2945 return 0;
2946
2947 /* These we've been told are local by varasm and encode_section_info
2948 respectively. */
2949 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2950 return 1;
2951
2952 /* There is, however, a not insubstantial body of code in the rest of
2953 the compiler that assumes it can just stick the results of
2954 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2955 /* ??? This is a hack. Should update the body of the compiler to
2956 always create a DECL an invoke targetm.encode_section_info. */
2957 if (strncmp (XSTR (op, 0), internal_label_prefix,
2958 internal_label_prefix_len) == 0)
2959 return 1;
2960
2961 return 0;
2962 }
2963
2964 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2965
2966 int
2967 tls_symbolic_operand (op, mode)
2968 register rtx op;
2969 enum machine_mode mode ATTRIBUTE_UNUSED;
2970 {
2971 const char *symbol_str;
2972
2973 if (GET_CODE (op) != SYMBOL_REF)
2974 return 0;
2975 symbol_str = XSTR (op, 0);
2976
2977 if (symbol_str[0] != '%')
2978 return 0;
2979 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
2980 }
2981
2982 static int
2983 tls_symbolic_operand_1 (op, kind)
2984 rtx op;
2985 enum tls_model kind;
2986 {
2987 const char *symbol_str;
2988
2989 if (GET_CODE (op) != SYMBOL_REF)
2990 return 0;
2991 symbol_str = XSTR (op, 0);
2992
2993 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
2994 }
2995
2996 int
2997 global_dynamic_symbolic_operand (op, mode)
2998 register rtx op;
2999 enum machine_mode mode ATTRIBUTE_UNUSED;
3000 {
3001 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3002 }
3003
3004 int
3005 local_dynamic_symbolic_operand (op, mode)
3006 register rtx op;
3007 enum machine_mode mode ATTRIBUTE_UNUSED;
3008 {
3009 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3010 }
3011
3012 int
3013 initial_exec_symbolic_operand (op, mode)
3014 register rtx op;
3015 enum machine_mode mode ATTRIBUTE_UNUSED;
3016 {
3017 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3018 }
3019
3020 int
3021 local_exec_symbolic_operand (op, mode)
3022 register rtx op;
3023 enum machine_mode mode ATTRIBUTE_UNUSED;
3024 {
3025 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3026 }
3027
3028 /* Test for a valid operand for a call instruction. Don't allow the
3029 arg pointer register or virtual regs since they may decay into
3030 reg + const, which the patterns can't handle. */
3031
3032 int
3033 call_insn_operand (op, mode)
3034 rtx op;
3035 enum machine_mode mode ATTRIBUTE_UNUSED;
3036 {
3037 /* Disallow indirect through a virtual register. This leads to
3038 compiler aborts when trying to eliminate them. */
3039 if (GET_CODE (op) == REG
3040 && (op == arg_pointer_rtx
3041 || op == frame_pointer_rtx
3042 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3043 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3044 return 0;
3045
3046 /* Disallow `call 1234'. Due to varying assembler lameness this
3047 gets either rejected or translated to `call .+1234'. */
3048 if (GET_CODE (op) == CONST_INT)
3049 return 0;
3050
3051 /* Explicitly allow SYMBOL_REF even if pic. */
3052 if (GET_CODE (op) == SYMBOL_REF)
3053 return 1;
3054
3055 /* Otherwise we can allow any general_operand in the address. */
3056 return general_operand (op, Pmode);
3057 }
3058
3059 int
3060 constant_call_address_operand (op, mode)
3061 rtx op;
3062 enum machine_mode mode ATTRIBUTE_UNUSED;
3063 {
3064 if (GET_CODE (op) == CONST
3065 && GET_CODE (XEXP (op, 0)) == PLUS
3066 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3067 op = XEXP (XEXP (op, 0), 0);
3068 return GET_CODE (op) == SYMBOL_REF;
3069 }
3070
3071 /* Match exactly zero and one. */
3072
3073 int
3074 const0_operand (op, mode)
3075 register rtx op;
3076 enum machine_mode mode;
3077 {
3078 return op == CONST0_RTX (mode);
3079 }
3080
3081 int
3082 const1_operand (op, mode)
3083 register rtx op;
3084 enum machine_mode mode ATTRIBUTE_UNUSED;
3085 {
3086 return op == const1_rtx;
3087 }
3088
3089 /* Match 2, 4, or 8. Used for leal multiplicands. */
3090
3091 int
3092 const248_operand (op, mode)
3093 register rtx op;
3094 enum machine_mode mode ATTRIBUTE_UNUSED;
3095 {
3096 return (GET_CODE (op) == CONST_INT
3097 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3098 }
3099
3100 /* True if this is a constant appropriate for an increment or decremenmt. */
3101
3102 int
3103 incdec_operand (op, mode)
3104 register rtx op;
3105 enum machine_mode mode ATTRIBUTE_UNUSED;
3106 {
3107 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3108 registers, since carry flag is not set. */
3109 if (TARGET_PENTIUM4 && !optimize_size)
3110 return 0;
3111 return op == const1_rtx || op == constm1_rtx;
3112 }
3113
3114 /* Return nonzero if OP is acceptable as operand of DImode shift
3115 expander. */
3116
3117 int
3118 shiftdi_operand (op, mode)
3119 rtx op;
3120 enum machine_mode mode ATTRIBUTE_UNUSED;
3121 {
3122 if (TARGET_64BIT)
3123 return nonimmediate_operand (op, mode);
3124 else
3125 return register_operand (op, mode);
3126 }
3127
3128 /* Return false if this is the stack pointer, or any other fake
3129 register eliminable to the stack pointer. Otherwise, this is
3130 a register operand.
3131
3132 This is used to prevent esp from being used as an index reg.
3133 Which would only happen in pathological cases. */
3134
3135 int
3136 reg_no_sp_operand (op, mode)
3137 register rtx op;
3138 enum machine_mode mode;
3139 {
3140 rtx t = op;
3141 if (GET_CODE (t) == SUBREG)
3142 t = SUBREG_REG (t);
3143 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3144 return 0;
3145
3146 return register_operand (op, mode);
3147 }
3148
3149 int
3150 mmx_reg_operand (op, mode)
3151 register rtx op;
3152 enum machine_mode mode ATTRIBUTE_UNUSED;
3153 {
3154 return MMX_REG_P (op);
3155 }
3156
3157 /* Return false if this is any eliminable register. Otherwise
3158 general_operand. */
3159
3160 int
3161 general_no_elim_operand (op, mode)
3162 register rtx op;
3163 enum machine_mode mode;
3164 {
3165 rtx t = op;
3166 if (GET_CODE (t) == SUBREG)
3167 t = SUBREG_REG (t);
3168 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3169 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3170 || t == virtual_stack_dynamic_rtx)
3171 return 0;
3172 if (REG_P (t)
3173 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3174 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3175 return 0;
3176
3177 return general_operand (op, mode);
3178 }
3179
3180 /* Return false if this is any eliminable register. Otherwise
3181 register_operand or const_int. */
3182
3183 int
3184 nonmemory_no_elim_operand (op, mode)
3185 register rtx op;
3186 enum machine_mode mode;
3187 {
3188 rtx t = op;
3189 if (GET_CODE (t) == SUBREG)
3190 t = SUBREG_REG (t);
3191 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3192 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3193 || t == virtual_stack_dynamic_rtx)
3194 return 0;
3195
3196 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3197 }
3198
3199 /* Return true if op is a Q_REGS class register. */
3200
3201 int
3202 q_regs_operand (op, mode)
3203 register rtx op;
3204 enum machine_mode mode;
3205 {
3206 if (mode != VOIDmode && GET_MODE (op) != mode)
3207 return 0;
3208 if (GET_CODE (op) == SUBREG)
3209 op = SUBREG_REG (op);
3210 return ANY_QI_REG_P (op);
3211 }
3212
3213 /* Return true if op is a NON_Q_REGS class register. */
3214
3215 int
3216 non_q_regs_operand (op, mode)
3217 register rtx op;
3218 enum machine_mode mode;
3219 {
3220 if (mode != VOIDmode && GET_MODE (op) != mode)
3221 return 0;
3222 if (GET_CODE (op) == SUBREG)
3223 op = SUBREG_REG (op);
3224 return NON_QI_REG_P (op);
3225 }
3226
3227 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3228 insns. */
3229 int
3230 sse_comparison_operator (op, mode)
3231 rtx op;
3232 enum machine_mode mode ATTRIBUTE_UNUSED;
3233 {
3234 enum rtx_code code = GET_CODE (op);
3235 switch (code)
3236 {
3237 /* Operations supported directly. */
3238 case EQ:
3239 case LT:
3240 case LE:
3241 case UNORDERED:
3242 case NE:
3243 case UNGE:
3244 case UNGT:
3245 case ORDERED:
3246 return 1;
3247 /* These are equivalent to ones above in non-IEEE comparisons. */
3248 case UNEQ:
3249 case UNLT:
3250 case UNLE:
3251 case LTGT:
3252 case GE:
3253 case GT:
3254 return !TARGET_IEEE_FP;
3255 default:
3256 return 0;
3257 }
3258 }
3259 /* Return 1 if OP is a valid comparison operator in valid mode. */
3260 int
3261 ix86_comparison_operator (op, mode)
3262 register rtx op;
3263 enum machine_mode mode;
3264 {
3265 enum machine_mode inmode;
3266 enum rtx_code code = GET_CODE (op);
3267 if (mode != VOIDmode && GET_MODE (op) != mode)
3268 return 0;
3269 if (GET_RTX_CLASS (code) != '<')
3270 return 0;
3271 inmode = GET_MODE (XEXP (op, 0));
3272
3273 if (inmode == CCFPmode || inmode == CCFPUmode)
3274 {
3275 enum rtx_code second_code, bypass_code;
3276 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3277 return (bypass_code == NIL && second_code == NIL);
3278 }
3279 switch (code)
3280 {
3281 case EQ: case NE:
3282 return 1;
3283 case LT: case GE:
3284 if (inmode == CCmode || inmode == CCGCmode
3285 || inmode == CCGOCmode || inmode == CCNOmode)
3286 return 1;
3287 return 0;
3288 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3289 if (inmode == CCmode)
3290 return 1;
3291 return 0;
3292 case GT: case LE:
3293 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3294 return 1;
3295 return 0;
3296 default:
3297 return 0;
3298 }
3299 }
3300
3301 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3302
3303 int
3304 fcmov_comparison_operator (op, mode)
3305 register rtx op;
3306 enum machine_mode mode;
3307 {
3308 enum machine_mode inmode;
3309 enum rtx_code code = GET_CODE (op);
3310 if (mode != VOIDmode && GET_MODE (op) != mode)
3311 return 0;
3312 if (GET_RTX_CLASS (code) != '<')
3313 return 0;
3314 inmode = GET_MODE (XEXP (op, 0));
3315 if (inmode == CCFPmode || inmode == CCFPUmode)
3316 {
3317 enum rtx_code second_code, bypass_code;
3318 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3319 if (bypass_code != NIL || second_code != NIL)
3320 return 0;
3321 code = ix86_fp_compare_code_to_integer (code);
3322 }
3323 /* i387 supports just limited amount of conditional codes. */
3324 switch (code)
3325 {
3326 case LTU: case GTU: case LEU: case GEU:
3327 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3328 return 1;
3329 return 0;
3330 case ORDERED: case UNORDERED:
3331 case EQ: case NE:
3332 return 1;
3333 default:
3334 return 0;
3335 }
3336 }
3337
3338 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3339
3340 int
3341 promotable_binary_operator (op, mode)
3342 register rtx op;
3343 enum machine_mode mode ATTRIBUTE_UNUSED;
3344 {
3345 switch (GET_CODE (op))
3346 {
3347 case MULT:
3348 /* Modern CPUs have same latency for HImode and SImode multiply,
3349 but 386 and 486 do HImode multiply faster. */
3350 return ix86_cpu > PROCESSOR_I486;
3351 case PLUS:
3352 case AND:
3353 case IOR:
3354 case XOR:
3355 case ASHIFT:
3356 return 1;
3357 default:
3358 return 0;
3359 }
3360 }
3361
3362 /* Nearly general operand, but accept any const_double, since we wish
3363 to be able to drop them into memory rather than have them get pulled
3364 into registers. */
3365
3366 int
3367 cmp_fp_expander_operand (op, mode)
3368 register rtx op;
3369 enum machine_mode mode;
3370 {
3371 if (mode != VOIDmode && mode != GET_MODE (op))
3372 return 0;
3373 if (GET_CODE (op) == CONST_DOUBLE)
3374 return 1;
3375 return general_operand (op, mode);
3376 }
3377
3378 /* Match an SI or HImode register for a zero_extract. */
3379
3380 int
3381 ext_register_operand (op, mode)
3382 register rtx op;
3383 enum machine_mode mode ATTRIBUTE_UNUSED;
3384 {
3385 int regno;
3386 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3387 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3388 return 0;
3389
3390 if (!register_operand (op, VOIDmode))
3391 return 0;
3392
3393 /* Be curefull to accept only registers having upper parts. */
3394 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3395 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3396 }
3397
3398 /* Return 1 if this is a valid binary floating-point operation.
3399 OP is the expression matched, and MODE is its mode. */
3400
3401 int
3402 binary_fp_operator (op, mode)
3403 register rtx op;
3404 enum machine_mode mode;
3405 {
3406 if (mode != VOIDmode && mode != GET_MODE (op))
3407 return 0;
3408
3409 switch (GET_CODE (op))
3410 {
3411 case PLUS:
3412 case MINUS:
3413 case MULT:
3414 case DIV:
3415 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3416
3417 default:
3418 return 0;
3419 }
3420 }
3421
3422 int
3423 mult_operator (op, mode)
3424 register rtx op;
3425 enum machine_mode mode ATTRIBUTE_UNUSED;
3426 {
3427 return GET_CODE (op) == MULT;
3428 }
3429
3430 int
3431 div_operator (op, mode)
3432 register rtx op;
3433 enum machine_mode mode ATTRIBUTE_UNUSED;
3434 {
3435 return GET_CODE (op) == DIV;
3436 }
3437
3438 int
3439 arith_or_logical_operator (op, mode)
3440 rtx op;
3441 enum machine_mode mode;
3442 {
3443 return ((mode == VOIDmode || GET_MODE (op) == mode)
3444 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3445 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3446 }
3447
3448 /* Returns 1 if OP is memory operand with a displacement. */
3449
3450 int
3451 memory_displacement_operand (op, mode)
3452 register rtx op;
3453 enum machine_mode mode;
3454 {
3455 struct ix86_address parts;
3456
3457 if (! memory_operand (op, mode))
3458 return 0;
3459
3460 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3461 abort ();
3462
3463 return parts.disp != NULL_RTX;
3464 }
3465
3466 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3467 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3468
3469 ??? It seems likely that this will only work because cmpsi is an
3470 expander, and no actual insns use this. */
3471
3472 int
3473 cmpsi_operand (op, mode)
3474 rtx op;
3475 enum machine_mode mode;
3476 {
3477 if (nonimmediate_operand (op, mode))
3478 return 1;
3479
3480 if (GET_CODE (op) == AND
3481 && GET_MODE (op) == SImode
3482 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3483 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3484 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3485 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3486 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3487 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3488 return 1;
3489
3490 return 0;
3491 }
3492
3493 /* Returns 1 if OP is memory operand that can not be represented by the
3494 modRM array. */
3495
3496 int
3497 long_memory_operand (op, mode)
3498 register rtx op;
3499 enum machine_mode mode;
3500 {
3501 if (! memory_operand (op, mode))
3502 return 0;
3503
3504 return memory_address_length (op) != 0;
3505 }
3506
3507 /* Return nonzero if the rtx is known aligned. */
3508
3509 int
3510 aligned_operand (op, mode)
3511 rtx op;
3512 enum machine_mode mode;
3513 {
3514 struct ix86_address parts;
3515
3516 if (!general_operand (op, mode))
3517 return 0;
3518
3519 /* Registers and immediate operands are always "aligned". */
3520 if (GET_CODE (op) != MEM)
3521 return 1;
3522
3523 /* Don't even try to do any aligned optimizations with volatiles. */
3524 if (MEM_VOLATILE_P (op))
3525 return 0;
3526
3527 op = XEXP (op, 0);
3528
3529 /* Pushes and pops are only valid on the stack pointer. */
3530 if (GET_CODE (op) == PRE_DEC
3531 || GET_CODE (op) == POST_INC)
3532 return 1;
3533
3534 /* Decode the address. */
3535 if (! ix86_decompose_address (op, &parts))
3536 abort ();
3537
3538 if (parts.base && GET_CODE (parts.base) == SUBREG)
3539 parts.base = SUBREG_REG (parts.base);
3540 if (parts.index && GET_CODE (parts.index) == SUBREG)
3541 parts.index = SUBREG_REG (parts.index);
3542
3543 /* Look for some component that isn't known to be aligned. */
3544 if (parts.index)
3545 {
3546 if (parts.scale < 4
3547 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3548 return 0;
3549 }
3550 if (parts.base)
3551 {
3552 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3553 return 0;
3554 }
3555 if (parts.disp)
3556 {
3557 if (GET_CODE (parts.disp) != CONST_INT
3558 || (INTVAL (parts.disp) & 3) != 0)
3559 return 0;
3560 }
3561
3562 /* Didn't find one -- this must be an aligned address. */
3563 return 1;
3564 }
3565 \f
3566 /* Return true if the constant is something that can be loaded with
3567 a special instruction. Only handle 0.0 and 1.0; others are less
3568 worthwhile. */
3569
3570 int
3571 standard_80387_constant_p (x)
3572 rtx x;
3573 {
3574 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3575 return -1;
3576 /* Note that on the 80387, other constants, such as pi, that we should support
3577 too. On some machines, these are much slower to load as standard constant,
3578 than to load from doubles in memory. */
3579 if (x == CONST0_RTX (GET_MODE (x)))
3580 return 1;
3581 if (x == CONST1_RTX (GET_MODE (x)))
3582 return 2;
3583 return 0;
3584 }
3585
3586 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3587 */
3588 int
3589 standard_sse_constant_p (x)
3590 rtx x;
3591 {
3592 if (GET_CODE (x) != CONST_DOUBLE)
3593 return -1;
3594 return (x == CONST0_RTX (GET_MODE (x)));
3595 }
3596
3597 /* Returns 1 if OP contains a symbol reference */
3598
3599 int
3600 symbolic_reference_mentioned_p (op)
3601 rtx op;
3602 {
3603 register const char *fmt;
3604 register int i;
3605
3606 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3607 return 1;
3608
3609 fmt = GET_RTX_FORMAT (GET_CODE (op));
3610 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3611 {
3612 if (fmt[i] == 'E')
3613 {
3614 register int j;
3615
3616 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3617 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3618 return 1;
3619 }
3620
3621 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3622 return 1;
3623 }
3624
3625 return 0;
3626 }
3627
3628 /* Return 1 if it is appropriate to emit `ret' instructions in the
3629 body of a function. Do this only if the epilogue is simple, needing a
3630 couple of insns. Prior to reloading, we can't tell how many registers
3631 must be saved, so return 0 then. Return 0 if there is no frame
3632 marker to de-allocate.
3633
3634 If NON_SAVING_SETJMP is defined and true, then it is not possible
3635 for the epilogue to be simple, so return 0. This is a special case
3636 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3637 until final, but jump_optimize may need to know sooner if a
3638 `return' is OK. */
3639
3640 int
3641 ix86_can_use_return_insn_p ()
3642 {
3643 struct ix86_frame frame;
3644
3645 #ifdef NON_SAVING_SETJMP
3646 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3647 return 0;
3648 #endif
3649
3650 if (! reload_completed || frame_pointer_needed)
3651 return 0;
3652
3653 /* Don't allow more than 32 pop, since that's all we can do
3654 with one instruction. */
3655 if (current_function_pops_args
3656 && current_function_args_size >= 32768)
3657 return 0;
3658
3659 ix86_compute_frame_layout (&frame);
3660 return frame.to_allocate == 0 && frame.nregs == 0;
3661 }
3662 \f
3663 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3664 int
3665 x86_64_sign_extended_value (value)
3666 rtx value;
3667 {
3668 switch (GET_CODE (value))
3669 {
3670 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3671 to be at least 32 and this all acceptable constants are
3672 represented as CONST_INT. */
3673 case CONST_INT:
3674 if (HOST_BITS_PER_WIDE_INT == 32)
3675 return 1;
3676 else
3677 {
3678 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3679 return trunc_int_for_mode (val, SImode) == val;
3680 }
3681 break;
3682
3683 /* For certain code models, the symbolic references are known to fit. */
3684 case SYMBOL_REF:
3685 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3686
3687 /* For certain code models, the code is near as well. */
3688 case LABEL_REF:
3689 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3690
3691 /* We also may accept the offsetted memory references in certain special
3692 cases. */
3693 case CONST:
3694 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3695 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3696 return 1;
3697 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3698 {
3699 rtx op1 = XEXP (XEXP (value, 0), 0);
3700 rtx op2 = XEXP (XEXP (value, 0), 1);
3701 HOST_WIDE_INT offset;
3702
3703 if (ix86_cmodel == CM_LARGE)
3704 return 0;
3705 if (GET_CODE (op2) != CONST_INT)
3706 return 0;
3707 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3708 switch (GET_CODE (op1))
3709 {
3710 case SYMBOL_REF:
3711 /* For CM_SMALL assume that latest object is 1MB before
3712 end of 31bits boundary. We may also accept pretty
3713 large negative constants knowing that all objects are
3714 in the positive half of address space. */
3715 if (ix86_cmodel == CM_SMALL
3716 && offset < 1024*1024*1024
3717 && trunc_int_for_mode (offset, SImode) == offset)
3718 return 1;
3719 /* For CM_KERNEL we know that all object resist in the
3720 negative half of 32bits address space. We may not
3721 accept negative offsets, since they may be just off
3722 and we may accept pretty large positive ones. */
3723 if (ix86_cmodel == CM_KERNEL
3724 && offset > 0
3725 && trunc_int_for_mode (offset, SImode) == offset)
3726 return 1;
3727 break;
3728 case LABEL_REF:
3729 /* These conditions are similar to SYMBOL_REF ones, just the
3730 constraints for code models differ. */
3731 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3732 && offset < 1024*1024*1024
3733 && trunc_int_for_mode (offset, SImode) == offset)
3734 return 1;
3735 if (ix86_cmodel == CM_KERNEL
3736 && offset > 0
3737 && trunc_int_for_mode (offset, SImode) == offset)
3738 return 1;
3739 break;
3740 default:
3741 return 0;
3742 }
3743 }
3744 return 0;
3745 default:
3746 return 0;
3747 }
3748 }
3749
3750 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3751 int
3752 x86_64_zero_extended_value (value)
3753 rtx value;
3754 {
3755 switch (GET_CODE (value))
3756 {
3757 case CONST_DOUBLE:
3758 if (HOST_BITS_PER_WIDE_INT == 32)
3759 return (GET_MODE (value) == VOIDmode
3760 && !CONST_DOUBLE_HIGH (value));
3761 else
3762 return 0;
3763 case CONST_INT:
3764 if (HOST_BITS_PER_WIDE_INT == 32)
3765 return INTVAL (value) >= 0;
3766 else
3767 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3768 break;
3769
3770 /* For certain code models, the symbolic references are known to fit. */
3771 case SYMBOL_REF:
3772 return ix86_cmodel == CM_SMALL;
3773
3774 /* For certain code models, the code is near as well. */
3775 case LABEL_REF:
3776 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3777
3778 /* We also may accept the offsetted memory references in certain special
3779 cases. */
3780 case CONST:
3781 if (GET_CODE (XEXP (value, 0)) == PLUS)
3782 {
3783 rtx op1 = XEXP (XEXP (value, 0), 0);
3784 rtx op2 = XEXP (XEXP (value, 0), 1);
3785
3786 if (ix86_cmodel == CM_LARGE)
3787 return 0;
3788 switch (GET_CODE (op1))
3789 {
3790 case SYMBOL_REF:
3791 return 0;
3792 /* For small code model we may accept pretty large positive
3793 offsets, since one bit is available for free. Negative
3794 offsets are limited by the size of NULL pointer area
3795 specified by the ABI. */
3796 if (ix86_cmodel == CM_SMALL
3797 && GET_CODE (op2) == CONST_INT
3798 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3799 && (trunc_int_for_mode (INTVAL (op2), SImode)
3800 == INTVAL (op2)))
3801 return 1;
3802 /* ??? For the kernel, we may accept adjustment of
3803 -0x10000000, since we know that it will just convert
3804 negative address space to positive, but perhaps this
3805 is not worthwhile. */
3806 break;
3807 case LABEL_REF:
3808 /* These conditions are similar to SYMBOL_REF ones, just the
3809 constraints for code models differ. */
3810 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3811 && GET_CODE (op2) == CONST_INT
3812 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3813 && (trunc_int_for_mode (INTVAL (op2), SImode)
3814 == INTVAL (op2)))
3815 return 1;
3816 break;
3817 default:
3818 return 0;
3819 }
3820 }
3821 return 0;
3822 default:
3823 return 0;
3824 }
3825 }
3826
3827 /* Value should be nonzero if functions must have frame pointers.
3828 Zero means the frame pointer need not be set up (and parms may
3829 be accessed via the stack pointer) in functions that seem suitable. */
3830
3831 int
3832 ix86_frame_pointer_required ()
3833 {
3834 /* If we accessed previous frames, then the generated code expects
3835 to be able to access the saved ebp value in our frame. */
3836 if (cfun->machine->accesses_prev_frame)
3837 return 1;
3838
3839 /* Several x86 os'es need a frame pointer for other reasons,
3840 usually pertaining to setjmp. */
3841 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3842 return 1;
3843
3844 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3845 the frame pointer by default. Turn it back on now if we've not
3846 got a leaf function. */
3847 if (TARGET_OMIT_LEAF_FRAME_POINTER
3848 && (!current_function_is_leaf || current_function_profile))
3849 return 1;
3850
3851 return 0;
3852 }
3853
3854 /* Record that the current function accesses previous call frames. */
3855
3856 void
3857 ix86_setup_frame_addresses ()
3858 {
3859 cfun->machine->accesses_prev_frame = 1;
3860 }
3861 \f
3862 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3863 # define USE_HIDDEN_LINKONCE 1
3864 #else
3865 # define USE_HIDDEN_LINKONCE 0
3866 #endif
3867
3868 static int pic_labels_used;
3869
3870 /* Fills in the label name that should be used for a pc thunk for
3871 the given register. */
3872
3873 static void
3874 get_pc_thunk_name (name, regno)
3875 char name[32];
3876 unsigned int regno;
3877 {
3878 if (USE_HIDDEN_LINKONCE)
3879 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3880 else
3881 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3882 }
3883
3884
3885 /* This function generates code for -fpic that loads %ebx with
3886 the return address of the caller and then returns. */
3887
3888 void
3889 ix86_asm_file_end (file)
3890 FILE *file;
3891 {
3892 rtx xops[2];
3893 int regno;
3894
3895 for (regno = 0; regno < 8; ++regno)
3896 {
3897 char name[32];
3898
3899 if (! ((pic_labels_used >> regno) & 1))
3900 continue;
3901
3902 get_pc_thunk_name (name, regno);
3903
3904 if (USE_HIDDEN_LINKONCE)
3905 {
3906 tree decl;
3907
3908 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3909 error_mark_node);
3910 TREE_PUBLIC (decl) = 1;
3911 TREE_STATIC (decl) = 1;
3912 DECL_ONE_ONLY (decl) = 1;
3913
3914 (*targetm.asm_out.unique_section) (decl, 0);
3915 named_section (decl, NULL, 0);
3916
3917 ASM_GLOBALIZE_LABEL (file, name);
3918 fputs ("\t.hidden\t", file);
3919 assemble_name (file, name);
3920 fputc ('\n', file);
3921 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3922 }
3923 else
3924 {
3925 text_section ();
3926 ASM_OUTPUT_LABEL (file, name);
3927 }
3928
3929 xops[0] = gen_rtx_REG (SImode, regno);
3930 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3931 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3932 output_asm_insn ("ret", xops);
3933 }
3934 }
3935
3936 /* Emit code for the SET_GOT patterns. */
3937
3938 const char *
3939 output_set_got (dest)
3940 rtx dest;
3941 {
3942 rtx xops[3];
3943
3944 xops[0] = dest;
3945 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3946
3947 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3948 {
3949 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3950
3951 if (!flag_pic)
3952 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3953 else
3954 output_asm_insn ("call\t%a2", xops);
3955
3956 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3957 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3958
3959 if (flag_pic)
3960 output_asm_insn ("pop{l}\t%0", xops);
3961 }
3962 else
3963 {
3964 char name[32];
3965 get_pc_thunk_name (name, REGNO (dest));
3966 pic_labels_used |= 1 << REGNO (dest);
3967
3968 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3969 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3970 output_asm_insn ("call\t%X2", xops);
3971 }
3972
3973 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3974 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3975 else
3976 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3977
3978 return "";
3979 }
3980
3981 /* Generate an "push" pattern for input ARG. */
3982
3983 static rtx
3984 gen_push (arg)
3985 rtx arg;
3986 {
3987 return gen_rtx_SET (VOIDmode,
3988 gen_rtx_MEM (Pmode,
3989 gen_rtx_PRE_DEC (Pmode,
3990 stack_pointer_rtx)),
3991 arg);
3992 }
3993
3994 /* Return >= 0 if there is an unused call-clobbered register available
3995 for the entire function. */
3996
3997 static unsigned int
3998 ix86_select_alt_pic_regnum ()
3999 {
4000 if (current_function_is_leaf && !current_function_profile)
4001 {
4002 int i;
4003 for (i = 2; i >= 0; --i)
4004 if (!regs_ever_live[i])
4005 return i;
4006 }
4007
4008 return INVALID_REGNUM;
4009 }
4010
4011 /* Return 1 if we need to save REGNO. */
4012 static int
4013 ix86_save_reg (regno, maybe_eh_return)
4014 unsigned int regno;
4015 int maybe_eh_return;
4016 {
4017 if (pic_offset_table_rtx
4018 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4019 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4020 || current_function_profile
4021 || current_function_calls_eh_return))
4022 {
4023 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4024 return 0;
4025 return 1;
4026 }
4027
4028 if (current_function_calls_eh_return && maybe_eh_return)
4029 {
4030 unsigned i;
4031 for (i = 0; ; i++)
4032 {
4033 unsigned test = EH_RETURN_DATA_REGNO (i);
4034 if (test == INVALID_REGNUM)
4035 break;
4036 if (test == regno)
4037 return 1;
4038 }
4039 }
4040
4041 return (regs_ever_live[regno]
4042 && !call_used_regs[regno]
4043 && !fixed_regs[regno]
4044 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4045 }
4046
4047 /* Return number of registers to be saved on the stack. */
4048
4049 static int
4050 ix86_nsaved_regs ()
4051 {
4052 int nregs = 0;
4053 int regno;
4054
4055 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4056 if (ix86_save_reg (regno, true))
4057 nregs++;
4058 return nregs;
4059 }
4060
4061 /* Return the offset between two registers, one to be eliminated, and the other
4062 its replacement, at the start of a routine. */
4063
4064 HOST_WIDE_INT
4065 ix86_initial_elimination_offset (from, to)
4066 int from;
4067 int to;
4068 {
4069 struct ix86_frame frame;
4070 ix86_compute_frame_layout (&frame);
4071
4072 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4073 return frame.hard_frame_pointer_offset;
4074 else if (from == FRAME_POINTER_REGNUM
4075 && to == HARD_FRAME_POINTER_REGNUM)
4076 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4077 else
4078 {
4079 if (to != STACK_POINTER_REGNUM)
4080 abort ();
4081 else if (from == ARG_POINTER_REGNUM)
4082 return frame.stack_pointer_offset;
4083 else if (from != FRAME_POINTER_REGNUM)
4084 abort ();
4085 else
4086 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4087 }
4088 }
4089
4090 /* Fill structure ix86_frame about frame of currently computed function. */
4091
4092 static void
4093 ix86_compute_frame_layout (frame)
4094 struct ix86_frame *frame;
4095 {
4096 HOST_WIDE_INT total_size;
4097 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4098 int offset;
4099 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4100 HOST_WIDE_INT size = get_frame_size ();
4101
4102 frame->nregs = ix86_nsaved_regs ();
4103 total_size = size;
4104
4105 /* Skip return address and saved base pointer. */
4106 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4107
4108 frame->hard_frame_pointer_offset = offset;
4109
4110 /* Do some sanity checking of stack_alignment_needed and
4111 preferred_alignment, since i386 port is the only using those features
4112 that may break easily. */
4113
4114 if (size && !stack_alignment_needed)
4115 abort ();
4116 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4117 abort ();
4118 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4119 abort ();
4120 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4121 abort ();
4122
4123 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4124 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4125
4126 /* Register save area */
4127 offset += frame->nregs * UNITS_PER_WORD;
4128
4129 /* Va-arg area */
4130 if (ix86_save_varrargs_registers)
4131 {
4132 offset += X86_64_VARARGS_SIZE;
4133 frame->va_arg_size = X86_64_VARARGS_SIZE;
4134 }
4135 else
4136 frame->va_arg_size = 0;
4137
4138 /* Align start of frame for local function. */
4139 frame->padding1 = ((offset + stack_alignment_needed - 1)
4140 & -stack_alignment_needed) - offset;
4141
4142 offset += frame->padding1;
4143
4144 /* Frame pointer points here. */
4145 frame->frame_pointer_offset = offset;
4146
4147 offset += size;
4148
4149 /* Add outgoing arguments area. Can be skipped if we eliminated
4150 all the function calls as dead code. */
4151 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4152 {
4153 offset += current_function_outgoing_args_size;
4154 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4155 }
4156 else
4157 frame->outgoing_arguments_size = 0;
4158
4159 /* Align stack boundary. Only needed if we're calling another function
4160 or using alloca. */
4161 if (!current_function_is_leaf || current_function_calls_alloca)
4162 frame->padding2 = ((offset + preferred_alignment - 1)
4163 & -preferred_alignment) - offset;
4164 else
4165 frame->padding2 = 0;
4166
4167 offset += frame->padding2;
4168
4169 /* We've reached end of stack frame. */
4170 frame->stack_pointer_offset = offset;
4171
4172 /* Size prologue needs to allocate. */
4173 frame->to_allocate =
4174 (size + frame->padding1 + frame->padding2
4175 + frame->outgoing_arguments_size + frame->va_arg_size);
4176
4177 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4178 && current_function_is_leaf)
4179 {
4180 frame->red_zone_size = frame->to_allocate;
4181 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4182 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4183 }
4184 else
4185 frame->red_zone_size = 0;
4186 frame->to_allocate -= frame->red_zone_size;
4187 frame->stack_pointer_offset -= frame->red_zone_size;
4188 #if 0
4189 fprintf (stderr, "nregs: %i\n", frame->nregs);
4190 fprintf (stderr, "size: %i\n", size);
4191 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4192 fprintf (stderr, "padding1: %i\n", frame->padding1);
4193 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4194 fprintf (stderr, "padding2: %i\n", frame->padding2);
4195 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4196 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4197 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4198 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4199 frame->hard_frame_pointer_offset);
4200 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4201 #endif
4202 }
4203
4204 /* Emit code to save registers in the prologue. */
4205
4206 static void
4207 ix86_emit_save_regs ()
4208 {
4209 register int regno;
4210 rtx insn;
4211
4212 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4213 if (ix86_save_reg (regno, true))
4214 {
4215 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4216 RTX_FRAME_RELATED_P (insn) = 1;
4217 }
4218 }
4219
4220 /* Emit code to save registers using MOV insns. First register
4221 is restored from POINTER + OFFSET. */
4222 static void
4223 ix86_emit_save_regs_using_mov (pointer, offset)
4224 rtx pointer;
4225 HOST_WIDE_INT offset;
4226 {
4227 int regno;
4228 rtx insn;
4229
4230 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4231 if (ix86_save_reg (regno, true))
4232 {
4233 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4234 Pmode, offset),
4235 gen_rtx_REG (Pmode, regno));
4236 RTX_FRAME_RELATED_P (insn) = 1;
4237 offset += UNITS_PER_WORD;
4238 }
4239 }
4240
4241 /* Expand the prologue into a bunch of separate insns. */
4242
4243 void
4244 ix86_expand_prologue ()
4245 {
4246 rtx insn;
4247 bool pic_reg_used;
4248 struct ix86_frame frame;
4249 int use_mov = 0;
4250 HOST_WIDE_INT allocate;
4251
4252 if (!optimize_size)
4253 {
4254 use_fast_prologue_epilogue
4255 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4256 if (TARGET_PROLOGUE_USING_MOVE)
4257 use_mov = use_fast_prologue_epilogue;
4258 }
4259 ix86_compute_frame_layout (&frame);
4260
4261 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4262 slower on all targets. Also sdb doesn't like it. */
4263
4264 if (frame_pointer_needed)
4265 {
4266 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4267 RTX_FRAME_RELATED_P (insn) = 1;
4268
4269 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4270 RTX_FRAME_RELATED_P (insn) = 1;
4271 }
4272
4273 allocate = frame.to_allocate;
4274 /* In case we are dealing only with single register and empty frame,
4275 push is equivalent of the mov+add sequence. */
4276 if (allocate == 0 && frame.nregs <= 1)
4277 use_mov = 0;
4278
4279 if (!use_mov)
4280 ix86_emit_save_regs ();
4281 else
4282 allocate += frame.nregs * UNITS_PER_WORD;
4283
4284 if (allocate == 0)
4285 ;
4286 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4287 {
4288 insn = emit_insn (gen_pro_epilogue_adjust_stack
4289 (stack_pointer_rtx, stack_pointer_rtx,
4290 GEN_INT (-allocate)));
4291 RTX_FRAME_RELATED_P (insn) = 1;
4292 }
4293 else
4294 {
4295 /* ??? Is this only valid for Win32? */
4296
4297 rtx arg0, sym;
4298
4299 if (TARGET_64BIT)
4300 abort ();
4301
4302 arg0 = gen_rtx_REG (SImode, 0);
4303 emit_move_insn (arg0, GEN_INT (allocate));
4304
4305 sym = gen_rtx_MEM (FUNCTION_MODE,
4306 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4307 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4308
4309 CALL_INSN_FUNCTION_USAGE (insn)
4310 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4311 CALL_INSN_FUNCTION_USAGE (insn));
4312 }
4313 if (use_mov)
4314 {
4315 if (!frame_pointer_needed || !frame.to_allocate)
4316 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4317 else
4318 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4319 -frame.nregs * UNITS_PER_WORD);
4320 }
4321
4322 #ifdef SUBTARGET_PROLOGUE
4323 SUBTARGET_PROLOGUE;
4324 #endif
4325
4326 pic_reg_used = false;
4327 if (pic_offset_table_rtx
4328 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4329 || current_function_profile))
4330 {
4331 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4332
4333 if (alt_pic_reg_used != INVALID_REGNUM)
4334 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4335
4336 pic_reg_used = true;
4337 }
4338
4339 if (pic_reg_used)
4340 {
4341 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4342
4343 /* Even with accurate pre-reload life analysis, we can wind up
4344 deleting all references to the pic register after reload.
4345 Consider if cross-jumping unifies two sides of a branch
4346 controled by a comparison vs the only read from a global.
4347 In which case, allow the set_got to be deleted, though we're
4348 too late to do anything about the ebx save in the prologue. */
4349 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4350 }
4351
4352 /* Prevent function calls from be scheduled before the call to mcount.
4353 In the pic_reg_used case, make sure that the got load isn't deleted. */
4354 if (current_function_profile)
4355 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4356 }
4357
4358 /* Emit code to restore saved registers using MOV insns. First register
4359 is restored from POINTER + OFFSET. */
4360 static void
4361 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4362 rtx pointer;
4363 int offset;
4364 int maybe_eh_return;
4365 {
4366 int regno;
4367
4368 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4369 if (ix86_save_reg (regno, maybe_eh_return))
4370 {
4371 emit_move_insn (gen_rtx_REG (Pmode, regno),
4372 adjust_address (gen_rtx_MEM (Pmode, pointer),
4373 Pmode, offset));
4374 offset += UNITS_PER_WORD;
4375 }
4376 }
4377
4378 /* Restore function stack, frame, and registers. */
4379
4380 void
4381 ix86_expand_epilogue (style)
4382 int style;
4383 {
4384 int regno;
4385 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4386 struct ix86_frame frame;
4387 HOST_WIDE_INT offset;
4388
4389 ix86_compute_frame_layout (&frame);
4390
4391 /* Calculate start of saved registers relative to ebp. Special care
4392 must be taken for the normal return case of a function using
4393 eh_return: the eax and edx registers are marked as saved, but not
4394 restored along this path. */
4395 offset = frame.nregs;
4396 if (current_function_calls_eh_return && style != 2)
4397 offset -= 2;
4398 offset *= -UNITS_PER_WORD;
4399
4400 /* If we're only restoring one register and sp is not valid then
4401 using a move instruction to restore the register since it's
4402 less work than reloading sp and popping the register.
4403
4404 The default code result in stack adjustment using add/lea instruction,
4405 while this code results in LEAVE instruction (or discrete equivalent),
4406 so it is profitable in some other cases as well. Especially when there
4407 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4408 and there is exactly one register to pop. This heruistic may need some
4409 tuning in future. */
4410 if ((!sp_valid && frame.nregs <= 1)
4411 || (TARGET_EPILOGUE_USING_MOVE
4412 && use_fast_prologue_epilogue
4413 && (frame.nregs > 1 || frame.to_allocate))
4414 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4415 || (frame_pointer_needed && TARGET_USE_LEAVE
4416 && use_fast_prologue_epilogue && frame.nregs == 1)
4417 || current_function_calls_eh_return)
4418 {
4419 /* Restore registers. We can use ebp or esp to address the memory
4420 locations. If both are available, default to ebp, since offsets
4421 are known to be small. Only exception is esp pointing directly to the
4422 end of block of saved registers, where we may simplify addressing
4423 mode. */
4424
4425 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4426 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4427 frame.to_allocate, style == 2);
4428 else
4429 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4430 offset, style == 2);
4431
4432 /* eh_return epilogues need %ecx added to the stack pointer. */
4433 if (style == 2)
4434 {
4435 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4436
4437 if (frame_pointer_needed)
4438 {
4439 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4440 tmp = plus_constant (tmp, UNITS_PER_WORD);
4441 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4442
4443 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4444 emit_move_insn (hard_frame_pointer_rtx, tmp);
4445
4446 emit_insn (gen_pro_epilogue_adjust_stack
4447 (stack_pointer_rtx, sa, const0_rtx));
4448 }
4449 else
4450 {
4451 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4452 tmp = plus_constant (tmp, (frame.to_allocate
4453 + frame.nregs * UNITS_PER_WORD));
4454 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4455 }
4456 }
4457 else if (!frame_pointer_needed)
4458 emit_insn (gen_pro_epilogue_adjust_stack
4459 (stack_pointer_rtx, stack_pointer_rtx,
4460 GEN_INT (frame.to_allocate
4461 + frame.nregs * UNITS_PER_WORD)));
4462 /* If not an i386, mov & pop is faster than "leave". */
4463 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4464 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4465 else
4466 {
4467 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4468 hard_frame_pointer_rtx,
4469 const0_rtx));
4470 if (TARGET_64BIT)
4471 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4472 else
4473 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4474 }
4475 }
4476 else
4477 {
4478 /* First step is to deallocate the stack frame so that we can
4479 pop the registers. */
4480 if (!sp_valid)
4481 {
4482 if (!frame_pointer_needed)
4483 abort ();
4484 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4485 hard_frame_pointer_rtx,
4486 GEN_INT (offset)));
4487 }
4488 else if (frame.to_allocate)
4489 emit_insn (gen_pro_epilogue_adjust_stack
4490 (stack_pointer_rtx, stack_pointer_rtx,
4491 GEN_INT (frame.to_allocate)));
4492
4493 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4494 if (ix86_save_reg (regno, false))
4495 {
4496 if (TARGET_64BIT)
4497 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4498 else
4499 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4500 }
4501 if (frame_pointer_needed)
4502 {
4503 /* Leave results in shorter dependency chains on CPUs that are
4504 able to grok it fast. */
4505 if (TARGET_USE_LEAVE)
4506 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4507 else if (TARGET_64BIT)
4508 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4509 else
4510 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4511 }
4512 }
4513
4514 /* Sibcall epilogues don't want a return instruction. */
4515 if (style == 0)
4516 return;
4517
4518 if (current_function_pops_args && current_function_args_size)
4519 {
4520 rtx popc = GEN_INT (current_function_pops_args);
4521
4522 /* i386 can only pop 64K bytes. If asked to pop more, pop
4523 return address, do explicit add, and jump indirectly to the
4524 caller. */
4525
4526 if (current_function_pops_args >= 65536)
4527 {
4528 rtx ecx = gen_rtx_REG (SImode, 2);
4529
4530 /* There are is no "pascal" calling convention in 64bit ABI. */
4531 if (TARGET_64BIT)
4532 abort ();
4533
4534 emit_insn (gen_popsi1 (ecx));
4535 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4536 emit_jump_insn (gen_return_indirect_internal (ecx));
4537 }
4538 else
4539 emit_jump_insn (gen_return_pop_internal (popc));
4540 }
4541 else
4542 emit_jump_insn (gen_return_internal ());
4543 }
4544
4545 /* Reset from the function's potential modifications. */
4546
4547 static void
4548 ix86_output_function_epilogue (file, size)
4549 FILE *file ATTRIBUTE_UNUSED;
4550 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4551 {
4552 if (pic_offset_table_rtx)
4553 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4554 }
4555 \f
4556 /* Extract the parts of an RTL expression that is a valid memory address
4557 for an instruction. Return 0 if the structure of the address is
4558 grossly off. Return -1 if the address contains ASHIFT, so it is not
4559 strictly valid, but still used for computing length of lea instruction.
4560 */
4561
4562 static int
4563 ix86_decompose_address (addr, out)
4564 register rtx addr;
4565 struct ix86_address *out;
4566 {
4567 rtx base = NULL_RTX;
4568 rtx index = NULL_RTX;
4569 rtx disp = NULL_RTX;
4570 HOST_WIDE_INT scale = 1;
4571 rtx scale_rtx = NULL_RTX;
4572 int retval = 1;
4573
4574 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4575 base = addr;
4576 else if (GET_CODE (addr) == PLUS)
4577 {
4578 rtx op0 = XEXP (addr, 0);
4579 rtx op1 = XEXP (addr, 1);
4580 enum rtx_code code0 = GET_CODE (op0);
4581 enum rtx_code code1 = GET_CODE (op1);
4582
4583 if (code0 == REG || code0 == SUBREG)
4584 {
4585 if (code1 == REG || code1 == SUBREG)
4586 index = op0, base = op1; /* index + base */
4587 else
4588 base = op0, disp = op1; /* base + displacement */
4589 }
4590 else if (code0 == MULT)
4591 {
4592 index = XEXP (op0, 0);
4593 scale_rtx = XEXP (op0, 1);
4594 if (code1 == REG || code1 == SUBREG)
4595 base = op1; /* index*scale + base */
4596 else
4597 disp = op1; /* index*scale + disp */
4598 }
4599 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4600 {
4601 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4602 scale_rtx = XEXP (XEXP (op0, 0), 1);
4603 base = XEXP (op0, 1);
4604 disp = op1;
4605 }
4606 else if (code0 == PLUS)
4607 {
4608 index = XEXP (op0, 0); /* index + base + disp */
4609 base = XEXP (op0, 1);
4610 disp = op1;
4611 }
4612 else
4613 return 0;
4614 }
4615 else if (GET_CODE (addr) == MULT)
4616 {
4617 index = XEXP (addr, 0); /* index*scale */
4618 scale_rtx = XEXP (addr, 1);
4619 }
4620 else if (GET_CODE (addr) == ASHIFT)
4621 {
4622 rtx tmp;
4623
4624 /* We're called for lea too, which implements ashift on occasion. */
4625 index = XEXP (addr, 0);
4626 tmp = XEXP (addr, 1);
4627 if (GET_CODE (tmp) != CONST_INT)
4628 return 0;
4629 scale = INTVAL (tmp);
4630 if ((unsigned HOST_WIDE_INT) scale > 3)
4631 return 0;
4632 scale = 1 << scale;
4633 retval = -1;
4634 }
4635 else
4636 disp = addr; /* displacement */
4637
4638 /* Extract the integral value of scale. */
4639 if (scale_rtx)
4640 {
4641 if (GET_CODE (scale_rtx) != CONST_INT)
4642 return 0;
4643 scale = INTVAL (scale_rtx);
4644 }
4645
4646 /* Allow arg pointer and stack pointer as index if there is not scaling */
4647 if (base && index && scale == 1
4648 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4649 || index == stack_pointer_rtx))
4650 {
4651 rtx tmp = base;
4652 base = index;
4653 index = tmp;
4654 }
4655
4656 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4657 if ((base == hard_frame_pointer_rtx
4658 || base == frame_pointer_rtx
4659 || base == arg_pointer_rtx) && !disp)
4660 disp = const0_rtx;
4661
4662 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4663 Avoid this by transforming to [%esi+0]. */
4664 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4665 && base && !index && !disp
4666 && REG_P (base)
4667 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4668 disp = const0_rtx;
4669
4670 /* Special case: encode reg+reg instead of reg*2. */
4671 if (!base && index && scale && scale == 2)
4672 base = index, scale = 1;
4673
4674 /* Special case: scaling cannot be encoded without base or displacement. */
4675 if (!base && !disp && index && scale != 1)
4676 disp = const0_rtx;
4677
4678 out->base = base;
4679 out->index = index;
4680 out->disp = disp;
4681 out->scale = scale;
4682
4683 return retval;
4684 }
4685 \f
4686 /* Return cost of the memory address x.
4687 For i386, it is better to use a complex address than let gcc copy
4688 the address into a reg and make a new pseudo. But not if the address
4689 requires to two regs - that would mean more pseudos with longer
4690 lifetimes. */
4691 int
4692 ix86_address_cost (x)
4693 rtx x;
4694 {
4695 struct ix86_address parts;
4696 int cost = 1;
4697
4698 if (!ix86_decompose_address (x, &parts))
4699 abort ();
4700
4701 if (parts.base && GET_CODE (parts.base) == SUBREG)
4702 parts.base = SUBREG_REG (parts.base);
4703 if (parts.index && GET_CODE (parts.index) == SUBREG)
4704 parts.index = SUBREG_REG (parts.index);
4705
4706 /* More complex memory references are better. */
4707 if (parts.disp && parts.disp != const0_rtx)
4708 cost--;
4709
4710 /* Attempt to minimize number of registers in the address. */
4711 if ((parts.base
4712 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4713 || (parts.index
4714 && (!REG_P (parts.index)
4715 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4716 cost++;
4717
4718 if (parts.base
4719 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4720 && parts.index
4721 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4722 && parts.base != parts.index)
4723 cost++;
4724
4725 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4726 since it's predecode logic can't detect the length of instructions
4727 and it degenerates to vector decoded. Increase cost of such
4728 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4729 to split such addresses or even refuse such addresses at all.
4730
4731 Following addressing modes are affected:
4732 [base+scale*index]
4733 [scale*index+disp]
4734 [base+index]
4735
4736 The first and last case may be avoidable by explicitly coding the zero in
4737 memory address, but I don't have AMD-K6 machine handy to check this
4738 theory. */
4739
4740 if (TARGET_K6
4741 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4742 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4743 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4744 cost += 10;
4745
4746 return cost;
4747 }
4748 \f
4749 /* If X is a machine specific address (i.e. a symbol or label being
4750 referenced as a displacement from the GOT implemented using an
4751 UNSPEC), then return the base term. Otherwise return X. */
4752
4753 rtx
4754 ix86_find_base_term (x)
4755 rtx x;
4756 {
4757 rtx term;
4758
4759 if (TARGET_64BIT)
4760 {
4761 if (GET_CODE (x) != CONST)
4762 return x;
4763 term = XEXP (x, 0);
4764 if (GET_CODE (term) == PLUS
4765 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4766 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4767 term = XEXP (term, 0);
4768 if (GET_CODE (term) != UNSPEC
4769 || XINT (term, 1) != UNSPEC_GOTPCREL)
4770 return x;
4771
4772 term = XVECEXP (term, 0, 0);
4773
4774 if (GET_CODE (term) != SYMBOL_REF
4775 && GET_CODE (term) != LABEL_REF)
4776 return x;
4777
4778 return term;
4779 }
4780
4781 if (GET_CODE (x) != PLUS
4782 || XEXP (x, 0) != pic_offset_table_rtx
4783 || GET_CODE (XEXP (x, 1)) != CONST)
4784 return x;
4785
4786 term = XEXP (XEXP (x, 1), 0);
4787
4788 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4789 term = XEXP (term, 0);
4790
4791 if (GET_CODE (term) != UNSPEC
4792 || XINT (term, 1) != UNSPEC_GOTOFF)
4793 return x;
4794
4795 term = XVECEXP (term, 0, 0);
4796
4797 if (GET_CODE (term) != SYMBOL_REF
4798 && GET_CODE (term) != LABEL_REF)
4799 return x;
4800
4801 return term;
4802 }
4803 \f
4804 /* Determine if a given RTX is a valid constant. We already know this
4805 satisfies CONSTANT_P. */
4806
4807 bool
4808 legitimate_constant_p (x)
4809 rtx x;
4810 {
4811 rtx inner;
4812
4813 switch (GET_CODE (x))
4814 {
4815 case SYMBOL_REF:
4816 /* TLS symbols are not constant. */
4817 if (tls_symbolic_operand (x, Pmode))
4818 return false;
4819 break;
4820
4821 case CONST:
4822 inner = XEXP (x, 0);
4823
4824 /* Offsets of TLS symbols are never valid.
4825 Discourage CSE from creating them. */
4826 if (GET_CODE (inner) == PLUS
4827 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4828 return false;
4829
4830 /* Only some unspecs are valid as "constants". */
4831 if (GET_CODE (inner) == UNSPEC)
4832 switch (XINT (inner, 1))
4833 {
4834 case UNSPEC_TPOFF:
4835 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4836 case UNSPEC_TP:
4837 return true;
4838 default:
4839 return false;
4840 }
4841 break;
4842
4843 default:
4844 break;
4845 }
4846
4847 /* Otherwise we handle everything else in the move patterns. */
4848 return true;
4849 }
4850
4851 /* Determine if a given RTX is a valid constant address. */
4852
4853 bool
4854 constant_address_p (x)
4855 rtx x;
4856 {
4857 switch (GET_CODE (x))
4858 {
4859 case LABEL_REF:
4860 case CONST_INT:
4861 return true;
4862
4863 case CONST_DOUBLE:
4864 return TARGET_64BIT;
4865
4866 case CONST:
4867 case SYMBOL_REF:
4868 return !flag_pic && legitimate_constant_p (x);
4869
4870 default:
4871 return false;
4872 }
4873 }
4874
4875 /* Nonzero if the constant value X is a legitimate general operand
4876 when generating PIC code. It is given that flag_pic is on and
4877 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4878
4879 bool
4880 legitimate_pic_operand_p (x)
4881 rtx x;
4882 {
4883 rtx inner;
4884
4885 switch (GET_CODE (x))
4886 {
4887 case CONST:
4888 inner = XEXP (x, 0);
4889
4890 /* Only some unspecs are valid as "constants". */
4891 if (GET_CODE (inner) == UNSPEC)
4892 switch (XINT (inner, 1))
4893 {
4894 case UNSPEC_TPOFF:
4895 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4896 case UNSPEC_TP:
4897 return true;
4898 default:
4899 return false;
4900 }
4901 /* FALLTHRU */
4902
4903 case SYMBOL_REF:
4904 case LABEL_REF:
4905 return legitimate_pic_address_disp_p (x);
4906
4907 default:
4908 return true;
4909 }
4910 }
4911
4912 /* Determine if a given CONST RTX is a valid memory displacement
4913 in PIC mode. */
4914
4915 int
4916 legitimate_pic_address_disp_p (disp)
4917 register rtx disp;
4918 {
4919 bool saw_plus;
4920
4921 /* In 64bit mode we can allow direct addresses of symbols and labels
4922 when they are not dynamic symbols. */
4923 if (TARGET_64BIT)
4924 {
4925 rtx x = disp;
4926 if (GET_CODE (disp) == CONST)
4927 x = XEXP (disp, 0);
4928 /* ??? Handle PIC code models */
4929 if (GET_CODE (x) == PLUS
4930 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4931 && ix86_cmodel == CM_SMALL_PIC
4932 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4933 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4934 x = XEXP (x, 0);
4935 if (local_symbolic_operand (x, Pmode))
4936 return 1;
4937 }
4938 if (GET_CODE (disp) != CONST)
4939 return 0;
4940 disp = XEXP (disp, 0);
4941
4942 if (TARGET_64BIT)
4943 {
4944 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4945 of GOT tables. We should not need these anyway. */
4946 if (GET_CODE (disp) != UNSPEC
4947 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4948 return 0;
4949
4950 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4951 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4952 return 0;
4953 return 1;
4954 }
4955
4956 saw_plus = false;
4957 if (GET_CODE (disp) == PLUS)
4958 {
4959 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4960 return 0;
4961 disp = XEXP (disp, 0);
4962 saw_plus = true;
4963 }
4964
4965 if (GET_CODE (disp) != UNSPEC)
4966 return 0;
4967
4968 switch (XINT (disp, 1))
4969 {
4970 case UNSPEC_GOT:
4971 if (saw_plus)
4972 return false;
4973 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4974 case UNSPEC_GOTOFF:
4975 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4976 case UNSPEC_GOTTPOFF:
4977 if (saw_plus)
4978 return false;
4979 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4980 case UNSPEC_NTPOFF:
4981 /* ??? Could support offset here. */
4982 if (saw_plus)
4983 return false;
4984 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4985 case UNSPEC_DTPOFF:
4986 /* ??? Could support offset here. */
4987 if (saw_plus)
4988 return false;
4989 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4990 }
4991
4992 return 0;
4993 }
4994
4995 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4996 memory address for an instruction. The MODE argument is the machine mode
4997 for the MEM expression that wants to use this address.
4998
4999 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5000 convert common non-canonical forms to canonical form so that they will
5001 be recognized. */
5002
5003 int
5004 legitimate_address_p (mode, addr, strict)
5005 enum machine_mode mode;
5006 register rtx addr;
5007 int strict;
5008 {
5009 struct ix86_address parts;
5010 rtx base, index, disp;
5011 HOST_WIDE_INT scale;
5012 const char *reason = NULL;
5013 rtx reason_rtx = NULL_RTX;
5014
5015 if (TARGET_DEBUG_ADDR)
5016 {
5017 fprintf (stderr,
5018 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5019 GET_MODE_NAME (mode), strict);
5020 debug_rtx (addr);
5021 }
5022
5023 if (ix86_decompose_address (addr, &parts) <= 0)
5024 {
5025 reason = "decomposition failed";
5026 goto report_error;
5027 }
5028
5029 base = parts.base;
5030 index = parts.index;
5031 disp = parts.disp;
5032 scale = parts.scale;
5033
5034 /* Validate base register.
5035
5036 Don't allow SUBREG's here, it can lead to spill failures when the base
5037 is one word out of a two word structure, which is represented internally
5038 as a DImode int. */
5039
5040 if (base)
5041 {
5042 rtx reg;
5043 reason_rtx = base;
5044
5045 if (GET_CODE (base) == SUBREG)
5046 reg = SUBREG_REG (base);
5047 else
5048 reg = base;
5049
5050 if (GET_CODE (reg) != REG)
5051 {
5052 reason = "base is not a register";
5053 goto report_error;
5054 }
5055
5056 if (GET_MODE (base) != Pmode)
5057 {
5058 reason = "base is not in Pmode";
5059 goto report_error;
5060 }
5061
5062 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5063 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5064 {
5065 reason = "base is not valid";
5066 goto report_error;
5067 }
5068 }
5069
5070 /* Validate index register.
5071
5072 Don't allow SUBREG's here, it can lead to spill failures when the index
5073 is one word out of a two word structure, which is represented internally
5074 as a DImode int. */
5075
5076 if (index)
5077 {
5078 rtx reg;
5079 reason_rtx = index;
5080
5081 if (GET_CODE (index) == SUBREG)
5082 reg = SUBREG_REG (index);
5083 else
5084 reg = index;
5085
5086 if (GET_CODE (reg) != REG)
5087 {
5088 reason = "index is not a register";
5089 goto report_error;
5090 }
5091
5092 if (GET_MODE (index) != Pmode)
5093 {
5094 reason = "index is not in Pmode";
5095 goto report_error;
5096 }
5097
5098 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5099 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5100 {
5101 reason = "index is not valid";
5102 goto report_error;
5103 }
5104 }
5105
5106 /* Validate scale factor. */
5107 if (scale != 1)
5108 {
5109 reason_rtx = GEN_INT (scale);
5110 if (!index)
5111 {
5112 reason = "scale without index";
5113 goto report_error;
5114 }
5115
5116 if (scale != 2 && scale != 4 && scale != 8)
5117 {
5118 reason = "scale is not a valid multiplier";
5119 goto report_error;
5120 }
5121 }
5122
5123 /* Validate displacement. */
5124 if (disp)
5125 {
5126 reason_rtx = disp;
5127
5128 if (TARGET_64BIT)
5129 {
5130 if (!x86_64_sign_extended_value (disp))
5131 {
5132 reason = "displacement is out of range";
5133 goto report_error;
5134 }
5135 }
5136 else
5137 {
5138 if (GET_CODE (disp) == CONST_DOUBLE)
5139 {
5140 reason = "displacement is a const_double";
5141 goto report_error;
5142 }
5143 }
5144
5145 if (GET_CODE (disp) == CONST
5146 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5147 switch (XINT (XEXP (disp, 0), 1))
5148 {
5149 case UNSPEC_GOT:
5150 case UNSPEC_GOTOFF:
5151 case UNSPEC_GOTPCREL:
5152 if (!flag_pic)
5153 abort ();
5154 goto is_legitimate_pic;
5155
5156 case UNSPEC_GOTTPOFF:
5157 case UNSPEC_NTPOFF:
5158 case UNSPEC_DTPOFF:
5159 break;
5160
5161 default:
5162 reason = "invalid address unspec";
5163 goto report_error;
5164 }
5165
5166 else if (flag_pic && SYMBOLIC_CONST (disp))
5167 {
5168 is_legitimate_pic:
5169 if (TARGET_64BIT && (index || base))
5170 {
5171 reason = "non-constant pic memory reference";
5172 goto report_error;
5173 }
5174 if (! legitimate_pic_address_disp_p (disp))
5175 {
5176 reason = "displacement is an invalid pic construct";
5177 goto report_error;
5178 }
5179
5180 /* This code used to verify that a symbolic pic displacement
5181 includes the pic_offset_table_rtx register.
5182
5183 While this is good idea, unfortunately these constructs may
5184 be created by "adds using lea" optimization for incorrect
5185 code like:
5186
5187 int a;
5188 int foo(int i)
5189 {
5190 return *(&a+i);
5191 }
5192
5193 This code is nonsensical, but results in addressing
5194 GOT table with pic_offset_table_rtx base. We can't
5195 just refuse it easily, since it gets matched by
5196 "addsi3" pattern, that later gets split to lea in the
5197 case output register differs from input. While this
5198 can be handled by separate addsi pattern for this case
5199 that never results in lea, this seems to be easier and
5200 correct fix for crash to disable this test. */
5201 }
5202 else if (!CONSTANT_ADDRESS_P (disp))
5203 {
5204 reason = "displacement is not constant";
5205 goto report_error;
5206 }
5207 }
5208
5209 /* Everything looks valid. */
5210 if (TARGET_DEBUG_ADDR)
5211 fprintf (stderr, "Success.\n");
5212 return TRUE;
5213
5214 report_error:
5215 if (TARGET_DEBUG_ADDR)
5216 {
5217 fprintf (stderr, "Error: %s\n", reason);
5218 debug_rtx (reason_rtx);
5219 }
5220 return FALSE;
5221 }
5222 \f
5223 /* Return an unique alias set for the GOT. */
5224
5225 static HOST_WIDE_INT
5226 ix86_GOT_alias_set ()
5227 {
5228 static HOST_WIDE_INT set = -1;
5229 if (set == -1)
5230 set = new_alias_set ();
5231 return set;
5232 }
5233
5234 /* Return a legitimate reference for ORIG (an address) using the
5235 register REG. If REG is 0, a new pseudo is generated.
5236
5237 There are two types of references that must be handled:
5238
5239 1. Global data references must load the address from the GOT, via
5240 the PIC reg. An insn is emitted to do this load, and the reg is
5241 returned.
5242
5243 2. Static data references, constant pool addresses, and code labels
5244 compute the address as an offset from the GOT, whose base is in
5245 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5246 differentiate them from global data objects. The returned
5247 address is the PIC reg + an unspec constant.
5248
5249 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5250 reg also appears in the address. */
5251
5252 rtx
5253 legitimize_pic_address (orig, reg)
5254 rtx orig;
5255 rtx reg;
5256 {
5257 rtx addr = orig;
5258 rtx new = orig;
5259 rtx base;
5260
5261 if (local_symbolic_operand (addr, Pmode))
5262 {
5263 /* In 64bit mode we can address such objects directly. */
5264 if (TARGET_64BIT)
5265 new = addr;
5266 else
5267 {
5268 /* This symbol may be referenced via a displacement from the PIC
5269 base address (@GOTOFF). */
5270
5271 if (reload_in_progress)
5272 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5273 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5274 new = gen_rtx_CONST (Pmode, new);
5275 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5276
5277 if (reg != 0)
5278 {
5279 emit_move_insn (reg, new);
5280 new = reg;
5281 }
5282 }
5283 }
5284 else if (GET_CODE (addr) == SYMBOL_REF)
5285 {
5286 if (TARGET_64BIT)
5287 {
5288 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5289 new = gen_rtx_CONST (Pmode, new);
5290 new = gen_rtx_MEM (Pmode, new);
5291 RTX_UNCHANGING_P (new) = 1;
5292 set_mem_alias_set (new, ix86_GOT_alias_set ());
5293
5294 if (reg == 0)
5295 reg = gen_reg_rtx (Pmode);
5296 /* Use directly gen_movsi, otherwise the address is loaded
5297 into register for CSE. We don't want to CSE this addresses,
5298 instead we CSE addresses from the GOT table, so skip this. */
5299 emit_insn (gen_movsi (reg, new));
5300 new = reg;
5301 }
5302 else
5303 {
5304 /* This symbol must be referenced via a load from the
5305 Global Offset Table (@GOT). */
5306
5307 if (reload_in_progress)
5308 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5309 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5310 new = gen_rtx_CONST (Pmode, new);
5311 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5312 new = gen_rtx_MEM (Pmode, new);
5313 RTX_UNCHANGING_P (new) = 1;
5314 set_mem_alias_set (new, ix86_GOT_alias_set ());
5315
5316 if (reg == 0)
5317 reg = gen_reg_rtx (Pmode);
5318 emit_move_insn (reg, new);
5319 new = reg;
5320 }
5321 }
5322 else
5323 {
5324 if (GET_CODE (addr) == CONST)
5325 {
5326 addr = XEXP (addr, 0);
5327
5328 /* We must match stuff we generate before. Assume the only
5329 unspecs that can get here are ours. Not that we could do
5330 anything with them anyway... */
5331 if (GET_CODE (addr) == UNSPEC
5332 || (GET_CODE (addr) == PLUS
5333 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5334 return orig;
5335 if (GET_CODE (addr) != PLUS)
5336 abort ();
5337 }
5338 if (GET_CODE (addr) == PLUS)
5339 {
5340 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5341
5342 /* Check first to see if this is a constant offset from a @GOTOFF
5343 symbol reference. */
5344 if (local_symbolic_operand (op0, Pmode)
5345 && GET_CODE (op1) == CONST_INT)
5346 {
5347 if (!TARGET_64BIT)
5348 {
5349 if (reload_in_progress)
5350 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5351 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5352 UNSPEC_GOTOFF);
5353 new = gen_rtx_PLUS (Pmode, new, op1);
5354 new = gen_rtx_CONST (Pmode, new);
5355 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5356
5357 if (reg != 0)
5358 {
5359 emit_move_insn (reg, new);
5360 new = reg;
5361 }
5362 }
5363 else
5364 {
5365 /* ??? We need to limit offsets here. */
5366 }
5367 }
5368 else
5369 {
5370 base = legitimize_pic_address (XEXP (addr, 0), reg);
5371 new = legitimize_pic_address (XEXP (addr, 1),
5372 base == reg ? NULL_RTX : reg);
5373
5374 if (GET_CODE (new) == CONST_INT)
5375 new = plus_constant (base, INTVAL (new));
5376 else
5377 {
5378 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5379 {
5380 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5381 new = XEXP (new, 1);
5382 }
5383 new = gen_rtx_PLUS (Pmode, base, new);
5384 }
5385 }
5386 }
5387 }
5388 return new;
5389 }
5390
5391 static void
5392 ix86_encode_section_info (decl, first)
5393 tree decl;
5394 int first ATTRIBUTE_UNUSED;
5395 {
5396 bool local_p = (*targetm.binds_local_p) (decl);
5397 rtx rtl, symbol;
5398
5399 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5400 if (GET_CODE (rtl) != MEM)
5401 return;
5402 symbol = XEXP (rtl, 0);
5403 if (GET_CODE (symbol) != SYMBOL_REF)
5404 return;
5405
5406 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5407 symbol so that we may access it directly in the GOT. */
5408
5409 if (flag_pic)
5410 SYMBOL_REF_FLAG (symbol) = local_p;
5411
5412 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5413 "local dynamic", "initial exec" or "local exec" TLS models
5414 respectively. */
5415
5416 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5417 {
5418 const char *symbol_str;
5419 char *newstr;
5420 size_t len;
5421 enum tls_model kind;
5422
5423 if (!flag_pic)
5424 {
5425 if (local_p)
5426 kind = TLS_MODEL_LOCAL_EXEC;
5427 else
5428 kind = TLS_MODEL_INITIAL_EXEC;
5429 }
5430 /* Local dynamic is inefficient when we're not combining the
5431 parts of the address. */
5432 else if (optimize && local_p)
5433 kind = TLS_MODEL_LOCAL_DYNAMIC;
5434 else
5435 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5436 if (kind < flag_tls_default)
5437 kind = flag_tls_default;
5438
5439 symbol_str = XSTR (symbol, 0);
5440
5441 if (symbol_str[0] == '%')
5442 {
5443 if (symbol_str[1] == tls_model_chars[kind])
5444 return;
5445 symbol_str += 2;
5446 }
5447 len = strlen (symbol_str) + 1;
5448 newstr = alloca (len + 2);
5449
5450 newstr[0] = '%';
5451 newstr[1] = tls_model_chars[kind];
5452 memcpy (newstr + 2, symbol_str, len);
5453
5454 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5455 }
5456 }
5457
5458 /* Undo the above when printing symbol names. */
5459
5460 static const char *
5461 ix86_strip_name_encoding (str)
5462 const char *str;
5463 {
5464 if (str[0] == '%')
5465 str += 2;
5466 if (str [0] == '*')
5467 str += 1;
5468 return str;
5469 }
5470 \f
5471 /* Load the thread pointer into a register. */
5472
5473 static rtx
5474 get_thread_pointer ()
5475 {
5476 rtx tp;
5477
5478 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5479 tp = gen_rtx_CONST (Pmode, tp);
5480 tp = force_reg (Pmode, tp);
5481
5482 return tp;
5483 }
5484
5485 /* Try machine-dependent ways of modifying an illegitimate address
5486 to be legitimate. If we find one, return the new, valid address.
5487 This macro is used in only one place: `memory_address' in explow.c.
5488
5489 OLDX is the address as it was before break_out_memory_refs was called.
5490 In some cases it is useful to look at this to decide what needs to be done.
5491
5492 MODE and WIN are passed so that this macro can use
5493 GO_IF_LEGITIMATE_ADDRESS.
5494
5495 It is always safe for this macro to do nothing. It exists to recognize
5496 opportunities to optimize the output.
5497
5498 For the 80386, we handle X+REG by loading X into a register R and
5499 using R+REG. R will go in a general reg and indexing will be used.
5500 However, if REG is a broken-out memory address or multiplication,
5501 nothing needs to be done because REG can certainly go in a general reg.
5502
5503 When -fpic is used, special handling is needed for symbolic references.
5504 See comments by legitimize_pic_address in i386.c for details. */
5505
5506 rtx
5507 legitimize_address (x, oldx, mode)
5508 register rtx x;
5509 register rtx oldx ATTRIBUTE_UNUSED;
5510 enum machine_mode mode;
5511 {
5512 int changed = 0;
5513 unsigned log;
5514
5515 if (TARGET_DEBUG_ADDR)
5516 {
5517 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5518 GET_MODE_NAME (mode));
5519 debug_rtx (x);
5520 }
5521
5522 log = tls_symbolic_operand (x, mode);
5523 if (log)
5524 {
5525 rtx dest, base, off, pic;
5526
5527 switch (log)
5528 {
5529 case TLS_MODEL_GLOBAL_DYNAMIC:
5530 dest = gen_reg_rtx (Pmode);
5531 emit_insn (gen_tls_global_dynamic (dest, x));
5532 break;
5533
5534 case TLS_MODEL_LOCAL_DYNAMIC:
5535 base = gen_reg_rtx (Pmode);
5536 emit_insn (gen_tls_local_dynamic_base (base));
5537
5538 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5539 off = gen_rtx_CONST (Pmode, off);
5540
5541 return gen_rtx_PLUS (Pmode, base, off);
5542
5543 case TLS_MODEL_INITIAL_EXEC:
5544 if (flag_pic)
5545 {
5546 if (reload_in_progress)
5547 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5548 pic = pic_offset_table_rtx;
5549 }
5550 else
5551 {
5552 pic = gen_reg_rtx (Pmode);
5553 emit_insn (gen_set_got (pic));
5554 }
5555
5556 base = get_thread_pointer ();
5557
5558 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5559 off = gen_rtx_CONST (Pmode, off);
5560 off = gen_rtx_PLUS (Pmode, pic, off);
5561 off = gen_rtx_MEM (Pmode, off);
5562 RTX_UNCHANGING_P (off) = 1;
5563 set_mem_alias_set (off, ix86_GOT_alias_set ());
5564
5565 /* Damn Sun for specifing a set of dynamic relocations without
5566 considering the two-operand nature of the architecture!
5567 We'd be much better off with a "GOTNTPOFF" relocation that
5568 already contained the negated constant. */
5569 /* ??? Using negl and reg+reg addressing appears to be a lose
5570 size-wise. The negl is two bytes, just like the extra movl
5571 incurred by the two-operand subl, but reg+reg addressing
5572 uses the two-byte modrm form, unlike plain reg. */
5573
5574 dest = gen_reg_rtx (Pmode);
5575 emit_insn (gen_subsi3 (dest, base, off));
5576 break;
5577
5578 case TLS_MODEL_LOCAL_EXEC:
5579 base = get_thread_pointer ();
5580
5581 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5582 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5583 off = gen_rtx_CONST (Pmode, off);
5584
5585 if (TARGET_GNU_TLS)
5586 return gen_rtx_PLUS (Pmode, base, off);
5587 else
5588 {
5589 dest = gen_reg_rtx (Pmode);
5590 emit_insn (gen_subsi3 (dest, base, off));
5591 }
5592 break;
5593
5594 default:
5595 abort ();
5596 }
5597
5598 return dest;
5599 }
5600
5601 if (flag_pic && SYMBOLIC_CONST (x))
5602 return legitimize_pic_address (x, 0);
5603
5604 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5605 if (GET_CODE (x) == ASHIFT
5606 && GET_CODE (XEXP (x, 1)) == CONST_INT
5607 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5608 {
5609 changed = 1;
5610 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5611 GEN_INT (1 << log));
5612 }
5613
5614 if (GET_CODE (x) == PLUS)
5615 {
5616 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5617
5618 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5619 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5620 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5621 {
5622 changed = 1;
5623 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5624 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5625 GEN_INT (1 << log));
5626 }
5627
5628 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5629 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5630 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5631 {
5632 changed = 1;
5633 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5634 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5635 GEN_INT (1 << log));
5636 }
5637
5638 /* Put multiply first if it isn't already. */
5639 if (GET_CODE (XEXP (x, 1)) == MULT)
5640 {
5641 rtx tmp = XEXP (x, 0);
5642 XEXP (x, 0) = XEXP (x, 1);
5643 XEXP (x, 1) = tmp;
5644 changed = 1;
5645 }
5646
5647 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5648 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5649 created by virtual register instantiation, register elimination, and
5650 similar optimizations. */
5651 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5652 {
5653 changed = 1;
5654 x = gen_rtx_PLUS (Pmode,
5655 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5656 XEXP (XEXP (x, 1), 0)),
5657 XEXP (XEXP (x, 1), 1));
5658 }
5659
5660 /* Canonicalize
5661 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5662 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5663 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5664 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5665 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5666 && CONSTANT_P (XEXP (x, 1)))
5667 {
5668 rtx constant;
5669 rtx other = NULL_RTX;
5670
5671 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5672 {
5673 constant = XEXP (x, 1);
5674 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5675 }
5676 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5677 {
5678 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5679 other = XEXP (x, 1);
5680 }
5681 else
5682 constant = 0;
5683
5684 if (constant)
5685 {
5686 changed = 1;
5687 x = gen_rtx_PLUS (Pmode,
5688 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5689 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5690 plus_constant (other, INTVAL (constant)));
5691 }
5692 }
5693
5694 if (changed && legitimate_address_p (mode, x, FALSE))
5695 return x;
5696
5697 if (GET_CODE (XEXP (x, 0)) == MULT)
5698 {
5699 changed = 1;
5700 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5701 }
5702
5703 if (GET_CODE (XEXP (x, 1)) == MULT)
5704 {
5705 changed = 1;
5706 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5707 }
5708
5709 if (changed
5710 && GET_CODE (XEXP (x, 1)) == REG
5711 && GET_CODE (XEXP (x, 0)) == REG)
5712 return x;
5713
5714 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5715 {
5716 changed = 1;
5717 x = legitimize_pic_address (x, 0);
5718 }
5719
5720 if (changed && legitimate_address_p (mode, x, FALSE))
5721 return x;
5722
5723 if (GET_CODE (XEXP (x, 0)) == REG)
5724 {
5725 register rtx temp = gen_reg_rtx (Pmode);
5726 register rtx val = force_operand (XEXP (x, 1), temp);
5727 if (val != temp)
5728 emit_move_insn (temp, val);
5729
5730 XEXP (x, 1) = temp;
5731 return x;
5732 }
5733
5734 else if (GET_CODE (XEXP (x, 1)) == REG)
5735 {
5736 register rtx temp = gen_reg_rtx (Pmode);
5737 register rtx val = force_operand (XEXP (x, 0), temp);
5738 if (val != temp)
5739 emit_move_insn (temp, val);
5740
5741 XEXP (x, 0) = temp;
5742 return x;
5743 }
5744 }
5745
5746 return x;
5747 }
5748 \f
5749 /* Print an integer constant expression in assembler syntax. Addition
5750 and subtraction are the only arithmetic that may appear in these
5751 expressions. FILE is the stdio stream to write to, X is the rtx, and
5752 CODE is the operand print code from the output string. */
5753
5754 static void
5755 output_pic_addr_const (file, x, code)
5756 FILE *file;
5757 rtx x;
5758 int code;
5759 {
5760 char buf[256];
5761
5762 switch (GET_CODE (x))
5763 {
5764 case PC:
5765 if (flag_pic)
5766 putc ('.', file);
5767 else
5768 abort ();
5769 break;
5770
5771 case SYMBOL_REF:
5772 assemble_name (file, XSTR (x, 0));
5773 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5774 fputs ("@PLT", file);
5775 break;
5776
5777 case LABEL_REF:
5778 x = XEXP (x, 0);
5779 /* FALLTHRU */
5780 case CODE_LABEL:
5781 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5782 assemble_name (asm_out_file, buf);
5783 break;
5784
5785 case CONST_INT:
5786 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5787 break;
5788
5789 case CONST:
5790 /* This used to output parentheses around the expression,
5791 but that does not work on the 386 (either ATT or BSD assembler). */
5792 output_pic_addr_const (file, XEXP (x, 0), code);
5793 break;
5794
5795 case CONST_DOUBLE:
5796 if (GET_MODE (x) == VOIDmode)
5797 {
5798 /* We can use %d if the number is <32 bits and positive. */
5799 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5800 fprintf (file, "0x%lx%08lx",
5801 (unsigned long) CONST_DOUBLE_HIGH (x),
5802 (unsigned long) CONST_DOUBLE_LOW (x));
5803 else
5804 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5805 }
5806 else
5807 /* We can't handle floating point constants;
5808 PRINT_OPERAND must handle them. */
5809 output_operand_lossage ("floating constant misused");
5810 break;
5811
5812 case PLUS:
5813 /* Some assemblers need integer constants to appear first. */
5814 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5815 {
5816 output_pic_addr_const (file, XEXP (x, 0), code);
5817 putc ('+', file);
5818 output_pic_addr_const (file, XEXP (x, 1), code);
5819 }
5820 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5821 {
5822 output_pic_addr_const (file, XEXP (x, 1), code);
5823 putc ('+', file);
5824 output_pic_addr_const (file, XEXP (x, 0), code);
5825 }
5826 else
5827 abort ();
5828 break;
5829
5830 case MINUS:
5831 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5832 output_pic_addr_const (file, XEXP (x, 0), code);
5833 putc ('-', file);
5834 output_pic_addr_const (file, XEXP (x, 1), code);
5835 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5836 break;
5837
5838 case UNSPEC:
5839 if (XVECLEN (x, 0) != 1)
5840 abort ();
5841 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5842 switch (XINT (x, 1))
5843 {
5844 case UNSPEC_GOT:
5845 fputs ("@GOT", file);
5846 break;
5847 case UNSPEC_GOTOFF:
5848 fputs ("@GOTOFF", file);
5849 break;
5850 case UNSPEC_GOTPCREL:
5851 fputs ("@GOTPCREL(%rip)", file);
5852 break;
5853 case UNSPEC_GOTTPOFF:
5854 fputs ("@GOTTPOFF", file);
5855 break;
5856 case UNSPEC_TPOFF:
5857 fputs ("@TPOFF", file);
5858 break;
5859 case UNSPEC_NTPOFF:
5860 fputs ("@NTPOFF", file);
5861 break;
5862 case UNSPEC_DTPOFF:
5863 fputs ("@DTPOFF", file);
5864 break;
5865 default:
5866 output_operand_lossage ("invalid UNSPEC as operand");
5867 break;
5868 }
5869 break;
5870
5871 default:
5872 output_operand_lossage ("invalid expression as operand");
5873 }
5874 }
5875
5876 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5877 We need to handle our special PIC relocations. */
5878
5879 void
5880 i386_dwarf_output_addr_const (file, x)
5881 FILE *file;
5882 rtx x;
5883 {
5884 #ifdef ASM_QUAD
5885 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5886 #else
5887 if (TARGET_64BIT)
5888 abort ();
5889 fprintf (file, "%s", ASM_LONG);
5890 #endif
5891 if (flag_pic)
5892 output_pic_addr_const (file, x, '\0');
5893 else
5894 output_addr_const (file, x);
5895 fputc ('\n', file);
5896 }
5897
5898 /* In the name of slightly smaller debug output, and to cater to
5899 general assembler losage, recognize PIC+GOTOFF and turn it back
5900 into a direct symbol reference. */
5901
5902 rtx
5903 i386_simplify_dwarf_addr (orig_x)
5904 rtx orig_x;
5905 {
5906 rtx x = orig_x, y;
5907
5908 if (GET_CODE (x) == MEM)
5909 x = XEXP (x, 0);
5910
5911 if (TARGET_64BIT)
5912 {
5913 if (GET_CODE (x) != CONST
5914 || GET_CODE (XEXP (x, 0)) != UNSPEC
5915 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5916 || GET_CODE (orig_x) != MEM)
5917 return orig_x;
5918 return XVECEXP (XEXP (x, 0), 0, 0);
5919 }
5920
5921 if (GET_CODE (x) != PLUS
5922 || GET_CODE (XEXP (x, 1)) != CONST)
5923 return orig_x;
5924
5925 if (GET_CODE (XEXP (x, 0)) == REG
5926 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5927 /* %ebx + GOT/GOTOFF */
5928 y = NULL;
5929 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5930 {
5931 /* %ebx + %reg * scale + GOT/GOTOFF */
5932 y = XEXP (x, 0);
5933 if (GET_CODE (XEXP (y, 0)) == REG
5934 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5935 y = XEXP (y, 1);
5936 else if (GET_CODE (XEXP (y, 1)) == REG
5937 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5938 y = XEXP (y, 0);
5939 else
5940 return orig_x;
5941 if (GET_CODE (y) != REG
5942 && GET_CODE (y) != MULT
5943 && GET_CODE (y) != ASHIFT)
5944 return orig_x;
5945 }
5946 else
5947 return orig_x;
5948
5949 x = XEXP (XEXP (x, 1), 0);
5950 if (GET_CODE (x) == UNSPEC
5951 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5952 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5953 {
5954 if (y)
5955 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5956 return XVECEXP (x, 0, 0);
5957 }
5958
5959 if (GET_CODE (x) == PLUS
5960 && GET_CODE (XEXP (x, 0)) == UNSPEC
5961 && GET_CODE (XEXP (x, 1)) == CONST_INT
5962 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5963 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5964 && GET_CODE (orig_x) != MEM)))
5965 {
5966 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5967 if (y)
5968 return gen_rtx_PLUS (Pmode, y, x);
5969 return x;
5970 }
5971
5972 return orig_x;
5973 }
5974 \f
5975 static void
5976 put_condition_code (code, mode, reverse, fp, file)
5977 enum rtx_code code;
5978 enum machine_mode mode;
5979 int reverse, fp;
5980 FILE *file;
5981 {
5982 const char *suffix;
5983
5984 if (mode == CCFPmode || mode == CCFPUmode)
5985 {
5986 enum rtx_code second_code, bypass_code;
5987 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5988 if (bypass_code != NIL || second_code != NIL)
5989 abort ();
5990 code = ix86_fp_compare_code_to_integer (code);
5991 mode = CCmode;
5992 }
5993 if (reverse)
5994 code = reverse_condition (code);
5995
5996 switch (code)
5997 {
5998 case EQ:
5999 suffix = "e";
6000 break;
6001 case NE:
6002 suffix = "ne";
6003 break;
6004 case GT:
6005 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6006 abort ();
6007 suffix = "g";
6008 break;
6009 case GTU:
6010 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6011 Those same assemblers have the same but opposite losage on cmov. */
6012 if (mode != CCmode)
6013 abort ();
6014 suffix = fp ? "nbe" : "a";
6015 break;
6016 case LT:
6017 if (mode == CCNOmode || mode == CCGOCmode)
6018 suffix = "s";
6019 else if (mode == CCmode || mode == CCGCmode)
6020 suffix = "l";
6021 else
6022 abort ();
6023 break;
6024 case LTU:
6025 if (mode != CCmode)
6026 abort ();
6027 suffix = "b";
6028 break;
6029 case GE:
6030 if (mode == CCNOmode || mode == CCGOCmode)
6031 suffix = "ns";
6032 else if (mode == CCmode || mode == CCGCmode)
6033 suffix = "ge";
6034 else
6035 abort ();
6036 break;
6037 case GEU:
6038 /* ??? As above. */
6039 if (mode != CCmode)
6040 abort ();
6041 suffix = fp ? "nb" : "ae";
6042 break;
6043 case LE:
6044 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6045 abort ();
6046 suffix = "le";
6047 break;
6048 case LEU:
6049 if (mode != CCmode)
6050 abort ();
6051 suffix = "be";
6052 break;
6053 case UNORDERED:
6054 suffix = fp ? "u" : "p";
6055 break;
6056 case ORDERED:
6057 suffix = fp ? "nu" : "np";
6058 break;
6059 default:
6060 abort ();
6061 }
6062 fputs (suffix, file);
6063 }
6064
6065 void
6066 print_reg (x, code, file)
6067 rtx x;
6068 int code;
6069 FILE *file;
6070 {
6071 if (REGNO (x) == ARG_POINTER_REGNUM
6072 || REGNO (x) == FRAME_POINTER_REGNUM
6073 || REGNO (x) == FLAGS_REG
6074 || REGNO (x) == FPSR_REG)
6075 abort ();
6076
6077 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6078 putc ('%', file);
6079
6080 if (code == 'w' || MMX_REG_P (x))
6081 code = 2;
6082 else if (code == 'b')
6083 code = 1;
6084 else if (code == 'k')
6085 code = 4;
6086 else if (code == 'q')
6087 code = 8;
6088 else if (code == 'y')
6089 code = 3;
6090 else if (code == 'h')
6091 code = 0;
6092 else
6093 code = GET_MODE_SIZE (GET_MODE (x));
6094
6095 /* Irritatingly, AMD extended registers use different naming convention
6096 from the normal registers. */
6097 if (REX_INT_REG_P (x))
6098 {
6099 if (!TARGET_64BIT)
6100 abort ();
6101 switch (code)
6102 {
6103 case 0:
6104 error ("extended registers have no high halves");
6105 break;
6106 case 1:
6107 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6108 break;
6109 case 2:
6110 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6111 break;
6112 case 4:
6113 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6114 break;
6115 case 8:
6116 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6117 break;
6118 default:
6119 error ("unsupported operand size for extended register");
6120 break;
6121 }
6122 return;
6123 }
6124 switch (code)
6125 {
6126 case 3:
6127 if (STACK_TOP_P (x))
6128 {
6129 fputs ("st(0)", file);
6130 break;
6131 }
6132 /* FALLTHRU */
6133 case 8:
6134 case 4:
6135 case 12:
6136 if (! ANY_FP_REG_P (x))
6137 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6138 /* FALLTHRU */
6139 case 16:
6140 case 2:
6141 fputs (hi_reg_name[REGNO (x)], file);
6142 break;
6143 case 1:
6144 fputs (qi_reg_name[REGNO (x)], file);
6145 break;
6146 case 0:
6147 fputs (qi_high_reg_name[REGNO (x)], file);
6148 break;
6149 default:
6150 abort ();
6151 }
6152 }
6153
6154 /* Locate some local-dynamic symbol still in use by this function
6155 so that we can print its name in some tls_local_dynamic_base
6156 pattern. */
6157
6158 static const char *
6159 get_some_local_dynamic_name ()
6160 {
6161 rtx insn;
6162
6163 if (cfun->machine->some_ld_name)
6164 return cfun->machine->some_ld_name;
6165
6166 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6167 if (INSN_P (insn)
6168 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6169 return cfun->machine->some_ld_name;
6170
6171 abort ();
6172 }
6173
6174 static int
6175 get_some_local_dynamic_name_1 (px, data)
6176 rtx *px;
6177 void *data ATTRIBUTE_UNUSED;
6178 {
6179 rtx x = *px;
6180
6181 if (GET_CODE (x) == SYMBOL_REF
6182 && local_dynamic_symbolic_operand (x, Pmode))
6183 {
6184 cfun->machine->some_ld_name = XSTR (x, 0);
6185 return 1;
6186 }
6187
6188 return 0;
6189 }
6190
6191 /* Meaning of CODE:
6192 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6193 C -- print opcode suffix for set/cmov insn.
6194 c -- like C, but print reversed condition
6195 F,f -- likewise, but for floating-point.
6196 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6197 nothing
6198 R -- print the prefix for register names.
6199 z -- print the opcode suffix for the size of the current operand.
6200 * -- print a star (in certain assembler syntax)
6201 A -- print an absolute memory reference.
6202 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6203 s -- print a shift double count, followed by the assemblers argument
6204 delimiter.
6205 b -- print the QImode name of the register for the indicated operand.
6206 %b0 would print %al if operands[0] is reg 0.
6207 w -- likewise, print the HImode name of the register.
6208 k -- likewise, print the SImode name of the register.
6209 q -- likewise, print the DImode name of the register.
6210 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6211 y -- print "st(0)" instead of "st" as a register.
6212 D -- print condition for SSE cmp instruction.
6213 P -- if PIC, print an @PLT suffix.
6214 X -- don't print any sort of PIC '@' suffix for a symbol.
6215 & -- print some in-use local-dynamic symbol name.
6216 */
6217
6218 void
6219 print_operand (file, x, code)
6220 FILE *file;
6221 rtx x;
6222 int code;
6223 {
6224 if (code)
6225 {
6226 switch (code)
6227 {
6228 case '*':
6229 if (ASSEMBLER_DIALECT == ASM_ATT)
6230 putc ('*', file);
6231 return;
6232
6233 case '&':
6234 assemble_name (file, get_some_local_dynamic_name ());
6235 return;
6236
6237 case 'A':
6238 if (ASSEMBLER_DIALECT == ASM_ATT)
6239 putc ('*', file);
6240 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6241 {
6242 /* Intel syntax. For absolute addresses, registers should not
6243 be surrounded by braces. */
6244 if (GET_CODE (x) != REG)
6245 {
6246 putc ('[', file);
6247 PRINT_OPERAND (file, x, 0);
6248 putc (']', file);
6249 return;
6250 }
6251 }
6252 else
6253 abort ();
6254
6255 PRINT_OPERAND (file, x, 0);
6256 return;
6257
6258
6259 case 'L':
6260 if (ASSEMBLER_DIALECT == ASM_ATT)
6261 putc ('l', file);
6262 return;
6263
6264 case 'W':
6265 if (ASSEMBLER_DIALECT == ASM_ATT)
6266 putc ('w', file);
6267 return;
6268
6269 case 'B':
6270 if (ASSEMBLER_DIALECT == ASM_ATT)
6271 putc ('b', file);
6272 return;
6273
6274 case 'Q':
6275 if (ASSEMBLER_DIALECT == ASM_ATT)
6276 putc ('l', file);
6277 return;
6278
6279 case 'S':
6280 if (ASSEMBLER_DIALECT == ASM_ATT)
6281 putc ('s', file);
6282 return;
6283
6284 case 'T':
6285 if (ASSEMBLER_DIALECT == ASM_ATT)
6286 putc ('t', file);
6287 return;
6288
6289 case 'z':
6290 /* 387 opcodes don't get size suffixes if the operands are
6291 registers. */
6292 if (STACK_REG_P (x))
6293 return;
6294
6295 /* Likewise if using Intel opcodes. */
6296 if (ASSEMBLER_DIALECT == ASM_INTEL)
6297 return;
6298
6299 /* This is the size of op from size of operand. */
6300 switch (GET_MODE_SIZE (GET_MODE (x)))
6301 {
6302 case 2:
6303 #ifdef HAVE_GAS_FILDS_FISTS
6304 putc ('s', file);
6305 #endif
6306 return;
6307
6308 case 4:
6309 if (GET_MODE (x) == SFmode)
6310 {
6311 putc ('s', file);
6312 return;
6313 }
6314 else
6315 putc ('l', file);
6316 return;
6317
6318 case 12:
6319 case 16:
6320 putc ('t', file);
6321 return;
6322
6323 case 8:
6324 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6325 {
6326 #ifdef GAS_MNEMONICS
6327 putc ('q', file);
6328 #else
6329 putc ('l', file);
6330 putc ('l', file);
6331 #endif
6332 }
6333 else
6334 putc ('l', file);
6335 return;
6336
6337 default:
6338 abort ();
6339 }
6340
6341 case 'b':
6342 case 'w':
6343 case 'k':
6344 case 'q':
6345 case 'h':
6346 case 'y':
6347 case 'X':
6348 case 'P':
6349 break;
6350
6351 case 's':
6352 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6353 {
6354 PRINT_OPERAND (file, x, 0);
6355 putc (',', file);
6356 }
6357 return;
6358
6359 case 'D':
6360 /* Little bit of braindamage here. The SSE compare instructions
6361 does use completely different names for the comparisons that the
6362 fp conditional moves. */
6363 switch (GET_CODE (x))
6364 {
6365 case EQ:
6366 case UNEQ:
6367 fputs ("eq", file);
6368 break;
6369 case LT:
6370 case UNLT:
6371 fputs ("lt", file);
6372 break;
6373 case LE:
6374 case UNLE:
6375 fputs ("le", file);
6376 break;
6377 case UNORDERED:
6378 fputs ("unord", file);
6379 break;
6380 case NE:
6381 case LTGT:
6382 fputs ("neq", file);
6383 break;
6384 case UNGE:
6385 case GE:
6386 fputs ("nlt", file);
6387 break;
6388 case UNGT:
6389 case GT:
6390 fputs ("nle", file);
6391 break;
6392 case ORDERED:
6393 fputs ("ord", file);
6394 break;
6395 default:
6396 abort ();
6397 break;
6398 }
6399 return;
6400 case 'O':
6401 #ifdef CMOV_SUN_AS_SYNTAX
6402 if (ASSEMBLER_DIALECT == ASM_ATT)
6403 {
6404 switch (GET_MODE (x))
6405 {
6406 case HImode: putc ('w', file); break;
6407 case SImode:
6408 case SFmode: putc ('l', file); break;
6409 case DImode:
6410 case DFmode: putc ('q', file); break;
6411 default: abort ();
6412 }
6413 putc ('.', file);
6414 }
6415 #endif
6416 return;
6417 case 'C':
6418 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6419 return;
6420 case 'F':
6421 #ifdef CMOV_SUN_AS_SYNTAX
6422 if (ASSEMBLER_DIALECT == ASM_ATT)
6423 putc ('.', file);
6424 #endif
6425 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6426 return;
6427
6428 /* Like above, but reverse condition */
6429 case 'c':
6430 /* Check to see if argument to %c is really a constant
6431 and not a condition code which needs to be reversed. */
6432 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6433 {
6434 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6435 return;
6436 }
6437 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6438 return;
6439 case 'f':
6440 #ifdef CMOV_SUN_AS_SYNTAX
6441 if (ASSEMBLER_DIALECT == ASM_ATT)
6442 putc ('.', file);
6443 #endif
6444 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6445 return;
6446 case '+':
6447 {
6448 rtx x;
6449
6450 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6451 return;
6452
6453 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6454 if (x)
6455 {
6456 int pred_val = INTVAL (XEXP (x, 0));
6457
6458 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6459 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6460 {
6461 int taken = pred_val > REG_BR_PROB_BASE / 2;
6462 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6463
6464 /* Emit hints only in the case default branch prediction
6465 heruistics would fail. */
6466 if (taken != cputaken)
6467 {
6468 /* We use 3e (DS) prefix for taken branches and
6469 2e (CS) prefix for not taken branches. */
6470 if (taken)
6471 fputs ("ds ; ", file);
6472 else
6473 fputs ("cs ; ", file);
6474 }
6475 }
6476 }
6477 return;
6478 }
6479 default:
6480 output_operand_lossage ("invalid operand code `%c'", code);
6481 }
6482 }
6483
6484 if (GET_CODE (x) == REG)
6485 {
6486 PRINT_REG (x, code, file);
6487 }
6488
6489 else if (GET_CODE (x) == MEM)
6490 {
6491 /* No `byte ptr' prefix for call instructions. */
6492 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6493 {
6494 const char * size;
6495 switch (GET_MODE_SIZE (GET_MODE (x)))
6496 {
6497 case 1: size = "BYTE"; break;
6498 case 2: size = "WORD"; break;
6499 case 4: size = "DWORD"; break;
6500 case 8: size = "QWORD"; break;
6501 case 12: size = "XWORD"; break;
6502 case 16: size = "XMMWORD"; break;
6503 default:
6504 abort ();
6505 }
6506
6507 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6508 if (code == 'b')
6509 size = "BYTE";
6510 else if (code == 'w')
6511 size = "WORD";
6512 else if (code == 'k')
6513 size = "DWORD";
6514
6515 fputs (size, file);
6516 fputs (" PTR ", file);
6517 }
6518
6519 x = XEXP (x, 0);
6520 if (flag_pic && CONSTANT_ADDRESS_P (x))
6521 output_pic_addr_const (file, x, code);
6522 /* Avoid (%rip) for call operands. */
6523 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6524 && GET_CODE (x) != CONST_INT)
6525 output_addr_const (file, x);
6526 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6527 output_operand_lossage ("invalid constraints for operand");
6528 else
6529 output_address (x);
6530 }
6531
6532 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6533 {
6534 REAL_VALUE_TYPE r;
6535 long l;
6536
6537 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6538 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6539
6540 if (ASSEMBLER_DIALECT == ASM_ATT)
6541 putc ('$', file);
6542 fprintf (file, "0x%lx", l);
6543 }
6544
6545 /* These float cases don't actually occur as immediate operands. */
6546 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6547 {
6548 REAL_VALUE_TYPE r;
6549 char dstr[30];
6550
6551 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6552 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6553 fprintf (file, "%s", dstr);
6554 }
6555
6556 else if (GET_CODE (x) == CONST_DOUBLE
6557 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6558 {
6559 REAL_VALUE_TYPE r;
6560 char dstr[30];
6561
6562 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6563 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6564 fprintf (file, "%s", dstr);
6565 }
6566
6567 else if (GET_CODE (x) == CONST
6568 && GET_CODE (XEXP (x, 0)) == UNSPEC
6569 && XINT (XEXP (x, 0), 1) == UNSPEC_TP)
6570 {
6571 if (ASSEMBLER_DIALECT == ASM_INTEL)
6572 fputs ("DWORD PTR ", file);
6573 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6574 putc ('%', file);
6575 fputs ("gs:0", file);
6576 }
6577
6578 else
6579 {
6580 if (code != 'P')
6581 {
6582 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6583 {
6584 if (ASSEMBLER_DIALECT == ASM_ATT)
6585 putc ('$', file);
6586 }
6587 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6588 || GET_CODE (x) == LABEL_REF)
6589 {
6590 if (ASSEMBLER_DIALECT == ASM_ATT)
6591 putc ('$', file);
6592 else
6593 fputs ("OFFSET FLAT:", file);
6594 }
6595 }
6596 if (GET_CODE (x) == CONST_INT)
6597 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6598 else if (flag_pic)
6599 output_pic_addr_const (file, x, code);
6600 else
6601 output_addr_const (file, x);
6602 }
6603 }
6604 \f
6605 /* Print a memory operand whose address is ADDR. */
6606
6607 void
6608 print_operand_address (file, addr)
6609 FILE *file;
6610 register rtx addr;
6611 {
6612 struct ix86_address parts;
6613 rtx base, index, disp;
6614 int scale;
6615
6616 if (! ix86_decompose_address (addr, &parts))
6617 abort ();
6618
6619 base = parts.base;
6620 index = parts.index;
6621 disp = parts.disp;
6622 scale = parts.scale;
6623
6624 if (!base && !index)
6625 {
6626 /* Displacement only requires special attention. */
6627
6628 if (GET_CODE (disp) == CONST_INT)
6629 {
6630 if (ASSEMBLER_DIALECT == ASM_INTEL)
6631 {
6632 if (USER_LABEL_PREFIX[0] == 0)
6633 putc ('%', file);
6634 fputs ("ds:", file);
6635 }
6636 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6637 }
6638 else if (flag_pic)
6639 output_pic_addr_const (file, addr, 0);
6640 else
6641 output_addr_const (file, addr);
6642
6643 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6644 if (TARGET_64BIT
6645 && (GET_CODE (addr) == SYMBOL_REF
6646 || GET_CODE (addr) == LABEL_REF
6647 || (GET_CODE (addr) == CONST
6648 && GET_CODE (XEXP (addr, 0)) == PLUS
6649 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6650 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6651 fputs ("(%rip)", file);
6652 }
6653 else
6654 {
6655 if (ASSEMBLER_DIALECT == ASM_ATT)
6656 {
6657 if (disp)
6658 {
6659 if (flag_pic)
6660 output_pic_addr_const (file, disp, 0);
6661 else if (GET_CODE (disp) == LABEL_REF)
6662 output_asm_label (disp);
6663 else
6664 output_addr_const (file, disp);
6665 }
6666
6667 putc ('(', file);
6668 if (base)
6669 PRINT_REG (base, 0, file);
6670 if (index)
6671 {
6672 putc (',', file);
6673 PRINT_REG (index, 0, file);
6674 if (scale != 1)
6675 fprintf (file, ",%d", scale);
6676 }
6677 putc (')', file);
6678 }
6679 else
6680 {
6681 rtx offset = NULL_RTX;
6682
6683 if (disp)
6684 {
6685 /* Pull out the offset of a symbol; print any symbol itself. */
6686 if (GET_CODE (disp) == CONST
6687 && GET_CODE (XEXP (disp, 0)) == PLUS
6688 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6689 {
6690 offset = XEXP (XEXP (disp, 0), 1);
6691 disp = gen_rtx_CONST (VOIDmode,
6692 XEXP (XEXP (disp, 0), 0));
6693 }
6694
6695 if (flag_pic)
6696 output_pic_addr_const (file, disp, 0);
6697 else if (GET_CODE (disp) == LABEL_REF)
6698 output_asm_label (disp);
6699 else if (GET_CODE (disp) == CONST_INT)
6700 offset = disp;
6701 else
6702 output_addr_const (file, disp);
6703 }
6704
6705 putc ('[', file);
6706 if (base)
6707 {
6708 PRINT_REG (base, 0, file);
6709 if (offset)
6710 {
6711 if (INTVAL (offset) >= 0)
6712 putc ('+', file);
6713 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6714 }
6715 }
6716 else if (offset)
6717 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6718 else
6719 putc ('0', file);
6720
6721 if (index)
6722 {
6723 putc ('+', file);
6724 PRINT_REG (index, 0, file);
6725 if (scale != 1)
6726 fprintf (file, "*%d", scale);
6727 }
6728 putc (']', file);
6729 }
6730 }
6731 }
6732
6733 bool
6734 output_addr_const_extra (file, x)
6735 FILE *file;
6736 rtx x;
6737 {
6738 rtx op;
6739
6740 if (GET_CODE (x) != UNSPEC)
6741 return false;
6742
6743 op = XVECEXP (x, 0, 0);
6744 switch (XINT (x, 1))
6745 {
6746 case UNSPEC_GOTTPOFF:
6747 output_addr_const (file, op);
6748 fputs ("@GOTTPOFF", file);
6749 break;
6750 case UNSPEC_TPOFF:
6751 output_addr_const (file, op);
6752 fputs ("@TPOFF", file);
6753 break;
6754 case UNSPEC_NTPOFF:
6755 output_addr_const (file, op);
6756 fputs ("@NTPOFF", file);
6757 break;
6758 case UNSPEC_DTPOFF:
6759 output_addr_const (file, op);
6760 fputs ("@DTPOFF", file);
6761 break;
6762
6763 default:
6764 return false;
6765 }
6766
6767 return true;
6768 }
6769 \f
6770 /* Split one or more DImode RTL references into pairs of SImode
6771 references. The RTL can be REG, offsettable MEM, integer constant, or
6772 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6773 split and "num" is its length. lo_half and hi_half are output arrays
6774 that parallel "operands". */
6775
6776 void
6777 split_di (operands, num, lo_half, hi_half)
6778 rtx operands[];
6779 int num;
6780 rtx lo_half[], hi_half[];
6781 {
6782 while (num--)
6783 {
6784 rtx op = operands[num];
6785
6786 /* simplify_subreg refuse to split volatile memory addresses,
6787 but we still have to handle it. */
6788 if (GET_CODE (op) == MEM)
6789 {
6790 lo_half[num] = adjust_address (op, SImode, 0);
6791 hi_half[num] = adjust_address (op, SImode, 4);
6792 }
6793 else
6794 {
6795 lo_half[num] = simplify_gen_subreg (SImode, op,
6796 GET_MODE (op) == VOIDmode
6797 ? DImode : GET_MODE (op), 0);
6798 hi_half[num] = simplify_gen_subreg (SImode, op,
6799 GET_MODE (op) == VOIDmode
6800 ? DImode : GET_MODE (op), 4);
6801 }
6802 }
6803 }
6804 /* Split one or more TImode RTL references into pairs of SImode
6805 references. The RTL can be REG, offsettable MEM, integer constant, or
6806 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6807 split and "num" is its length. lo_half and hi_half are output arrays
6808 that parallel "operands". */
6809
6810 void
6811 split_ti (operands, num, lo_half, hi_half)
6812 rtx operands[];
6813 int num;
6814 rtx lo_half[], hi_half[];
6815 {
6816 while (num--)
6817 {
6818 rtx op = operands[num];
6819
6820 /* simplify_subreg refuse to split volatile memory addresses, but we
6821 still have to handle it. */
6822 if (GET_CODE (op) == MEM)
6823 {
6824 lo_half[num] = adjust_address (op, DImode, 0);
6825 hi_half[num] = adjust_address (op, DImode, 8);
6826 }
6827 else
6828 {
6829 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6830 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6831 }
6832 }
6833 }
6834 \f
6835 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6836 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6837 is the expression of the binary operation. The output may either be
6838 emitted here, or returned to the caller, like all output_* functions.
6839
6840 There is no guarantee that the operands are the same mode, as they
6841 might be within FLOAT or FLOAT_EXTEND expressions. */
6842
6843 #ifndef SYSV386_COMPAT
6844 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6845 wants to fix the assemblers because that causes incompatibility
6846 with gcc. No-one wants to fix gcc because that causes
6847 incompatibility with assemblers... You can use the option of
6848 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6849 #define SYSV386_COMPAT 1
6850 #endif
6851
6852 const char *
6853 output_387_binary_op (insn, operands)
6854 rtx insn;
6855 rtx *operands;
6856 {
6857 static char buf[30];
6858 const char *p;
6859 const char *ssep;
6860 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6861
6862 #ifdef ENABLE_CHECKING
6863 /* Even if we do not want to check the inputs, this documents input
6864 constraints. Which helps in understanding the following code. */
6865 if (STACK_REG_P (operands[0])
6866 && ((REG_P (operands[1])
6867 && REGNO (operands[0]) == REGNO (operands[1])
6868 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6869 || (REG_P (operands[2])
6870 && REGNO (operands[0]) == REGNO (operands[2])
6871 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6872 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6873 ; /* ok */
6874 else if (!is_sse)
6875 abort ();
6876 #endif
6877
6878 switch (GET_CODE (operands[3]))
6879 {
6880 case PLUS:
6881 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6882 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6883 p = "fiadd";
6884 else
6885 p = "fadd";
6886 ssep = "add";
6887 break;
6888
6889 case MINUS:
6890 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6891 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6892 p = "fisub";
6893 else
6894 p = "fsub";
6895 ssep = "sub";
6896 break;
6897
6898 case MULT:
6899 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6900 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6901 p = "fimul";
6902 else
6903 p = "fmul";
6904 ssep = "mul";
6905 break;
6906
6907 case DIV:
6908 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6909 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6910 p = "fidiv";
6911 else
6912 p = "fdiv";
6913 ssep = "div";
6914 break;
6915
6916 default:
6917 abort ();
6918 }
6919
6920 if (is_sse)
6921 {
6922 strcpy (buf, ssep);
6923 if (GET_MODE (operands[0]) == SFmode)
6924 strcat (buf, "ss\t{%2, %0|%0, %2}");
6925 else
6926 strcat (buf, "sd\t{%2, %0|%0, %2}");
6927 return buf;
6928 }
6929 strcpy (buf, p);
6930
6931 switch (GET_CODE (operands[3]))
6932 {
6933 case MULT:
6934 case PLUS:
6935 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6936 {
6937 rtx temp = operands[2];
6938 operands[2] = operands[1];
6939 operands[1] = temp;
6940 }
6941
6942 /* know operands[0] == operands[1]. */
6943
6944 if (GET_CODE (operands[2]) == MEM)
6945 {
6946 p = "%z2\t%2";
6947 break;
6948 }
6949
6950 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6951 {
6952 if (STACK_TOP_P (operands[0]))
6953 /* How is it that we are storing to a dead operand[2]?
6954 Well, presumably operands[1] is dead too. We can't
6955 store the result to st(0) as st(0) gets popped on this
6956 instruction. Instead store to operands[2] (which I
6957 think has to be st(1)). st(1) will be popped later.
6958 gcc <= 2.8.1 didn't have this check and generated
6959 assembly code that the Unixware assembler rejected. */
6960 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6961 else
6962 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6963 break;
6964 }
6965
6966 if (STACK_TOP_P (operands[0]))
6967 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6968 else
6969 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6970 break;
6971
6972 case MINUS:
6973 case DIV:
6974 if (GET_CODE (operands[1]) == MEM)
6975 {
6976 p = "r%z1\t%1";
6977 break;
6978 }
6979
6980 if (GET_CODE (operands[2]) == MEM)
6981 {
6982 p = "%z2\t%2";
6983 break;
6984 }
6985
6986 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6987 {
6988 #if SYSV386_COMPAT
6989 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6990 derived assemblers, confusingly reverse the direction of
6991 the operation for fsub{r} and fdiv{r} when the
6992 destination register is not st(0). The Intel assembler
6993 doesn't have this brain damage. Read !SYSV386_COMPAT to
6994 figure out what the hardware really does. */
6995 if (STACK_TOP_P (operands[0]))
6996 p = "{p\t%0, %2|rp\t%2, %0}";
6997 else
6998 p = "{rp\t%2, %0|p\t%0, %2}";
6999 #else
7000 if (STACK_TOP_P (operands[0]))
7001 /* As above for fmul/fadd, we can't store to st(0). */
7002 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7003 else
7004 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7005 #endif
7006 break;
7007 }
7008
7009 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7010 {
7011 #if SYSV386_COMPAT
7012 if (STACK_TOP_P (operands[0]))
7013 p = "{rp\t%0, %1|p\t%1, %0}";
7014 else
7015 p = "{p\t%1, %0|rp\t%0, %1}";
7016 #else
7017 if (STACK_TOP_P (operands[0]))
7018 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7019 else
7020 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7021 #endif
7022 break;
7023 }
7024
7025 if (STACK_TOP_P (operands[0]))
7026 {
7027 if (STACK_TOP_P (operands[1]))
7028 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7029 else
7030 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7031 break;
7032 }
7033 else if (STACK_TOP_P (operands[1]))
7034 {
7035 #if SYSV386_COMPAT
7036 p = "{\t%1, %0|r\t%0, %1}";
7037 #else
7038 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7039 #endif
7040 }
7041 else
7042 {
7043 #if SYSV386_COMPAT
7044 p = "{r\t%2, %0|\t%0, %2}";
7045 #else
7046 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7047 #endif
7048 }
7049 break;
7050
7051 default:
7052 abort ();
7053 }
7054
7055 strcat (buf, p);
7056 return buf;
7057 }
7058
7059 /* Output code to initialize control word copies used by
7060 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7061 is set to control word rounding downwards. */
7062 void
7063 emit_i387_cw_initialization (normal, round_down)
7064 rtx normal, round_down;
7065 {
7066 rtx reg = gen_reg_rtx (HImode);
7067
7068 emit_insn (gen_x86_fnstcw_1 (normal));
7069 emit_move_insn (reg, normal);
7070 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7071 && !TARGET_64BIT)
7072 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7073 else
7074 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7075 emit_move_insn (round_down, reg);
7076 }
7077
7078 /* Output code for INSN to convert a float to a signed int. OPERANDS
7079 are the insn operands. The output may be [HSD]Imode and the input
7080 operand may be [SDX]Fmode. */
7081
7082 const char *
7083 output_fix_trunc (insn, operands)
7084 rtx insn;
7085 rtx *operands;
7086 {
7087 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7088 int dimode_p = GET_MODE (operands[0]) == DImode;
7089
7090 /* Jump through a hoop or two for DImode, since the hardware has no
7091 non-popping instruction. We used to do this a different way, but
7092 that was somewhat fragile and broke with post-reload splitters. */
7093 if (dimode_p && !stack_top_dies)
7094 output_asm_insn ("fld\t%y1", operands);
7095
7096 if (!STACK_TOP_P (operands[1]))
7097 abort ();
7098
7099 if (GET_CODE (operands[0]) != MEM)
7100 abort ();
7101
7102 output_asm_insn ("fldcw\t%3", operands);
7103 if (stack_top_dies || dimode_p)
7104 output_asm_insn ("fistp%z0\t%0", operands);
7105 else
7106 output_asm_insn ("fist%z0\t%0", operands);
7107 output_asm_insn ("fldcw\t%2", operands);
7108
7109 return "";
7110 }
7111
7112 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7113 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7114 when fucom should be used. */
7115
7116 const char *
7117 output_fp_compare (insn, operands, eflags_p, unordered_p)
7118 rtx insn;
7119 rtx *operands;
7120 int eflags_p, unordered_p;
7121 {
7122 int stack_top_dies;
7123 rtx cmp_op0 = operands[0];
7124 rtx cmp_op1 = operands[1];
7125 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7126
7127 if (eflags_p == 2)
7128 {
7129 cmp_op0 = cmp_op1;
7130 cmp_op1 = operands[2];
7131 }
7132 if (is_sse)
7133 {
7134 if (GET_MODE (operands[0]) == SFmode)
7135 if (unordered_p)
7136 return "ucomiss\t{%1, %0|%0, %1}";
7137 else
7138 return "comiss\t{%1, %0|%0, %y}";
7139 else
7140 if (unordered_p)
7141 return "ucomisd\t{%1, %0|%0, %1}";
7142 else
7143 return "comisd\t{%1, %0|%0, %y}";
7144 }
7145
7146 if (! STACK_TOP_P (cmp_op0))
7147 abort ();
7148
7149 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7150
7151 if (STACK_REG_P (cmp_op1)
7152 && stack_top_dies
7153 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7154 && REGNO (cmp_op1) != FIRST_STACK_REG)
7155 {
7156 /* If both the top of the 387 stack dies, and the other operand
7157 is also a stack register that dies, then this must be a
7158 `fcompp' float compare */
7159
7160 if (eflags_p == 1)
7161 {
7162 /* There is no double popping fcomi variant. Fortunately,
7163 eflags is immune from the fstp's cc clobbering. */
7164 if (unordered_p)
7165 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7166 else
7167 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7168 return "fstp\t%y0";
7169 }
7170 else
7171 {
7172 if (eflags_p == 2)
7173 {
7174 if (unordered_p)
7175 return "fucompp\n\tfnstsw\t%0";
7176 else
7177 return "fcompp\n\tfnstsw\t%0";
7178 }
7179 else
7180 {
7181 if (unordered_p)
7182 return "fucompp";
7183 else
7184 return "fcompp";
7185 }
7186 }
7187 }
7188 else
7189 {
7190 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7191
7192 static const char * const alt[24] =
7193 {
7194 "fcom%z1\t%y1",
7195 "fcomp%z1\t%y1",
7196 "fucom%z1\t%y1",
7197 "fucomp%z1\t%y1",
7198
7199 "ficom%z1\t%y1",
7200 "ficomp%z1\t%y1",
7201 NULL,
7202 NULL,
7203
7204 "fcomi\t{%y1, %0|%0, %y1}",
7205 "fcomip\t{%y1, %0|%0, %y1}",
7206 "fucomi\t{%y1, %0|%0, %y1}",
7207 "fucomip\t{%y1, %0|%0, %y1}",
7208
7209 NULL,
7210 NULL,
7211 NULL,
7212 NULL,
7213
7214 "fcom%z2\t%y2\n\tfnstsw\t%0",
7215 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7216 "fucom%z2\t%y2\n\tfnstsw\t%0",
7217 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7218
7219 "ficom%z2\t%y2\n\tfnstsw\t%0",
7220 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7221 NULL,
7222 NULL
7223 };
7224
7225 int mask;
7226 const char *ret;
7227
7228 mask = eflags_p << 3;
7229 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7230 mask |= unordered_p << 1;
7231 mask |= stack_top_dies;
7232
7233 if (mask >= 24)
7234 abort ();
7235 ret = alt[mask];
7236 if (ret == NULL)
7237 abort ();
7238
7239 return ret;
7240 }
7241 }
7242
7243 void
7244 ix86_output_addr_vec_elt (file, value)
7245 FILE *file;
7246 int value;
7247 {
7248 const char *directive = ASM_LONG;
7249
7250 if (TARGET_64BIT)
7251 {
7252 #ifdef ASM_QUAD
7253 directive = ASM_QUAD;
7254 #else
7255 abort ();
7256 #endif
7257 }
7258
7259 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7260 }
7261
7262 void
7263 ix86_output_addr_diff_elt (file, value, rel)
7264 FILE *file;
7265 int value, rel;
7266 {
7267 if (TARGET_64BIT)
7268 fprintf (file, "%s%s%d-%s%d\n",
7269 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7270 else if (HAVE_AS_GOTOFF_IN_DATA)
7271 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7272 else
7273 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7274 ASM_LONG, LPREFIX, value);
7275 }
7276 \f
7277 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7278 for the target. */
7279
7280 void
7281 ix86_expand_clear (dest)
7282 rtx dest;
7283 {
7284 rtx tmp;
7285
7286 /* We play register width games, which are only valid after reload. */
7287 if (!reload_completed)
7288 abort ();
7289
7290 /* Avoid HImode and its attendant prefix byte. */
7291 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7292 dest = gen_rtx_REG (SImode, REGNO (dest));
7293
7294 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7295
7296 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7297 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7298 {
7299 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7300 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7301 }
7302
7303 emit_insn (tmp);
7304 }
7305
7306 /* X is an unchanging MEM. If it is a constant pool reference, return
7307 the constant pool rtx, else NULL. */
7308
7309 static rtx
7310 maybe_get_pool_constant (x)
7311 rtx x;
7312 {
7313 x = XEXP (x, 0);
7314
7315 if (flag_pic)
7316 {
7317 if (GET_CODE (x) != PLUS)
7318 return NULL_RTX;
7319 if (XEXP (x, 0) != pic_offset_table_rtx)
7320 return NULL_RTX;
7321 x = XEXP (x, 1);
7322 if (GET_CODE (x) != CONST)
7323 return NULL_RTX;
7324 x = XEXP (x, 0);
7325 if (GET_CODE (x) != UNSPEC)
7326 return NULL_RTX;
7327 if (XINT (x, 1) != UNSPEC_GOTOFF)
7328 return NULL_RTX;
7329 x = XVECEXP (x, 0, 0);
7330 }
7331
7332 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7333 return get_pool_constant (x);
7334
7335 return NULL_RTX;
7336 }
7337
7338 void
7339 ix86_expand_move (mode, operands)
7340 enum machine_mode mode;
7341 rtx operands[];
7342 {
7343 int strict = (reload_in_progress || reload_completed);
7344 rtx insn, op0, op1, tmp;
7345
7346 op0 = operands[0];
7347 op1 = operands[1];
7348
7349 /* ??? We have a slight problem. We need to say that tls symbols are
7350 not legitimate constants so that reload does not helpfully reload
7351 these constants from a REG_EQUIV, which we cannot handle. (Recall
7352 that general- and local-dynamic address resolution requires a
7353 function call.)
7354
7355 However, if we say that tls symbols are not legitimate constants,
7356 then emit_move_insn helpfully drop them into the constant pool.
7357
7358 It is far easier to work around emit_move_insn than reload. Recognize
7359 the MEM that we would have created and extract the symbol_ref. */
7360
7361 if (mode == Pmode
7362 && GET_CODE (op1) == MEM
7363 && RTX_UNCHANGING_P (op1))
7364 {
7365 tmp = maybe_get_pool_constant (op1);
7366 /* Note that we only care about symbolic constants here, which
7367 unlike CONST_INT will always have a proper mode. */
7368 if (tmp && GET_MODE (tmp) == Pmode)
7369 op1 = tmp;
7370 }
7371
7372 if (tls_symbolic_operand (op1, Pmode))
7373 {
7374 op1 = legitimize_address (op1, op1, VOIDmode);
7375 if (GET_CODE (op0) == MEM)
7376 {
7377 tmp = gen_reg_rtx (mode);
7378 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7379 op1 = tmp;
7380 }
7381 }
7382 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7383 {
7384 if (GET_CODE (op0) == MEM)
7385 op1 = force_reg (Pmode, op1);
7386 else
7387 {
7388 rtx temp = op0;
7389 if (GET_CODE (temp) != REG)
7390 temp = gen_reg_rtx (Pmode);
7391 temp = legitimize_pic_address (op1, temp);
7392 if (temp == op0)
7393 return;
7394 op1 = temp;
7395 }
7396 }
7397 else
7398 {
7399 if (GET_CODE (op0) == MEM
7400 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7401 || !push_operand (op0, mode))
7402 && GET_CODE (op1) == MEM)
7403 op1 = force_reg (mode, op1);
7404
7405 if (push_operand (op0, mode)
7406 && ! general_no_elim_operand (op1, mode))
7407 op1 = copy_to_mode_reg (mode, op1);
7408
7409 /* Force large constants in 64bit compilation into register
7410 to get them CSEed. */
7411 if (TARGET_64BIT && mode == DImode
7412 && immediate_operand (op1, mode)
7413 && !x86_64_zero_extended_value (op1)
7414 && !register_operand (op0, mode)
7415 && optimize && !reload_completed && !reload_in_progress)
7416 op1 = copy_to_mode_reg (mode, op1);
7417
7418 if (FLOAT_MODE_P (mode))
7419 {
7420 /* If we are loading a floating point constant to a register,
7421 force the value to memory now, since we'll get better code
7422 out the back end. */
7423
7424 if (strict)
7425 ;
7426 else if (GET_CODE (op1) == CONST_DOUBLE
7427 && register_operand (op0, mode))
7428 op1 = validize_mem (force_const_mem (mode, op1));
7429 }
7430 }
7431
7432 insn = gen_rtx_SET (VOIDmode, op0, op1);
7433
7434 emit_insn (insn);
7435 }
7436
7437 void
7438 ix86_expand_vector_move (mode, operands)
7439 enum machine_mode mode;
7440 rtx operands[];
7441 {
7442 /* Force constants other than zero into memory. We do not know how
7443 the instructions used to build constants modify the upper 64 bits
7444 of the register, once we have that information we may be able
7445 to handle some of them more efficiently. */
7446 if ((reload_in_progress | reload_completed) == 0
7447 && register_operand (operands[0], mode)
7448 && CONSTANT_P (operands[1]))
7449 {
7450 rtx addr = gen_reg_rtx (Pmode);
7451 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7452 operands[1] = gen_rtx_MEM (mode, addr);
7453 }
7454
7455 /* Make operand1 a register if it isn't already. */
7456 if ((reload_in_progress | reload_completed) == 0
7457 && !register_operand (operands[0], mode)
7458 && !register_operand (operands[1], mode)
7459 && operands[1] != CONST0_RTX (mode))
7460 {
7461 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7462 emit_move_insn (operands[0], temp);
7463 return;
7464 }
7465
7466 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7467 }
7468
7469 /* Attempt to expand a binary operator. Make the expansion closer to the
7470 actual machine, then just general_operand, which will allow 3 separate
7471 memory references (one output, two input) in a single insn. */
7472
7473 void
7474 ix86_expand_binary_operator (code, mode, operands)
7475 enum rtx_code code;
7476 enum machine_mode mode;
7477 rtx operands[];
7478 {
7479 int matching_memory;
7480 rtx src1, src2, dst, op, clob;
7481
7482 dst = operands[0];
7483 src1 = operands[1];
7484 src2 = operands[2];
7485
7486 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7487 if (GET_RTX_CLASS (code) == 'c'
7488 && (rtx_equal_p (dst, src2)
7489 || immediate_operand (src1, mode)))
7490 {
7491 rtx temp = src1;
7492 src1 = src2;
7493 src2 = temp;
7494 }
7495
7496 /* If the destination is memory, and we do not have matching source
7497 operands, do things in registers. */
7498 matching_memory = 0;
7499 if (GET_CODE (dst) == MEM)
7500 {
7501 if (rtx_equal_p (dst, src1))
7502 matching_memory = 1;
7503 else if (GET_RTX_CLASS (code) == 'c'
7504 && rtx_equal_p (dst, src2))
7505 matching_memory = 2;
7506 else
7507 dst = gen_reg_rtx (mode);
7508 }
7509
7510 /* Both source operands cannot be in memory. */
7511 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7512 {
7513 if (matching_memory != 2)
7514 src2 = force_reg (mode, src2);
7515 else
7516 src1 = force_reg (mode, src1);
7517 }
7518
7519 /* If the operation is not commutable, source 1 cannot be a constant
7520 or non-matching memory. */
7521 if ((CONSTANT_P (src1)
7522 || (!matching_memory && GET_CODE (src1) == MEM))
7523 && GET_RTX_CLASS (code) != 'c')
7524 src1 = force_reg (mode, src1);
7525
7526 /* If optimizing, copy to regs to improve CSE */
7527 if (optimize && ! no_new_pseudos)
7528 {
7529 if (GET_CODE (dst) == MEM)
7530 dst = gen_reg_rtx (mode);
7531 if (GET_CODE (src1) == MEM)
7532 src1 = force_reg (mode, src1);
7533 if (GET_CODE (src2) == MEM)
7534 src2 = force_reg (mode, src2);
7535 }
7536
7537 /* Emit the instruction. */
7538
7539 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7540 if (reload_in_progress)
7541 {
7542 /* Reload doesn't know about the flags register, and doesn't know that
7543 it doesn't want to clobber it. We can only do this with PLUS. */
7544 if (code != PLUS)
7545 abort ();
7546 emit_insn (op);
7547 }
7548 else
7549 {
7550 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7551 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7552 }
7553
7554 /* Fix up the destination if needed. */
7555 if (dst != operands[0])
7556 emit_move_insn (operands[0], dst);
7557 }
7558
7559 /* Return TRUE or FALSE depending on whether the binary operator meets the
7560 appropriate constraints. */
7561
7562 int
7563 ix86_binary_operator_ok (code, mode, operands)
7564 enum rtx_code code;
7565 enum machine_mode mode ATTRIBUTE_UNUSED;
7566 rtx operands[3];
7567 {
7568 /* Both source operands cannot be in memory. */
7569 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7570 return 0;
7571 /* If the operation is not commutable, source 1 cannot be a constant. */
7572 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7573 return 0;
7574 /* If the destination is memory, we must have a matching source operand. */
7575 if (GET_CODE (operands[0]) == MEM
7576 && ! (rtx_equal_p (operands[0], operands[1])
7577 || (GET_RTX_CLASS (code) == 'c'
7578 && rtx_equal_p (operands[0], operands[2]))))
7579 return 0;
7580 /* If the operation is not commutable and the source 1 is memory, we must
7581 have a matching destination. */
7582 if (GET_CODE (operands[1]) == MEM
7583 && GET_RTX_CLASS (code) != 'c'
7584 && ! rtx_equal_p (operands[0], operands[1]))
7585 return 0;
7586 return 1;
7587 }
7588
7589 /* Attempt to expand a unary operator. Make the expansion closer to the
7590 actual machine, then just general_operand, which will allow 2 separate
7591 memory references (one output, one input) in a single insn. */
7592
7593 void
7594 ix86_expand_unary_operator (code, mode, operands)
7595 enum rtx_code code;
7596 enum machine_mode mode;
7597 rtx operands[];
7598 {
7599 int matching_memory;
7600 rtx src, dst, op, clob;
7601
7602 dst = operands[0];
7603 src = operands[1];
7604
7605 /* If the destination is memory, and we do not have matching source
7606 operands, do things in registers. */
7607 matching_memory = 0;
7608 if (GET_CODE (dst) == MEM)
7609 {
7610 if (rtx_equal_p (dst, src))
7611 matching_memory = 1;
7612 else
7613 dst = gen_reg_rtx (mode);
7614 }
7615
7616 /* When source operand is memory, destination must match. */
7617 if (!matching_memory && GET_CODE (src) == MEM)
7618 src = force_reg (mode, src);
7619
7620 /* If optimizing, copy to regs to improve CSE */
7621 if (optimize && ! no_new_pseudos)
7622 {
7623 if (GET_CODE (dst) == MEM)
7624 dst = gen_reg_rtx (mode);
7625 if (GET_CODE (src) == MEM)
7626 src = force_reg (mode, src);
7627 }
7628
7629 /* Emit the instruction. */
7630
7631 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7632 if (reload_in_progress || code == NOT)
7633 {
7634 /* Reload doesn't know about the flags register, and doesn't know that
7635 it doesn't want to clobber it. */
7636 if (code != NOT)
7637 abort ();
7638 emit_insn (op);
7639 }
7640 else
7641 {
7642 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7643 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7644 }
7645
7646 /* Fix up the destination if needed. */
7647 if (dst != operands[0])
7648 emit_move_insn (operands[0], dst);
7649 }
7650
7651 /* Return TRUE or FALSE depending on whether the unary operator meets the
7652 appropriate constraints. */
7653
7654 int
7655 ix86_unary_operator_ok (code, mode, operands)
7656 enum rtx_code code ATTRIBUTE_UNUSED;
7657 enum machine_mode mode ATTRIBUTE_UNUSED;
7658 rtx operands[2] ATTRIBUTE_UNUSED;
7659 {
7660 /* If one of operands is memory, source and destination must match. */
7661 if ((GET_CODE (operands[0]) == MEM
7662 || GET_CODE (operands[1]) == MEM)
7663 && ! rtx_equal_p (operands[0], operands[1]))
7664 return FALSE;
7665 return TRUE;
7666 }
7667
7668 /* Return TRUE or FALSE depending on whether the first SET in INSN
7669 has source and destination with matching CC modes, and that the
7670 CC mode is at least as constrained as REQ_MODE. */
7671
7672 int
7673 ix86_match_ccmode (insn, req_mode)
7674 rtx insn;
7675 enum machine_mode req_mode;
7676 {
7677 rtx set;
7678 enum machine_mode set_mode;
7679
7680 set = PATTERN (insn);
7681 if (GET_CODE (set) == PARALLEL)
7682 set = XVECEXP (set, 0, 0);
7683 if (GET_CODE (set) != SET)
7684 abort ();
7685 if (GET_CODE (SET_SRC (set)) != COMPARE)
7686 abort ();
7687
7688 set_mode = GET_MODE (SET_DEST (set));
7689 switch (set_mode)
7690 {
7691 case CCNOmode:
7692 if (req_mode != CCNOmode
7693 && (req_mode != CCmode
7694 || XEXP (SET_SRC (set), 1) != const0_rtx))
7695 return 0;
7696 break;
7697 case CCmode:
7698 if (req_mode == CCGCmode)
7699 return 0;
7700 /* FALLTHRU */
7701 case CCGCmode:
7702 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7703 return 0;
7704 /* FALLTHRU */
7705 case CCGOCmode:
7706 if (req_mode == CCZmode)
7707 return 0;
7708 /* FALLTHRU */
7709 case CCZmode:
7710 break;
7711
7712 default:
7713 abort ();
7714 }
7715
7716 return (GET_MODE (SET_SRC (set)) == set_mode);
7717 }
7718
7719 /* Generate insn patterns to do an integer compare of OPERANDS. */
7720
7721 static rtx
7722 ix86_expand_int_compare (code, op0, op1)
7723 enum rtx_code code;
7724 rtx op0, op1;
7725 {
7726 enum machine_mode cmpmode;
7727 rtx tmp, flags;
7728
7729 cmpmode = SELECT_CC_MODE (code, op0, op1);
7730 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7731
7732 /* This is very simple, but making the interface the same as in the
7733 FP case makes the rest of the code easier. */
7734 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7735 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7736
7737 /* Return the test that should be put into the flags user, i.e.
7738 the bcc, scc, or cmov instruction. */
7739 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7740 }
7741
7742 /* Figure out whether to use ordered or unordered fp comparisons.
7743 Return the appropriate mode to use. */
7744
7745 enum machine_mode
7746 ix86_fp_compare_mode (code)
7747 enum rtx_code code ATTRIBUTE_UNUSED;
7748 {
7749 /* ??? In order to make all comparisons reversible, we do all comparisons
7750 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7751 all forms trapping and nontrapping comparisons, we can make inequality
7752 comparisons trapping again, since it results in better code when using
7753 FCOM based compares. */
7754 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7755 }
7756
7757 enum machine_mode
7758 ix86_cc_mode (code, op0, op1)
7759 enum rtx_code code;
7760 rtx op0, op1;
7761 {
7762 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7763 return ix86_fp_compare_mode (code);
7764 switch (code)
7765 {
7766 /* Only zero flag is needed. */
7767 case EQ: /* ZF=0 */
7768 case NE: /* ZF!=0 */
7769 return CCZmode;
7770 /* Codes needing carry flag. */
7771 case GEU: /* CF=0 */
7772 case GTU: /* CF=0 & ZF=0 */
7773 case LTU: /* CF=1 */
7774 case LEU: /* CF=1 | ZF=1 */
7775 return CCmode;
7776 /* Codes possibly doable only with sign flag when
7777 comparing against zero. */
7778 case GE: /* SF=OF or SF=0 */
7779 case LT: /* SF<>OF or SF=1 */
7780 if (op1 == const0_rtx)
7781 return CCGOCmode;
7782 else
7783 /* For other cases Carry flag is not required. */
7784 return CCGCmode;
7785 /* Codes doable only with sign flag when comparing
7786 against zero, but we miss jump instruction for it
7787 so we need to use relational tests agains overflow
7788 that thus needs to be zero. */
7789 case GT: /* ZF=0 & SF=OF */
7790 case LE: /* ZF=1 | SF<>OF */
7791 if (op1 == const0_rtx)
7792 return CCNOmode;
7793 else
7794 return CCGCmode;
7795 /* strcmp pattern do (use flags) and combine may ask us for proper
7796 mode. */
7797 case USE:
7798 return CCmode;
7799 default:
7800 abort ();
7801 }
7802 }
7803
7804 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7805
7806 int
7807 ix86_use_fcomi_compare (code)
7808 enum rtx_code code ATTRIBUTE_UNUSED;
7809 {
7810 enum rtx_code swapped_code = swap_condition (code);
7811 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7812 || (ix86_fp_comparison_cost (swapped_code)
7813 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7814 }
7815
7816 /* Swap, force into registers, or otherwise massage the two operands
7817 to a fp comparison. The operands are updated in place; the new
7818 comparsion code is returned. */
7819
7820 static enum rtx_code
7821 ix86_prepare_fp_compare_args (code, pop0, pop1)
7822 enum rtx_code code;
7823 rtx *pop0, *pop1;
7824 {
7825 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7826 rtx op0 = *pop0, op1 = *pop1;
7827 enum machine_mode op_mode = GET_MODE (op0);
7828 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7829
7830 /* All of the unordered compare instructions only work on registers.
7831 The same is true of the XFmode compare instructions. The same is
7832 true of the fcomi compare instructions. */
7833
7834 if (!is_sse
7835 && (fpcmp_mode == CCFPUmode
7836 || op_mode == XFmode
7837 || op_mode == TFmode
7838 || ix86_use_fcomi_compare (code)))
7839 {
7840 op0 = force_reg (op_mode, op0);
7841 op1 = force_reg (op_mode, op1);
7842 }
7843 else
7844 {
7845 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7846 things around if they appear profitable, otherwise force op0
7847 into a register. */
7848
7849 if (standard_80387_constant_p (op0) == 0
7850 || (GET_CODE (op0) == MEM
7851 && ! (standard_80387_constant_p (op1) == 0
7852 || GET_CODE (op1) == MEM)))
7853 {
7854 rtx tmp;
7855 tmp = op0, op0 = op1, op1 = tmp;
7856 code = swap_condition (code);
7857 }
7858
7859 if (GET_CODE (op0) != REG)
7860 op0 = force_reg (op_mode, op0);
7861
7862 if (CONSTANT_P (op1))
7863 {
7864 if (standard_80387_constant_p (op1))
7865 op1 = force_reg (op_mode, op1);
7866 else
7867 op1 = validize_mem (force_const_mem (op_mode, op1));
7868 }
7869 }
7870
7871 /* Try to rearrange the comparison to make it cheaper. */
7872 if (ix86_fp_comparison_cost (code)
7873 > ix86_fp_comparison_cost (swap_condition (code))
7874 && (GET_CODE (op1) == REG || !no_new_pseudos))
7875 {
7876 rtx tmp;
7877 tmp = op0, op0 = op1, op1 = tmp;
7878 code = swap_condition (code);
7879 if (GET_CODE (op0) != REG)
7880 op0 = force_reg (op_mode, op0);
7881 }
7882
7883 *pop0 = op0;
7884 *pop1 = op1;
7885 return code;
7886 }
7887
7888 /* Convert comparison codes we use to represent FP comparison to integer
7889 code that will result in proper branch. Return UNKNOWN if no such code
7890 is available. */
7891 static enum rtx_code
7892 ix86_fp_compare_code_to_integer (code)
7893 enum rtx_code code;
7894 {
7895 switch (code)
7896 {
7897 case GT:
7898 return GTU;
7899 case GE:
7900 return GEU;
7901 case ORDERED:
7902 case UNORDERED:
7903 return code;
7904 break;
7905 case UNEQ:
7906 return EQ;
7907 break;
7908 case UNLT:
7909 return LTU;
7910 break;
7911 case UNLE:
7912 return LEU;
7913 break;
7914 case LTGT:
7915 return NE;
7916 break;
7917 default:
7918 return UNKNOWN;
7919 }
7920 }
7921
7922 /* Split comparison code CODE into comparisons we can do using branch
7923 instructions. BYPASS_CODE is comparison code for branch that will
7924 branch around FIRST_CODE and SECOND_CODE. If some of branches
7925 is not required, set value to NIL.
7926 We never require more than two branches. */
7927 static void
7928 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7929 enum rtx_code code, *bypass_code, *first_code, *second_code;
7930 {
7931 *first_code = code;
7932 *bypass_code = NIL;
7933 *second_code = NIL;
7934
7935 /* The fcomi comparison sets flags as follows:
7936
7937 cmp ZF PF CF
7938 > 0 0 0
7939 < 0 0 1
7940 = 1 0 0
7941 un 1 1 1 */
7942
7943 switch (code)
7944 {
7945 case GT: /* GTU - CF=0 & ZF=0 */
7946 case GE: /* GEU - CF=0 */
7947 case ORDERED: /* PF=0 */
7948 case UNORDERED: /* PF=1 */
7949 case UNEQ: /* EQ - ZF=1 */
7950 case UNLT: /* LTU - CF=1 */
7951 case UNLE: /* LEU - CF=1 | ZF=1 */
7952 case LTGT: /* EQ - ZF=0 */
7953 break;
7954 case LT: /* LTU - CF=1 - fails on unordered */
7955 *first_code = UNLT;
7956 *bypass_code = UNORDERED;
7957 break;
7958 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7959 *first_code = UNLE;
7960 *bypass_code = UNORDERED;
7961 break;
7962 case EQ: /* EQ - ZF=1 - fails on unordered */
7963 *first_code = UNEQ;
7964 *bypass_code = UNORDERED;
7965 break;
7966 case NE: /* NE - ZF=0 - fails on unordered */
7967 *first_code = LTGT;
7968 *second_code = UNORDERED;
7969 break;
7970 case UNGE: /* GEU - CF=0 - fails on unordered */
7971 *first_code = GE;
7972 *second_code = UNORDERED;
7973 break;
7974 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7975 *first_code = GT;
7976 *second_code = UNORDERED;
7977 break;
7978 default:
7979 abort ();
7980 }
7981 if (!TARGET_IEEE_FP)
7982 {
7983 *second_code = NIL;
7984 *bypass_code = NIL;
7985 }
7986 }
7987
7988 /* Return cost of comparison done fcom + arithmetics operations on AX.
7989 All following functions do use number of instructions as an cost metrics.
7990 In future this should be tweaked to compute bytes for optimize_size and
7991 take into account performance of various instructions on various CPUs. */
7992 static int
7993 ix86_fp_comparison_arithmetics_cost (code)
7994 enum rtx_code code;
7995 {
7996 if (!TARGET_IEEE_FP)
7997 return 4;
7998 /* The cost of code output by ix86_expand_fp_compare. */
7999 switch (code)
8000 {
8001 case UNLE:
8002 case UNLT:
8003 case LTGT:
8004 case GT:
8005 case GE:
8006 case UNORDERED:
8007 case ORDERED:
8008 case UNEQ:
8009 return 4;
8010 break;
8011 case LT:
8012 case NE:
8013 case EQ:
8014 case UNGE:
8015 return 5;
8016 break;
8017 case LE:
8018 case UNGT:
8019 return 6;
8020 break;
8021 default:
8022 abort ();
8023 }
8024 }
8025
8026 /* Return cost of comparison done using fcomi operation.
8027 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8028 static int
8029 ix86_fp_comparison_fcomi_cost (code)
8030 enum rtx_code code;
8031 {
8032 enum rtx_code bypass_code, first_code, second_code;
8033 /* Return arbitarily high cost when instruction is not supported - this
8034 prevents gcc from using it. */
8035 if (!TARGET_CMOVE)
8036 return 1024;
8037 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8038 return (bypass_code != NIL || second_code != NIL) + 2;
8039 }
8040
8041 /* Return cost of comparison done using sahf operation.
8042 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8043 static int
8044 ix86_fp_comparison_sahf_cost (code)
8045 enum rtx_code code;
8046 {
8047 enum rtx_code bypass_code, first_code, second_code;
8048 /* Return arbitarily high cost when instruction is not preferred - this
8049 avoids gcc from using it. */
8050 if (!TARGET_USE_SAHF && !optimize_size)
8051 return 1024;
8052 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8053 return (bypass_code != NIL || second_code != NIL) + 3;
8054 }
8055
8056 /* Compute cost of the comparison done using any method.
8057 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8058 static int
8059 ix86_fp_comparison_cost (code)
8060 enum rtx_code code;
8061 {
8062 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8063 int min;
8064
8065 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8066 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8067
8068 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8069 if (min > sahf_cost)
8070 min = sahf_cost;
8071 if (min > fcomi_cost)
8072 min = fcomi_cost;
8073 return min;
8074 }
8075
8076 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8077
8078 static rtx
8079 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8080 enum rtx_code code;
8081 rtx op0, op1, scratch;
8082 rtx *second_test;
8083 rtx *bypass_test;
8084 {
8085 enum machine_mode fpcmp_mode, intcmp_mode;
8086 rtx tmp, tmp2;
8087 int cost = ix86_fp_comparison_cost (code);
8088 enum rtx_code bypass_code, first_code, second_code;
8089
8090 fpcmp_mode = ix86_fp_compare_mode (code);
8091 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8092
8093 if (second_test)
8094 *second_test = NULL_RTX;
8095 if (bypass_test)
8096 *bypass_test = NULL_RTX;
8097
8098 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8099
8100 /* Do fcomi/sahf based test when profitable. */
8101 if ((bypass_code == NIL || bypass_test)
8102 && (second_code == NIL || second_test)
8103 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8104 {
8105 if (TARGET_CMOVE)
8106 {
8107 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8108 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8109 tmp);
8110 emit_insn (tmp);
8111 }
8112 else
8113 {
8114 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8115 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8116 if (!scratch)
8117 scratch = gen_reg_rtx (HImode);
8118 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8119 emit_insn (gen_x86_sahf_1 (scratch));
8120 }
8121
8122 /* The FP codes work out to act like unsigned. */
8123 intcmp_mode = fpcmp_mode;
8124 code = first_code;
8125 if (bypass_code != NIL)
8126 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8127 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8128 const0_rtx);
8129 if (second_code != NIL)
8130 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8131 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8132 const0_rtx);
8133 }
8134 else
8135 {
8136 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8137 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8138 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8139 if (!scratch)
8140 scratch = gen_reg_rtx (HImode);
8141 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8142
8143 /* In the unordered case, we have to check C2 for NaN's, which
8144 doesn't happen to work out to anything nice combination-wise.
8145 So do some bit twiddling on the value we've got in AH to come
8146 up with an appropriate set of condition codes. */
8147
8148 intcmp_mode = CCNOmode;
8149 switch (code)
8150 {
8151 case GT:
8152 case UNGT:
8153 if (code == GT || !TARGET_IEEE_FP)
8154 {
8155 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8156 code = EQ;
8157 }
8158 else
8159 {
8160 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8161 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8162 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8163 intcmp_mode = CCmode;
8164 code = GEU;
8165 }
8166 break;
8167 case LT:
8168 case UNLT:
8169 if (code == LT && TARGET_IEEE_FP)
8170 {
8171 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8172 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8173 intcmp_mode = CCmode;
8174 code = EQ;
8175 }
8176 else
8177 {
8178 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8179 code = NE;
8180 }
8181 break;
8182 case GE:
8183 case UNGE:
8184 if (code == GE || !TARGET_IEEE_FP)
8185 {
8186 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8187 code = EQ;
8188 }
8189 else
8190 {
8191 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8192 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8193 GEN_INT (0x01)));
8194 code = NE;
8195 }
8196 break;
8197 case LE:
8198 case UNLE:
8199 if (code == LE && TARGET_IEEE_FP)
8200 {
8201 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8202 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8203 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8204 intcmp_mode = CCmode;
8205 code = LTU;
8206 }
8207 else
8208 {
8209 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8210 code = NE;
8211 }
8212 break;
8213 case EQ:
8214 case UNEQ:
8215 if (code == EQ && TARGET_IEEE_FP)
8216 {
8217 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8218 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8219 intcmp_mode = CCmode;
8220 code = EQ;
8221 }
8222 else
8223 {
8224 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8225 code = NE;
8226 break;
8227 }
8228 break;
8229 case NE:
8230 case LTGT:
8231 if (code == NE && TARGET_IEEE_FP)
8232 {
8233 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8234 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8235 GEN_INT (0x40)));
8236 code = NE;
8237 }
8238 else
8239 {
8240 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8241 code = EQ;
8242 }
8243 break;
8244
8245 case UNORDERED:
8246 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8247 code = NE;
8248 break;
8249 case ORDERED:
8250 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8251 code = EQ;
8252 break;
8253
8254 default:
8255 abort ();
8256 }
8257 }
8258
8259 /* Return the test that should be put into the flags user, i.e.
8260 the bcc, scc, or cmov instruction. */
8261 return gen_rtx_fmt_ee (code, VOIDmode,
8262 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8263 const0_rtx);
8264 }
8265
8266 rtx
8267 ix86_expand_compare (code, second_test, bypass_test)
8268 enum rtx_code code;
8269 rtx *second_test, *bypass_test;
8270 {
8271 rtx op0, op1, ret;
8272 op0 = ix86_compare_op0;
8273 op1 = ix86_compare_op1;
8274
8275 if (second_test)
8276 *second_test = NULL_RTX;
8277 if (bypass_test)
8278 *bypass_test = NULL_RTX;
8279
8280 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8281 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8282 second_test, bypass_test);
8283 else
8284 ret = ix86_expand_int_compare (code, op0, op1);
8285
8286 return ret;
8287 }
8288
8289 /* Return true if the CODE will result in nontrivial jump sequence. */
8290 bool
8291 ix86_fp_jump_nontrivial_p (code)
8292 enum rtx_code code;
8293 {
8294 enum rtx_code bypass_code, first_code, second_code;
8295 if (!TARGET_CMOVE)
8296 return true;
8297 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8298 return bypass_code != NIL || second_code != NIL;
8299 }
8300
8301 void
8302 ix86_expand_branch (code, label)
8303 enum rtx_code code;
8304 rtx label;
8305 {
8306 rtx tmp;
8307
8308 switch (GET_MODE (ix86_compare_op0))
8309 {
8310 case QImode:
8311 case HImode:
8312 case SImode:
8313 simple:
8314 tmp = ix86_expand_compare (code, NULL, NULL);
8315 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8316 gen_rtx_LABEL_REF (VOIDmode, label),
8317 pc_rtx);
8318 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8319 return;
8320
8321 case SFmode:
8322 case DFmode:
8323 case XFmode:
8324 case TFmode:
8325 {
8326 rtvec vec;
8327 int use_fcomi;
8328 enum rtx_code bypass_code, first_code, second_code;
8329
8330 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8331 &ix86_compare_op1);
8332
8333 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8334
8335 /* Check whether we will use the natural sequence with one jump. If
8336 so, we can expand jump early. Otherwise delay expansion by
8337 creating compound insn to not confuse optimizers. */
8338 if (bypass_code == NIL && second_code == NIL
8339 && TARGET_CMOVE)
8340 {
8341 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8342 gen_rtx_LABEL_REF (VOIDmode, label),
8343 pc_rtx, NULL_RTX);
8344 }
8345 else
8346 {
8347 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8348 ix86_compare_op0, ix86_compare_op1);
8349 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8350 gen_rtx_LABEL_REF (VOIDmode, label),
8351 pc_rtx);
8352 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8353
8354 use_fcomi = ix86_use_fcomi_compare (code);
8355 vec = rtvec_alloc (3 + !use_fcomi);
8356 RTVEC_ELT (vec, 0) = tmp;
8357 RTVEC_ELT (vec, 1)
8358 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8359 RTVEC_ELT (vec, 2)
8360 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8361 if (! use_fcomi)
8362 RTVEC_ELT (vec, 3)
8363 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8364
8365 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8366 }
8367 return;
8368 }
8369
8370 case DImode:
8371 if (TARGET_64BIT)
8372 goto simple;
8373 /* Expand DImode branch into multiple compare+branch. */
8374 {
8375 rtx lo[2], hi[2], label2;
8376 enum rtx_code code1, code2, code3;
8377
8378 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8379 {
8380 tmp = ix86_compare_op0;
8381 ix86_compare_op0 = ix86_compare_op1;
8382 ix86_compare_op1 = tmp;
8383 code = swap_condition (code);
8384 }
8385 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8386 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8387
8388 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8389 avoid two branches. This costs one extra insn, so disable when
8390 optimizing for size. */
8391
8392 if ((code == EQ || code == NE)
8393 && (!optimize_size
8394 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8395 {
8396 rtx xor0, xor1;
8397
8398 xor1 = hi[0];
8399 if (hi[1] != const0_rtx)
8400 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8401 NULL_RTX, 0, OPTAB_WIDEN);
8402
8403 xor0 = lo[0];
8404 if (lo[1] != const0_rtx)
8405 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8406 NULL_RTX, 0, OPTAB_WIDEN);
8407
8408 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8409 NULL_RTX, 0, OPTAB_WIDEN);
8410
8411 ix86_compare_op0 = tmp;
8412 ix86_compare_op1 = const0_rtx;
8413 ix86_expand_branch (code, label);
8414 return;
8415 }
8416
8417 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8418 op1 is a constant and the low word is zero, then we can just
8419 examine the high word. */
8420
8421 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8422 switch (code)
8423 {
8424 case LT: case LTU: case GE: case GEU:
8425 ix86_compare_op0 = hi[0];
8426 ix86_compare_op1 = hi[1];
8427 ix86_expand_branch (code, label);
8428 return;
8429 default:
8430 break;
8431 }
8432
8433 /* Otherwise, we need two or three jumps. */
8434
8435 label2 = gen_label_rtx ();
8436
8437 code1 = code;
8438 code2 = swap_condition (code);
8439 code3 = unsigned_condition (code);
8440
8441 switch (code)
8442 {
8443 case LT: case GT: case LTU: case GTU:
8444 break;
8445
8446 case LE: code1 = LT; code2 = GT; break;
8447 case GE: code1 = GT; code2 = LT; break;
8448 case LEU: code1 = LTU; code2 = GTU; break;
8449 case GEU: code1 = GTU; code2 = LTU; break;
8450
8451 case EQ: code1 = NIL; code2 = NE; break;
8452 case NE: code2 = NIL; break;
8453
8454 default:
8455 abort ();
8456 }
8457
8458 /*
8459 * a < b =>
8460 * if (hi(a) < hi(b)) goto true;
8461 * if (hi(a) > hi(b)) goto false;
8462 * if (lo(a) < lo(b)) goto true;
8463 * false:
8464 */
8465
8466 ix86_compare_op0 = hi[0];
8467 ix86_compare_op1 = hi[1];
8468
8469 if (code1 != NIL)
8470 ix86_expand_branch (code1, label);
8471 if (code2 != NIL)
8472 ix86_expand_branch (code2, label2);
8473
8474 ix86_compare_op0 = lo[0];
8475 ix86_compare_op1 = lo[1];
8476 ix86_expand_branch (code3, label);
8477
8478 if (code2 != NIL)
8479 emit_label (label2);
8480 return;
8481 }
8482
8483 default:
8484 abort ();
8485 }
8486 }
8487
8488 /* Split branch based on floating point condition. */
8489 void
8490 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8491 enum rtx_code code;
8492 rtx op1, op2, target1, target2, tmp;
8493 {
8494 rtx second, bypass;
8495 rtx label = NULL_RTX;
8496 rtx condition;
8497 int bypass_probability = -1, second_probability = -1, probability = -1;
8498 rtx i;
8499
8500 if (target2 != pc_rtx)
8501 {
8502 rtx tmp = target2;
8503 code = reverse_condition_maybe_unordered (code);
8504 target2 = target1;
8505 target1 = tmp;
8506 }
8507
8508 condition = ix86_expand_fp_compare (code, op1, op2,
8509 tmp, &second, &bypass);
8510
8511 if (split_branch_probability >= 0)
8512 {
8513 /* Distribute the probabilities across the jumps.
8514 Assume the BYPASS and SECOND to be always test
8515 for UNORDERED. */
8516 probability = split_branch_probability;
8517
8518 /* Value of 1 is low enough to make no need for probability
8519 to be updated. Later we may run some experiments and see
8520 if unordered values are more frequent in practice. */
8521 if (bypass)
8522 bypass_probability = 1;
8523 if (second)
8524 second_probability = 1;
8525 }
8526 if (bypass != NULL_RTX)
8527 {
8528 label = gen_label_rtx ();
8529 i = emit_jump_insn (gen_rtx_SET
8530 (VOIDmode, pc_rtx,
8531 gen_rtx_IF_THEN_ELSE (VOIDmode,
8532 bypass,
8533 gen_rtx_LABEL_REF (VOIDmode,
8534 label),
8535 pc_rtx)));
8536 if (bypass_probability >= 0)
8537 REG_NOTES (i)
8538 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8539 GEN_INT (bypass_probability),
8540 REG_NOTES (i));
8541 }
8542 i = emit_jump_insn (gen_rtx_SET
8543 (VOIDmode, pc_rtx,
8544 gen_rtx_IF_THEN_ELSE (VOIDmode,
8545 condition, target1, target2)));
8546 if (probability >= 0)
8547 REG_NOTES (i)
8548 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8549 GEN_INT (probability),
8550 REG_NOTES (i));
8551 if (second != NULL_RTX)
8552 {
8553 i = emit_jump_insn (gen_rtx_SET
8554 (VOIDmode, pc_rtx,
8555 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8556 target2)));
8557 if (second_probability >= 0)
8558 REG_NOTES (i)
8559 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8560 GEN_INT (second_probability),
8561 REG_NOTES (i));
8562 }
8563 if (label != NULL_RTX)
8564 emit_label (label);
8565 }
8566
8567 int
8568 ix86_expand_setcc (code, dest)
8569 enum rtx_code code;
8570 rtx dest;
8571 {
8572 rtx ret, tmp, tmpreg;
8573 rtx second_test, bypass_test;
8574
8575 if (GET_MODE (ix86_compare_op0) == DImode
8576 && !TARGET_64BIT)
8577 return 0; /* FAIL */
8578
8579 if (GET_MODE (dest) != QImode)
8580 abort ();
8581
8582 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8583 PUT_MODE (ret, QImode);
8584
8585 tmp = dest;
8586 tmpreg = dest;
8587
8588 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8589 if (bypass_test || second_test)
8590 {
8591 rtx test = second_test;
8592 int bypass = 0;
8593 rtx tmp2 = gen_reg_rtx (QImode);
8594 if (bypass_test)
8595 {
8596 if (second_test)
8597 abort ();
8598 test = bypass_test;
8599 bypass = 1;
8600 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8601 }
8602 PUT_MODE (test, QImode);
8603 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8604
8605 if (bypass)
8606 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8607 else
8608 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8609 }
8610
8611 return 1; /* DONE */
8612 }
8613
8614 int
8615 ix86_expand_int_movcc (operands)
8616 rtx operands[];
8617 {
8618 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8619 rtx compare_seq, compare_op;
8620 rtx second_test, bypass_test;
8621 enum machine_mode mode = GET_MODE (operands[0]);
8622
8623 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8624 In case comparsion is done with immediate, we can convert it to LTU or
8625 GEU by altering the integer. */
8626
8627 if ((code == LEU || code == GTU)
8628 && GET_CODE (ix86_compare_op1) == CONST_INT
8629 && mode != HImode
8630 && INTVAL (ix86_compare_op1) != -1
8631 /* For x86-64, the immediate field in the instruction is 32-bit
8632 signed, so we can't increment a DImode value above 0x7fffffff. */
8633 && (!TARGET_64BIT
8634 || GET_MODE (ix86_compare_op0) != DImode
8635 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8636 && GET_CODE (operands[2]) == CONST_INT
8637 && GET_CODE (operands[3]) == CONST_INT)
8638 {
8639 if (code == LEU)
8640 code = LTU;
8641 else
8642 code = GEU;
8643 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8644 GET_MODE (ix86_compare_op0));
8645 }
8646
8647 start_sequence ();
8648 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8649 compare_seq = get_insns ();
8650 end_sequence ();
8651
8652 compare_code = GET_CODE (compare_op);
8653
8654 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8655 HImode insns, we'd be swallowed in word prefix ops. */
8656
8657 if (mode != HImode
8658 && (mode != DImode || TARGET_64BIT)
8659 && GET_CODE (operands[2]) == CONST_INT
8660 && GET_CODE (operands[3]) == CONST_INT)
8661 {
8662 rtx out = operands[0];
8663 HOST_WIDE_INT ct = INTVAL (operands[2]);
8664 HOST_WIDE_INT cf = INTVAL (operands[3]);
8665 HOST_WIDE_INT diff;
8666
8667 if ((compare_code == LTU || compare_code == GEU)
8668 && !second_test && !bypass_test)
8669 {
8670
8671 /* Detect overlap between destination and compare sources. */
8672 rtx tmp = out;
8673
8674 /* To simplify rest of code, restrict to the GEU case. */
8675 if (compare_code == LTU)
8676 {
8677 int tmp = ct;
8678 ct = cf;
8679 cf = tmp;
8680 compare_code = reverse_condition (compare_code);
8681 code = reverse_condition (code);
8682 }
8683 diff = ct - cf;
8684
8685 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8686 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8687 tmp = gen_reg_rtx (mode);
8688
8689 emit_insn (compare_seq);
8690 if (mode == DImode)
8691 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8692 else
8693 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8694
8695 if (diff == 1)
8696 {
8697 /*
8698 * cmpl op0,op1
8699 * sbbl dest,dest
8700 * [addl dest, ct]
8701 *
8702 * Size 5 - 8.
8703 */
8704 if (ct)
8705 tmp = expand_simple_binop (mode, PLUS,
8706 tmp, GEN_INT (ct),
8707 tmp, 1, OPTAB_DIRECT);
8708 }
8709 else if (cf == -1)
8710 {
8711 /*
8712 * cmpl op0,op1
8713 * sbbl dest,dest
8714 * orl $ct, dest
8715 *
8716 * Size 8.
8717 */
8718 tmp = expand_simple_binop (mode, IOR,
8719 tmp, GEN_INT (ct),
8720 tmp, 1, OPTAB_DIRECT);
8721 }
8722 else if (diff == -1 && ct)
8723 {
8724 /*
8725 * cmpl op0,op1
8726 * sbbl dest,dest
8727 * xorl $-1, dest
8728 * [addl dest, cf]
8729 *
8730 * Size 8 - 11.
8731 */
8732 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8733 if (cf)
8734 tmp = expand_simple_binop (mode, PLUS,
8735 tmp, GEN_INT (cf),
8736 tmp, 1, OPTAB_DIRECT);
8737 }
8738 else
8739 {
8740 /*
8741 * cmpl op0,op1
8742 * sbbl dest,dest
8743 * andl cf - ct, dest
8744 * [addl dest, ct]
8745 *
8746 * Size 8 - 11.
8747 */
8748 tmp = expand_simple_binop (mode, AND,
8749 tmp,
8750 gen_int_mode (cf - ct, mode),
8751 tmp, 1, OPTAB_DIRECT);
8752 if (ct)
8753 tmp = expand_simple_binop (mode, PLUS,
8754 tmp, GEN_INT (ct),
8755 tmp, 1, OPTAB_DIRECT);
8756 }
8757
8758 if (tmp != out)
8759 emit_move_insn (out, tmp);
8760
8761 return 1; /* DONE */
8762 }
8763
8764 diff = ct - cf;
8765 if (diff < 0)
8766 {
8767 HOST_WIDE_INT tmp;
8768 tmp = ct, ct = cf, cf = tmp;
8769 diff = -diff;
8770 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8771 {
8772 /* We may be reversing unordered compare to normal compare, that
8773 is not valid in general (we may convert non-trapping condition
8774 to trapping one), however on i386 we currently emit all
8775 comparisons unordered. */
8776 compare_code = reverse_condition_maybe_unordered (compare_code);
8777 code = reverse_condition_maybe_unordered (code);
8778 }
8779 else
8780 {
8781 compare_code = reverse_condition (compare_code);
8782 code = reverse_condition (code);
8783 }
8784 }
8785
8786 compare_code = NIL;
8787 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8788 && GET_CODE (ix86_compare_op1) == CONST_INT)
8789 {
8790 if (ix86_compare_op1 == const0_rtx
8791 && (code == LT || code == GE))
8792 compare_code = code;
8793 else if (ix86_compare_op1 == constm1_rtx)
8794 {
8795 if (code == LE)
8796 compare_code = LT;
8797 else if (code == GT)
8798 compare_code = GE;
8799 }
8800 }
8801
8802 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8803 if (compare_code != NIL
8804 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8805 && (cf == -1 || ct == -1))
8806 {
8807 /* If lea code below could be used, only optimize
8808 if it results in a 2 insn sequence. */
8809
8810 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8811 || diff == 3 || diff == 5 || diff == 9)
8812 || (compare_code == LT && ct == -1)
8813 || (compare_code == GE && cf == -1))
8814 {
8815 /*
8816 * notl op1 (if necessary)
8817 * sarl $31, op1
8818 * orl cf, op1
8819 */
8820 if (ct != -1)
8821 {
8822 cf = ct;
8823 ct = -1;
8824 code = reverse_condition (code);
8825 }
8826
8827 out = emit_store_flag (out, code, ix86_compare_op0,
8828 ix86_compare_op1, VOIDmode, 0, -1);
8829
8830 out = expand_simple_binop (mode, IOR,
8831 out, GEN_INT (cf),
8832 out, 1, OPTAB_DIRECT);
8833 if (out != operands[0])
8834 emit_move_insn (operands[0], out);
8835
8836 return 1; /* DONE */
8837 }
8838 }
8839
8840 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8841 || diff == 3 || diff == 5 || diff == 9)
8842 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8843 {
8844 /*
8845 * xorl dest,dest
8846 * cmpl op1,op2
8847 * setcc dest
8848 * lea cf(dest*(ct-cf)),dest
8849 *
8850 * Size 14.
8851 *
8852 * This also catches the degenerate setcc-only case.
8853 */
8854
8855 rtx tmp;
8856 int nops;
8857
8858 out = emit_store_flag (out, code, ix86_compare_op0,
8859 ix86_compare_op1, VOIDmode, 0, 1);
8860
8861 nops = 0;
8862 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8863 done in proper mode to match. */
8864 if (diff == 1)
8865 tmp = out;
8866 else
8867 {
8868 rtx out1;
8869 out1 = out;
8870 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8871 nops++;
8872 if (diff & 1)
8873 {
8874 tmp = gen_rtx_PLUS (mode, tmp, out1);
8875 nops++;
8876 }
8877 }
8878 if (cf != 0)
8879 {
8880 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8881 nops++;
8882 }
8883 if (tmp != out
8884 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8885 {
8886 if (nops == 1)
8887 {
8888 rtx clob;
8889
8890 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8891 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8892
8893 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8894 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8895 emit_insn (tmp);
8896 }
8897 else
8898 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8899 }
8900 if (out != operands[0])
8901 emit_move_insn (operands[0], out);
8902
8903 return 1; /* DONE */
8904 }
8905
8906 /*
8907 * General case: Jumpful:
8908 * xorl dest,dest cmpl op1, op2
8909 * cmpl op1, op2 movl ct, dest
8910 * setcc dest jcc 1f
8911 * decl dest movl cf, dest
8912 * andl (cf-ct),dest 1:
8913 * addl ct,dest
8914 *
8915 * Size 20. Size 14.
8916 *
8917 * This is reasonably steep, but branch mispredict costs are
8918 * high on modern cpus, so consider failing only if optimizing
8919 * for space.
8920 *
8921 * %%% Parameterize branch_cost on the tuning architecture, then
8922 * use that. The 80386 couldn't care less about mispredicts.
8923 */
8924
8925 if (!optimize_size && !TARGET_CMOVE)
8926 {
8927 if (ct == 0)
8928 {
8929 ct = cf;
8930 cf = 0;
8931 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8932 /* We may be reversing unordered compare to normal compare,
8933 that is not valid in general (we may convert non-trapping
8934 condition to trapping one), however on i386 we currently
8935 emit all comparisons unordered. */
8936 code = reverse_condition_maybe_unordered (code);
8937 else
8938 {
8939 code = reverse_condition (code);
8940 if (compare_code != NIL)
8941 compare_code = reverse_condition (compare_code);
8942 }
8943 }
8944
8945 if (compare_code != NIL)
8946 {
8947 /* notl op1 (if needed)
8948 sarl $31, op1
8949 andl (cf-ct), op1
8950 addl ct, op1
8951
8952 For x < 0 (resp. x <= -1) there will be no notl,
8953 so if possible swap the constants to get rid of the
8954 complement.
8955 True/false will be -1/0 while code below (store flag
8956 followed by decrement) is 0/-1, so the constants need
8957 to be exchanged once more. */
8958
8959 if (compare_code == GE || !cf)
8960 {
8961 code = reverse_condition (code);
8962 compare_code = LT;
8963 }
8964 else
8965 {
8966 HOST_WIDE_INT tmp = cf;
8967 cf = ct;
8968 ct = tmp;
8969 }
8970
8971 out = emit_store_flag (out, code, ix86_compare_op0,
8972 ix86_compare_op1, VOIDmode, 0, -1);
8973 }
8974 else
8975 {
8976 out = emit_store_flag (out, code, ix86_compare_op0,
8977 ix86_compare_op1, VOIDmode, 0, 1);
8978
8979 out = expand_simple_binop (mode, PLUS,
8980 out, constm1_rtx,
8981 out, 1, OPTAB_DIRECT);
8982 }
8983
8984 out = expand_simple_binop (mode, AND,
8985 out,
8986 gen_int_mode (cf - ct, mode),
8987 out, 1, OPTAB_DIRECT);
8988 out = expand_simple_binop (mode, PLUS,
8989 out, GEN_INT (ct),
8990 out, 1, OPTAB_DIRECT);
8991 if (out != operands[0])
8992 emit_move_insn (operands[0], out);
8993
8994 return 1; /* DONE */
8995 }
8996 }
8997
8998 if (!TARGET_CMOVE)
8999 {
9000 /* Try a few things more with specific constants and a variable. */
9001
9002 optab op;
9003 rtx var, orig_out, out, tmp;
9004
9005 if (optimize_size)
9006 return 0; /* FAIL */
9007
9008 /* If one of the two operands is an interesting constant, load a
9009 constant with the above and mask it in with a logical operation. */
9010
9011 if (GET_CODE (operands[2]) == CONST_INT)
9012 {
9013 var = operands[3];
9014 if (INTVAL (operands[2]) == 0)
9015 operands[3] = constm1_rtx, op = and_optab;
9016 else if (INTVAL (operands[2]) == -1)
9017 operands[3] = const0_rtx, op = ior_optab;
9018 else
9019 return 0; /* FAIL */
9020 }
9021 else if (GET_CODE (operands[3]) == CONST_INT)
9022 {
9023 var = operands[2];
9024 if (INTVAL (operands[3]) == 0)
9025 operands[2] = constm1_rtx, op = and_optab;
9026 else if (INTVAL (operands[3]) == -1)
9027 operands[2] = const0_rtx, op = ior_optab;
9028 else
9029 return 0; /* FAIL */
9030 }
9031 else
9032 return 0; /* FAIL */
9033
9034 orig_out = operands[0];
9035 tmp = gen_reg_rtx (mode);
9036 operands[0] = tmp;
9037
9038 /* Recurse to get the constant loaded. */
9039 if (ix86_expand_int_movcc (operands) == 0)
9040 return 0; /* FAIL */
9041
9042 /* Mask in the interesting variable. */
9043 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9044 OPTAB_WIDEN);
9045 if (out != orig_out)
9046 emit_move_insn (orig_out, out);
9047
9048 return 1; /* DONE */
9049 }
9050
9051 /*
9052 * For comparison with above,
9053 *
9054 * movl cf,dest
9055 * movl ct,tmp
9056 * cmpl op1,op2
9057 * cmovcc tmp,dest
9058 *
9059 * Size 15.
9060 */
9061
9062 if (! nonimmediate_operand (operands[2], mode))
9063 operands[2] = force_reg (mode, operands[2]);
9064 if (! nonimmediate_operand (operands[3], mode))
9065 operands[3] = force_reg (mode, operands[3]);
9066
9067 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9068 {
9069 rtx tmp = gen_reg_rtx (mode);
9070 emit_move_insn (tmp, operands[3]);
9071 operands[3] = tmp;
9072 }
9073 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9074 {
9075 rtx tmp = gen_reg_rtx (mode);
9076 emit_move_insn (tmp, operands[2]);
9077 operands[2] = tmp;
9078 }
9079 if (! register_operand (operands[2], VOIDmode)
9080 && ! register_operand (operands[3], VOIDmode))
9081 operands[2] = force_reg (mode, operands[2]);
9082
9083 emit_insn (compare_seq);
9084 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9085 gen_rtx_IF_THEN_ELSE (mode,
9086 compare_op, operands[2],
9087 operands[3])));
9088 if (bypass_test)
9089 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9090 gen_rtx_IF_THEN_ELSE (mode,
9091 bypass_test,
9092 operands[3],
9093 operands[0])));
9094 if (second_test)
9095 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9096 gen_rtx_IF_THEN_ELSE (mode,
9097 second_test,
9098 operands[2],
9099 operands[0])));
9100
9101 return 1; /* DONE */
9102 }
9103
9104 int
9105 ix86_expand_fp_movcc (operands)
9106 rtx operands[];
9107 {
9108 enum rtx_code code;
9109 rtx tmp;
9110 rtx compare_op, second_test, bypass_test;
9111
9112 /* For SF/DFmode conditional moves based on comparisons
9113 in same mode, we may want to use SSE min/max instructions. */
9114 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9115 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9116 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9117 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9118 && (!TARGET_IEEE_FP
9119 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9120 /* We may be called from the post-reload splitter. */
9121 && (!REG_P (operands[0])
9122 || SSE_REG_P (operands[0])
9123 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9124 {
9125 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9126 code = GET_CODE (operands[1]);
9127
9128 /* See if we have (cross) match between comparison operands and
9129 conditional move operands. */
9130 if (rtx_equal_p (operands[2], op1))
9131 {
9132 rtx tmp = op0;
9133 op0 = op1;
9134 op1 = tmp;
9135 code = reverse_condition_maybe_unordered (code);
9136 }
9137 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9138 {
9139 /* Check for min operation. */
9140 if (code == LT)
9141 {
9142 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9143 if (memory_operand (op0, VOIDmode))
9144 op0 = force_reg (GET_MODE (operands[0]), op0);
9145 if (GET_MODE (operands[0]) == SFmode)
9146 emit_insn (gen_minsf3 (operands[0], op0, op1));
9147 else
9148 emit_insn (gen_mindf3 (operands[0], op0, op1));
9149 return 1;
9150 }
9151 /* Check for max operation. */
9152 if (code == GT)
9153 {
9154 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9155 if (memory_operand (op0, VOIDmode))
9156 op0 = force_reg (GET_MODE (operands[0]), op0);
9157 if (GET_MODE (operands[0]) == SFmode)
9158 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9159 else
9160 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9161 return 1;
9162 }
9163 }
9164 /* Manage condition to be sse_comparison_operator. In case we are
9165 in non-ieee mode, try to canonicalize the destination operand
9166 to be first in the comparison - this helps reload to avoid extra
9167 moves. */
9168 if (!sse_comparison_operator (operands[1], VOIDmode)
9169 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9170 {
9171 rtx tmp = ix86_compare_op0;
9172 ix86_compare_op0 = ix86_compare_op1;
9173 ix86_compare_op1 = tmp;
9174 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9175 VOIDmode, ix86_compare_op0,
9176 ix86_compare_op1);
9177 }
9178 /* Similary try to manage result to be first operand of conditional
9179 move. We also don't support the NE comparison on SSE, so try to
9180 avoid it. */
9181 if ((rtx_equal_p (operands[0], operands[3])
9182 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9183 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9184 {
9185 rtx tmp = operands[2];
9186 operands[2] = operands[3];
9187 operands[3] = tmp;
9188 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9189 (GET_CODE (operands[1])),
9190 VOIDmode, ix86_compare_op0,
9191 ix86_compare_op1);
9192 }
9193 if (GET_MODE (operands[0]) == SFmode)
9194 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9195 operands[2], operands[3],
9196 ix86_compare_op0, ix86_compare_op1));
9197 else
9198 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9199 operands[2], operands[3],
9200 ix86_compare_op0, ix86_compare_op1));
9201 return 1;
9202 }
9203
9204 /* The floating point conditional move instructions don't directly
9205 support conditions resulting from a signed integer comparison. */
9206
9207 code = GET_CODE (operands[1]);
9208 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9209
9210 /* The floating point conditional move instructions don't directly
9211 support signed integer comparisons. */
9212
9213 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9214 {
9215 if (second_test != NULL || bypass_test != NULL)
9216 abort ();
9217 tmp = gen_reg_rtx (QImode);
9218 ix86_expand_setcc (code, tmp);
9219 code = NE;
9220 ix86_compare_op0 = tmp;
9221 ix86_compare_op1 = const0_rtx;
9222 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9223 }
9224 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9225 {
9226 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9227 emit_move_insn (tmp, operands[3]);
9228 operands[3] = tmp;
9229 }
9230 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9231 {
9232 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9233 emit_move_insn (tmp, operands[2]);
9234 operands[2] = tmp;
9235 }
9236
9237 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9238 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9239 compare_op,
9240 operands[2],
9241 operands[3])));
9242 if (bypass_test)
9243 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9244 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9245 bypass_test,
9246 operands[3],
9247 operands[0])));
9248 if (second_test)
9249 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9250 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9251 second_test,
9252 operands[2],
9253 operands[0])));
9254
9255 return 1;
9256 }
9257
9258 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9259 works for floating pointer parameters and nonoffsetable memories.
9260 For pushes, it returns just stack offsets; the values will be saved
9261 in the right order. Maximally three parts are generated. */
9262
9263 static int
9264 ix86_split_to_parts (operand, parts, mode)
9265 rtx operand;
9266 rtx *parts;
9267 enum machine_mode mode;
9268 {
9269 int size;
9270
9271 if (!TARGET_64BIT)
9272 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9273 else
9274 size = (GET_MODE_SIZE (mode) + 4) / 8;
9275
9276 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9277 abort ();
9278 if (size < 2 || size > 3)
9279 abort ();
9280
9281 /* Optimize constant pool reference to immediates. This is used by fp
9282 moves, that force all constants to memory to allow combining. */
9283 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9284 {
9285 rtx tmp = maybe_get_pool_constant (operand);
9286 if (tmp)
9287 operand = tmp;
9288 }
9289
9290 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9291 {
9292 /* The only non-offsetable memories we handle are pushes. */
9293 if (! push_operand (operand, VOIDmode))
9294 abort ();
9295
9296 operand = copy_rtx (operand);
9297 PUT_MODE (operand, Pmode);
9298 parts[0] = parts[1] = parts[2] = operand;
9299 }
9300 else if (!TARGET_64BIT)
9301 {
9302 if (mode == DImode)
9303 split_di (&operand, 1, &parts[0], &parts[1]);
9304 else
9305 {
9306 if (REG_P (operand))
9307 {
9308 if (!reload_completed)
9309 abort ();
9310 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9311 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9312 if (size == 3)
9313 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9314 }
9315 else if (offsettable_memref_p (operand))
9316 {
9317 operand = adjust_address (operand, SImode, 0);
9318 parts[0] = operand;
9319 parts[1] = adjust_address (operand, SImode, 4);
9320 if (size == 3)
9321 parts[2] = adjust_address (operand, SImode, 8);
9322 }
9323 else if (GET_CODE (operand) == CONST_DOUBLE)
9324 {
9325 REAL_VALUE_TYPE r;
9326 long l[4];
9327
9328 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9329 switch (mode)
9330 {
9331 case XFmode:
9332 case TFmode:
9333 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9334 parts[2] = gen_int_mode (l[2], SImode);
9335 break;
9336 case DFmode:
9337 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9338 break;
9339 default:
9340 abort ();
9341 }
9342 parts[1] = gen_int_mode (l[1], SImode);
9343 parts[0] = gen_int_mode (l[0], SImode);
9344 }
9345 else
9346 abort ();
9347 }
9348 }
9349 else
9350 {
9351 if (mode == TImode)
9352 split_ti (&operand, 1, &parts[0], &parts[1]);
9353 if (mode == XFmode || mode == TFmode)
9354 {
9355 if (REG_P (operand))
9356 {
9357 if (!reload_completed)
9358 abort ();
9359 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9360 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9361 }
9362 else if (offsettable_memref_p (operand))
9363 {
9364 operand = adjust_address (operand, DImode, 0);
9365 parts[0] = operand;
9366 parts[1] = adjust_address (operand, SImode, 8);
9367 }
9368 else if (GET_CODE (operand) == CONST_DOUBLE)
9369 {
9370 REAL_VALUE_TYPE r;
9371 long l[3];
9372
9373 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9374 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9375 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9376 if (HOST_BITS_PER_WIDE_INT >= 64)
9377 parts[0]
9378 = gen_int_mode
9379 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9380 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9381 DImode);
9382 else
9383 parts[0] = immed_double_const (l[0], l[1], DImode);
9384 parts[1] = gen_int_mode (l[2], SImode);
9385 }
9386 else
9387 abort ();
9388 }
9389 }
9390
9391 return size;
9392 }
9393
9394 /* Emit insns to perform a move or push of DI, DF, and XF values.
9395 Return false when normal moves are needed; true when all required
9396 insns have been emitted. Operands 2-4 contain the input values
9397 int the correct order; operands 5-7 contain the output values. */
9398
9399 void
9400 ix86_split_long_move (operands)
9401 rtx operands[];
9402 {
9403 rtx part[2][3];
9404 int nparts;
9405 int push = 0;
9406 int collisions = 0;
9407 enum machine_mode mode = GET_MODE (operands[0]);
9408
9409 /* The DFmode expanders may ask us to move double.
9410 For 64bit target this is single move. By hiding the fact
9411 here we simplify i386.md splitters. */
9412 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9413 {
9414 /* Optimize constant pool reference to immediates. This is used by
9415 fp moves, that force all constants to memory to allow combining. */
9416
9417 if (GET_CODE (operands[1]) == MEM
9418 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9419 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9420 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9421 if (push_operand (operands[0], VOIDmode))
9422 {
9423 operands[0] = copy_rtx (operands[0]);
9424 PUT_MODE (operands[0], Pmode);
9425 }
9426 else
9427 operands[0] = gen_lowpart (DImode, operands[0]);
9428 operands[1] = gen_lowpart (DImode, operands[1]);
9429 emit_move_insn (operands[0], operands[1]);
9430 return;
9431 }
9432
9433 /* The only non-offsettable memory we handle is push. */
9434 if (push_operand (operands[0], VOIDmode))
9435 push = 1;
9436 else if (GET_CODE (operands[0]) == MEM
9437 && ! offsettable_memref_p (operands[0]))
9438 abort ();
9439
9440 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9441 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9442
9443 /* When emitting push, take care for source operands on the stack. */
9444 if (push && GET_CODE (operands[1]) == MEM
9445 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9446 {
9447 if (nparts == 3)
9448 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9449 XEXP (part[1][2], 0));
9450 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9451 XEXP (part[1][1], 0));
9452 }
9453
9454 /* We need to do copy in the right order in case an address register
9455 of the source overlaps the destination. */
9456 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9457 {
9458 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9459 collisions++;
9460 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9461 collisions++;
9462 if (nparts == 3
9463 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9464 collisions++;
9465
9466 /* Collision in the middle part can be handled by reordering. */
9467 if (collisions == 1 && nparts == 3
9468 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9469 {
9470 rtx tmp;
9471 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9472 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9473 }
9474
9475 /* If there are more collisions, we can't handle it by reordering.
9476 Do an lea to the last part and use only one colliding move. */
9477 else if (collisions > 1)
9478 {
9479 collisions = 1;
9480 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9481 XEXP (part[1][0], 0)));
9482 part[1][0] = change_address (part[1][0],
9483 TARGET_64BIT ? DImode : SImode,
9484 part[0][nparts - 1]);
9485 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9486 if (nparts == 3)
9487 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9488 }
9489 }
9490
9491 if (push)
9492 {
9493 if (!TARGET_64BIT)
9494 {
9495 if (nparts == 3)
9496 {
9497 /* We use only first 12 bytes of TFmode value, but for pushing we
9498 are required to adjust stack as if we were pushing real 16byte
9499 value. */
9500 if (mode == TFmode && !TARGET_64BIT)
9501 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9502 GEN_INT (-4)));
9503 emit_move_insn (part[0][2], part[1][2]);
9504 }
9505 }
9506 else
9507 {
9508 /* In 64bit mode we don't have 32bit push available. In case this is
9509 register, it is OK - we will just use larger counterpart. We also
9510 retype memory - these comes from attempt to avoid REX prefix on
9511 moving of second half of TFmode value. */
9512 if (GET_MODE (part[1][1]) == SImode)
9513 {
9514 if (GET_CODE (part[1][1]) == MEM)
9515 part[1][1] = adjust_address (part[1][1], DImode, 0);
9516 else if (REG_P (part[1][1]))
9517 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9518 else
9519 abort ();
9520 if (GET_MODE (part[1][0]) == SImode)
9521 part[1][0] = part[1][1];
9522 }
9523 }
9524 emit_move_insn (part[0][1], part[1][1]);
9525 emit_move_insn (part[0][0], part[1][0]);
9526 return;
9527 }
9528
9529 /* Choose correct order to not overwrite the source before it is copied. */
9530 if ((REG_P (part[0][0])
9531 && REG_P (part[1][1])
9532 && (REGNO (part[0][0]) == REGNO (part[1][1])
9533 || (nparts == 3
9534 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9535 || (collisions > 0
9536 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9537 {
9538 if (nparts == 3)
9539 {
9540 operands[2] = part[0][2];
9541 operands[3] = part[0][1];
9542 operands[4] = part[0][0];
9543 operands[5] = part[1][2];
9544 operands[6] = part[1][1];
9545 operands[7] = part[1][0];
9546 }
9547 else
9548 {
9549 operands[2] = part[0][1];
9550 operands[3] = part[0][0];
9551 operands[5] = part[1][1];
9552 operands[6] = part[1][0];
9553 }
9554 }
9555 else
9556 {
9557 if (nparts == 3)
9558 {
9559 operands[2] = part[0][0];
9560 operands[3] = part[0][1];
9561 operands[4] = part[0][2];
9562 operands[5] = part[1][0];
9563 operands[6] = part[1][1];
9564 operands[7] = part[1][2];
9565 }
9566 else
9567 {
9568 operands[2] = part[0][0];
9569 operands[3] = part[0][1];
9570 operands[5] = part[1][0];
9571 operands[6] = part[1][1];
9572 }
9573 }
9574 emit_move_insn (operands[2], operands[5]);
9575 emit_move_insn (operands[3], operands[6]);
9576 if (nparts == 3)
9577 emit_move_insn (operands[4], operands[7]);
9578
9579 return;
9580 }
9581
9582 void
9583 ix86_split_ashldi (operands, scratch)
9584 rtx *operands, scratch;
9585 {
9586 rtx low[2], high[2];
9587 int count;
9588
9589 if (GET_CODE (operands[2]) == CONST_INT)
9590 {
9591 split_di (operands, 2, low, high);
9592 count = INTVAL (operands[2]) & 63;
9593
9594 if (count >= 32)
9595 {
9596 emit_move_insn (high[0], low[1]);
9597 emit_move_insn (low[0], const0_rtx);
9598
9599 if (count > 32)
9600 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9601 }
9602 else
9603 {
9604 if (!rtx_equal_p (operands[0], operands[1]))
9605 emit_move_insn (operands[0], operands[1]);
9606 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9607 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9608 }
9609 }
9610 else
9611 {
9612 if (!rtx_equal_p (operands[0], operands[1]))
9613 emit_move_insn (operands[0], operands[1]);
9614
9615 split_di (operands, 1, low, high);
9616
9617 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9618 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9619
9620 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9621 {
9622 if (! no_new_pseudos)
9623 scratch = force_reg (SImode, const0_rtx);
9624 else
9625 emit_move_insn (scratch, const0_rtx);
9626
9627 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9628 scratch));
9629 }
9630 else
9631 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9632 }
9633 }
9634
9635 void
9636 ix86_split_ashrdi (operands, scratch)
9637 rtx *operands, scratch;
9638 {
9639 rtx low[2], high[2];
9640 int count;
9641
9642 if (GET_CODE (operands[2]) == CONST_INT)
9643 {
9644 split_di (operands, 2, low, high);
9645 count = INTVAL (operands[2]) & 63;
9646
9647 if (count >= 32)
9648 {
9649 emit_move_insn (low[0], high[1]);
9650
9651 if (! reload_completed)
9652 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9653 else
9654 {
9655 emit_move_insn (high[0], low[0]);
9656 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9657 }
9658
9659 if (count > 32)
9660 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9661 }
9662 else
9663 {
9664 if (!rtx_equal_p (operands[0], operands[1]))
9665 emit_move_insn (operands[0], operands[1]);
9666 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9667 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9668 }
9669 }
9670 else
9671 {
9672 if (!rtx_equal_p (operands[0], operands[1]))
9673 emit_move_insn (operands[0], operands[1]);
9674
9675 split_di (operands, 1, low, high);
9676
9677 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9678 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9679
9680 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9681 {
9682 if (! no_new_pseudos)
9683 scratch = gen_reg_rtx (SImode);
9684 emit_move_insn (scratch, high[0]);
9685 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9686 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9687 scratch));
9688 }
9689 else
9690 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9691 }
9692 }
9693
9694 void
9695 ix86_split_lshrdi (operands, scratch)
9696 rtx *operands, scratch;
9697 {
9698 rtx low[2], high[2];
9699 int count;
9700
9701 if (GET_CODE (operands[2]) == CONST_INT)
9702 {
9703 split_di (operands, 2, low, high);
9704 count = INTVAL (operands[2]) & 63;
9705
9706 if (count >= 32)
9707 {
9708 emit_move_insn (low[0], high[1]);
9709 emit_move_insn (high[0], const0_rtx);
9710
9711 if (count > 32)
9712 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9713 }
9714 else
9715 {
9716 if (!rtx_equal_p (operands[0], operands[1]))
9717 emit_move_insn (operands[0], operands[1]);
9718 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9719 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9720 }
9721 }
9722 else
9723 {
9724 if (!rtx_equal_p (operands[0], operands[1]))
9725 emit_move_insn (operands[0], operands[1]);
9726
9727 split_di (operands, 1, low, high);
9728
9729 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9730 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9731
9732 /* Heh. By reversing the arguments, we can reuse this pattern. */
9733 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9734 {
9735 if (! no_new_pseudos)
9736 scratch = force_reg (SImode, const0_rtx);
9737 else
9738 emit_move_insn (scratch, const0_rtx);
9739
9740 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9741 scratch));
9742 }
9743 else
9744 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9745 }
9746 }
9747
9748 /* Helper function for the string operations below. Dest VARIABLE whether
9749 it is aligned to VALUE bytes. If true, jump to the label. */
9750 static rtx
9751 ix86_expand_aligntest (variable, value)
9752 rtx variable;
9753 int value;
9754 {
9755 rtx label = gen_label_rtx ();
9756 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9757 if (GET_MODE (variable) == DImode)
9758 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9759 else
9760 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9761 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9762 1, label);
9763 return label;
9764 }
9765
9766 /* Adjust COUNTER by the VALUE. */
9767 static void
9768 ix86_adjust_counter (countreg, value)
9769 rtx countreg;
9770 HOST_WIDE_INT value;
9771 {
9772 if (GET_MODE (countreg) == DImode)
9773 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9774 else
9775 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9776 }
9777
9778 /* Zero extend possibly SImode EXP to Pmode register. */
9779 rtx
9780 ix86_zero_extend_to_Pmode (exp)
9781 rtx exp;
9782 {
9783 rtx r;
9784 if (GET_MODE (exp) == VOIDmode)
9785 return force_reg (Pmode, exp);
9786 if (GET_MODE (exp) == Pmode)
9787 return copy_to_mode_reg (Pmode, exp);
9788 r = gen_reg_rtx (Pmode);
9789 emit_insn (gen_zero_extendsidi2 (r, exp));
9790 return r;
9791 }
9792
9793 /* Expand string move (memcpy) operation. Use i386 string operations when
9794 profitable. expand_clrstr contains similar code. */
9795 int
9796 ix86_expand_movstr (dst, src, count_exp, align_exp)
9797 rtx dst, src, count_exp, align_exp;
9798 {
9799 rtx srcreg, destreg, countreg;
9800 enum machine_mode counter_mode;
9801 HOST_WIDE_INT align = 0;
9802 unsigned HOST_WIDE_INT count = 0;
9803 rtx insns;
9804
9805 start_sequence ();
9806
9807 if (GET_CODE (align_exp) == CONST_INT)
9808 align = INTVAL (align_exp);
9809
9810 /* This simple hack avoids all inlining code and simplifies code below. */
9811 if (!TARGET_ALIGN_STRINGOPS)
9812 align = 64;
9813
9814 if (GET_CODE (count_exp) == CONST_INT)
9815 count = INTVAL (count_exp);
9816
9817 /* Figure out proper mode for counter. For 32bits it is always SImode,
9818 for 64bits use SImode when possible, otherwise DImode.
9819 Set count to number of bytes copied when known at compile time. */
9820 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9821 || x86_64_zero_extended_value (count_exp))
9822 counter_mode = SImode;
9823 else
9824 counter_mode = DImode;
9825
9826 if (counter_mode != SImode && counter_mode != DImode)
9827 abort ();
9828
9829 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9830 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9831
9832 emit_insn (gen_cld ());
9833
9834 /* When optimizing for size emit simple rep ; movsb instruction for
9835 counts not divisible by 4. */
9836
9837 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9838 {
9839 countreg = ix86_zero_extend_to_Pmode (count_exp);
9840 if (TARGET_64BIT)
9841 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9842 destreg, srcreg, countreg));
9843 else
9844 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9845 destreg, srcreg, countreg));
9846 }
9847
9848 /* For constant aligned (or small unaligned) copies use rep movsl
9849 followed by code copying the rest. For PentiumPro ensure 8 byte
9850 alignment to allow rep movsl acceleration. */
9851
9852 else if (count != 0
9853 && (align >= 8
9854 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9855 || optimize_size || count < (unsigned int) 64))
9856 {
9857 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9858 if (count & ~(size - 1))
9859 {
9860 countreg = copy_to_mode_reg (counter_mode,
9861 GEN_INT ((count >> (size == 4 ? 2 : 3))
9862 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9863 countreg = ix86_zero_extend_to_Pmode (countreg);
9864 if (size == 4)
9865 {
9866 if (TARGET_64BIT)
9867 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9868 destreg, srcreg, countreg));
9869 else
9870 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9871 destreg, srcreg, countreg));
9872 }
9873 else
9874 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9875 destreg, srcreg, countreg));
9876 }
9877 if (size == 8 && (count & 0x04))
9878 emit_insn (gen_strmovsi (destreg, srcreg));
9879 if (count & 0x02)
9880 emit_insn (gen_strmovhi (destreg, srcreg));
9881 if (count & 0x01)
9882 emit_insn (gen_strmovqi (destreg, srcreg));
9883 }
9884 /* The generic code based on the glibc implementation:
9885 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9886 allowing accelerated copying there)
9887 - copy the data using rep movsl
9888 - copy the rest. */
9889 else
9890 {
9891 rtx countreg2;
9892 rtx label = NULL;
9893 int desired_alignment = (TARGET_PENTIUMPRO
9894 && (count == 0 || count >= (unsigned int) 260)
9895 ? 8 : UNITS_PER_WORD);
9896
9897 /* In case we don't know anything about the alignment, default to
9898 library version, since it is usually equally fast and result in
9899 shorter code. */
9900 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9901 {
9902 end_sequence ();
9903 return 0;
9904 }
9905
9906 if (TARGET_SINGLE_STRINGOP)
9907 emit_insn (gen_cld ());
9908
9909 countreg2 = gen_reg_rtx (Pmode);
9910 countreg = copy_to_mode_reg (counter_mode, count_exp);
9911
9912 /* We don't use loops to align destination and to copy parts smaller
9913 than 4 bytes, because gcc is able to optimize such code better (in
9914 the case the destination or the count really is aligned, gcc is often
9915 able to predict the branches) and also it is friendlier to the
9916 hardware branch prediction.
9917
9918 Using loops is benefical for generic case, because we can
9919 handle small counts using the loops. Many CPUs (such as Athlon)
9920 have large REP prefix setup costs.
9921
9922 This is quite costy. Maybe we can revisit this decision later or
9923 add some customizability to this code. */
9924
9925 if (count == 0 && align < desired_alignment)
9926 {
9927 label = gen_label_rtx ();
9928 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
9929 LEU, 0, counter_mode, 1, label);
9930 }
9931 if (align <= 1)
9932 {
9933 rtx label = ix86_expand_aligntest (destreg, 1);
9934 emit_insn (gen_strmovqi (destreg, srcreg));
9935 ix86_adjust_counter (countreg, 1);
9936 emit_label (label);
9937 LABEL_NUSES (label) = 1;
9938 }
9939 if (align <= 2)
9940 {
9941 rtx label = ix86_expand_aligntest (destreg, 2);
9942 emit_insn (gen_strmovhi (destreg, srcreg));
9943 ix86_adjust_counter (countreg, 2);
9944 emit_label (label);
9945 LABEL_NUSES (label) = 1;
9946 }
9947 if (align <= 4 && desired_alignment > 4)
9948 {
9949 rtx label = ix86_expand_aligntest (destreg, 4);
9950 emit_insn (gen_strmovsi (destreg, srcreg));
9951 ix86_adjust_counter (countreg, 4);
9952 emit_label (label);
9953 LABEL_NUSES (label) = 1;
9954 }
9955
9956 if (label && desired_alignment > 4 && !TARGET_64BIT)
9957 {
9958 emit_label (label);
9959 LABEL_NUSES (label) = 1;
9960 label = NULL_RTX;
9961 }
9962 if (!TARGET_SINGLE_STRINGOP)
9963 emit_insn (gen_cld ());
9964 if (TARGET_64BIT)
9965 {
9966 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9967 GEN_INT (3)));
9968 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9969 destreg, srcreg, countreg2));
9970 }
9971 else
9972 {
9973 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9974 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9975 destreg, srcreg, countreg2));
9976 }
9977
9978 if (label)
9979 {
9980 emit_label (label);
9981 LABEL_NUSES (label) = 1;
9982 }
9983 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9984 emit_insn (gen_strmovsi (destreg, srcreg));
9985 if ((align <= 4 || count == 0) && TARGET_64BIT)
9986 {
9987 rtx label = ix86_expand_aligntest (countreg, 4);
9988 emit_insn (gen_strmovsi (destreg, srcreg));
9989 emit_label (label);
9990 LABEL_NUSES (label) = 1;
9991 }
9992 if (align > 2 && count != 0 && (count & 2))
9993 emit_insn (gen_strmovhi (destreg, srcreg));
9994 if (align <= 2 || count == 0)
9995 {
9996 rtx label = ix86_expand_aligntest (countreg, 2);
9997 emit_insn (gen_strmovhi (destreg, srcreg));
9998 emit_label (label);
9999 LABEL_NUSES (label) = 1;
10000 }
10001 if (align > 1 && count != 0 && (count & 1))
10002 emit_insn (gen_strmovqi (destreg, srcreg));
10003 if (align <= 1 || count == 0)
10004 {
10005 rtx label = ix86_expand_aligntest (countreg, 1);
10006 emit_insn (gen_strmovqi (destreg, srcreg));
10007 emit_label (label);
10008 LABEL_NUSES (label) = 1;
10009 }
10010 }
10011
10012 insns = get_insns ();
10013 end_sequence ();
10014
10015 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10016 emit_insn (insns);
10017 return 1;
10018 }
10019
10020 /* Expand string clear operation (bzero). Use i386 string operations when
10021 profitable. expand_movstr contains similar code. */
10022 int
10023 ix86_expand_clrstr (src, count_exp, align_exp)
10024 rtx src, count_exp, align_exp;
10025 {
10026 rtx destreg, zeroreg, countreg;
10027 enum machine_mode counter_mode;
10028 HOST_WIDE_INT align = 0;
10029 unsigned HOST_WIDE_INT count = 0;
10030
10031 if (GET_CODE (align_exp) == CONST_INT)
10032 align = INTVAL (align_exp);
10033
10034 /* This simple hack avoids all inlining code and simplifies code below. */
10035 if (!TARGET_ALIGN_STRINGOPS)
10036 align = 32;
10037
10038 if (GET_CODE (count_exp) == CONST_INT)
10039 count = INTVAL (count_exp);
10040 /* Figure out proper mode for counter. For 32bits it is always SImode,
10041 for 64bits use SImode when possible, otherwise DImode.
10042 Set count to number of bytes copied when known at compile time. */
10043 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10044 || x86_64_zero_extended_value (count_exp))
10045 counter_mode = SImode;
10046 else
10047 counter_mode = DImode;
10048
10049 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10050
10051 emit_insn (gen_cld ());
10052
10053 /* When optimizing for size emit simple rep ; movsb instruction for
10054 counts not divisible by 4. */
10055
10056 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10057 {
10058 countreg = ix86_zero_extend_to_Pmode (count_exp);
10059 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10060 if (TARGET_64BIT)
10061 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10062 destreg, countreg));
10063 else
10064 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10065 destreg, countreg));
10066 }
10067 else if (count != 0
10068 && (align >= 8
10069 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10070 || optimize_size || count < (unsigned int) 64))
10071 {
10072 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10073 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10074 if (count & ~(size - 1))
10075 {
10076 countreg = copy_to_mode_reg (counter_mode,
10077 GEN_INT ((count >> (size == 4 ? 2 : 3))
10078 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10079 countreg = ix86_zero_extend_to_Pmode (countreg);
10080 if (size == 4)
10081 {
10082 if (TARGET_64BIT)
10083 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10084 destreg, countreg));
10085 else
10086 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10087 destreg, countreg));
10088 }
10089 else
10090 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10091 destreg, countreg));
10092 }
10093 if (size == 8 && (count & 0x04))
10094 emit_insn (gen_strsetsi (destreg,
10095 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10096 if (count & 0x02)
10097 emit_insn (gen_strsethi (destreg,
10098 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10099 if (count & 0x01)
10100 emit_insn (gen_strsetqi (destreg,
10101 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10102 }
10103 else
10104 {
10105 rtx countreg2;
10106 rtx label = NULL;
10107 /* Compute desired alignment of the string operation. */
10108 int desired_alignment = (TARGET_PENTIUMPRO
10109 && (count == 0 || count >= (unsigned int) 260)
10110 ? 8 : UNITS_PER_WORD);
10111
10112 /* In case we don't know anything about the alignment, default to
10113 library version, since it is usually equally fast and result in
10114 shorter code. */
10115 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10116 return 0;
10117
10118 if (TARGET_SINGLE_STRINGOP)
10119 emit_insn (gen_cld ());
10120
10121 countreg2 = gen_reg_rtx (Pmode);
10122 countreg = copy_to_mode_reg (counter_mode, count_exp);
10123 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10124
10125 if (count == 0 && align < desired_alignment)
10126 {
10127 label = gen_label_rtx ();
10128 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10129 LEU, 0, counter_mode, 1, label);
10130 }
10131 if (align <= 1)
10132 {
10133 rtx label = ix86_expand_aligntest (destreg, 1);
10134 emit_insn (gen_strsetqi (destreg,
10135 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10136 ix86_adjust_counter (countreg, 1);
10137 emit_label (label);
10138 LABEL_NUSES (label) = 1;
10139 }
10140 if (align <= 2)
10141 {
10142 rtx label = ix86_expand_aligntest (destreg, 2);
10143 emit_insn (gen_strsethi (destreg,
10144 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10145 ix86_adjust_counter (countreg, 2);
10146 emit_label (label);
10147 LABEL_NUSES (label) = 1;
10148 }
10149 if (align <= 4 && desired_alignment > 4)
10150 {
10151 rtx label = ix86_expand_aligntest (destreg, 4);
10152 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10153 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10154 : zeroreg)));
10155 ix86_adjust_counter (countreg, 4);
10156 emit_label (label);
10157 LABEL_NUSES (label) = 1;
10158 }
10159
10160 if (label && desired_alignment > 4 && !TARGET_64BIT)
10161 {
10162 emit_label (label);
10163 LABEL_NUSES (label) = 1;
10164 label = NULL_RTX;
10165 }
10166
10167 if (!TARGET_SINGLE_STRINGOP)
10168 emit_insn (gen_cld ());
10169 if (TARGET_64BIT)
10170 {
10171 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10172 GEN_INT (3)));
10173 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10174 destreg, countreg2));
10175 }
10176 else
10177 {
10178 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10179 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10180 destreg, countreg2));
10181 }
10182 if (label)
10183 {
10184 emit_label (label);
10185 LABEL_NUSES (label) = 1;
10186 }
10187
10188 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10189 emit_insn (gen_strsetsi (destreg,
10190 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10191 if (TARGET_64BIT && (align <= 4 || count == 0))
10192 {
10193 rtx label = ix86_expand_aligntest (countreg, 2);
10194 emit_insn (gen_strsetsi (destreg,
10195 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10196 emit_label (label);
10197 LABEL_NUSES (label) = 1;
10198 }
10199 if (align > 2 && count != 0 && (count & 2))
10200 emit_insn (gen_strsethi (destreg,
10201 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10202 if (align <= 2 || count == 0)
10203 {
10204 rtx label = ix86_expand_aligntest (countreg, 2);
10205 emit_insn (gen_strsethi (destreg,
10206 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10207 emit_label (label);
10208 LABEL_NUSES (label) = 1;
10209 }
10210 if (align > 1 && count != 0 && (count & 1))
10211 emit_insn (gen_strsetqi (destreg,
10212 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10213 if (align <= 1 || count == 0)
10214 {
10215 rtx label = ix86_expand_aligntest (countreg, 1);
10216 emit_insn (gen_strsetqi (destreg,
10217 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10218 emit_label (label);
10219 LABEL_NUSES (label) = 1;
10220 }
10221 }
10222 return 1;
10223 }
10224 /* Expand strlen. */
10225 int
10226 ix86_expand_strlen (out, src, eoschar, align)
10227 rtx out, src, eoschar, align;
10228 {
10229 rtx addr, scratch1, scratch2, scratch3, scratch4;
10230
10231 /* The generic case of strlen expander is long. Avoid it's
10232 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10233
10234 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10235 && !TARGET_INLINE_ALL_STRINGOPS
10236 && !optimize_size
10237 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10238 return 0;
10239
10240 addr = force_reg (Pmode, XEXP (src, 0));
10241 scratch1 = gen_reg_rtx (Pmode);
10242
10243 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10244 && !optimize_size)
10245 {
10246 /* Well it seems that some optimizer does not combine a call like
10247 foo(strlen(bar), strlen(bar));
10248 when the move and the subtraction is done here. It does calculate
10249 the length just once when these instructions are done inside of
10250 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10251 often used and I use one fewer register for the lifetime of
10252 output_strlen_unroll() this is better. */
10253
10254 emit_move_insn (out, addr);
10255
10256 ix86_expand_strlensi_unroll_1 (out, align);
10257
10258 /* strlensi_unroll_1 returns the address of the zero at the end of
10259 the string, like memchr(), so compute the length by subtracting
10260 the start address. */
10261 if (TARGET_64BIT)
10262 emit_insn (gen_subdi3 (out, out, addr));
10263 else
10264 emit_insn (gen_subsi3 (out, out, addr));
10265 }
10266 else
10267 {
10268 scratch2 = gen_reg_rtx (Pmode);
10269 scratch3 = gen_reg_rtx (Pmode);
10270 scratch4 = force_reg (Pmode, constm1_rtx);
10271
10272 emit_move_insn (scratch3, addr);
10273 eoschar = force_reg (QImode, eoschar);
10274
10275 emit_insn (gen_cld ());
10276 if (TARGET_64BIT)
10277 {
10278 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10279 align, scratch4, scratch3));
10280 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10281 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10282 }
10283 else
10284 {
10285 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10286 align, scratch4, scratch3));
10287 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10288 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10289 }
10290 }
10291 return 1;
10292 }
10293
10294 /* Expand the appropriate insns for doing strlen if not just doing
10295 repnz; scasb
10296
10297 out = result, initialized with the start address
10298 align_rtx = alignment of the address.
10299 scratch = scratch register, initialized with the startaddress when
10300 not aligned, otherwise undefined
10301
10302 This is just the body. It needs the initialisations mentioned above and
10303 some address computing at the end. These things are done in i386.md. */
10304
10305 static void
10306 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10307 rtx out, align_rtx;
10308 {
10309 int align;
10310 rtx tmp;
10311 rtx align_2_label = NULL_RTX;
10312 rtx align_3_label = NULL_RTX;
10313 rtx align_4_label = gen_label_rtx ();
10314 rtx end_0_label = gen_label_rtx ();
10315 rtx mem;
10316 rtx tmpreg = gen_reg_rtx (SImode);
10317 rtx scratch = gen_reg_rtx (SImode);
10318
10319 align = 0;
10320 if (GET_CODE (align_rtx) == CONST_INT)
10321 align = INTVAL (align_rtx);
10322
10323 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10324
10325 /* Is there a known alignment and is it less than 4? */
10326 if (align < 4)
10327 {
10328 rtx scratch1 = gen_reg_rtx (Pmode);
10329 emit_move_insn (scratch1, out);
10330 /* Is there a known alignment and is it not 2? */
10331 if (align != 2)
10332 {
10333 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10334 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10335
10336 /* Leave just the 3 lower bits. */
10337 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10338 NULL_RTX, 0, OPTAB_WIDEN);
10339
10340 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10341 Pmode, 1, align_4_label);
10342 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10343 Pmode, 1, align_2_label);
10344 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10345 Pmode, 1, align_3_label);
10346 }
10347 else
10348 {
10349 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10350 check if is aligned to 4 - byte. */
10351
10352 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10353 NULL_RTX, 0, OPTAB_WIDEN);
10354
10355 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10356 Pmode, 1, align_4_label);
10357 }
10358
10359 mem = gen_rtx_MEM (QImode, out);
10360
10361 /* Now compare the bytes. */
10362
10363 /* Compare the first n unaligned byte on a byte per byte basis. */
10364 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10365 QImode, 1, end_0_label);
10366
10367 /* Increment the address. */
10368 if (TARGET_64BIT)
10369 emit_insn (gen_adddi3 (out, out, const1_rtx));
10370 else
10371 emit_insn (gen_addsi3 (out, out, const1_rtx));
10372
10373 /* Not needed with an alignment of 2 */
10374 if (align != 2)
10375 {
10376 emit_label (align_2_label);
10377
10378 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10379 end_0_label);
10380
10381 if (TARGET_64BIT)
10382 emit_insn (gen_adddi3 (out, out, const1_rtx));
10383 else
10384 emit_insn (gen_addsi3 (out, out, const1_rtx));
10385
10386 emit_label (align_3_label);
10387 }
10388
10389 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10390 end_0_label);
10391
10392 if (TARGET_64BIT)
10393 emit_insn (gen_adddi3 (out, out, const1_rtx));
10394 else
10395 emit_insn (gen_addsi3 (out, out, const1_rtx));
10396 }
10397
10398 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10399 align this loop. It gives only huge programs, but does not help to
10400 speed up. */
10401 emit_label (align_4_label);
10402
10403 mem = gen_rtx_MEM (SImode, out);
10404 emit_move_insn (scratch, mem);
10405 if (TARGET_64BIT)
10406 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10407 else
10408 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10409
10410 /* This formula yields a nonzero result iff one of the bytes is zero.
10411 This saves three branches inside loop and many cycles. */
10412
10413 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10414 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10415 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10416 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10417 gen_int_mode (0x80808080, SImode)));
10418 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10419 align_4_label);
10420
10421 if (TARGET_CMOVE)
10422 {
10423 rtx reg = gen_reg_rtx (SImode);
10424 rtx reg2 = gen_reg_rtx (Pmode);
10425 emit_move_insn (reg, tmpreg);
10426 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10427
10428 /* If zero is not in the first two bytes, move two bytes forward. */
10429 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10430 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10431 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10432 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10433 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10434 reg,
10435 tmpreg)));
10436 /* Emit lea manually to avoid clobbering of flags. */
10437 emit_insn (gen_rtx_SET (SImode, reg2,
10438 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10439
10440 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10441 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10442 emit_insn (gen_rtx_SET (VOIDmode, out,
10443 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10444 reg2,
10445 out)));
10446
10447 }
10448 else
10449 {
10450 rtx end_2_label = gen_label_rtx ();
10451 /* Is zero in the first two bytes? */
10452
10453 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10454 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10455 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10456 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10457 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10458 pc_rtx);
10459 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10460 JUMP_LABEL (tmp) = end_2_label;
10461
10462 /* Not in the first two. Move two bytes forward. */
10463 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10464 if (TARGET_64BIT)
10465 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10466 else
10467 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10468
10469 emit_label (end_2_label);
10470
10471 }
10472
10473 /* Avoid branch in fixing the byte. */
10474 tmpreg = gen_lowpart (QImode, tmpreg);
10475 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10476 if (TARGET_64BIT)
10477 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10478 else
10479 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10480
10481 emit_label (end_0_label);
10482 }
10483
10484 void
10485 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10486 rtx retval, fnaddr, callarg1, callarg2, pop;
10487 {
10488 rtx use = NULL, call;
10489
10490 if (pop == const0_rtx)
10491 pop = NULL;
10492 if (TARGET_64BIT && pop)
10493 abort ();
10494
10495 /* Static functions and indirect calls don't need the pic register. */
10496 if (! TARGET_64BIT && flag_pic
10497 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10498 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10499 use_reg (&use, pic_offset_table_rtx);
10500
10501 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10502 {
10503 rtx al = gen_rtx_REG (QImode, 0);
10504 emit_move_insn (al, callarg2);
10505 use_reg (&use, al);
10506 }
10507
10508 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10509 {
10510 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10511 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10512 }
10513
10514 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10515 if (retval)
10516 call = gen_rtx_SET (VOIDmode, retval, call);
10517 if (pop)
10518 {
10519 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10520 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10521 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10522 }
10523
10524 call = emit_call_insn (call);
10525 if (use)
10526 CALL_INSN_FUNCTION_USAGE (call) = use;
10527 }
10528
10529 \f
10530 /* Clear stack slot assignments remembered from previous functions.
10531 This is called from INIT_EXPANDERS once before RTL is emitted for each
10532 function. */
10533
10534 static struct machine_function *
10535 ix86_init_machine_status ()
10536 {
10537 return ggc_alloc_cleared (sizeof (struct machine_function));
10538 }
10539
10540 /* Return a MEM corresponding to a stack slot with mode MODE.
10541 Allocate a new slot if necessary.
10542
10543 The RTL for a function can have several slots available: N is
10544 which slot to use. */
10545
10546 rtx
10547 assign_386_stack_local (mode, n)
10548 enum machine_mode mode;
10549 int n;
10550 {
10551 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10552 abort ();
10553
10554 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10555 ix86_stack_locals[(int) mode][n]
10556 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10557
10558 return ix86_stack_locals[(int) mode][n];
10559 }
10560
10561 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10562
10563 static GTY(()) rtx ix86_tls_symbol;
10564 rtx
10565 ix86_tls_get_addr ()
10566 {
10567
10568 if (!ix86_tls_symbol)
10569 {
10570 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10571 ? "___tls_get_addr"
10572 : "__tls_get_addr"));
10573 }
10574
10575 return ix86_tls_symbol;
10576 }
10577 \f
10578 /* Calculate the length of the memory address in the instruction
10579 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10580
10581 static int
10582 memory_address_length (addr)
10583 rtx addr;
10584 {
10585 struct ix86_address parts;
10586 rtx base, index, disp;
10587 int len;
10588
10589 if (GET_CODE (addr) == PRE_DEC
10590 || GET_CODE (addr) == POST_INC
10591 || GET_CODE (addr) == PRE_MODIFY
10592 || GET_CODE (addr) == POST_MODIFY)
10593 return 0;
10594
10595 if (! ix86_decompose_address (addr, &parts))
10596 abort ();
10597
10598 base = parts.base;
10599 index = parts.index;
10600 disp = parts.disp;
10601 len = 0;
10602
10603 /* Register Indirect. */
10604 if (base && !index && !disp)
10605 {
10606 /* Special cases: ebp and esp need the two-byte modrm form. */
10607 if (addr == stack_pointer_rtx
10608 || addr == arg_pointer_rtx
10609 || addr == frame_pointer_rtx
10610 || addr == hard_frame_pointer_rtx)
10611 len = 1;
10612 }
10613
10614 /* Direct Addressing. */
10615 else if (disp && !base && !index)
10616 len = 4;
10617
10618 else
10619 {
10620 /* Find the length of the displacement constant. */
10621 if (disp)
10622 {
10623 if (GET_CODE (disp) == CONST_INT
10624 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10625 len = 1;
10626 else
10627 len = 4;
10628 }
10629
10630 /* An index requires the two-byte modrm form. */
10631 if (index)
10632 len += 1;
10633 }
10634
10635 return len;
10636 }
10637
10638 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10639 is set, expect that insn have 8bit immediate alternative. */
10640 int
10641 ix86_attr_length_immediate_default (insn, shortform)
10642 rtx insn;
10643 int shortform;
10644 {
10645 int len = 0;
10646 int i;
10647 extract_insn_cached (insn);
10648 for (i = recog_data.n_operands - 1; i >= 0; --i)
10649 if (CONSTANT_P (recog_data.operand[i]))
10650 {
10651 if (len)
10652 abort ();
10653 if (shortform
10654 && GET_CODE (recog_data.operand[i]) == CONST_INT
10655 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10656 len = 1;
10657 else
10658 {
10659 switch (get_attr_mode (insn))
10660 {
10661 case MODE_QI:
10662 len+=1;
10663 break;
10664 case MODE_HI:
10665 len+=2;
10666 break;
10667 case MODE_SI:
10668 len+=4;
10669 break;
10670 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10671 case MODE_DI:
10672 len+=4;
10673 break;
10674 default:
10675 fatal_insn ("unknown insn mode", insn);
10676 }
10677 }
10678 }
10679 return len;
10680 }
10681 /* Compute default value for "length_address" attribute. */
10682 int
10683 ix86_attr_length_address_default (insn)
10684 rtx insn;
10685 {
10686 int i;
10687 extract_insn_cached (insn);
10688 for (i = recog_data.n_operands - 1; i >= 0; --i)
10689 if (GET_CODE (recog_data.operand[i]) == MEM)
10690 {
10691 return memory_address_length (XEXP (recog_data.operand[i], 0));
10692 break;
10693 }
10694 return 0;
10695 }
10696 \f
10697 /* Return the maximum number of instructions a cpu can issue. */
10698
10699 static int
10700 ix86_issue_rate ()
10701 {
10702 switch (ix86_cpu)
10703 {
10704 case PROCESSOR_PENTIUM:
10705 case PROCESSOR_K6:
10706 return 2;
10707
10708 case PROCESSOR_PENTIUMPRO:
10709 case PROCESSOR_PENTIUM4:
10710 case PROCESSOR_ATHLON:
10711 return 3;
10712
10713 default:
10714 return 1;
10715 }
10716 }
10717
10718 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10719 by DEP_INSN and nothing set by DEP_INSN. */
10720
10721 static int
10722 ix86_flags_dependant (insn, dep_insn, insn_type)
10723 rtx insn, dep_insn;
10724 enum attr_type insn_type;
10725 {
10726 rtx set, set2;
10727
10728 /* Simplify the test for uninteresting insns. */
10729 if (insn_type != TYPE_SETCC
10730 && insn_type != TYPE_ICMOV
10731 && insn_type != TYPE_FCMOV
10732 && insn_type != TYPE_IBR)
10733 return 0;
10734
10735 if ((set = single_set (dep_insn)) != 0)
10736 {
10737 set = SET_DEST (set);
10738 set2 = NULL_RTX;
10739 }
10740 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10741 && XVECLEN (PATTERN (dep_insn), 0) == 2
10742 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10743 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10744 {
10745 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10746 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10747 }
10748 else
10749 return 0;
10750
10751 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10752 return 0;
10753
10754 /* This test is true if the dependent insn reads the flags but
10755 not any other potentially set register. */
10756 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10757 return 0;
10758
10759 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10760 return 0;
10761
10762 return 1;
10763 }
10764
10765 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10766 address with operands set by DEP_INSN. */
10767
10768 static int
10769 ix86_agi_dependant (insn, dep_insn, insn_type)
10770 rtx insn, dep_insn;
10771 enum attr_type insn_type;
10772 {
10773 rtx addr;
10774
10775 if (insn_type == TYPE_LEA
10776 && TARGET_PENTIUM)
10777 {
10778 addr = PATTERN (insn);
10779 if (GET_CODE (addr) == SET)
10780 ;
10781 else if (GET_CODE (addr) == PARALLEL
10782 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10783 addr = XVECEXP (addr, 0, 0);
10784 else
10785 abort ();
10786 addr = SET_SRC (addr);
10787 }
10788 else
10789 {
10790 int i;
10791 extract_insn_cached (insn);
10792 for (i = recog_data.n_operands - 1; i >= 0; --i)
10793 if (GET_CODE (recog_data.operand[i]) == MEM)
10794 {
10795 addr = XEXP (recog_data.operand[i], 0);
10796 goto found;
10797 }
10798 return 0;
10799 found:;
10800 }
10801
10802 return modified_in_p (addr, dep_insn);
10803 }
10804
10805 static int
10806 ix86_adjust_cost (insn, link, dep_insn, cost)
10807 rtx insn, link, dep_insn;
10808 int cost;
10809 {
10810 enum attr_type insn_type, dep_insn_type;
10811 enum attr_memory memory, dep_memory;
10812 rtx set, set2;
10813 int dep_insn_code_number;
10814
10815 /* Anti and output depenancies have zero cost on all CPUs. */
10816 if (REG_NOTE_KIND (link) != 0)
10817 return 0;
10818
10819 dep_insn_code_number = recog_memoized (dep_insn);
10820
10821 /* If we can't recognize the insns, we can't really do anything. */
10822 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10823 return cost;
10824
10825 insn_type = get_attr_type (insn);
10826 dep_insn_type = get_attr_type (dep_insn);
10827
10828 switch (ix86_cpu)
10829 {
10830 case PROCESSOR_PENTIUM:
10831 /* Address Generation Interlock adds a cycle of latency. */
10832 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10833 cost += 1;
10834
10835 /* ??? Compares pair with jump/setcc. */
10836 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10837 cost = 0;
10838
10839 /* Floating point stores require value to be ready one cycle ealier. */
10840 if (insn_type == TYPE_FMOV
10841 && get_attr_memory (insn) == MEMORY_STORE
10842 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10843 cost += 1;
10844 break;
10845
10846 case PROCESSOR_PENTIUMPRO:
10847 memory = get_attr_memory (insn);
10848 dep_memory = get_attr_memory (dep_insn);
10849
10850 /* Since we can't represent delayed latencies of load+operation,
10851 increase the cost here for non-imov insns. */
10852 if (dep_insn_type != TYPE_IMOV
10853 && dep_insn_type != TYPE_FMOV
10854 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10855 cost += 1;
10856
10857 /* INT->FP conversion is expensive. */
10858 if (get_attr_fp_int_src (dep_insn))
10859 cost += 5;
10860
10861 /* There is one cycle extra latency between an FP op and a store. */
10862 if (insn_type == TYPE_FMOV
10863 && (set = single_set (dep_insn)) != NULL_RTX
10864 && (set2 = single_set (insn)) != NULL_RTX
10865 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10866 && GET_CODE (SET_DEST (set2)) == MEM)
10867 cost += 1;
10868
10869 /* Show ability of reorder buffer to hide latency of load by executing
10870 in parallel with previous instruction in case
10871 previous instruction is not needed to compute the address. */
10872 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10873 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10874 {
10875 /* Claim moves to take one cycle, as core can issue one load
10876 at time and the next load can start cycle later. */
10877 if (dep_insn_type == TYPE_IMOV
10878 || dep_insn_type == TYPE_FMOV)
10879 cost = 1;
10880 else if (cost > 1)
10881 cost--;
10882 }
10883 break;
10884
10885 case PROCESSOR_K6:
10886 memory = get_attr_memory (insn);
10887 dep_memory = get_attr_memory (dep_insn);
10888 /* The esp dependency is resolved before the instruction is really
10889 finished. */
10890 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10891 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10892 return 1;
10893
10894 /* Since we can't represent delayed latencies of load+operation,
10895 increase the cost here for non-imov insns. */
10896 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10897 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10898
10899 /* INT->FP conversion is expensive. */
10900 if (get_attr_fp_int_src (dep_insn))
10901 cost += 5;
10902
10903 /* Show ability of reorder buffer to hide latency of load by executing
10904 in parallel with previous instruction in case
10905 previous instruction is not needed to compute the address. */
10906 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10907 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10908 {
10909 /* Claim moves to take one cycle, as core can issue one load
10910 at time and the next load can start cycle later. */
10911 if (dep_insn_type == TYPE_IMOV
10912 || dep_insn_type == TYPE_FMOV)
10913 cost = 1;
10914 else if (cost > 2)
10915 cost -= 2;
10916 else
10917 cost = 1;
10918 }
10919 break;
10920
10921 case PROCESSOR_ATHLON:
10922 memory = get_attr_memory (insn);
10923 dep_memory = get_attr_memory (dep_insn);
10924
10925 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10926 {
10927 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10928 cost += 2;
10929 else
10930 cost += 3;
10931 }
10932 /* Show ability of reorder buffer to hide latency of load by executing
10933 in parallel with previous instruction in case
10934 previous instruction is not needed to compute the address. */
10935 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10936 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10937 {
10938 /* Claim moves to take one cycle, as core can issue one load
10939 at time and the next load can start cycle later. */
10940 if (dep_insn_type == TYPE_IMOV
10941 || dep_insn_type == TYPE_FMOV)
10942 cost = 0;
10943 else if (cost >= 3)
10944 cost -= 3;
10945 else
10946 cost = 0;
10947 }
10948
10949 default:
10950 break;
10951 }
10952
10953 return cost;
10954 }
10955
10956 static union
10957 {
10958 struct ppro_sched_data
10959 {
10960 rtx decode[3];
10961 int issued_this_cycle;
10962 } ppro;
10963 } ix86_sched_data;
10964
10965 static enum attr_ppro_uops
10966 ix86_safe_ppro_uops (insn)
10967 rtx insn;
10968 {
10969 if (recog_memoized (insn) >= 0)
10970 return get_attr_ppro_uops (insn);
10971 else
10972 return PPRO_UOPS_MANY;
10973 }
10974
10975 static void
10976 ix86_dump_ppro_packet (dump)
10977 FILE *dump;
10978 {
10979 if (ix86_sched_data.ppro.decode[0])
10980 {
10981 fprintf (dump, "PPRO packet: %d",
10982 INSN_UID (ix86_sched_data.ppro.decode[0]));
10983 if (ix86_sched_data.ppro.decode[1])
10984 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10985 if (ix86_sched_data.ppro.decode[2])
10986 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10987 fputc ('\n', dump);
10988 }
10989 }
10990
10991 /* We're beginning a new block. Initialize data structures as necessary. */
10992
10993 static void
10994 ix86_sched_init (dump, sched_verbose, veclen)
10995 FILE *dump ATTRIBUTE_UNUSED;
10996 int sched_verbose ATTRIBUTE_UNUSED;
10997 int veclen ATTRIBUTE_UNUSED;
10998 {
10999 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11000 }
11001
11002 /* Shift INSN to SLOT, and shift everything else down. */
11003
11004 static void
11005 ix86_reorder_insn (insnp, slot)
11006 rtx *insnp, *slot;
11007 {
11008 if (insnp != slot)
11009 {
11010 rtx insn = *insnp;
11011 do
11012 insnp[0] = insnp[1];
11013 while (++insnp != slot);
11014 *insnp = insn;
11015 }
11016 }
11017
11018 static void
11019 ix86_sched_reorder_ppro (ready, e_ready)
11020 rtx *ready;
11021 rtx *e_ready;
11022 {
11023 rtx decode[3];
11024 enum attr_ppro_uops cur_uops;
11025 int issued_this_cycle;
11026 rtx *insnp;
11027 int i;
11028
11029 /* At this point .ppro.decode contains the state of the three
11030 decoders from last "cycle". That is, those insns that were
11031 actually independent. But here we're scheduling for the
11032 decoder, and we may find things that are decodable in the
11033 same cycle. */
11034
11035 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11036 issued_this_cycle = 0;
11037
11038 insnp = e_ready;
11039 cur_uops = ix86_safe_ppro_uops (*insnp);
11040
11041 /* If the decoders are empty, and we've a complex insn at the
11042 head of the priority queue, let it issue without complaint. */
11043 if (decode[0] == NULL)
11044 {
11045 if (cur_uops == PPRO_UOPS_MANY)
11046 {
11047 decode[0] = *insnp;
11048 goto ppro_done;
11049 }
11050
11051 /* Otherwise, search for a 2-4 uop unsn to issue. */
11052 while (cur_uops != PPRO_UOPS_FEW)
11053 {
11054 if (insnp == ready)
11055 break;
11056 cur_uops = ix86_safe_ppro_uops (*--insnp);
11057 }
11058
11059 /* If so, move it to the head of the line. */
11060 if (cur_uops == PPRO_UOPS_FEW)
11061 ix86_reorder_insn (insnp, e_ready);
11062
11063 /* Issue the head of the queue. */
11064 issued_this_cycle = 1;
11065 decode[0] = *e_ready--;
11066 }
11067
11068 /* Look for simple insns to fill in the other two slots. */
11069 for (i = 1; i < 3; ++i)
11070 if (decode[i] == NULL)
11071 {
11072 if (ready > e_ready)
11073 goto ppro_done;
11074
11075 insnp = e_ready;
11076 cur_uops = ix86_safe_ppro_uops (*insnp);
11077 while (cur_uops != PPRO_UOPS_ONE)
11078 {
11079 if (insnp == ready)
11080 break;
11081 cur_uops = ix86_safe_ppro_uops (*--insnp);
11082 }
11083
11084 /* Found one. Move it to the head of the queue and issue it. */
11085 if (cur_uops == PPRO_UOPS_ONE)
11086 {
11087 ix86_reorder_insn (insnp, e_ready);
11088 decode[i] = *e_ready--;
11089 issued_this_cycle++;
11090 continue;
11091 }
11092
11093 /* ??? Didn't find one. Ideally, here we would do a lazy split
11094 of 2-uop insns, issue one and queue the other. */
11095 }
11096
11097 ppro_done:
11098 if (issued_this_cycle == 0)
11099 issued_this_cycle = 1;
11100 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11101 }
11102
11103 /* We are about to being issuing insns for this clock cycle.
11104 Override the default sort algorithm to better slot instructions. */
11105 static int
11106 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11107 FILE *dump ATTRIBUTE_UNUSED;
11108 int sched_verbose ATTRIBUTE_UNUSED;
11109 rtx *ready;
11110 int *n_readyp;
11111 int clock_var ATTRIBUTE_UNUSED;
11112 {
11113 int n_ready = *n_readyp;
11114 rtx *e_ready = ready + n_ready - 1;
11115
11116 /* Make sure to go ahead and initialize key items in
11117 ix86_sched_data if we are not going to bother trying to
11118 reorder the ready queue. */
11119 if (n_ready < 2)
11120 {
11121 ix86_sched_data.ppro.issued_this_cycle = 1;
11122 goto out;
11123 }
11124
11125 switch (ix86_cpu)
11126 {
11127 default:
11128 break;
11129
11130 case PROCESSOR_PENTIUMPRO:
11131 ix86_sched_reorder_ppro (ready, e_ready);
11132 break;
11133 }
11134
11135 out:
11136 return ix86_issue_rate ();
11137 }
11138
11139 /* We are about to issue INSN. Return the number of insns left on the
11140 ready queue that can be issued this cycle. */
11141
11142 static int
11143 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11144 FILE *dump;
11145 int sched_verbose;
11146 rtx insn;
11147 int can_issue_more;
11148 {
11149 int i;
11150 switch (ix86_cpu)
11151 {
11152 default:
11153 return can_issue_more - 1;
11154
11155 case PROCESSOR_PENTIUMPRO:
11156 {
11157 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11158
11159 if (uops == PPRO_UOPS_MANY)
11160 {
11161 if (sched_verbose)
11162 ix86_dump_ppro_packet (dump);
11163 ix86_sched_data.ppro.decode[0] = insn;
11164 ix86_sched_data.ppro.decode[1] = NULL;
11165 ix86_sched_data.ppro.decode[2] = NULL;
11166 if (sched_verbose)
11167 ix86_dump_ppro_packet (dump);
11168 ix86_sched_data.ppro.decode[0] = NULL;
11169 }
11170 else if (uops == PPRO_UOPS_FEW)
11171 {
11172 if (sched_verbose)
11173 ix86_dump_ppro_packet (dump);
11174 ix86_sched_data.ppro.decode[0] = insn;
11175 ix86_sched_data.ppro.decode[1] = NULL;
11176 ix86_sched_data.ppro.decode[2] = NULL;
11177 }
11178 else
11179 {
11180 for (i = 0; i < 3; ++i)
11181 if (ix86_sched_data.ppro.decode[i] == NULL)
11182 {
11183 ix86_sched_data.ppro.decode[i] = insn;
11184 break;
11185 }
11186 if (i == 3)
11187 abort ();
11188 if (i == 2)
11189 {
11190 if (sched_verbose)
11191 ix86_dump_ppro_packet (dump);
11192 ix86_sched_data.ppro.decode[0] = NULL;
11193 ix86_sched_data.ppro.decode[1] = NULL;
11194 ix86_sched_data.ppro.decode[2] = NULL;
11195 }
11196 }
11197 }
11198 return --ix86_sched_data.ppro.issued_this_cycle;
11199 }
11200 }
11201
11202 static int
11203 ia32_use_dfa_pipeline_interface ()
11204 {
11205 if (ix86_cpu == PROCESSOR_PENTIUM)
11206 return 1;
11207 return 0;
11208 }
11209
11210 /* How many alternative schedules to try. This should be as wide as the
11211 scheduling freedom in the DFA, but no wider. Making this value too
11212 large results extra work for the scheduler. */
11213
11214 static int
11215 ia32_multipass_dfa_lookahead ()
11216 {
11217 if (ix86_cpu == PROCESSOR_PENTIUM)
11218 return 2;
11219 else
11220 return 0;
11221 }
11222
11223 \f
11224 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11225 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11226 appropriate. */
11227
11228 void
11229 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11230 rtx insns;
11231 rtx dstref, srcref, dstreg, srcreg;
11232 {
11233 rtx insn;
11234
11235 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11236 if (INSN_P (insn))
11237 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11238 dstreg, srcreg);
11239 }
11240
11241 /* Subroutine of above to actually do the updating by recursively walking
11242 the rtx. */
11243
11244 static void
11245 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11246 rtx x;
11247 rtx dstref, srcref, dstreg, srcreg;
11248 {
11249 enum rtx_code code = GET_CODE (x);
11250 const char *format_ptr = GET_RTX_FORMAT (code);
11251 int i, j;
11252
11253 if (code == MEM && XEXP (x, 0) == dstreg)
11254 MEM_COPY_ATTRIBUTES (x, dstref);
11255 else if (code == MEM && XEXP (x, 0) == srcreg)
11256 MEM_COPY_ATTRIBUTES (x, srcref);
11257
11258 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11259 {
11260 if (*format_ptr == 'e')
11261 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11262 dstreg, srcreg);
11263 else if (*format_ptr == 'E')
11264 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11265 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11266 dstreg, srcreg);
11267 }
11268 }
11269 \f
11270 /* Compute the alignment given to a constant that is being placed in memory.
11271 EXP is the constant and ALIGN is the alignment that the object would
11272 ordinarily have.
11273 The value of this function is used instead of that alignment to align
11274 the object. */
11275
11276 int
11277 ix86_constant_alignment (exp, align)
11278 tree exp;
11279 int align;
11280 {
11281 if (TREE_CODE (exp) == REAL_CST)
11282 {
11283 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11284 return 64;
11285 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11286 return 128;
11287 }
11288 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11289 && align < 256)
11290 return 256;
11291
11292 return align;
11293 }
11294
11295 /* Compute the alignment for a static variable.
11296 TYPE is the data type, and ALIGN is the alignment that
11297 the object would ordinarily have. The value of this function is used
11298 instead of that alignment to align the object. */
11299
11300 int
11301 ix86_data_alignment (type, align)
11302 tree type;
11303 int align;
11304 {
11305 if (AGGREGATE_TYPE_P (type)
11306 && TYPE_SIZE (type)
11307 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11308 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11309 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11310 return 256;
11311
11312 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11313 to 16byte boundary. */
11314 if (TARGET_64BIT)
11315 {
11316 if (AGGREGATE_TYPE_P (type)
11317 && TYPE_SIZE (type)
11318 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11319 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11320 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11321 return 128;
11322 }
11323
11324 if (TREE_CODE (type) == ARRAY_TYPE)
11325 {
11326 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11327 return 64;
11328 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11329 return 128;
11330 }
11331 else if (TREE_CODE (type) == COMPLEX_TYPE)
11332 {
11333
11334 if (TYPE_MODE (type) == DCmode && align < 64)
11335 return 64;
11336 if (TYPE_MODE (type) == XCmode && align < 128)
11337 return 128;
11338 }
11339 else if ((TREE_CODE (type) == RECORD_TYPE
11340 || TREE_CODE (type) == UNION_TYPE
11341 || TREE_CODE (type) == QUAL_UNION_TYPE)
11342 && TYPE_FIELDS (type))
11343 {
11344 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11345 return 64;
11346 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11347 return 128;
11348 }
11349 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11350 || TREE_CODE (type) == INTEGER_TYPE)
11351 {
11352 if (TYPE_MODE (type) == DFmode && align < 64)
11353 return 64;
11354 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11355 return 128;
11356 }
11357
11358 return align;
11359 }
11360
11361 /* Compute the alignment for a local variable.
11362 TYPE is the data type, and ALIGN is the alignment that
11363 the object would ordinarily have. The value of this macro is used
11364 instead of that alignment to align the object. */
11365
11366 int
11367 ix86_local_alignment (type, align)
11368 tree type;
11369 int align;
11370 {
11371 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11372 to 16byte boundary. */
11373 if (TARGET_64BIT)
11374 {
11375 if (AGGREGATE_TYPE_P (type)
11376 && TYPE_SIZE (type)
11377 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11378 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11379 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11380 return 128;
11381 }
11382 if (TREE_CODE (type) == ARRAY_TYPE)
11383 {
11384 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11385 return 64;
11386 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11387 return 128;
11388 }
11389 else if (TREE_CODE (type) == COMPLEX_TYPE)
11390 {
11391 if (TYPE_MODE (type) == DCmode && align < 64)
11392 return 64;
11393 if (TYPE_MODE (type) == XCmode && align < 128)
11394 return 128;
11395 }
11396 else if ((TREE_CODE (type) == RECORD_TYPE
11397 || TREE_CODE (type) == UNION_TYPE
11398 || TREE_CODE (type) == QUAL_UNION_TYPE)
11399 && TYPE_FIELDS (type))
11400 {
11401 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11402 return 64;
11403 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11404 return 128;
11405 }
11406 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11407 || TREE_CODE (type) == INTEGER_TYPE)
11408 {
11409
11410 if (TYPE_MODE (type) == DFmode && align < 64)
11411 return 64;
11412 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11413 return 128;
11414 }
11415 return align;
11416 }
11417 \f
11418 /* Emit RTL insns to initialize the variable parts of a trampoline.
11419 FNADDR is an RTX for the address of the function's pure code.
11420 CXT is an RTX for the static chain value for the function. */
11421 void
11422 x86_initialize_trampoline (tramp, fnaddr, cxt)
11423 rtx tramp, fnaddr, cxt;
11424 {
11425 if (!TARGET_64BIT)
11426 {
11427 /* Compute offset from the end of the jmp to the target function. */
11428 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11429 plus_constant (tramp, 10),
11430 NULL_RTX, 1, OPTAB_DIRECT);
11431 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11432 gen_int_mode (0xb9, QImode));
11433 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11434 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11435 gen_int_mode (0xe9, QImode));
11436 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11437 }
11438 else
11439 {
11440 int offset = 0;
11441 /* Try to load address using shorter movl instead of movabs.
11442 We may want to support movq for kernel mode, but kernel does not use
11443 trampolines at the moment. */
11444 if (x86_64_zero_extended_value (fnaddr))
11445 {
11446 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11447 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11448 gen_int_mode (0xbb41, HImode));
11449 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11450 gen_lowpart (SImode, fnaddr));
11451 offset += 6;
11452 }
11453 else
11454 {
11455 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11456 gen_int_mode (0xbb49, HImode));
11457 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11458 fnaddr);
11459 offset += 10;
11460 }
11461 /* Load static chain using movabs to r10. */
11462 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11463 gen_int_mode (0xba49, HImode));
11464 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11465 cxt);
11466 offset += 10;
11467 /* Jump to the r11 */
11468 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11469 gen_int_mode (0xff49, HImode));
11470 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11471 gen_int_mode (0xe3, QImode));
11472 offset += 3;
11473 if (offset > TRAMPOLINE_SIZE)
11474 abort ();
11475 }
11476 }
11477 \f
11478 #define def_builtin(MASK, NAME, TYPE, CODE) \
11479 do { \
11480 if ((MASK) & target_flags) \
11481 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11482 NULL, NULL_TREE); \
11483 } while (0)
11484
11485 struct builtin_description
11486 {
11487 const unsigned int mask;
11488 const enum insn_code icode;
11489 const char *const name;
11490 const enum ix86_builtins code;
11491 const enum rtx_code comparison;
11492 const unsigned int flag;
11493 };
11494
11495 /* Used for builtins that are enabled both by -msse and -msse2. */
11496 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11497
11498 static const struct builtin_description bdesc_comi[] =
11499 {
11500 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11501 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11502 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11503 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11504 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11505 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11506 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11507 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11508 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11509 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11510 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11511 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11512 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11513 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11514 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11515 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11516 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11517 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11518 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11519 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11520 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11521 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11522 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11523 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11524 };
11525
11526 static const struct builtin_description bdesc_2arg[] =
11527 {
11528 /* SSE */
11529 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11530 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11531 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11532 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11533 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11534 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11535 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11536 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11537
11538 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11539 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11540 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11541 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11542 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11543 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11544 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11545 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11546 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11547 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11548 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11549 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11550 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11551 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11552 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11553 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11554 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11555 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11556 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11557 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11558 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11559 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11560 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11561 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11562
11563 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11564 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11565 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11566 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11567
11568 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11569 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11570 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11571 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11572 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11573
11574 /* MMX */
11575 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11576 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11577 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11578 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11579 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11580 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11581
11582 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11583 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11584 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11585 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11586 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11587 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11588 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11589 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11590
11591 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11592 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11593 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11594
11595 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11596 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11597 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11598 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11599
11600 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11601 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11602
11603 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11604 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11605 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11606 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11607 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11608 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11609
11610 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11611 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11612 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11613 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11614
11615 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11616 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11617 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11618 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11619 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11620 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11621
11622 /* Special. */
11623 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11624 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11625 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11626
11627 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11628 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11629
11630 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11631 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11632 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11633 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11634 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11635 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11636
11637 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11638 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11639 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11640 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11641 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11642 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11643
11644 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11645 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11646 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11647 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11648
11649 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11650 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11651
11652 /* SSE2 */
11653 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11654 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11655 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11656 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11657 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11658 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11659 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11660 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11661
11662 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11663 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11664 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11665 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11666 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11667 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11668 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11669 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11670 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11671 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11672 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11673 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11674 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11675 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11676 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11677 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11678 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11679 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11680 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11681 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11682 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11683 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11684 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11685 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11686
11687 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11688 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11689 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11690 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11691
11692 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11693 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11694 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11695 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11696
11697 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11698 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11699 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11700
11701 /* SSE2 MMX */
11702 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11703 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11704 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11705 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11706 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11707 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11708 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11709 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11710
11711 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11712 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11713 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11714 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11715 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11716 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11717 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11718 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11719
11720 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11721 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11722 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11723 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11724
11725 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11726 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11727 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11728 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11729
11730 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11731 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11732
11733 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11734 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11735 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11736 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11737 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11738 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11739
11740 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11741 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11742 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11743 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11744
11745 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11746 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11747 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11748 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11750 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11751
11752 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11753 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11754 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11755
11756 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11757 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11758
11759 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11760 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11761 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11762 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11763 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11764 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11765
11766 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11767 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11768 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11769 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11770 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11771 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11772
11773 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11774 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11775 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11776 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11777
11778 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11779
11780 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11781 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11782 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11783 };
11784
11785 static const struct builtin_description bdesc_1arg[] =
11786 {
11787 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11788 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11789
11790 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11791 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11792 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11793
11794 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11795 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11796 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11797 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11798
11799 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11800 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11801 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11802
11803 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11804
11805 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11806 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11807
11808 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11809 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11810 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11811 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11812 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11813
11814 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11815
11816 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11817 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11818
11819 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11820 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11821 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11822 };
11823
11824 void
11825 ix86_init_builtins ()
11826 {
11827 if (TARGET_MMX)
11828 ix86_init_mmx_sse_builtins ();
11829 }
11830
11831 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11832 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11833 builtins. */
11834 static void
11835 ix86_init_mmx_sse_builtins ()
11836 {
11837 const struct builtin_description * d;
11838 size_t i;
11839
11840 tree pchar_type_node = build_pointer_type (char_type_node);
11841 tree pfloat_type_node = build_pointer_type (float_type_node);
11842 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11843 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11844 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11845
11846 /* Comparisons. */
11847 tree int_ftype_v4sf_v4sf
11848 = build_function_type_list (integer_type_node,
11849 V4SF_type_node, V4SF_type_node, NULL_TREE);
11850 tree v4si_ftype_v4sf_v4sf
11851 = build_function_type_list (V4SI_type_node,
11852 V4SF_type_node, V4SF_type_node, NULL_TREE);
11853 /* MMX/SSE/integer conversions. */
11854 tree int_ftype_v4sf
11855 = build_function_type_list (integer_type_node,
11856 V4SF_type_node, NULL_TREE);
11857 tree int_ftype_v8qi
11858 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
11859 tree v4sf_ftype_v4sf_int
11860 = build_function_type_list (V4SF_type_node,
11861 V4SF_type_node, integer_type_node, NULL_TREE);
11862 tree v4sf_ftype_v4sf_v2si
11863 = build_function_type_list (V4SF_type_node,
11864 V4SF_type_node, V2SI_type_node, NULL_TREE);
11865 tree int_ftype_v4hi_int
11866 = build_function_type_list (integer_type_node,
11867 V4HI_type_node, integer_type_node, NULL_TREE);
11868 tree v4hi_ftype_v4hi_int_int
11869 = build_function_type_list (V4HI_type_node, V4HI_type_node,
11870 integer_type_node, integer_type_node,
11871 NULL_TREE);
11872 /* Miscellaneous. */
11873 tree v8qi_ftype_v4hi_v4hi
11874 = build_function_type_list (V8QI_type_node,
11875 V4HI_type_node, V4HI_type_node, NULL_TREE);
11876 tree v4hi_ftype_v2si_v2si
11877 = build_function_type_list (V4HI_type_node,
11878 V2SI_type_node, V2SI_type_node, NULL_TREE);
11879 tree v4sf_ftype_v4sf_v4sf_int
11880 = build_function_type_list (V4SF_type_node,
11881 V4SF_type_node, V4SF_type_node,
11882 integer_type_node, NULL_TREE);
11883 tree v2si_ftype_v4hi_v4hi
11884 = build_function_type_list (V2SI_type_node,
11885 V4HI_type_node, V4HI_type_node, NULL_TREE);
11886 tree v4hi_ftype_v4hi_int
11887 = build_function_type_list (V4HI_type_node,
11888 V4HI_type_node, integer_type_node, NULL_TREE);
11889 tree v4hi_ftype_v4hi_di
11890 = build_function_type_list (V4HI_type_node,
11891 V4HI_type_node, long_long_unsigned_type_node,
11892 NULL_TREE);
11893 tree v2si_ftype_v2si_di
11894 = build_function_type_list (V2SI_type_node,
11895 V2SI_type_node, long_long_unsigned_type_node,
11896 NULL_TREE);
11897 tree void_ftype_void
11898 = build_function_type (void_type_node, void_list_node);
11899 tree void_ftype_unsigned
11900 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
11901 tree unsigned_ftype_void
11902 = build_function_type (unsigned_type_node, void_list_node);
11903 tree di_ftype_void
11904 = build_function_type (long_long_unsigned_type_node, void_list_node);
11905 tree v4sf_ftype_void
11906 = build_function_type (V4SF_type_node, void_list_node);
11907 tree v2si_ftype_v4sf
11908 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
11909 /* Loads/stores. */
11910 tree void_ftype_v8qi_v8qi_pchar
11911 = build_function_type_list (void_type_node,
11912 V8QI_type_node, V8QI_type_node,
11913 pchar_type_node, NULL_TREE);
11914 tree v4sf_ftype_pfloat
11915 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
11916 /* @@@ the type is bogus */
11917 tree v4sf_ftype_v4sf_pv2si
11918 = build_function_type_list (V4SF_type_node,
11919 V4SF_type_node, pv2di_type_node, NULL_TREE);
11920 tree void_ftype_pv2si_v4sf
11921 = build_function_type_list (void_type_node,
11922 pv2di_type_node, V4SF_type_node, NULL_TREE);
11923 tree void_ftype_pfloat_v4sf
11924 = build_function_type_list (void_type_node,
11925 pfloat_type_node, V4SF_type_node, NULL_TREE);
11926 tree void_ftype_pdi_di
11927 = build_function_type_list (void_type_node,
11928 pdi_type_node, long_long_unsigned_type_node,
11929 NULL_TREE);
11930 tree void_ftype_pv2di_v2di
11931 = build_function_type_list (void_type_node,
11932 pv2di_type_node, V2DI_type_node, NULL_TREE);
11933 /* Normal vector unops. */
11934 tree v4sf_ftype_v4sf
11935 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
11936
11937 /* Normal vector binops. */
11938 tree v4sf_ftype_v4sf_v4sf
11939 = build_function_type_list (V4SF_type_node,
11940 V4SF_type_node, V4SF_type_node, NULL_TREE);
11941 tree v8qi_ftype_v8qi_v8qi
11942 = build_function_type_list (V8QI_type_node,
11943 V8QI_type_node, V8QI_type_node, NULL_TREE);
11944 tree v4hi_ftype_v4hi_v4hi
11945 = build_function_type_list (V4HI_type_node,
11946 V4HI_type_node, V4HI_type_node, NULL_TREE);
11947 tree v2si_ftype_v2si_v2si
11948 = build_function_type_list (V2SI_type_node,
11949 V2SI_type_node, V2SI_type_node, NULL_TREE);
11950 tree di_ftype_di_di
11951 = build_function_type_list (long_long_unsigned_type_node,
11952 long_long_unsigned_type_node,
11953 long_long_unsigned_type_node, NULL_TREE);
11954
11955 tree v2si_ftype_v2sf
11956 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
11957 tree v2sf_ftype_v2si
11958 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
11959 tree v2si_ftype_v2si
11960 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
11961 tree v2sf_ftype_v2sf
11962 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
11963 tree v2sf_ftype_v2sf_v2sf
11964 = build_function_type_list (V2SF_type_node,
11965 V2SF_type_node, V2SF_type_node, NULL_TREE);
11966 tree v2si_ftype_v2sf_v2sf
11967 = build_function_type_list (V2SI_type_node,
11968 V2SF_type_node, V2SF_type_node, NULL_TREE);
11969 tree pint_type_node = build_pointer_type (integer_type_node);
11970 tree pdouble_type_node = build_pointer_type (double_type_node);
11971 tree int_ftype_v2df_v2df
11972 = build_function_type_list (integer_type_node,
11973 V2DF_type_node, V2DF_type_node, NULL_TREE);
11974
11975 tree ti_ftype_void
11976 = build_function_type (intTI_type_node, void_list_node);
11977 tree ti_ftype_ti_ti
11978 = build_function_type_list (intTI_type_node,
11979 intTI_type_node, intTI_type_node, NULL_TREE);
11980 tree void_ftype_pvoid
11981 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
11982 tree v2di_ftype_di
11983 = build_function_type_list (V2DI_type_node,
11984 long_long_unsigned_type_node, NULL_TREE);
11985 tree v4sf_ftype_v4si
11986 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
11987 tree v4si_ftype_v4sf
11988 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
11989 tree v2df_ftype_v4si
11990 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
11991 tree v4si_ftype_v2df
11992 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
11993 tree v2si_ftype_v2df
11994 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
11995 tree v4sf_ftype_v2df
11996 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
11997 tree v2df_ftype_v2si
11998 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
11999 tree v2df_ftype_v4sf
12000 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12001 tree int_ftype_v2df
12002 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12003 tree v2df_ftype_v2df_int
12004 = build_function_type_list (V2DF_type_node,
12005 V2DF_type_node, integer_type_node, NULL_TREE);
12006 tree v4sf_ftype_v4sf_v2df
12007 = build_function_type_list (V4SF_type_node,
12008 V4SF_type_node, V2DF_type_node, NULL_TREE);
12009 tree v2df_ftype_v2df_v4sf
12010 = build_function_type_list (V2DF_type_node,
12011 V2DF_type_node, V4SF_type_node, NULL_TREE);
12012 tree v2df_ftype_v2df_v2df_int
12013 = build_function_type_list (V2DF_type_node,
12014 V2DF_type_node, V2DF_type_node,
12015 integer_type_node,
12016 NULL_TREE);
12017 tree v2df_ftype_v2df_pv2si
12018 = build_function_type_list (V2DF_type_node,
12019 V2DF_type_node, pv2si_type_node, NULL_TREE);
12020 tree void_ftype_pv2si_v2df
12021 = build_function_type_list (void_type_node,
12022 pv2si_type_node, V2DF_type_node, NULL_TREE);
12023 tree void_ftype_pdouble_v2df
12024 = build_function_type_list (void_type_node,
12025 pdouble_type_node, V2DF_type_node, NULL_TREE);
12026 tree void_ftype_pint_int
12027 = build_function_type_list (void_type_node,
12028 pint_type_node, integer_type_node, NULL_TREE);
12029 tree void_ftype_v16qi_v16qi_pchar
12030 = build_function_type_list (void_type_node,
12031 V16QI_type_node, V16QI_type_node,
12032 pchar_type_node, NULL_TREE);
12033 tree v2df_ftype_pdouble
12034 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12035 tree v2df_ftype_v2df_v2df
12036 = build_function_type_list (V2DF_type_node,
12037 V2DF_type_node, V2DF_type_node, NULL_TREE);
12038 tree v16qi_ftype_v16qi_v16qi
12039 = build_function_type_list (V16QI_type_node,
12040 V16QI_type_node, V16QI_type_node, NULL_TREE);
12041 tree v8hi_ftype_v8hi_v8hi
12042 = build_function_type_list (V8HI_type_node,
12043 V8HI_type_node, V8HI_type_node, NULL_TREE);
12044 tree v4si_ftype_v4si_v4si
12045 = build_function_type_list (V4SI_type_node,
12046 V4SI_type_node, V4SI_type_node, NULL_TREE);
12047 tree v2di_ftype_v2di_v2di
12048 = build_function_type_list (V2DI_type_node,
12049 V2DI_type_node, V2DI_type_node, NULL_TREE);
12050 tree v2di_ftype_v2df_v2df
12051 = build_function_type_list (V2DI_type_node,
12052 V2DF_type_node, V2DF_type_node, NULL_TREE);
12053 tree v2df_ftype_v2df
12054 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12055 tree v2df_ftype_double
12056 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12057 tree v2df_ftype_double_double
12058 = build_function_type_list (V2DF_type_node,
12059 double_type_node, double_type_node, NULL_TREE);
12060 tree int_ftype_v8hi_int
12061 = build_function_type_list (integer_type_node,
12062 V8HI_type_node, integer_type_node, NULL_TREE);
12063 tree v8hi_ftype_v8hi_int_int
12064 = build_function_type_list (V8HI_type_node,
12065 V8HI_type_node, integer_type_node,
12066 integer_type_node, NULL_TREE);
12067 tree v2di_ftype_v2di_int
12068 = build_function_type_list (V2DI_type_node,
12069 V2DI_type_node, integer_type_node, NULL_TREE);
12070 tree v4si_ftype_v4si_int
12071 = build_function_type_list (V4SI_type_node,
12072 V4SI_type_node, integer_type_node, NULL_TREE);
12073 tree v8hi_ftype_v8hi_int
12074 = build_function_type_list (V8HI_type_node,
12075 V8HI_type_node, integer_type_node, NULL_TREE);
12076 tree v8hi_ftype_v8hi_v2di
12077 = build_function_type_list (V8HI_type_node,
12078 V8HI_type_node, V2DI_type_node, NULL_TREE);
12079 tree v4si_ftype_v4si_v2di
12080 = build_function_type_list (V4SI_type_node,
12081 V4SI_type_node, V2DI_type_node, NULL_TREE);
12082 tree v4si_ftype_v8hi_v8hi
12083 = build_function_type_list (V4SI_type_node,
12084 V8HI_type_node, V8HI_type_node, NULL_TREE);
12085 tree di_ftype_v8qi_v8qi
12086 = build_function_type_list (long_long_unsigned_type_node,
12087 V8QI_type_node, V8QI_type_node, NULL_TREE);
12088 tree v2di_ftype_v16qi_v16qi
12089 = build_function_type_list (V2DI_type_node,
12090 V16QI_type_node, V16QI_type_node, NULL_TREE);
12091 tree int_ftype_v16qi
12092 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12093
12094 /* Add all builtins that are more or less simple operations on two
12095 operands. */
12096 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12097 {
12098 /* Use one of the operands; the target can have a different mode for
12099 mask-generating compares. */
12100 enum machine_mode mode;
12101 tree type;
12102
12103 if (d->name == 0)
12104 continue;
12105 mode = insn_data[d->icode].operand[1].mode;
12106
12107 switch (mode)
12108 {
12109 case V16QImode:
12110 type = v16qi_ftype_v16qi_v16qi;
12111 break;
12112 case V8HImode:
12113 type = v8hi_ftype_v8hi_v8hi;
12114 break;
12115 case V4SImode:
12116 type = v4si_ftype_v4si_v4si;
12117 break;
12118 case V2DImode:
12119 type = v2di_ftype_v2di_v2di;
12120 break;
12121 case V2DFmode:
12122 type = v2df_ftype_v2df_v2df;
12123 break;
12124 case TImode:
12125 type = ti_ftype_ti_ti;
12126 break;
12127 case V4SFmode:
12128 type = v4sf_ftype_v4sf_v4sf;
12129 break;
12130 case V8QImode:
12131 type = v8qi_ftype_v8qi_v8qi;
12132 break;
12133 case V4HImode:
12134 type = v4hi_ftype_v4hi_v4hi;
12135 break;
12136 case V2SImode:
12137 type = v2si_ftype_v2si_v2si;
12138 break;
12139 case DImode:
12140 type = di_ftype_di_di;
12141 break;
12142
12143 default:
12144 abort ();
12145 }
12146
12147 /* Override for comparisons. */
12148 if (d->icode == CODE_FOR_maskcmpv4sf3
12149 || d->icode == CODE_FOR_maskncmpv4sf3
12150 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12151 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12152 type = v4si_ftype_v4sf_v4sf;
12153
12154 if (d->icode == CODE_FOR_maskcmpv2df3
12155 || d->icode == CODE_FOR_maskncmpv2df3
12156 || d->icode == CODE_FOR_vmmaskcmpv2df3
12157 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12158 type = v2di_ftype_v2df_v2df;
12159
12160 def_builtin (d->mask, d->name, type, d->code);
12161 }
12162
12163 /* Add the remaining MMX insns with somewhat more complicated types. */
12164 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12165 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12166 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12167 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12168 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12169 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12170 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12171
12172 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12173 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12174 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12175
12176 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12177 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12178
12179 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12180 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12181
12182 /* comi/ucomi insns. */
12183 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12184 if (d->mask == MASK_SSE2)
12185 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12186 else
12187 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12188
12189 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12190 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12191 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12192
12193 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12194 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12195 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12196 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12197 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12198 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12199
12200 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12201 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12202 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12203 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12204
12205 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12206 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12207
12208 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12209
12210 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12211 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12212 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12213 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12214 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12215 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12216
12217 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12218 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12219 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12220 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12221
12222 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12223 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12224 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12225 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12226
12227 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12228
12229 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12230
12231 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12232 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12233 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12234 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12235 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12236 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12237
12238 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12239
12240 /* Original 3DNow! */
12241 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12242 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12243 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12244 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12245 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12246 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12247 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12248 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12249 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12250 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12251 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12252 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12253 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12254 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12255 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12256 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12257 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12258 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12259 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12260 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12261
12262 /* 3DNow! extension as used in the Athlon CPU. */
12263 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12264 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12265 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12266 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12267 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12268 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12269
12270 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12271
12272 /* SSE2 */
12273 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12274 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12275
12276 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12277 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12278
12279 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12280 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12281 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12282 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12283 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12284 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12285
12286 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12287 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12288 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12289 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12290
12291 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12292 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12293 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12294 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12295 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12296
12297 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12298 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12299 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12300 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12301
12302 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12303 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12304
12305 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12306
12307 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12308 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12309
12310 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12311 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12312 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12313 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12314 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12315
12316 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12317
12318 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12319 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12320
12321 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12322 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12323 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12324
12325 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12326 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12327 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12328
12329 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12330 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12331 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12332 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12333 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12334 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12335 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12336
12337 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12338 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12339 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12340
12341 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12342 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12343 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12344
12345 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12346 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12347 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12348
12349 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12350 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12351
12352 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12353 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12354 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12355
12356 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12357 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12358 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12359
12360 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12361 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12362
12363 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12364 }
12365
12366 /* Errors in the source file can cause expand_expr to return const0_rtx
12367 where we expect a vector. To avoid crashing, use one of the vector
12368 clear instructions. */
12369 static rtx
12370 safe_vector_operand (x, mode)
12371 rtx x;
12372 enum machine_mode mode;
12373 {
12374 if (x != const0_rtx)
12375 return x;
12376 x = gen_reg_rtx (mode);
12377
12378 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12379 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12380 : gen_rtx_SUBREG (DImode, x, 0)));
12381 else
12382 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12383 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12384 return x;
12385 }
12386
12387 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12388
12389 static rtx
12390 ix86_expand_binop_builtin (icode, arglist, target)
12391 enum insn_code icode;
12392 tree arglist;
12393 rtx target;
12394 {
12395 rtx pat;
12396 tree arg0 = TREE_VALUE (arglist);
12397 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12398 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12399 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12400 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12401 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12402 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12403
12404 if (VECTOR_MODE_P (mode0))
12405 op0 = safe_vector_operand (op0, mode0);
12406 if (VECTOR_MODE_P (mode1))
12407 op1 = safe_vector_operand (op1, mode1);
12408
12409 if (! target
12410 || GET_MODE (target) != tmode
12411 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12412 target = gen_reg_rtx (tmode);
12413
12414 /* In case the insn wants input operands in modes different from
12415 the result, abort. */
12416 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12417 abort ();
12418
12419 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12420 op0 = copy_to_mode_reg (mode0, op0);
12421 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12422 op1 = copy_to_mode_reg (mode1, op1);
12423
12424 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12425 yet one of the two must not be a memory. This is normally enforced
12426 by expanders, but we didn't bother to create one here. */
12427 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12428 op0 = copy_to_mode_reg (mode0, op0);
12429
12430 pat = GEN_FCN (icode) (target, op0, op1);
12431 if (! pat)
12432 return 0;
12433 emit_insn (pat);
12434 return target;
12435 }
12436
12437 /* In type_for_mode we restrict the ability to create TImode types
12438 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12439 to have a V4SFmode signature. Convert them in-place to TImode. */
12440
12441 static rtx
12442 ix86_expand_timode_binop_builtin (icode, arglist, target)
12443 enum insn_code icode;
12444 tree arglist;
12445 rtx target;
12446 {
12447 rtx pat;
12448 tree arg0 = TREE_VALUE (arglist);
12449 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12450 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12451 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12452
12453 op0 = gen_lowpart (TImode, op0);
12454 op1 = gen_lowpart (TImode, op1);
12455 target = gen_reg_rtx (TImode);
12456
12457 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12458 op0 = copy_to_mode_reg (TImode, op0);
12459 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12460 op1 = copy_to_mode_reg (TImode, op1);
12461
12462 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12463 yet one of the two must not be a memory. This is normally enforced
12464 by expanders, but we didn't bother to create one here. */
12465 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12466 op0 = copy_to_mode_reg (TImode, op0);
12467
12468 pat = GEN_FCN (icode) (target, op0, op1);
12469 if (! pat)
12470 return 0;
12471 emit_insn (pat);
12472
12473 return gen_lowpart (V4SFmode, target);
12474 }
12475
12476 /* Subroutine of ix86_expand_builtin to take care of stores. */
12477
12478 static rtx
12479 ix86_expand_store_builtin (icode, arglist)
12480 enum insn_code icode;
12481 tree arglist;
12482 {
12483 rtx pat;
12484 tree arg0 = TREE_VALUE (arglist);
12485 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12486 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12487 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12488 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12489 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12490
12491 if (VECTOR_MODE_P (mode1))
12492 op1 = safe_vector_operand (op1, mode1);
12493
12494 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12495
12496 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12497 op1 = copy_to_mode_reg (mode1, op1);
12498
12499 pat = GEN_FCN (icode) (op0, op1);
12500 if (pat)
12501 emit_insn (pat);
12502 return 0;
12503 }
12504
12505 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12506
12507 static rtx
12508 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12509 enum insn_code icode;
12510 tree arglist;
12511 rtx target;
12512 int do_load;
12513 {
12514 rtx pat;
12515 tree arg0 = TREE_VALUE (arglist);
12516 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12517 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12518 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12519
12520 if (! target
12521 || GET_MODE (target) != tmode
12522 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12523 target = gen_reg_rtx (tmode);
12524 if (do_load)
12525 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12526 else
12527 {
12528 if (VECTOR_MODE_P (mode0))
12529 op0 = safe_vector_operand (op0, mode0);
12530
12531 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12532 op0 = copy_to_mode_reg (mode0, op0);
12533 }
12534
12535 pat = GEN_FCN (icode) (target, op0);
12536 if (! pat)
12537 return 0;
12538 emit_insn (pat);
12539 return target;
12540 }
12541
12542 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12543 sqrtss, rsqrtss, rcpss. */
12544
12545 static rtx
12546 ix86_expand_unop1_builtin (icode, arglist, target)
12547 enum insn_code icode;
12548 tree arglist;
12549 rtx target;
12550 {
12551 rtx pat;
12552 tree arg0 = TREE_VALUE (arglist);
12553 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12554 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12555 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12556
12557 if (! target
12558 || GET_MODE (target) != tmode
12559 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12560 target = gen_reg_rtx (tmode);
12561
12562 if (VECTOR_MODE_P (mode0))
12563 op0 = safe_vector_operand (op0, mode0);
12564
12565 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12566 op0 = copy_to_mode_reg (mode0, op0);
12567
12568 op1 = op0;
12569 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12570 op1 = copy_to_mode_reg (mode0, op1);
12571
12572 pat = GEN_FCN (icode) (target, op0, op1);
12573 if (! pat)
12574 return 0;
12575 emit_insn (pat);
12576 return target;
12577 }
12578
12579 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12580
12581 static rtx
12582 ix86_expand_sse_compare (d, arglist, target)
12583 const struct builtin_description *d;
12584 tree arglist;
12585 rtx target;
12586 {
12587 rtx pat;
12588 tree arg0 = TREE_VALUE (arglist);
12589 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12590 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12591 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12592 rtx op2;
12593 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12594 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12595 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12596 enum rtx_code comparison = d->comparison;
12597
12598 if (VECTOR_MODE_P (mode0))
12599 op0 = safe_vector_operand (op0, mode0);
12600 if (VECTOR_MODE_P (mode1))
12601 op1 = safe_vector_operand (op1, mode1);
12602
12603 /* Swap operands if we have a comparison that isn't available in
12604 hardware. */
12605 if (d->flag)
12606 {
12607 rtx tmp = gen_reg_rtx (mode1);
12608 emit_move_insn (tmp, op1);
12609 op1 = op0;
12610 op0 = tmp;
12611 }
12612
12613 if (! target
12614 || GET_MODE (target) != tmode
12615 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12616 target = gen_reg_rtx (tmode);
12617
12618 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12619 op0 = copy_to_mode_reg (mode0, op0);
12620 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12621 op1 = copy_to_mode_reg (mode1, op1);
12622
12623 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12624 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12625 if (! pat)
12626 return 0;
12627 emit_insn (pat);
12628 return target;
12629 }
12630
12631 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12632
12633 static rtx
12634 ix86_expand_sse_comi (d, arglist, target)
12635 const struct builtin_description *d;
12636 tree arglist;
12637 rtx target;
12638 {
12639 rtx pat;
12640 tree arg0 = TREE_VALUE (arglist);
12641 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12642 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12643 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12644 rtx op2;
12645 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12646 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12647 enum rtx_code comparison = d->comparison;
12648
12649 if (VECTOR_MODE_P (mode0))
12650 op0 = safe_vector_operand (op0, mode0);
12651 if (VECTOR_MODE_P (mode1))
12652 op1 = safe_vector_operand (op1, mode1);
12653
12654 /* Swap operands if we have a comparison that isn't available in
12655 hardware. */
12656 if (d->flag)
12657 {
12658 rtx tmp = op1;
12659 op1 = op0;
12660 op0 = tmp;
12661 }
12662
12663 target = gen_reg_rtx (SImode);
12664 emit_move_insn (target, const0_rtx);
12665 target = gen_rtx_SUBREG (QImode, target, 0);
12666
12667 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12668 op0 = copy_to_mode_reg (mode0, op0);
12669 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12670 op1 = copy_to_mode_reg (mode1, op1);
12671
12672 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12673 pat = GEN_FCN (d->icode) (op0, op1, op2);
12674 if (! pat)
12675 return 0;
12676 emit_insn (pat);
12677 emit_insn (gen_rtx_SET (VOIDmode,
12678 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12679 gen_rtx_fmt_ee (comparison, QImode,
12680 gen_rtx_REG (CCmode, FLAGS_REG),
12681 const0_rtx)));
12682
12683 return SUBREG_REG (target);
12684 }
12685
12686 /* Expand an expression EXP that calls a built-in function,
12687 with result going to TARGET if that's convenient
12688 (and in mode MODE if that's convenient).
12689 SUBTARGET may be used as the target for computing one of EXP's operands.
12690 IGNORE is nonzero if the value is to be ignored. */
12691
12692 rtx
12693 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12694 tree exp;
12695 rtx target;
12696 rtx subtarget ATTRIBUTE_UNUSED;
12697 enum machine_mode mode ATTRIBUTE_UNUSED;
12698 int ignore ATTRIBUTE_UNUSED;
12699 {
12700 const struct builtin_description *d;
12701 size_t i;
12702 enum insn_code icode;
12703 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12704 tree arglist = TREE_OPERAND (exp, 1);
12705 tree arg0, arg1, arg2;
12706 rtx op0, op1, op2, pat;
12707 enum machine_mode tmode, mode0, mode1, mode2;
12708 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12709
12710 switch (fcode)
12711 {
12712 case IX86_BUILTIN_EMMS:
12713 emit_insn (gen_emms ());
12714 return 0;
12715
12716 case IX86_BUILTIN_SFENCE:
12717 emit_insn (gen_sfence ());
12718 return 0;
12719
12720 case IX86_BUILTIN_PEXTRW:
12721 case IX86_BUILTIN_PEXTRW128:
12722 icode = (fcode == IX86_BUILTIN_PEXTRW
12723 ? CODE_FOR_mmx_pextrw
12724 : CODE_FOR_sse2_pextrw);
12725 arg0 = TREE_VALUE (arglist);
12726 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12727 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12728 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12729 tmode = insn_data[icode].operand[0].mode;
12730 mode0 = insn_data[icode].operand[1].mode;
12731 mode1 = insn_data[icode].operand[2].mode;
12732
12733 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12734 op0 = copy_to_mode_reg (mode0, op0);
12735 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12736 {
12737 /* @@@ better error message */
12738 error ("selector must be an immediate");
12739 return gen_reg_rtx (tmode);
12740 }
12741 if (target == 0
12742 || GET_MODE (target) != tmode
12743 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12744 target = gen_reg_rtx (tmode);
12745 pat = GEN_FCN (icode) (target, op0, op1);
12746 if (! pat)
12747 return 0;
12748 emit_insn (pat);
12749 return target;
12750
12751 case IX86_BUILTIN_PINSRW:
12752 case IX86_BUILTIN_PINSRW128:
12753 icode = (fcode == IX86_BUILTIN_PINSRW
12754 ? CODE_FOR_mmx_pinsrw
12755 : CODE_FOR_sse2_pinsrw);
12756 arg0 = TREE_VALUE (arglist);
12757 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12758 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12759 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12760 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12761 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12762 tmode = insn_data[icode].operand[0].mode;
12763 mode0 = insn_data[icode].operand[1].mode;
12764 mode1 = insn_data[icode].operand[2].mode;
12765 mode2 = insn_data[icode].operand[3].mode;
12766
12767 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12768 op0 = copy_to_mode_reg (mode0, op0);
12769 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12770 op1 = copy_to_mode_reg (mode1, op1);
12771 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12772 {
12773 /* @@@ better error message */
12774 error ("selector must be an immediate");
12775 return const0_rtx;
12776 }
12777 if (target == 0
12778 || GET_MODE (target) != tmode
12779 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12780 target = gen_reg_rtx (tmode);
12781 pat = GEN_FCN (icode) (target, op0, op1, op2);
12782 if (! pat)
12783 return 0;
12784 emit_insn (pat);
12785 return target;
12786
12787 case IX86_BUILTIN_MASKMOVQ:
12788 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12789 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12790 : CODE_FOR_sse2_maskmovdqu);
12791 /* Note the arg order is different from the operand order. */
12792 arg1 = TREE_VALUE (arglist);
12793 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12794 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12795 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12796 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12797 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12798 mode0 = insn_data[icode].operand[0].mode;
12799 mode1 = insn_data[icode].operand[1].mode;
12800 mode2 = insn_data[icode].operand[2].mode;
12801
12802 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12803 op0 = copy_to_mode_reg (mode0, op0);
12804 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12805 op1 = copy_to_mode_reg (mode1, op1);
12806 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12807 op2 = copy_to_mode_reg (mode2, op2);
12808 pat = GEN_FCN (icode) (op0, op1, op2);
12809 if (! pat)
12810 return 0;
12811 emit_insn (pat);
12812 return 0;
12813
12814 case IX86_BUILTIN_SQRTSS:
12815 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12816 case IX86_BUILTIN_RSQRTSS:
12817 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12818 case IX86_BUILTIN_RCPSS:
12819 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12820
12821 case IX86_BUILTIN_ANDPS:
12822 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12823 arglist, target);
12824 case IX86_BUILTIN_ANDNPS:
12825 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12826 arglist, target);
12827 case IX86_BUILTIN_ORPS:
12828 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12829 arglist, target);
12830 case IX86_BUILTIN_XORPS:
12831 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12832 arglist, target);
12833
12834 case IX86_BUILTIN_LOADAPS:
12835 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12836
12837 case IX86_BUILTIN_LOADUPS:
12838 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12839
12840 case IX86_BUILTIN_STOREAPS:
12841 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12842 case IX86_BUILTIN_STOREUPS:
12843 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12844
12845 case IX86_BUILTIN_LOADSS:
12846 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12847
12848 case IX86_BUILTIN_STORESS:
12849 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12850
12851 case IX86_BUILTIN_LOADHPS:
12852 case IX86_BUILTIN_LOADLPS:
12853 case IX86_BUILTIN_LOADHPD:
12854 case IX86_BUILTIN_LOADLPD:
12855 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12856 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12857 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12858 : CODE_FOR_sse2_movlpd);
12859 arg0 = TREE_VALUE (arglist);
12860 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12861 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12862 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12863 tmode = insn_data[icode].operand[0].mode;
12864 mode0 = insn_data[icode].operand[1].mode;
12865 mode1 = insn_data[icode].operand[2].mode;
12866
12867 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12868 op0 = copy_to_mode_reg (mode0, op0);
12869 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12870 if (target == 0
12871 || GET_MODE (target) != tmode
12872 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12873 target = gen_reg_rtx (tmode);
12874 pat = GEN_FCN (icode) (target, op0, op1);
12875 if (! pat)
12876 return 0;
12877 emit_insn (pat);
12878 return target;
12879
12880 case IX86_BUILTIN_STOREHPS:
12881 case IX86_BUILTIN_STORELPS:
12882 case IX86_BUILTIN_STOREHPD:
12883 case IX86_BUILTIN_STORELPD:
12884 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12885 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12886 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12887 : CODE_FOR_sse2_movlpd);
12888 arg0 = TREE_VALUE (arglist);
12889 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12890 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12891 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12892 mode0 = insn_data[icode].operand[1].mode;
12893 mode1 = insn_data[icode].operand[2].mode;
12894
12895 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12896 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12897 op1 = copy_to_mode_reg (mode1, op1);
12898
12899 pat = GEN_FCN (icode) (op0, op0, op1);
12900 if (! pat)
12901 return 0;
12902 emit_insn (pat);
12903 return 0;
12904
12905 case IX86_BUILTIN_MOVNTPS:
12906 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
12907 case IX86_BUILTIN_MOVNTQ:
12908 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
12909
12910 case IX86_BUILTIN_LDMXCSR:
12911 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12912 target = assign_386_stack_local (SImode, 0);
12913 emit_move_insn (target, op0);
12914 emit_insn (gen_ldmxcsr (target));
12915 return 0;
12916
12917 case IX86_BUILTIN_STMXCSR:
12918 target = assign_386_stack_local (SImode, 0);
12919 emit_insn (gen_stmxcsr (target));
12920 return copy_to_mode_reg (SImode, target);
12921
12922 case IX86_BUILTIN_SHUFPS:
12923 case IX86_BUILTIN_SHUFPD:
12924 icode = (fcode == IX86_BUILTIN_SHUFPS
12925 ? CODE_FOR_sse_shufps
12926 : CODE_FOR_sse2_shufpd);
12927 arg0 = TREE_VALUE (arglist);
12928 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12929 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12930 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12931 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12932 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12933 tmode = insn_data[icode].operand[0].mode;
12934 mode0 = insn_data[icode].operand[1].mode;
12935 mode1 = insn_data[icode].operand[2].mode;
12936 mode2 = insn_data[icode].operand[3].mode;
12937
12938 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12939 op0 = copy_to_mode_reg (mode0, op0);
12940 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12941 op1 = copy_to_mode_reg (mode1, op1);
12942 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12943 {
12944 /* @@@ better error message */
12945 error ("mask must be an immediate");
12946 return gen_reg_rtx (tmode);
12947 }
12948 if (target == 0
12949 || GET_MODE (target) != tmode
12950 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12951 target = gen_reg_rtx (tmode);
12952 pat = GEN_FCN (icode) (target, op0, op1, op2);
12953 if (! pat)
12954 return 0;
12955 emit_insn (pat);
12956 return target;
12957
12958 case IX86_BUILTIN_PSHUFW:
12959 case IX86_BUILTIN_PSHUFD:
12960 case IX86_BUILTIN_PSHUFHW:
12961 case IX86_BUILTIN_PSHUFLW:
12962 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
12963 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
12964 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
12965 : CODE_FOR_mmx_pshufw);
12966 arg0 = TREE_VALUE (arglist);
12967 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12968 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12969 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12970 tmode = insn_data[icode].operand[0].mode;
12971 mode1 = insn_data[icode].operand[1].mode;
12972 mode2 = insn_data[icode].operand[2].mode;
12973
12974 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12975 op0 = copy_to_mode_reg (mode1, op0);
12976 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12977 {
12978 /* @@@ better error message */
12979 error ("mask must be an immediate");
12980 return const0_rtx;
12981 }
12982 if (target == 0
12983 || GET_MODE (target) != tmode
12984 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12985 target = gen_reg_rtx (tmode);
12986 pat = GEN_FCN (icode) (target, op0, op1);
12987 if (! pat)
12988 return 0;
12989 emit_insn (pat);
12990 return target;
12991
12992 case IX86_BUILTIN_FEMMS:
12993 emit_insn (gen_femms ());
12994 return NULL_RTX;
12995
12996 case IX86_BUILTIN_PAVGUSB:
12997 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12998
12999 case IX86_BUILTIN_PF2ID:
13000 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13001
13002 case IX86_BUILTIN_PFACC:
13003 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13004
13005 case IX86_BUILTIN_PFADD:
13006 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13007
13008 case IX86_BUILTIN_PFCMPEQ:
13009 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13010
13011 case IX86_BUILTIN_PFCMPGE:
13012 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13013
13014 case IX86_BUILTIN_PFCMPGT:
13015 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13016
13017 case IX86_BUILTIN_PFMAX:
13018 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13019
13020 case IX86_BUILTIN_PFMIN:
13021 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13022
13023 case IX86_BUILTIN_PFMUL:
13024 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13025
13026 case IX86_BUILTIN_PFRCP:
13027 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13028
13029 case IX86_BUILTIN_PFRCPIT1:
13030 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13031
13032 case IX86_BUILTIN_PFRCPIT2:
13033 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13034
13035 case IX86_BUILTIN_PFRSQIT1:
13036 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13037
13038 case IX86_BUILTIN_PFRSQRT:
13039 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13040
13041 case IX86_BUILTIN_PFSUB:
13042 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13043
13044 case IX86_BUILTIN_PFSUBR:
13045 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13046
13047 case IX86_BUILTIN_PI2FD:
13048 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13049
13050 case IX86_BUILTIN_PMULHRW:
13051 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13052
13053 case IX86_BUILTIN_PF2IW:
13054 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13055
13056 case IX86_BUILTIN_PFNACC:
13057 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13058
13059 case IX86_BUILTIN_PFPNACC:
13060 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13061
13062 case IX86_BUILTIN_PI2FW:
13063 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13064
13065 case IX86_BUILTIN_PSWAPDSI:
13066 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13067
13068 case IX86_BUILTIN_PSWAPDSF:
13069 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13070
13071 case IX86_BUILTIN_SSE_ZERO:
13072 target = gen_reg_rtx (V4SFmode);
13073 emit_insn (gen_sse_clrv4sf (target));
13074 return target;
13075
13076 case IX86_BUILTIN_MMX_ZERO:
13077 target = gen_reg_rtx (DImode);
13078 emit_insn (gen_mmx_clrdi (target));
13079 return target;
13080
13081 case IX86_BUILTIN_SQRTSD:
13082 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13083 case IX86_BUILTIN_LOADAPD:
13084 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13085 case IX86_BUILTIN_LOADUPD:
13086 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13087
13088 case IX86_BUILTIN_STOREAPD:
13089 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13090 case IX86_BUILTIN_STOREUPD:
13091 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13092
13093 case IX86_BUILTIN_LOADSD:
13094 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13095
13096 case IX86_BUILTIN_STORESD:
13097 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13098
13099 case IX86_BUILTIN_SETPD1:
13100 target = assign_386_stack_local (DFmode, 0);
13101 arg0 = TREE_VALUE (arglist);
13102 emit_move_insn (adjust_address (target, DFmode, 0),
13103 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13104 op0 = gen_reg_rtx (V2DFmode);
13105 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13106 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13107 return op0;
13108
13109 case IX86_BUILTIN_SETPD:
13110 target = assign_386_stack_local (V2DFmode, 0);
13111 arg0 = TREE_VALUE (arglist);
13112 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13113 emit_move_insn (adjust_address (target, DFmode, 0),
13114 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13115 emit_move_insn (adjust_address (target, DFmode, 8),
13116 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13117 op0 = gen_reg_rtx (V2DFmode);
13118 emit_insn (gen_sse2_movapd (op0, target));
13119 return op0;
13120
13121 case IX86_BUILTIN_LOADRPD:
13122 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13123 gen_reg_rtx (V2DFmode), 1);
13124 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13125 return target;
13126
13127 case IX86_BUILTIN_LOADPD1:
13128 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13129 gen_reg_rtx (V2DFmode), 1);
13130 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13131 return target;
13132
13133 case IX86_BUILTIN_STOREPD1:
13134 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13135 case IX86_BUILTIN_STORERPD:
13136 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13137
13138 case IX86_BUILTIN_MFENCE:
13139 emit_insn (gen_sse2_mfence ());
13140 return 0;
13141 case IX86_BUILTIN_LFENCE:
13142 emit_insn (gen_sse2_lfence ());
13143 return 0;
13144
13145 case IX86_BUILTIN_CLFLUSH:
13146 arg0 = TREE_VALUE (arglist);
13147 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13148 icode = CODE_FOR_sse2_clflush;
13149 mode0 = insn_data[icode].operand[0].mode;
13150 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13151 op0 = copy_to_mode_reg (mode0, op0);
13152
13153 emit_insn (gen_sse2_clflush (op0));
13154 return 0;
13155
13156 case IX86_BUILTIN_MOVNTPD:
13157 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13158 case IX86_BUILTIN_MOVNTDQ:
13159 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13160 case IX86_BUILTIN_MOVNTI:
13161 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13162
13163 default:
13164 break;
13165 }
13166
13167 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13168 if (d->code == fcode)
13169 {
13170 /* Compares are treated specially. */
13171 if (d->icode == CODE_FOR_maskcmpv4sf3
13172 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13173 || d->icode == CODE_FOR_maskncmpv4sf3
13174 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13175 || d->icode == CODE_FOR_maskcmpv2df3
13176 || d->icode == CODE_FOR_vmmaskcmpv2df3
13177 || d->icode == CODE_FOR_maskncmpv2df3
13178 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13179 return ix86_expand_sse_compare (d, arglist, target);
13180
13181 return ix86_expand_binop_builtin (d->icode, arglist, target);
13182 }
13183
13184 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13185 if (d->code == fcode)
13186 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13187
13188 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13189 if (d->code == fcode)
13190 return ix86_expand_sse_comi (d, arglist, target);
13191
13192 /* @@@ Should really do something sensible here. */
13193 return 0;
13194 }
13195
13196 /* Store OPERAND to the memory after reload is completed. This means
13197 that we can't easily use assign_stack_local. */
13198 rtx
13199 ix86_force_to_memory (mode, operand)
13200 enum machine_mode mode;
13201 rtx operand;
13202 {
13203 rtx result;
13204 if (!reload_completed)
13205 abort ();
13206 if (TARGET_64BIT && TARGET_RED_ZONE)
13207 {
13208 result = gen_rtx_MEM (mode,
13209 gen_rtx_PLUS (Pmode,
13210 stack_pointer_rtx,
13211 GEN_INT (-RED_ZONE_SIZE)));
13212 emit_move_insn (result, operand);
13213 }
13214 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13215 {
13216 switch (mode)
13217 {
13218 case HImode:
13219 case SImode:
13220 operand = gen_lowpart (DImode, operand);
13221 /* FALLTHRU */
13222 case DImode:
13223 emit_insn (
13224 gen_rtx_SET (VOIDmode,
13225 gen_rtx_MEM (DImode,
13226 gen_rtx_PRE_DEC (DImode,
13227 stack_pointer_rtx)),
13228 operand));
13229 break;
13230 default:
13231 abort ();
13232 }
13233 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13234 }
13235 else
13236 {
13237 switch (mode)
13238 {
13239 case DImode:
13240 {
13241 rtx operands[2];
13242 split_di (&operand, 1, operands, operands + 1);
13243 emit_insn (
13244 gen_rtx_SET (VOIDmode,
13245 gen_rtx_MEM (SImode,
13246 gen_rtx_PRE_DEC (Pmode,
13247 stack_pointer_rtx)),
13248 operands[1]));
13249 emit_insn (
13250 gen_rtx_SET (VOIDmode,
13251 gen_rtx_MEM (SImode,
13252 gen_rtx_PRE_DEC (Pmode,
13253 stack_pointer_rtx)),
13254 operands[0]));
13255 }
13256 break;
13257 case HImode:
13258 /* It is better to store HImodes as SImodes. */
13259 if (!TARGET_PARTIAL_REG_STALL)
13260 operand = gen_lowpart (SImode, operand);
13261 /* FALLTHRU */
13262 case SImode:
13263 emit_insn (
13264 gen_rtx_SET (VOIDmode,
13265 gen_rtx_MEM (GET_MODE (operand),
13266 gen_rtx_PRE_DEC (SImode,
13267 stack_pointer_rtx)),
13268 operand));
13269 break;
13270 default:
13271 abort ();
13272 }
13273 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13274 }
13275 return result;
13276 }
13277
13278 /* Free operand from the memory. */
13279 void
13280 ix86_free_from_memory (mode)
13281 enum machine_mode mode;
13282 {
13283 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13284 {
13285 int size;
13286
13287 if (mode == DImode || TARGET_64BIT)
13288 size = 8;
13289 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13290 size = 2;
13291 else
13292 size = 4;
13293 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13294 to pop or add instruction if registers are available. */
13295 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13296 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13297 GEN_INT (size))));
13298 }
13299 }
13300
13301 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13302 QImode must go into class Q_REGS.
13303 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13304 movdf to do mem-to-mem moves through integer regs. */
13305 enum reg_class
13306 ix86_preferred_reload_class (x, class)
13307 rtx x;
13308 enum reg_class class;
13309 {
13310 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13311 {
13312 /* SSE can't load any constant directly yet. */
13313 if (SSE_CLASS_P (class))
13314 return NO_REGS;
13315 /* Floats can load 0 and 1. */
13316 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13317 {
13318 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13319 if (MAYBE_SSE_CLASS_P (class))
13320 return (reg_class_subset_p (class, GENERAL_REGS)
13321 ? GENERAL_REGS : FLOAT_REGS);
13322 else
13323 return class;
13324 }
13325 /* General regs can load everything. */
13326 if (reg_class_subset_p (class, GENERAL_REGS))
13327 return GENERAL_REGS;
13328 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13329 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13330 return NO_REGS;
13331 }
13332 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13333 return NO_REGS;
13334 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13335 return Q_REGS;
13336 return class;
13337 }
13338
13339 /* If we are copying between general and FP registers, we need a memory
13340 location. The same is true for SSE and MMX registers.
13341
13342 The macro can't work reliably when one of the CLASSES is class containing
13343 registers from multiple units (SSE, MMX, integer). We avoid this by never
13344 combining those units in single alternative in the machine description.
13345 Ensure that this constraint holds to avoid unexpected surprises.
13346
13347 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13348 enforce these sanity checks. */
13349 int
13350 ix86_secondary_memory_needed (class1, class2, mode, strict)
13351 enum reg_class class1, class2;
13352 enum machine_mode mode;
13353 int strict;
13354 {
13355 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13356 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13357 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13358 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13359 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13360 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13361 {
13362 if (strict)
13363 abort ();
13364 else
13365 return 1;
13366 }
13367 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13368 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13369 && (mode) != SImode)
13370 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13371 && (mode) != SImode));
13372 }
13373 /* Return the cost of moving data from a register in class CLASS1 to
13374 one in class CLASS2.
13375
13376 It is not required that the cost always equal 2 when FROM is the same as TO;
13377 on some machines it is expensive to move between registers if they are not
13378 general registers. */
13379 int
13380 ix86_register_move_cost (mode, class1, class2)
13381 enum machine_mode mode;
13382 enum reg_class class1, class2;
13383 {
13384 /* In case we require secondary memory, compute cost of the store followed
13385 by load. In case of copying from general_purpose_register we may emit
13386 multiple stores followed by single load causing memory size mismatch
13387 stall. Count this as arbitarily high cost of 20. */
13388 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13389 {
13390 int add_cost = 0;
13391 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13392 add_cost = 20;
13393 return (MEMORY_MOVE_COST (mode, class1, 0)
13394 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13395 }
13396 /* Moves between SSE/MMX and integer unit are expensive. */
13397 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13398 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13399 return ix86_cost->mmxsse_to_integer;
13400 if (MAYBE_FLOAT_CLASS_P (class1))
13401 return ix86_cost->fp_move;
13402 if (MAYBE_SSE_CLASS_P (class1))
13403 return ix86_cost->sse_move;
13404 if (MAYBE_MMX_CLASS_P (class1))
13405 return ix86_cost->mmx_move;
13406 return 2;
13407 }
13408
13409 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13410 int
13411 ix86_hard_regno_mode_ok (regno, mode)
13412 int regno;
13413 enum machine_mode mode;
13414 {
13415 /* Flags and only flags can only hold CCmode values. */
13416 if (CC_REGNO_P (regno))
13417 return GET_MODE_CLASS (mode) == MODE_CC;
13418 if (GET_MODE_CLASS (mode) == MODE_CC
13419 || GET_MODE_CLASS (mode) == MODE_RANDOM
13420 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13421 return 0;
13422 if (FP_REGNO_P (regno))
13423 return VALID_FP_MODE_P (mode);
13424 if (SSE_REGNO_P (regno))
13425 return VALID_SSE_REG_MODE (mode);
13426 if (MMX_REGNO_P (regno))
13427 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13428 /* We handle both integer and floats in the general purpose registers.
13429 In future we should be able to handle vector modes as well. */
13430 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13431 return 0;
13432 /* Take care for QImode values - they can be in non-QI regs, but then
13433 they do cause partial register stalls. */
13434 if (regno < 4 || mode != QImode || TARGET_64BIT)
13435 return 1;
13436 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13437 }
13438
13439 /* Return the cost of moving data of mode M between a
13440 register and memory. A value of 2 is the default; this cost is
13441 relative to those in `REGISTER_MOVE_COST'.
13442
13443 If moving between registers and memory is more expensive than
13444 between two registers, you should define this macro to express the
13445 relative cost.
13446
13447 Model also increased moving costs of QImode registers in non
13448 Q_REGS classes.
13449 */
13450 int
13451 ix86_memory_move_cost (mode, class, in)
13452 enum machine_mode mode;
13453 enum reg_class class;
13454 int in;
13455 {
13456 if (FLOAT_CLASS_P (class))
13457 {
13458 int index;
13459 switch (mode)
13460 {
13461 case SFmode:
13462 index = 0;
13463 break;
13464 case DFmode:
13465 index = 1;
13466 break;
13467 case XFmode:
13468 case TFmode:
13469 index = 2;
13470 break;
13471 default:
13472 return 100;
13473 }
13474 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13475 }
13476 if (SSE_CLASS_P (class))
13477 {
13478 int index;
13479 switch (GET_MODE_SIZE (mode))
13480 {
13481 case 4:
13482 index = 0;
13483 break;
13484 case 8:
13485 index = 1;
13486 break;
13487 case 16:
13488 index = 2;
13489 break;
13490 default:
13491 return 100;
13492 }
13493 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13494 }
13495 if (MMX_CLASS_P (class))
13496 {
13497 int index;
13498 switch (GET_MODE_SIZE (mode))
13499 {
13500 case 4:
13501 index = 0;
13502 break;
13503 case 8:
13504 index = 1;
13505 break;
13506 default:
13507 return 100;
13508 }
13509 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13510 }
13511 switch (GET_MODE_SIZE (mode))
13512 {
13513 case 1:
13514 if (in)
13515 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13516 : ix86_cost->movzbl_load);
13517 else
13518 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13519 : ix86_cost->int_store[0] + 4);
13520 break;
13521 case 2:
13522 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13523 default:
13524 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13525 if (mode == TFmode)
13526 mode = XFmode;
13527 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13528 * (int) GET_MODE_SIZE (mode) / 4);
13529 }
13530 }
13531
13532 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13533 static void
13534 ix86_svr3_asm_out_constructor (symbol, priority)
13535 rtx symbol;
13536 int priority ATTRIBUTE_UNUSED;
13537 {
13538 init_section ();
13539 fputs ("\tpushl $", asm_out_file);
13540 assemble_name (asm_out_file, XSTR (symbol, 0));
13541 fputc ('\n', asm_out_file);
13542 }
13543 #endif
13544
13545 /* Order the registers for register allocator. */
13546
13547 void
13548 x86_order_regs_for_local_alloc ()
13549 {
13550 int pos = 0;
13551 int i;
13552
13553 /* First allocate the local general purpose registers. */
13554 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13555 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13556 reg_alloc_order [pos++] = i;
13557
13558 /* Global general purpose registers. */
13559 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13560 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13561 reg_alloc_order [pos++] = i;
13562
13563 /* x87 registers come first in case we are doing FP math
13564 using them. */
13565 if (!TARGET_SSE_MATH)
13566 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13567 reg_alloc_order [pos++] = i;
13568
13569 /* SSE registers. */
13570 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13571 reg_alloc_order [pos++] = i;
13572 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13573 reg_alloc_order [pos++] = i;
13574
13575 /* x87 registerts. */
13576 if (TARGET_SSE_MATH)
13577 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13578 reg_alloc_order [pos++] = i;
13579
13580 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13581 reg_alloc_order [pos++] = i;
13582
13583 /* Initialize the rest of array as we do not allocate some registers
13584 at all. */
13585 while (pos < FIRST_PSEUDO_REGISTER)
13586 reg_alloc_order [pos++] = 0;
13587 }
13588
13589 void
13590 x86_output_mi_thunk (file, delta, function)
13591 FILE *file;
13592 int delta;
13593 tree function;
13594 {
13595 tree parm;
13596 rtx xops[3];
13597
13598 if (ix86_regparm > 0)
13599 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13600 else
13601 parm = NULL_TREE;
13602 for (; parm; parm = TREE_CHAIN (parm))
13603 if (TREE_VALUE (parm) == void_type_node)
13604 break;
13605
13606 xops[0] = GEN_INT (delta);
13607 if (TARGET_64BIT)
13608 {
13609 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13610 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13611 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13612 if (flag_pic)
13613 {
13614 fprintf (file, "\tjmp *");
13615 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13616 fprintf (file, "@GOTPCREL(%%rip)\n");
13617 }
13618 else
13619 {
13620 fprintf (file, "\tjmp ");
13621 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13622 fprintf (file, "\n");
13623 }
13624 }
13625 else
13626 {
13627 if (parm)
13628 xops[1] = gen_rtx_REG (SImode, 0);
13629 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13630 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13631 else
13632 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13633 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13634
13635 if (flag_pic)
13636 {
13637 xops[0] = pic_offset_table_rtx;
13638 xops[1] = gen_label_rtx ();
13639 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13640
13641 if (ix86_regparm > 2)
13642 abort ();
13643 output_asm_insn ("push{l}\t%0", xops);
13644 output_asm_insn ("call\t%P1", xops);
13645 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13646 output_asm_insn ("pop{l}\t%0", xops);
13647 output_asm_insn
13648 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13649 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13650 output_asm_insn
13651 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13652 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13653 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13654 }
13655 else
13656 {
13657 fprintf (file, "\tjmp ");
13658 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13659 fprintf (file, "\n");
13660 }
13661 }
13662 }
13663
13664 int
13665 x86_field_alignment (field, computed)
13666 tree field;
13667 int computed;
13668 {
13669 enum machine_mode mode;
13670 if (TARGET_64BIT || DECL_USER_ALIGN (field) || TARGET_ALIGN_DOUBLE)
13671 return computed;
13672 mode = TYPE_MODE (TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE
13673 ? get_inner_array_type (field) : TREE_TYPE (field));
13674 if ((mode == DFmode || mode == DCmode
13675 || mode == DImode || mode == CDImode)
13676 && !TARGET_ALIGN_DOUBLE)
13677 return MIN (32, computed);
13678 return computed;
13679 }
13680
13681 #include "gt-i386.h"
This page took 0.695963 seconds and 6 git commands to generate.