]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
re PR target/6303 (output_operand: invalid expression as operand)
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 };
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 };
125
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 };
162
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
198 };
199
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
235 };
236
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
272 };
273
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309 };
310
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 };
347
348 const struct processor_costs *ix86_cost = &pentium_cost;
349
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
380 const int x86_single_stringop = m_386 | m_PENT4;
381 const int x86_qimode_math = ~(0);
382 const int x86_promote_qi_regs = 0;
383 const int x86_himode_math = ~(m_PPRO);
384 const int x86_promote_hi_regs = m_PPRO;
385 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
386 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
387 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
388 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
389 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
390 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
391 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
392 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
393 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_decompose_lea = m_PENT4;
396 const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
397
398 /* In case the avreage insn count for single function invocation is
399 lower than this constant, emit fast (but longer) prologue and
400 epilogue code. */
401 #define FAST_PROLOGUE_INSN_COUNT 30
402 /* Set by prologue expander and used by epilogue expander to determine
403 the style used. */
404 static int use_fast_prologue_epilogue;
405
406 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
407
408 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
409 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
410 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
411
412 /* Array of the smallest class containing reg number REGNO, indexed by
413 REGNO. Used by REGNO_REG_CLASS in i386.h. */
414
415 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
416 {
417 /* ax, dx, cx, bx */
418 AREG, DREG, CREG, BREG,
419 /* si, di, bp, sp */
420 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
421 /* FP registers */
422 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
423 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
424 /* arg pointer */
425 NON_Q_REGS,
426 /* flags, fpsr, dirflag, frame */
427 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
428 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
429 SSE_REGS, SSE_REGS,
430 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
431 MMX_REGS, MMX_REGS,
432 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
433 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
434 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
435 SSE_REGS, SSE_REGS,
436 };
437
438 /* The "default" register map used in 32bit mode. */
439
440 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
441 {
442 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
443 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
444 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
445 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
446 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
447 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
448 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
449 };
450
451 static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
452 1 /*RDX*/, 2 /*RCX*/,
453 FIRST_REX_INT_REG /*R8 */,
454 FIRST_REX_INT_REG + 1 /*R9 */};
455 static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
456
457 /* The "default" register map used in 64bit mode. */
458 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
459 {
460 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
461 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
462 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
463 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
464 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
465 8,9,10,11,12,13,14,15, /* extended integer registers */
466 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
467 };
468
469 /* Define the register numbers to be used in Dwarf debugging information.
470 The SVR4 reference port C compiler uses the following register numbers
471 in its Dwarf output code:
472 0 for %eax (gcc regno = 0)
473 1 for %ecx (gcc regno = 2)
474 2 for %edx (gcc regno = 1)
475 3 for %ebx (gcc regno = 3)
476 4 for %esp (gcc regno = 7)
477 5 for %ebp (gcc regno = 6)
478 6 for %esi (gcc regno = 4)
479 7 for %edi (gcc regno = 5)
480 The following three DWARF register numbers are never generated by
481 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
482 believes these numbers have these meanings.
483 8 for %eip (no gcc equivalent)
484 9 for %eflags (gcc regno = 17)
485 10 for %trapno (no gcc equivalent)
486 It is not at all clear how we should number the FP stack registers
487 for the x86 architecture. If the version of SDB on x86/svr4 were
488 a bit less brain dead with respect to floating-point then we would
489 have a precedent to follow with respect to DWARF register numbers
490 for x86 FP registers, but the SDB on x86/svr4 is so completely
491 broken with respect to FP registers that it is hardly worth thinking
492 of it as something to strive for compatibility with.
493 The version of x86/svr4 SDB I have at the moment does (partially)
494 seem to believe that DWARF register number 11 is associated with
495 the x86 register %st(0), but that's about all. Higher DWARF
496 register numbers don't seem to be associated with anything in
497 particular, and even for DWARF regno 11, SDB only seems to under-
498 stand that it should say that a variable lives in %st(0) (when
499 asked via an `=' command) if we said it was in DWARF regno 11,
500 but SDB still prints garbage when asked for the value of the
501 variable in question (via a `/' command).
502 (Also note that the labels SDB prints for various FP stack regs
503 when doing an `x' command are all wrong.)
504 Note that these problems generally don't affect the native SVR4
505 C compiler because it doesn't allow the use of -O with -g and
506 because when it is *not* optimizing, it allocates a memory
507 location for each floating-point variable, and the memory
508 location is what gets described in the DWARF AT_location
509 attribute for the variable in question.
510 Regardless of the severe mental illness of the x86/svr4 SDB, we
511 do something sensible here and we use the following DWARF
512 register numbers. Note that these are all stack-top-relative
513 numbers.
514 11 for %st(0) (gcc regno = 8)
515 12 for %st(1) (gcc regno = 9)
516 13 for %st(2) (gcc regno = 10)
517 14 for %st(3) (gcc regno = 11)
518 15 for %st(4) (gcc regno = 12)
519 16 for %st(5) (gcc regno = 13)
520 17 for %st(6) (gcc regno = 14)
521 18 for %st(7) (gcc regno = 15)
522 */
523 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
524 {
525 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
526 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
527 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
528 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
529 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
530 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
531 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
532 };
533
534 /* Test and compare insns in i386.md store the information needed to
535 generate branch and scc insns here. */
536
537 rtx ix86_compare_op0 = NULL_RTX;
538 rtx ix86_compare_op1 = NULL_RTX;
539
540 #define MAX_386_STACK_LOCALS 3
541 /* Size of the register save area. */
542 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
543
544 /* Define the structure for the machine field in struct function. */
545 struct machine_function
546 {
547 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
548 int save_varrargs_registers;
549 int accesses_prev_frame;
550 };
551
552 #define ix86_stack_locals (cfun->machine->stack_locals)
553 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
554
555 /* Structure describing stack frame layout.
556 Stack grows downward:
557
558 [arguments]
559 <- ARG_POINTER
560 saved pc
561
562 saved frame pointer if frame_pointer_needed
563 <- HARD_FRAME_POINTER
564 [saved regs]
565
566 [padding1] \
567 )
568 [va_arg registers] (
569 > to_allocate <- FRAME_POINTER
570 [frame] (
571 )
572 [padding2] /
573 */
574 struct ix86_frame
575 {
576 int nregs;
577 int padding1;
578 int va_arg_size;
579 HOST_WIDE_INT frame;
580 int padding2;
581 int outgoing_arguments_size;
582 int red_zone_size;
583
584 HOST_WIDE_INT to_allocate;
585 /* The offsets relative to ARG_POINTER. */
586 HOST_WIDE_INT frame_pointer_offset;
587 HOST_WIDE_INT hard_frame_pointer_offset;
588 HOST_WIDE_INT stack_pointer_offset;
589 };
590
591 /* Used to enable/disable debugging features. */
592 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
593 /* Code model option as passed by user. */
594 const char *ix86_cmodel_string;
595 /* Parsed value. */
596 enum cmodel ix86_cmodel;
597 /* Asm dialect. */
598 const char *ix86_asm_string;
599 enum asm_dialect ix86_asm_dialect = ASM_ATT;
600
601 /* which cpu are we scheduling for */
602 enum processor_type ix86_cpu;
603
604 /* which unit we are generating floating point math for */
605 enum fpmath_unit ix86_fpmath;
606
607 /* which instruction set architecture to use. */
608 int ix86_arch;
609
610 /* Strings to hold which cpu and instruction set architecture to use. */
611 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
612 const char *ix86_arch_string; /* for -march=<xxx> */
613 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
614
615 /* # of registers to use to pass arguments. */
616 const char *ix86_regparm_string;
617
618 /* true if sse prefetch instruction is not NOOP. */
619 int x86_prefetch_sse;
620
621 /* ix86_regparm_string as a number */
622 int ix86_regparm;
623
624 /* Alignment to use for loops and jumps: */
625
626 /* Power of two alignment for loops. */
627 const char *ix86_align_loops_string;
628
629 /* Power of two alignment for non-loop jumps. */
630 const char *ix86_align_jumps_string;
631
632 /* Power of two alignment for stack boundary in bytes. */
633 const char *ix86_preferred_stack_boundary_string;
634
635 /* Preferred alignment for stack boundary in bits. */
636 int ix86_preferred_stack_boundary;
637
638 /* Values 1-5: see jump.c */
639 int ix86_branch_cost;
640 const char *ix86_branch_cost_string;
641
642 /* Power of two alignment for functions. */
643 const char *ix86_align_funcs_string;
644
645 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
646 static char internal_label_prefix[16];
647 static int internal_label_prefix_len;
648 \f
649 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
650 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
651 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
652 int, int, FILE *));
653 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
654 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
655 rtx *, rtx *));
656 static rtx gen_push PARAMS ((rtx));
657 static int memory_address_length PARAMS ((rtx addr));
658 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
659 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
660 static int ix86_safe_length PARAMS ((rtx));
661 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
662 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
663 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
664 static void ix86_dump_ppro_packet PARAMS ((FILE *));
665 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
666 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
667 rtx));
668 static void ix86_init_machine_status PARAMS ((struct function *));
669 static void ix86_mark_machine_status PARAMS ((struct function *));
670 static void ix86_free_machine_status PARAMS ((struct function *));
671 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
672 static int ix86_safe_length_prefix PARAMS ((rtx));
673 static int ix86_nsaved_regs PARAMS ((void));
674 static void ix86_emit_save_regs PARAMS ((void));
675 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
676 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
677 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
678 static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *));
679 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
680 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
681 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
682 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
683 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
684 static int ix86_issue_rate PARAMS ((void));
685 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
686 static void ix86_sched_init PARAMS ((FILE *, int, int));
687 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
688 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
689 static void ix86_init_mmx_sse_builtins PARAMS ((void));
690
691 struct ix86_address
692 {
693 rtx base, index, disp;
694 HOST_WIDE_INT scale;
695 };
696
697 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
698
699 struct builtin_description;
700 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
701 tree, rtx));
702 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
703 tree, rtx));
704 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
705 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
706 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
707 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
708 tree, rtx));
709 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
710 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
711 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
712 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
713 enum rtx_code *,
714 enum rtx_code *,
715 enum rtx_code *));
716 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
717 rtx *, rtx *));
718 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
719 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
720 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
721 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
722 static int ix86_save_reg PARAMS ((int, int));
723 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
724 static int ix86_comp_type_attributes PARAMS ((tree, tree));
725 const struct attribute_spec ix86_attribute_table[];
726 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
727 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
728
729 #ifdef DO_GLOBAL_CTORS_BODY
730 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
731 #endif
732
733 /* Register class used for passing given 64bit part of the argument.
734 These represent classes as documented by the PS ABI, with the exception
735 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
736 use SF or DFmode move instead of DImode to avoid reformating penalties.
737
738 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
739 whenever possible (upper half does contain padding).
740 */
741 enum x86_64_reg_class
742 {
743 X86_64_NO_CLASS,
744 X86_64_INTEGER_CLASS,
745 X86_64_INTEGERSI_CLASS,
746 X86_64_SSE_CLASS,
747 X86_64_SSESF_CLASS,
748 X86_64_SSEDF_CLASS,
749 X86_64_SSEUP_CLASS,
750 X86_64_X87_CLASS,
751 X86_64_X87UP_CLASS,
752 X86_64_MEMORY_CLASS
753 };
754 static const char * const x86_64_reg_class_name[] =
755 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
756
757 #define MAX_CLASSES 4
758 static int classify_argument PARAMS ((enum machine_mode, tree,
759 enum x86_64_reg_class [MAX_CLASSES],
760 int));
761 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
762 int *));
763 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
764 const int *, int));
765 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
766 enum x86_64_reg_class));
767 \f
768 /* Initialize the GCC target structure. */
769 #undef TARGET_ATTRIBUTE_TABLE
770 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
771 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
772 # undef TARGET_MERGE_DECL_ATTRIBUTES
773 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
774 #endif
775
776 #undef TARGET_COMP_TYPE_ATTRIBUTES
777 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
778
779 #undef TARGET_INIT_BUILTINS
780 #define TARGET_INIT_BUILTINS ix86_init_builtins
781
782 #undef TARGET_EXPAND_BUILTIN
783 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
784
785 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
786 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
787 HOST_WIDE_INT));
788 # undef TARGET_ASM_FUNCTION_PROLOGUE
789 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
790 #endif
791
792 #undef TARGET_ASM_OPEN_PAREN
793 #define TARGET_ASM_OPEN_PAREN ""
794 #undef TARGET_ASM_CLOSE_PAREN
795 #define TARGET_ASM_CLOSE_PAREN ""
796
797 #undef TARGET_ASM_ALIGNED_HI_OP
798 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
799 #undef TARGET_ASM_ALIGNED_SI_OP
800 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
801 #ifdef ASM_QUAD
802 #undef TARGET_ASM_ALIGNED_DI_OP
803 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
804 #endif
805
806 #undef TARGET_ASM_UNALIGNED_HI_OP
807 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
808 #undef TARGET_ASM_UNALIGNED_SI_OP
809 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
810 #undef TARGET_ASM_UNALIGNED_DI_OP
811 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
812
813 #undef TARGET_SCHED_ADJUST_COST
814 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
815 #undef TARGET_SCHED_ISSUE_RATE
816 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
817 #undef TARGET_SCHED_VARIABLE_ISSUE
818 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
819 #undef TARGET_SCHED_INIT
820 #define TARGET_SCHED_INIT ix86_sched_init
821 #undef TARGET_SCHED_REORDER
822 #define TARGET_SCHED_REORDER ix86_sched_reorder
823
824 struct gcc_target targetm = TARGET_INITIALIZER;
825 \f
826 /* Sometimes certain combinations of command options do not make
827 sense on a particular target machine. You can define a macro
828 `OVERRIDE_OPTIONS' to take account of this. This macro, if
829 defined, is executed once just after all the command options have
830 been parsed.
831
832 Don't use this macro to turn on various extra optimizations for
833 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
834
835 void
836 override_options ()
837 {
838 int i;
839 /* Comes from final.c -- no real reason to change it. */
840 #define MAX_CODE_ALIGN 16
841
842 static struct ptt
843 {
844 const struct processor_costs *cost; /* Processor costs */
845 const int target_enable; /* Target flags to enable. */
846 const int target_disable; /* Target flags to disable. */
847 const int align_loop; /* Default alignments. */
848 const int align_loop_max_skip;
849 const int align_jump;
850 const int align_jump_max_skip;
851 const int align_func;
852 const int branch_cost;
853 }
854 const processor_target_table[PROCESSOR_max] =
855 {
856 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
857 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
858 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
859 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
860 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
861 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
862 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
863 };
864
865 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
866 static struct pta
867 {
868 const char *const name; /* processor name or nickname. */
869 const enum processor_type processor;
870 const enum pta_flags
871 {
872 PTA_SSE = 1,
873 PTA_SSE2 = 2,
874 PTA_MMX = 4,
875 PTA_PREFETCH_SSE = 8,
876 PTA_3DNOW = 16,
877 PTA_3DNOW_A = 64
878 } flags;
879 }
880 const processor_alias_table[] =
881 {
882 {"i386", PROCESSOR_I386, 0},
883 {"i486", PROCESSOR_I486, 0},
884 {"i586", PROCESSOR_PENTIUM, 0},
885 {"pentium", PROCESSOR_PENTIUM, 0},
886 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
887 {"i686", PROCESSOR_PENTIUMPRO, 0},
888 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
889 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
890 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
891 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
892 PTA_MMX | PTA_PREFETCH_SSE},
893 {"k6", PROCESSOR_K6, PTA_MMX},
894 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
895 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
896 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
897 | PTA_3DNOW_A},
898 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
899 | PTA_3DNOW | PTA_3DNOW_A},
900 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
901 | PTA_3DNOW_A | PTA_SSE},
902 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
903 | PTA_3DNOW_A | PTA_SSE},
904 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
905 | PTA_3DNOW_A | PTA_SSE},
906 };
907
908 int const pta_size = ARRAY_SIZE (processor_alias_table);
909
910 #ifdef SUBTARGET_OVERRIDE_OPTIONS
911 SUBTARGET_OVERRIDE_OPTIONS;
912 #endif
913
914 if (!ix86_cpu_string && ix86_arch_string)
915 ix86_cpu_string = ix86_arch_string;
916 if (!ix86_cpu_string)
917 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
918 if (!ix86_arch_string)
919 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
920
921 if (ix86_cmodel_string != 0)
922 {
923 if (!strcmp (ix86_cmodel_string, "small"))
924 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
925 else if (flag_pic)
926 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
927 else if (!strcmp (ix86_cmodel_string, "32"))
928 ix86_cmodel = CM_32;
929 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
930 ix86_cmodel = CM_KERNEL;
931 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
932 ix86_cmodel = CM_MEDIUM;
933 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
934 ix86_cmodel = CM_LARGE;
935 else
936 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
937 }
938 else
939 {
940 ix86_cmodel = CM_32;
941 if (TARGET_64BIT)
942 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
943 }
944 if (ix86_asm_string != 0)
945 {
946 if (!strcmp (ix86_asm_string, "intel"))
947 ix86_asm_dialect = ASM_INTEL;
948 else if (!strcmp (ix86_asm_string, "att"))
949 ix86_asm_dialect = ASM_ATT;
950 else
951 error ("bad value (%s) for -masm= switch", ix86_asm_string);
952 }
953 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
954 error ("code model `%s' not supported in the %s bit mode",
955 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
956 if (ix86_cmodel == CM_LARGE)
957 sorry ("code model `large' not supported yet");
958 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
959 sorry ("%i-bit mode not compiled in",
960 (target_flags & MASK_64BIT) ? 64 : 32);
961
962 for (i = 0; i < pta_size; i++)
963 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
964 {
965 ix86_arch = processor_alias_table[i].processor;
966 /* Default cpu tuning to the architecture. */
967 ix86_cpu = ix86_arch;
968 if (processor_alias_table[i].flags & PTA_MMX
969 && !(target_flags & MASK_MMX_SET))
970 target_flags |= MASK_MMX;
971 if (processor_alias_table[i].flags & PTA_3DNOW
972 && !(target_flags & MASK_3DNOW_SET))
973 target_flags |= MASK_3DNOW;
974 if (processor_alias_table[i].flags & PTA_3DNOW_A
975 && !(target_flags & MASK_3DNOW_A_SET))
976 target_flags |= MASK_3DNOW_A;
977 if (processor_alias_table[i].flags & PTA_SSE
978 && !(target_flags & MASK_SSE_SET))
979 target_flags |= MASK_SSE;
980 if (processor_alias_table[i].flags & PTA_SSE2
981 && !(target_flags & MASK_SSE2_SET))
982 target_flags |= MASK_SSE2;
983 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
984 x86_prefetch_sse = true;
985 break;
986 }
987
988 if (i == pta_size)
989 error ("bad value (%s) for -march= switch", ix86_arch_string);
990
991 for (i = 0; i < pta_size; i++)
992 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
993 {
994 ix86_cpu = processor_alias_table[i].processor;
995 break;
996 }
997 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
998 x86_prefetch_sse = true;
999 if (i == pta_size)
1000 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1001
1002 if (optimize_size)
1003 ix86_cost = &size_cost;
1004 else
1005 ix86_cost = processor_target_table[ix86_cpu].cost;
1006 target_flags |= processor_target_table[ix86_cpu].target_enable;
1007 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1008
1009 /* Arrange to set up i386_stack_locals for all functions. */
1010 init_machine_status = ix86_init_machine_status;
1011 mark_machine_status = ix86_mark_machine_status;
1012 free_machine_status = ix86_free_machine_status;
1013
1014 /* Validate -mregparm= value. */
1015 if (ix86_regparm_string)
1016 {
1017 i = atoi (ix86_regparm_string);
1018 if (i < 0 || i > REGPARM_MAX)
1019 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1020 else
1021 ix86_regparm = i;
1022 }
1023 else
1024 if (TARGET_64BIT)
1025 ix86_regparm = REGPARM_MAX;
1026
1027 /* If the user has provided any of the -malign-* options,
1028 warn and use that value only if -falign-* is not set.
1029 Remove this code in GCC 3.2 or later. */
1030 if (ix86_align_loops_string)
1031 {
1032 warning ("-malign-loops is obsolete, use -falign-loops");
1033 if (align_loops == 0)
1034 {
1035 i = atoi (ix86_align_loops_string);
1036 if (i < 0 || i > MAX_CODE_ALIGN)
1037 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1038 else
1039 align_loops = 1 << i;
1040 }
1041 }
1042
1043 if (ix86_align_jumps_string)
1044 {
1045 warning ("-malign-jumps is obsolete, use -falign-jumps");
1046 if (align_jumps == 0)
1047 {
1048 i = atoi (ix86_align_jumps_string);
1049 if (i < 0 || i > MAX_CODE_ALIGN)
1050 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1051 else
1052 align_jumps = 1 << i;
1053 }
1054 }
1055
1056 if (ix86_align_funcs_string)
1057 {
1058 warning ("-malign-functions is obsolete, use -falign-functions");
1059 if (align_functions == 0)
1060 {
1061 i = atoi (ix86_align_funcs_string);
1062 if (i < 0 || i > MAX_CODE_ALIGN)
1063 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1064 else
1065 align_functions = 1 << i;
1066 }
1067 }
1068
1069 /* Default align_* from the processor table. */
1070 if (align_loops == 0)
1071 {
1072 align_loops = processor_target_table[ix86_cpu].align_loop;
1073 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1074 }
1075 if (align_jumps == 0)
1076 {
1077 align_jumps = processor_target_table[ix86_cpu].align_jump;
1078 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1079 }
1080 if (align_functions == 0)
1081 {
1082 align_functions = processor_target_table[ix86_cpu].align_func;
1083 }
1084
1085 /* Validate -mpreferred-stack-boundary= value, or provide default.
1086 The default of 128 bits is for Pentium III's SSE __m128, but we
1087 don't want additional code to keep the stack aligned when
1088 optimizing for code size. */
1089 ix86_preferred_stack_boundary = (optimize_size
1090 ? TARGET_64BIT ? 64 : 32
1091 : 128);
1092 if (ix86_preferred_stack_boundary_string)
1093 {
1094 i = atoi (ix86_preferred_stack_boundary_string);
1095 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1096 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1097 TARGET_64BIT ? 3 : 2);
1098 else
1099 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1100 }
1101
1102 /* Validate -mbranch-cost= value, or provide default. */
1103 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1104 if (ix86_branch_cost_string)
1105 {
1106 i = atoi (ix86_branch_cost_string);
1107 if (i < 0 || i > 5)
1108 error ("-mbranch-cost=%d is not between 0 and 5", i);
1109 else
1110 ix86_branch_cost = i;
1111 }
1112
1113 /* Keep nonleaf frame pointers. */
1114 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1115 flag_omit_frame_pointer = 1;
1116
1117 /* If we're doing fast math, we don't care about comparison order
1118 wrt NaNs. This lets us use a shorter comparison sequence. */
1119 if (flag_unsafe_math_optimizations)
1120 target_flags &= ~MASK_IEEE_FP;
1121
1122 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1123 since the insns won't need emulation. */
1124 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1125 target_flags &= ~MASK_NO_FANCY_MATH_387;
1126
1127 if (TARGET_64BIT)
1128 {
1129 if (TARGET_ALIGN_DOUBLE)
1130 error ("-malign-double makes no sense in the 64bit mode");
1131 if (TARGET_RTD)
1132 error ("-mrtd calling convention not supported in the 64bit mode");
1133 /* Enable by default the SSE and MMX builtins. */
1134 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1135 ix86_fpmath = FPMATH_SSE;
1136 }
1137 else
1138 ix86_fpmath = FPMATH_387;
1139
1140 if (ix86_fpmath_string != 0)
1141 {
1142 if (! strcmp (ix86_fpmath_string, "387"))
1143 ix86_fpmath = FPMATH_387;
1144 else if (! strcmp (ix86_fpmath_string, "sse"))
1145 {
1146 if (!TARGET_SSE)
1147 {
1148 warning ("SSE instruction set disabled, using 387 arithmetics");
1149 ix86_fpmath = FPMATH_387;
1150 }
1151 else
1152 ix86_fpmath = FPMATH_SSE;
1153 }
1154 else if (! strcmp (ix86_fpmath_string, "387,sse")
1155 || ! strcmp (ix86_fpmath_string, "sse,387"))
1156 {
1157 if (!TARGET_SSE)
1158 {
1159 warning ("SSE instruction set disabled, using 387 arithmetics");
1160 ix86_fpmath = FPMATH_387;
1161 }
1162 else if (!TARGET_80387)
1163 {
1164 warning ("387 instruction set disabled, using SSE arithmetics");
1165 ix86_fpmath = FPMATH_SSE;
1166 }
1167 else
1168 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1169 }
1170 else
1171 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1172 }
1173
1174 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1175 on by -msse. */
1176 if (TARGET_SSE)
1177 {
1178 target_flags |= MASK_MMX;
1179 x86_prefetch_sse = true;
1180 }
1181
1182 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1183 if (TARGET_3DNOW)
1184 {
1185 target_flags |= MASK_MMX;
1186 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1187 extensions it adds. */
1188 if (x86_3dnow_a & (1 << ix86_arch))
1189 target_flags |= MASK_3DNOW_A;
1190 }
1191 if ((x86_accumulate_outgoing_args & CPUMASK)
1192 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1193 && !optimize_size)
1194 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1195
1196 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1197 {
1198 char *p;
1199 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1200 p = strchr (internal_label_prefix, 'X');
1201 internal_label_prefix_len = p - internal_label_prefix;
1202 *p = '\0';
1203 }
1204 }
1205 \f
1206 void
1207 optimization_options (level, size)
1208 int level;
1209 int size ATTRIBUTE_UNUSED;
1210 {
1211 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1212 make the problem with not enough registers even worse. */
1213 #ifdef INSN_SCHEDULING
1214 if (level > 1)
1215 flag_schedule_insns = 0;
1216 #endif
1217 if (TARGET_64BIT && optimize >= 1)
1218 flag_omit_frame_pointer = 1;
1219 if (TARGET_64BIT)
1220 {
1221 flag_pcc_struct_return = 0;
1222 flag_asynchronous_unwind_tables = 1;
1223 }
1224 }
1225 \f
1226 /* Table of valid machine attributes. */
1227 const struct attribute_spec ix86_attribute_table[] =
1228 {
1229 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1230 /* Stdcall attribute says callee is responsible for popping arguments
1231 if they are not variable. */
1232 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1233 /* Cdecl attribute says the callee is a normal C declaration */
1234 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1235 /* Regparm attribute specifies how many integer arguments are to be
1236 passed in registers. */
1237 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1238 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1239 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1240 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1241 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1242 #endif
1243 { NULL, 0, 0, false, false, false, NULL }
1244 };
1245
1246 /* Handle a "cdecl" or "stdcall" attribute;
1247 arguments as in struct attribute_spec.handler. */
1248 static tree
1249 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1250 tree *node;
1251 tree name;
1252 tree args ATTRIBUTE_UNUSED;
1253 int flags ATTRIBUTE_UNUSED;
1254 bool *no_add_attrs;
1255 {
1256 if (TREE_CODE (*node) != FUNCTION_TYPE
1257 && TREE_CODE (*node) != METHOD_TYPE
1258 && TREE_CODE (*node) != FIELD_DECL
1259 && TREE_CODE (*node) != TYPE_DECL)
1260 {
1261 warning ("`%s' attribute only applies to functions",
1262 IDENTIFIER_POINTER (name));
1263 *no_add_attrs = true;
1264 }
1265
1266 if (TARGET_64BIT)
1267 {
1268 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1269 *no_add_attrs = true;
1270 }
1271
1272 return NULL_TREE;
1273 }
1274
1275 /* Handle a "regparm" attribute;
1276 arguments as in struct attribute_spec.handler. */
1277 static tree
1278 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1279 tree *node;
1280 tree name;
1281 tree args;
1282 int flags ATTRIBUTE_UNUSED;
1283 bool *no_add_attrs;
1284 {
1285 if (TREE_CODE (*node) != FUNCTION_TYPE
1286 && TREE_CODE (*node) != METHOD_TYPE
1287 && TREE_CODE (*node) != FIELD_DECL
1288 && TREE_CODE (*node) != TYPE_DECL)
1289 {
1290 warning ("`%s' attribute only applies to functions",
1291 IDENTIFIER_POINTER (name));
1292 *no_add_attrs = true;
1293 }
1294 else
1295 {
1296 tree cst;
1297
1298 cst = TREE_VALUE (args);
1299 if (TREE_CODE (cst) != INTEGER_CST)
1300 {
1301 warning ("`%s' attribute requires an integer constant argument",
1302 IDENTIFIER_POINTER (name));
1303 *no_add_attrs = true;
1304 }
1305 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1306 {
1307 warning ("argument to `%s' attribute larger than %d",
1308 IDENTIFIER_POINTER (name), REGPARM_MAX);
1309 *no_add_attrs = true;
1310 }
1311 }
1312
1313 return NULL_TREE;
1314 }
1315
1316 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1317
1318 /* Generate the assembly code for function entry. FILE is a stdio
1319 stream to output the code to. SIZE is an int: how many units of
1320 temporary storage to allocate.
1321
1322 Refer to the array `regs_ever_live' to determine which registers to
1323 save; `regs_ever_live[I]' is nonzero if register number I is ever
1324 used in the function. This function is responsible for knowing
1325 which registers should not be saved even if used.
1326
1327 We override it here to allow for the new profiling code to go before
1328 the prologue and the old mcount code to go after the prologue (and
1329 after %ebx has been set up for ELF shared library support). */
1330
1331 static void
1332 ix86_osf_output_function_prologue (file, size)
1333 FILE *file;
1334 HOST_WIDE_INT size;
1335 {
1336 const char *prefix = "";
1337 const char *const lprefix = LPREFIX;
1338 int labelno = profile_label_no;
1339
1340 #ifdef OSF_OS
1341
1342 if (TARGET_UNDERSCORES)
1343 prefix = "_";
1344
1345 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1346 {
1347 if (!flag_pic && !HALF_PIC_P ())
1348 {
1349 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1350 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1351 }
1352
1353 else if (HALF_PIC_P ())
1354 {
1355 rtx symref;
1356
1357 HALF_PIC_EXTERNAL ("_mcount_ptr");
1358 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1359 "_mcount_ptr"));
1360
1361 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1362 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1363 XSTR (symref, 0));
1364 fprintf (file, "\tcall *(%%eax)\n");
1365 }
1366
1367 else
1368 {
1369 static int call_no = 0;
1370
1371 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1372 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1373 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1374 lprefix, call_no++);
1375 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1376 lprefix, labelno);
1377 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1378 prefix);
1379 fprintf (file, "\tcall *(%%eax)\n");
1380 }
1381 }
1382
1383 #else /* !OSF_OS */
1384
1385 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1386 {
1387 if (!flag_pic)
1388 {
1389 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1390 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1391 }
1392
1393 else
1394 {
1395 static int call_no = 0;
1396
1397 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1398 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1399 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1400 lprefix, call_no++);
1401 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1402 lprefix, labelno);
1403 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1404 prefix);
1405 fprintf (file, "\tcall *(%%eax)\n");
1406 }
1407 }
1408 #endif /* !OSF_OS */
1409
1410 function_prologue (file, size);
1411 }
1412
1413 #endif /* OSF_OS || TARGET_OSF1ELF */
1414
1415 /* Return 0 if the attributes for two types are incompatible, 1 if they
1416 are compatible, and 2 if they are nearly compatible (which causes a
1417 warning to be generated). */
1418
1419 static int
1420 ix86_comp_type_attributes (type1, type2)
1421 tree type1;
1422 tree type2;
1423 {
1424 /* Check for mismatch of non-default calling convention. */
1425 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1426
1427 if (TREE_CODE (type1) != FUNCTION_TYPE)
1428 return 1;
1429
1430 /* Check for mismatched return types (cdecl vs stdcall). */
1431 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1432 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1433 return 0;
1434 return 1;
1435 }
1436 \f
1437 /* Value is the number of bytes of arguments automatically
1438 popped when returning from a subroutine call.
1439 FUNDECL is the declaration node of the function (as a tree),
1440 FUNTYPE is the data type of the function (as a tree),
1441 or for a library call it is an identifier node for the subroutine name.
1442 SIZE is the number of bytes of arguments passed on the stack.
1443
1444 On the 80386, the RTD insn may be used to pop them if the number
1445 of args is fixed, but if the number is variable then the caller
1446 must pop them all. RTD can't be used for library calls now
1447 because the library is compiled with the Unix compiler.
1448 Use of RTD is a selectable option, since it is incompatible with
1449 standard Unix calling sequences. If the option is not selected,
1450 the caller must always pop the args.
1451
1452 The attribute stdcall is equivalent to RTD on a per module basis. */
1453
1454 int
1455 ix86_return_pops_args (fundecl, funtype, size)
1456 tree fundecl;
1457 tree funtype;
1458 int size;
1459 {
1460 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1461
1462 /* Cdecl functions override -mrtd, and never pop the stack. */
1463 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1464
1465 /* Stdcall functions will pop the stack if not variable args. */
1466 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1467 rtd = 1;
1468
1469 if (rtd
1470 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1471 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1472 == void_type_node)))
1473 return size;
1474 }
1475
1476 /* Lose any fake structure return argument if it is passed on the stack. */
1477 if (aggregate_value_p (TREE_TYPE (funtype))
1478 && !TARGET_64BIT)
1479 {
1480 int nregs = ix86_regparm;
1481
1482 if (funtype)
1483 {
1484 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1485
1486 if (attr)
1487 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1488 }
1489
1490 if (!nregs)
1491 return GET_MODE_SIZE (Pmode);
1492 }
1493
1494 return 0;
1495 }
1496 \f
1497 /* Argument support functions. */
1498
1499 /* Return true when register may be used to pass function parameters. */
1500 bool
1501 ix86_function_arg_regno_p (regno)
1502 int regno;
1503 {
1504 int i;
1505 if (!TARGET_64BIT)
1506 return (regno < REGPARM_MAX
1507 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1508 if (SSE_REGNO_P (regno) && TARGET_SSE)
1509 return true;
1510 /* RAX is used as hidden argument to va_arg functions. */
1511 if (!regno)
1512 return true;
1513 for (i = 0; i < REGPARM_MAX; i++)
1514 if (regno == x86_64_int_parameter_registers[i])
1515 return true;
1516 return false;
1517 }
1518
1519 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1520 for a call to a function whose data type is FNTYPE.
1521 For a library call, FNTYPE is 0. */
1522
1523 void
1524 init_cumulative_args (cum, fntype, libname)
1525 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1526 tree fntype; /* tree ptr for function decl */
1527 rtx libname; /* SYMBOL_REF of library name or 0 */
1528 {
1529 static CUMULATIVE_ARGS zero_cum;
1530 tree param, next_param;
1531
1532 if (TARGET_DEBUG_ARG)
1533 {
1534 fprintf (stderr, "\ninit_cumulative_args (");
1535 if (fntype)
1536 fprintf (stderr, "fntype code = %s, ret code = %s",
1537 tree_code_name[(int) TREE_CODE (fntype)],
1538 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1539 else
1540 fprintf (stderr, "no fntype");
1541
1542 if (libname)
1543 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1544 }
1545
1546 *cum = zero_cum;
1547
1548 /* Set up the number of registers to use for passing arguments. */
1549 cum->nregs = ix86_regparm;
1550 cum->sse_nregs = SSE_REGPARM_MAX;
1551 if (fntype && !TARGET_64BIT)
1552 {
1553 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1554
1555 if (attr)
1556 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1557 }
1558 cum->maybe_vaarg = false;
1559
1560 /* Determine if this function has variable arguments. This is
1561 indicated by the last argument being 'void_type_mode' if there
1562 are no variable arguments. If there are variable arguments, then
1563 we won't pass anything in registers */
1564
1565 if (cum->nregs)
1566 {
1567 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1568 param != 0; param = next_param)
1569 {
1570 next_param = TREE_CHAIN (param);
1571 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1572 {
1573 if (!TARGET_64BIT)
1574 cum->nregs = 0;
1575 cum->maybe_vaarg = true;
1576 }
1577 }
1578 }
1579 if ((!fntype && !libname)
1580 || (fntype && !TYPE_ARG_TYPES (fntype)))
1581 cum->maybe_vaarg = 1;
1582
1583 if (TARGET_DEBUG_ARG)
1584 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1585
1586 return;
1587 }
1588
1589 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1590 of this code is to classify each 8bytes of incoming argument by the register
1591 class and assign registers accordingly. */
1592
1593 /* Return the union class of CLASS1 and CLASS2.
1594 See the x86-64 PS ABI for details. */
1595
1596 static enum x86_64_reg_class
1597 merge_classes (class1, class2)
1598 enum x86_64_reg_class class1, class2;
1599 {
1600 /* Rule #1: If both classes are equal, this is the resulting class. */
1601 if (class1 == class2)
1602 return class1;
1603
1604 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1605 the other class. */
1606 if (class1 == X86_64_NO_CLASS)
1607 return class2;
1608 if (class2 == X86_64_NO_CLASS)
1609 return class1;
1610
1611 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1612 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1613 return X86_64_MEMORY_CLASS;
1614
1615 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1616 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1617 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1618 return X86_64_INTEGERSI_CLASS;
1619 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1620 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1621 return X86_64_INTEGER_CLASS;
1622
1623 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1624 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1625 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1626 return X86_64_MEMORY_CLASS;
1627
1628 /* Rule #6: Otherwise class SSE is used. */
1629 return X86_64_SSE_CLASS;
1630 }
1631
1632 /* Classify the argument of type TYPE and mode MODE.
1633 CLASSES will be filled by the register class used to pass each word
1634 of the operand. The number of words is returned. In case the parameter
1635 should be passed in memory, 0 is returned. As a special case for zero
1636 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1637
1638 BIT_OFFSET is used internally for handling records and specifies offset
1639 of the offset in bits modulo 256 to avoid overflow cases.
1640
1641 See the x86-64 PS ABI for details.
1642 */
1643
1644 static int
1645 classify_argument (mode, type, classes, bit_offset)
1646 enum machine_mode mode;
1647 tree type;
1648 enum x86_64_reg_class classes[MAX_CLASSES];
1649 int bit_offset;
1650 {
1651 int bytes =
1652 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1653 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1654
1655 if (type && AGGREGATE_TYPE_P (type))
1656 {
1657 int i;
1658 tree field;
1659 enum x86_64_reg_class subclasses[MAX_CLASSES];
1660
1661 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1662 if (bytes > 16)
1663 return 0;
1664
1665 for (i = 0; i < words; i++)
1666 classes[i] = X86_64_NO_CLASS;
1667
1668 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1669 signalize memory class, so handle it as special case. */
1670 if (!words)
1671 {
1672 classes[0] = X86_64_NO_CLASS;
1673 return 1;
1674 }
1675
1676 /* Classify each field of record and merge classes. */
1677 if (TREE_CODE (type) == RECORD_TYPE)
1678 {
1679 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1680 {
1681 if (TREE_CODE (field) == FIELD_DECL)
1682 {
1683 int num;
1684
1685 /* Bitfields are always classified as integer. Handle them
1686 early, since later code would consider them to be
1687 misaligned integers. */
1688 if (DECL_BIT_FIELD (field))
1689 {
1690 for (i = int_bit_position (field) / 8 / 8;
1691 i < (int_bit_position (field)
1692 + tree_low_cst (DECL_SIZE (field), 0)
1693 + 63) / 8 / 8; i++)
1694 classes[i] =
1695 merge_classes (X86_64_INTEGER_CLASS,
1696 classes[i]);
1697 }
1698 else
1699 {
1700 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1701 TREE_TYPE (field), subclasses,
1702 (int_bit_position (field)
1703 + bit_offset) % 256);
1704 if (!num)
1705 return 0;
1706 for (i = 0; i < num; i++)
1707 {
1708 int pos =
1709 (int_bit_position (field) + bit_offset) / 8 / 8;
1710 classes[i + pos] =
1711 merge_classes (subclasses[i], classes[i + pos]);
1712 }
1713 }
1714 }
1715 }
1716 }
1717 /* Arrays are handled as small records. */
1718 else if (TREE_CODE (type) == ARRAY_TYPE)
1719 {
1720 int num;
1721 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1722 TREE_TYPE (type), subclasses, bit_offset);
1723 if (!num)
1724 return 0;
1725
1726 /* The partial classes are now full classes. */
1727 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1728 subclasses[0] = X86_64_SSE_CLASS;
1729 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1730 subclasses[0] = X86_64_INTEGER_CLASS;
1731
1732 for (i = 0; i < words; i++)
1733 classes[i] = subclasses[i % num];
1734 }
1735 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1736 else if (TREE_CODE (type) == UNION_TYPE
1737 || TREE_CODE (type) == QUAL_UNION_TYPE)
1738 {
1739 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1740 {
1741 if (TREE_CODE (field) == FIELD_DECL)
1742 {
1743 int num;
1744 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1745 TREE_TYPE (field), subclasses,
1746 bit_offset);
1747 if (!num)
1748 return 0;
1749 for (i = 0; i < num; i++)
1750 classes[i] = merge_classes (subclasses[i], classes[i]);
1751 }
1752 }
1753 }
1754 else
1755 abort ();
1756
1757 /* Final merger cleanup. */
1758 for (i = 0; i < words; i++)
1759 {
1760 /* If one class is MEMORY, everything should be passed in
1761 memory. */
1762 if (classes[i] == X86_64_MEMORY_CLASS)
1763 return 0;
1764
1765 /* The X86_64_SSEUP_CLASS should be always preceded by
1766 X86_64_SSE_CLASS. */
1767 if (classes[i] == X86_64_SSEUP_CLASS
1768 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1769 classes[i] = X86_64_SSE_CLASS;
1770
1771 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1772 if (classes[i] == X86_64_X87UP_CLASS
1773 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1774 classes[i] = X86_64_SSE_CLASS;
1775 }
1776 return words;
1777 }
1778
1779 /* Compute alignment needed. We align all types to natural boundaries with
1780 exception of XFmode that is aligned to 64bits. */
1781 if (mode != VOIDmode && mode != BLKmode)
1782 {
1783 int mode_alignment = GET_MODE_BITSIZE (mode);
1784
1785 if (mode == XFmode)
1786 mode_alignment = 128;
1787 else if (mode == XCmode)
1788 mode_alignment = 256;
1789 /* Misaligned fields are always returned in memory. */
1790 if (bit_offset % mode_alignment)
1791 return 0;
1792 }
1793
1794 /* Classification of atomic types. */
1795 switch (mode)
1796 {
1797 case DImode:
1798 case SImode:
1799 case HImode:
1800 case QImode:
1801 case CSImode:
1802 case CHImode:
1803 case CQImode:
1804 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1805 classes[0] = X86_64_INTEGERSI_CLASS;
1806 else
1807 classes[0] = X86_64_INTEGER_CLASS;
1808 return 1;
1809 case CDImode:
1810 case TImode:
1811 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1812 return 2;
1813 case CTImode:
1814 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1815 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1816 return 4;
1817 case SFmode:
1818 if (!(bit_offset % 64))
1819 classes[0] = X86_64_SSESF_CLASS;
1820 else
1821 classes[0] = X86_64_SSE_CLASS;
1822 return 1;
1823 case DFmode:
1824 classes[0] = X86_64_SSEDF_CLASS;
1825 return 1;
1826 case TFmode:
1827 classes[0] = X86_64_X87_CLASS;
1828 classes[1] = X86_64_X87UP_CLASS;
1829 return 2;
1830 case TCmode:
1831 classes[0] = X86_64_X87_CLASS;
1832 classes[1] = X86_64_X87UP_CLASS;
1833 classes[2] = X86_64_X87_CLASS;
1834 classes[3] = X86_64_X87UP_CLASS;
1835 return 4;
1836 case DCmode:
1837 classes[0] = X86_64_SSEDF_CLASS;
1838 classes[1] = X86_64_SSEDF_CLASS;
1839 return 2;
1840 case SCmode:
1841 classes[0] = X86_64_SSE_CLASS;
1842 return 1;
1843 case V4SFmode:
1844 case V4SImode:
1845 classes[0] = X86_64_SSE_CLASS;
1846 classes[1] = X86_64_SSEUP_CLASS;
1847 return 2;
1848 case V2SFmode:
1849 case V2SImode:
1850 case V4HImode:
1851 case V8QImode:
1852 classes[0] = X86_64_SSE_CLASS;
1853 return 1;
1854 case BLKmode:
1855 case VOIDmode:
1856 return 0;
1857 default:
1858 abort ();
1859 }
1860 }
1861
1862 /* Examine the argument and return set number of register required in each
1863 class. Return 0 iff parameter should be passed in memory. */
1864 static int
1865 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1866 enum machine_mode mode;
1867 tree type;
1868 int *int_nregs, *sse_nregs;
1869 int in_return;
1870 {
1871 enum x86_64_reg_class class[MAX_CLASSES];
1872 int n = classify_argument (mode, type, class, 0);
1873
1874 *int_nregs = 0;
1875 *sse_nregs = 0;
1876 if (!n)
1877 return 0;
1878 for (n--; n >= 0; n--)
1879 switch (class[n])
1880 {
1881 case X86_64_INTEGER_CLASS:
1882 case X86_64_INTEGERSI_CLASS:
1883 (*int_nregs)++;
1884 break;
1885 case X86_64_SSE_CLASS:
1886 case X86_64_SSESF_CLASS:
1887 case X86_64_SSEDF_CLASS:
1888 (*sse_nregs)++;
1889 break;
1890 case X86_64_NO_CLASS:
1891 case X86_64_SSEUP_CLASS:
1892 break;
1893 case X86_64_X87_CLASS:
1894 case X86_64_X87UP_CLASS:
1895 if (!in_return)
1896 return 0;
1897 break;
1898 case X86_64_MEMORY_CLASS:
1899 abort ();
1900 }
1901 return 1;
1902 }
1903 /* Construct container for the argument used by GCC interface. See
1904 FUNCTION_ARG for the detailed description. */
1905 static rtx
1906 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1907 enum machine_mode mode;
1908 tree type;
1909 int in_return;
1910 int nintregs, nsseregs;
1911 const int * intreg;
1912 int sse_regno;
1913 {
1914 enum machine_mode tmpmode;
1915 int bytes =
1916 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1917 enum x86_64_reg_class class[MAX_CLASSES];
1918 int n;
1919 int i;
1920 int nexps = 0;
1921 int needed_sseregs, needed_intregs;
1922 rtx exp[MAX_CLASSES];
1923 rtx ret;
1924
1925 n = classify_argument (mode, type, class, 0);
1926 if (TARGET_DEBUG_ARG)
1927 {
1928 if (!n)
1929 fprintf (stderr, "Memory class\n");
1930 else
1931 {
1932 fprintf (stderr, "Classes:");
1933 for (i = 0; i < n; i++)
1934 {
1935 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1936 }
1937 fprintf (stderr, "\n");
1938 }
1939 }
1940 if (!n)
1941 return NULL;
1942 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1943 return NULL;
1944 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1945 return NULL;
1946
1947 /* First construct simple cases. Avoid SCmode, since we want to use
1948 single register to pass this type. */
1949 if (n == 1 && mode != SCmode)
1950 switch (class[0])
1951 {
1952 case X86_64_INTEGER_CLASS:
1953 case X86_64_INTEGERSI_CLASS:
1954 return gen_rtx_REG (mode, intreg[0]);
1955 case X86_64_SSE_CLASS:
1956 case X86_64_SSESF_CLASS:
1957 case X86_64_SSEDF_CLASS:
1958 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1959 case X86_64_X87_CLASS:
1960 return gen_rtx_REG (mode, FIRST_STACK_REG);
1961 case X86_64_NO_CLASS:
1962 /* Zero sized array, struct or class. */
1963 return NULL;
1964 default:
1965 abort ();
1966 }
1967 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1968 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1969 if (n == 2
1970 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1971 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1972 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1973 && class[1] == X86_64_INTEGER_CLASS
1974 && (mode == CDImode || mode == TImode)
1975 && intreg[0] + 1 == intreg[1])
1976 return gen_rtx_REG (mode, intreg[0]);
1977 if (n == 4
1978 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1979 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1980 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1981
1982 /* Otherwise figure out the entries of the PARALLEL. */
1983 for (i = 0; i < n; i++)
1984 {
1985 switch (class[i])
1986 {
1987 case X86_64_NO_CLASS:
1988 break;
1989 case X86_64_INTEGER_CLASS:
1990 case X86_64_INTEGERSI_CLASS:
1991 /* Merge TImodes on aligned occassions here too. */
1992 if (i * 8 + 8 > bytes)
1993 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1994 else if (class[i] == X86_64_INTEGERSI_CLASS)
1995 tmpmode = SImode;
1996 else
1997 tmpmode = DImode;
1998 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1999 if (tmpmode == BLKmode)
2000 tmpmode = DImode;
2001 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2002 gen_rtx_REG (tmpmode, *intreg),
2003 GEN_INT (i*8));
2004 intreg++;
2005 break;
2006 case X86_64_SSESF_CLASS:
2007 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2008 gen_rtx_REG (SFmode,
2009 SSE_REGNO (sse_regno)),
2010 GEN_INT (i*8));
2011 sse_regno++;
2012 break;
2013 case X86_64_SSEDF_CLASS:
2014 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2015 gen_rtx_REG (DFmode,
2016 SSE_REGNO (sse_regno)),
2017 GEN_INT (i*8));
2018 sse_regno++;
2019 break;
2020 case X86_64_SSE_CLASS:
2021 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2022 tmpmode = TImode, i++;
2023 else
2024 tmpmode = DImode;
2025 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2026 gen_rtx_REG (tmpmode,
2027 SSE_REGNO (sse_regno)),
2028 GEN_INT (i*8));
2029 sse_regno++;
2030 break;
2031 default:
2032 abort ();
2033 }
2034 }
2035 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2036 for (i = 0; i < nexps; i++)
2037 XVECEXP (ret, 0, i) = exp [i];
2038 return ret;
2039 }
2040
2041 /* Update the data in CUM to advance over an argument
2042 of mode MODE and data type TYPE.
2043 (TYPE is null for libcalls where that information may not be available.) */
2044
2045 void
2046 function_arg_advance (cum, mode, type, named)
2047 CUMULATIVE_ARGS *cum; /* current arg information */
2048 enum machine_mode mode; /* current arg mode */
2049 tree type; /* type of the argument or 0 if lib support */
2050 int named; /* whether or not the argument was named */
2051 {
2052 int bytes =
2053 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2054 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2055
2056 if (TARGET_DEBUG_ARG)
2057 fprintf (stderr,
2058 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2059 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2060 if (TARGET_64BIT)
2061 {
2062 int int_nregs, sse_nregs;
2063 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2064 cum->words += words;
2065 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2066 {
2067 cum->nregs -= int_nregs;
2068 cum->sse_nregs -= sse_nregs;
2069 cum->regno += int_nregs;
2070 cum->sse_regno += sse_nregs;
2071 }
2072 else
2073 cum->words += words;
2074 }
2075 else
2076 {
2077 if (TARGET_SSE && mode == TImode)
2078 {
2079 cum->sse_words += words;
2080 cum->sse_nregs -= 1;
2081 cum->sse_regno += 1;
2082 if (cum->sse_nregs <= 0)
2083 {
2084 cum->sse_nregs = 0;
2085 cum->sse_regno = 0;
2086 }
2087 }
2088 else
2089 {
2090 cum->words += words;
2091 cum->nregs -= words;
2092 cum->regno += words;
2093
2094 if (cum->nregs <= 0)
2095 {
2096 cum->nregs = 0;
2097 cum->regno = 0;
2098 }
2099 }
2100 }
2101 return;
2102 }
2103
2104 /* Define where to put the arguments to a function.
2105 Value is zero to push the argument on the stack,
2106 or a hard register in which to store the argument.
2107
2108 MODE is the argument's machine mode.
2109 TYPE is the data type of the argument (as a tree).
2110 This is null for libcalls where that information may
2111 not be available.
2112 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2113 the preceding args and about the function being called.
2114 NAMED is nonzero if this argument is a named parameter
2115 (otherwise it is an extra parameter matching an ellipsis). */
2116
2117 rtx
2118 function_arg (cum, mode, type, named)
2119 CUMULATIVE_ARGS *cum; /* current arg information */
2120 enum machine_mode mode; /* current arg mode */
2121 tree type; /* type of the argument or 0 if lib support */
2122 int named; /* != 0 for normal args, == 0 for ... args */
2123 {
2124 rtx ret = NULL_RTX;
2125 int bytes =
2126 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2127 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2128
2129 /* Handle an hidden AL argument containing number of registers for varargs
2130 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2131 any AL settings. */
2132 if (mode == VOIDmode)
2133 {
2134 if (TARGET_64BIT)
2135 return GEN_INT (cum->maybe_vaarg
2136 ? (cum->sse_nregs < 0
2137 ? SSE_REGPARM_MAX
2138 : cum->sse_regno)
2139 : -1);
2140 else
2141 return constm1_rtx;
2142 }
2143 if (TARGET_64BIT)
2144 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2145 &x86_64_int_parameter_registers [cum->regno],
2146 cum->sse_regno);
2147 else
2148 switch (mode)
2149 {
2150 /* For now, pass fp/complex values on the stack. */
2151 default:
2152 break;
2153
2154 case BLKmode:
2155 case DImode:
2156 case SImode:
2157 case HImode:
2158 case QImode:
2159 if (words <= cum->nregs)
2160 ret = gen_rtx_REG (mode, cum->regno);
2161 break;
2162 case TImode:
2163 if (cum->sse_nregs)
2164 ret = gen_rtx_REG (mode, cum->sse_regno);
2165 break;
2166 }
2167
2168 if (TARGET_DEBUG_ARG)
2169 {
2170 fprintf (stderr,
2171 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2172 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2173
2174 if (ret)
2175 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2176 else
2177 fprintf (stderr, ", stack");
2178
2179 fprintf (stderr, " )\n");
2180 }
2181
2182 return ret;
2183 }
2184
2185 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2186 and type. */
2187
2188 int
2189 ix86_function_arg_boundary (mode, type)
2190 enum machine_mode mode;
2191 tree type;
2192 {
2193 int align;
2194 if (!TARGET_64BIT)
2195 return PARM_BOUNDARY;
2196 if (type)
2197 align = TYPE_ALIGN (type);
2198 else
2199 align = GET_MODE_ALIGNMENT (mode);
2200 if (align < PARM_BOUNDARY)
2201 align = PARM_BOUNDARY;
2202 if (align > 128)
2203 align = 128;
2204 return align;
2205 }
2206
2207 /* Return true if N is a possible register number of function value. */
2208 bool
2209 ix86_function_value_regno_p (regno)
2210 int regno;
2211 {
2212 if (!TARGET_64BIT)
2213 {
2214 return ((regno) == 0
2215 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2216 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2217 }
2218 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2219 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2220 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2221 }
2222
2223 /* Define how to find the value returned by a function.
2224 VALTYPE is the data type of the value (as a tree).
2225 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2226 otherwise, FUNC is 0. */
2227 rtx
2228 ix86_function_value (valtype)
2229 tree valtype;
2230 {
2231 if (TARGET_64BIT)
2232 {
2233 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2234 REGPARM_MAX, SSE_REGPARM_MAX,
2235 x86_64_int_return_registers, 0);
2236 /* For zero sized structures, construct_continer return NULL, but we need
2237 to keep rest of compiler happy by returning meaningfull value. */
2238 if (!ret)
2239 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2240 return ret;
2241 }
2242 else
2243 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2244 }
2245
2246 /* Return false iff type is returned in memory. */
2247 int
2248 ix86_return_in_memory (type)
2249 tree type;
2250 {
2251 int needed_intregs, needed_sseregs;
2252 if (TARGET_64BIT)
2253 {
2254 return !examine_argument (TYPE_MODE (type), type, 1,
2255 &needed_intregs, &needed_sseregs);
2256 }
2257 else
2258 {
2259 if (TYPE_MODE (type) == BLKmode
2260 || (VECTOR_MODE_P (TYPE_MODE (type))
2261 && int_size_in_bytes (type) == 8)
2262 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2263 && TYPE_MODE (type) != TFmode
2264 && !VECTOR_MODE_P (TYPE_MODE (type))))
2265 return 1;
2266 return 0;
2267 }
2268 }
2269
2270 /* Define how to find the value returned by a library function
2271 assuming the value has mode MODE. */
2272 rtx
2273 ix86_libcall_value (mode)
2274 enum machine_mode mode;
2275 {
2276 if (TARGET_64BIT)
2277 {
2278 switch (mode)
2279 {
2280 case SFmode:
2281 case SCmode:
2282 case DFmode:
2283 case DCmode:
2284 return gen_rtx_REG (mode, FIRST_SSE_REG);
2285 case TFmode:
2286 case TCmode:
2287 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2288 default:
2289 return gen_rtx_REG (mode, 0);
2290 }
2291 }
2292 else
2293 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2294 }
2295 \f
2296 /* Create the va_list data type. */
2297
2298 tree
2299 ix86_build_va_list ()
2300 {
2301 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2302
2303 /* For i386 we use plain pointer to argument area. */
2304 if (!TARGET_64BIT)
2305 return build_pointer_type (char_type_node);
2306
2307 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2308 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2309
2310 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2311 unsigned_type_node);
2312 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2313 unsigned_type_node);
2314 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2315 ptr_type_node);
2316 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2317 ptr_type_node);
2318
2319 DECL_FIELD_CONTEXT (f_gpr) = record;
2320 DECL_FIELD_CONTEXT (f_fpr) = record;
2321 DECL_FIELD_CONTEXT (f_ovf) = record;
2322 DECL_FIELD_CONTEXT (f_sav) = record;
2323
2324 TREE_CHAIN (record) = type_decl;
2325 TYPE_NAME (record) = type_decl;
2326 TYPE_FIELDS (record) = f_gpr;
2327 TREE_CHAIN (f_gpr) = f_fpr;
2328 TREE_CHAIN (f_fpr) = f_ovf;
2329 TREE_CHAIN (f_ovf) = f_sav;
2330
2331 layout_type (record);
2332
2333 /* The correct type is an array type of one element. */
2334 return build_array_type (record, build_index_type (size_zero_node));
2335 }
2336
2337 /* Perform any needed actions needed for a function that is receiving a
2338 variable number of arguments.
2339
2340 CUM is as above.
2341
2342 MODE and TYPE are the mode and type of the current parameter.
2343
2344 PRETEND_SIZE is a variable that should be set to the amount of stack
2345 that must be pushed by the prolog to pretend that our caller pushed
2346 it.
2347
2348 Normally, this macro will push all remaining incoming registers on the
2349 stack and set PRETEND_SIZE to the length of the registers pushed. */
2350
2351 void
2352 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2353 CUMULATIVE_ARGS *cum;
2354 enum machine_mode mode;
2355 tree type;
2356 int *pretend_size ATTRIBUTE_UNUSED;
2357 int no_rtl;
2358
2359 {
2360 CUMULATIVE_ARGS next_cum;
2361 rtx save_area = NULL_RTX, mem;
2362 rtx label;
2363 rtx label_ref;
2364 rtx tmp_reg;
2365 rtx nsse_reg;
2366 int set;
2367 tree fntype;
2368 int stdarg_p;
2369 int i;
2370
2371 if (!TARGET_64BIT)
2372 return;
2373
2374 /* Indicate to allocate space on the stack for varargs save area. */
2375 ix86_save_varrargs_registers = 1;
2376
2377 fntype = TREE_TYPE (current_function_decl);
2378 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2379 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2380 != void_type_node));
2381
2382 /* For varargs, we do not want to skip the dummy va_dcl argument.
2383 For stdargs, we do want to skip the last named argument. */
2384 next_cum = *cum;
2385 if (stdarg_p)
2386 function_arg_advance (&next_cum, mode, type, 1);
2387
2388 if (!no_rtl)
2389 save_area = frame_pointer_rtx;
2390
2391 set = get_varargs_alias_set ();
2392
2393 for (i = next_cum.regno; i < ix86_regparm; i++)
2394 {
2395 mem = gen_rtx_MEM (Pmode,
2396 plus_constant (save_area, i * UNITS_PER_WORD));
2397 set_mem_alias_set (mem, set);
2398 emit_move_insn (mem, gen_rtx_REG (Pmode,
2399 x86_64_int_parameter_registers[i]));
2400 }
2401
2402 if (next_cum.sse_nregs)
2403 {
2404 /* Now emit code to save SSE registers. The AX parameter contains number
2405 of SSE parameter regsiters used to call this function. We use
2406 sse_prologue_save insn template that produces computed jump across
2407 SSE saves. We need some preparation work to get this working. */
2408
2409 label = gen_label_rtx ();
2410 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2411
2412 /* Compute address to jump to :
2413 label - 5*eax + nnamed_sse_arguments*5 */
2414 tmp_reg = gen_reg_rtx (Pmode);
2415 nsse_reg = gen_reg_rtx (Pmode);
2416 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2417 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2418 gen_rtx_MULT (Pmode, nsse_reg,
2419 GEN_INT (4))));
2420 if (next_cum.sse_regno)
2421 emit_move_insn
2422 (nsse_reg,
2423 gen_rtx_CONST (DImode,
2424 gen_rtx_PLUS (DImode,
2425 label_ref,
2426 GEN_INT (next_cum.sse_regno * 4))));
2427 else
2428 emit_move_insn (nsse_reg, label_ref);
2429 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2430
2431 /* Compute address of memory block we save into. We always use pointer
2432 pointing 127 bytes after first byte to store - this is needed to keep
2433 instruction size limited by 4 bytes. */
2434 tmp_reg = gen_reg_rtx (Pmode);
2435 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2436 plus_constant (save_area,
2437 8 * REGPARM_MAX + 127)));
2438 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2439 set_mem_alias_set (mem, set);
2440 set_mem_align (mem, BITS_PER_WORD);
2441
2442 /* And finally do the dirty job! */
2443 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2444 GEN_INT (next_cum.sse_regno), label));
2445 }
2446
2447 }
2448
2449 /* Implement va_start. */
2450
2451 void
2452 ix86_va_start (stdarg_p, valist, nextarg)
2453 int stdarg_p;
2454 tree valist;
2455 rtx nextarg;
2456 {
2457 HOST_WIDE_INT words, n_gpr, n_fpr;
2458 tree f_gpr, f_fpr, f_ovf, f_sav;
2459 tree gpr, fpr, ovf, sav, t;
2460
2461 /* Only 64bit target needs something special. */
2462 if (!TARGET_64BIT)
2463 {
2464 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2465 return;
2466 }
2467
2468 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2469 f_fpr = TREE_CHAIN (f_gpr);
2470 f_ovf = TREE_CHAIN (f_fpr);
2471 f_sav = TREE_CHAIN (f_ovf);
2472
2473 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2474 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2475 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2476 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2477 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2478
2479 /* Count number of gp and fp argument registers used. */
2480 words = current_function_args_info.words;
2481 n_gpr = current_function_args_info.regno;
2482 n_fpr = current_function_args_info.sse_regno;
2483
2484 if (TARGET_DEBUG_ARG)
2485 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2486 (int) words, (int) n_gpr, (int) n_fpr);
2487
2488 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2489 build_int_2 (n_gpr * 8, 0));
2490 TREE_SIDE_EFFECTS (t) = 1;
2491 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2492
2493 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2494 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2495 TREE_SIDE_EFFECTS (t) = 1;
2496 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2497
2498 /* Find the overflow area. */
2499 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2500 if (words != 0)
2501 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2502 build_int_2 (words * UNITS_PER_WORD, 0));
2503 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2504 TREE_SIDE_EFFECTS (t) = 1;
2505 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2506
2507 /* Find the register save area.
2508 Prologue of the function save it right above stack frame. */
2509 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2510 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2511 TREE_SIDE_EFFECTS (t) = 1;
2512 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2513 }
2514
2515 /* Implement va_arg. */
2516 rtx
2517 ix86_va_arg (valist, type)
2518 tree valist, type;
2519 {
2520 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2521 tree f_gpr, f_fpr, f_ovf, f_sav;
2522 tree gpr, fpr, ovf, sav, t;
2523 int size, rsize;
2524 rtx lab_false, lab_over = NULL_RTX;
2525 rtx addr_rtx, r;
2526 rtx container;
2527
2528 /* Only 64bit target needs something special. */
2529 if (!TARGET_64BIT)
2530 {
2531 return std_expand_builtin_va_arg (valist, type);
2532 }
2533
2534 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2535 f_fpr = TREE_CHAIN (f_gpr);
2536 f_ovf = TREE_CHAIN (f_fpr);
2537 f_sav = TREE_CHAIN (f_ovf);
2538
2539 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2540 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2541 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2542 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2543 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2544
2545 size = int_size_in_bytes (type);
2546 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2547
2548 container = construct_container (TYPE_MODE (type), type, 0,
2549 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2550 /*
2551 * Pull the value out of the saved registers ...
2552 */
2553
2554 addr_rtx = gen_reg_rtx (Pmode);
2555
2556 if (container)
2557 {
2558 rtx int_addr_rtx, sse_addr_rtx;
2559 int needed_intregs, needed_sseregs;
2560 int need_temp;
2561
2562 lab_over = gen_label_rtx ();
2563 lab_false = gen_label_rtx ();
2564
2565 examine_argument (TYPE_MODE (type), type, 0,
2566 &needed_intregs, &needed_sseregs);
2567
2568
2569 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2570 || TYPE_ALIGN (type) > 128);
2571
2572 /* In case we are passing structure, verify that it is consetuctive block
2573 on the register save area. If not we need to do moves. */
2574 if (!need_temp && !REG_P (container))
2575 {
2576 /* Verify that all registers are strictly consetuctive */
2577 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2578 {
2579 int i;
2580
2581 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2582 {
2583 rtx slot = XVECEXP (container, 0, i);
2584 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2585 || INTVAL (XEXP (slot, 1)) != i * 16)
2586 need_temp = 1;
2587 }
2588 }
2589 else
2590 {
2591 int i;
2592
2593 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2594 {
2595 rtx slot = XVECEXP (container, 0, i);
2596 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2597 || INTVAL (XEXP (slot, 1)) != i * 8)
2598 need_temp = 1;
2599 }
2600 }
2601 }
2602 if (!need_temp)
2603 {
2604 int_addr_rtx = addr_rtx;
2605 sse_addr_rtx = addr_rtx;
2606 }
2607 else
2608 {
2609 int_addr_rtx = gen_reg_rtx (Pmode);
2610 sse_addr_rtx = gen_reg_rtx (Pmode);
2611 }
2612 /* First ensure that we fit completely in registers. */
2613 if (needed_intregs)
2614 {
2615 emit_cmp_and_jump_insns (expand_expr
2616 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2617 GEN_INT ((REGPARM_MAX - needed_intregs +
2618 1) * 8), GE, const1_rtx, SImode,
2619 1, lab_false);
2620 }
2621 if (needed_sseregs)
2622 {
2623 emit_cmp_and_jump_insns (expand_expr
2624 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2625 GEN_INT ((SSE_REGPARM_MAX -
2626 needed_sseregs + 1) * 16 +
2627 REGPARM_MAX * 8), GE, const1_rtx,
2628 SImode, 1, lab_false);
2629 }
2630
2631 /* Compute index to start of area used for integer regs. */
2632 if (needed_intregs)
2633 {
2634 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2635 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2636 if (r != int_addr_rtx)
2637 emit_move_insn (int_addr_rtx, r);
2638 }
2639 if (needed_sseregs)
2640 {
2641 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2642 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2643 if (r != sse_addr_rtx)
2644 emit_move_insn (sse_addr_rtx, r);
2645 }
2646 if (need_temp)
2647 {
2648 int i;
2649 rtx mem;
2650
2651 /* Never use the memory itself, as it has the alias set. */
2652 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2653 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2654 set_mem_alias_set (mem, get_varargs_alias_set ());
2655 set_mem_align (mem, BITS_PER_UNIT);
2656
2657 for (i = 0; i < XVECLEN (container, 0); i++)
2658 {
2659 rtx slot = XVECEXP (container, 0, i);
2660 rtx reg = XEXP (slot, 0);
2661 enum machine_mode mode = GET_MODE (reg);
2662 rtx src_addr;
2663 rtx src_mem;
2664 int src_offset;
2665 rtx dest_mem;
2666
2667 if (SSE_REGNO_P (REGNO (reg)))
2668 {
2669 src_addr = sse_addr_rtx;
2670 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2671 }
2672 else
2673 {
2674 src_addr = int_addr_rtx;
2675 src_offset = REGNO (reg) * 8;
2676 }
2677 src_mem = gen_rtx_MEM (mode, src_addr);
2678 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2679 src_mem = adjust_address (src_mem, mode, src_offset);
2680 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2681 emit_move_insn (dest_mem, src_mem);
2682 }
2683 }
2684
2685 if (needed_intregs)
2686 {
2687 t =
2688 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2689 build_int_2 (needed_intregs * 8, 0));
2690 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2691 TREE_SIDE_EFFECTS (t) = 1;
2692 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2693 }
2694 if (needed_sseregs)
2695 {
2696 t =
2697 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2698 build_int_2 (needed_sseregs * 16, 0));
2699 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2700 TREE_SIDE_EFFECTS (t) = 1;
2701 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2702 }
2703
2704 emit_jump_insn (gen_jump (lab_over));
2705 emit_barrier ();
2706 emit_label (lab_false);
2707 }
2708
2709 /* ... otherwise out of the overflow area. */
2710
2711 /* Care for on-stack alignment if needed. */
2712 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2713 t = ovf;
2714 else
2715 {
2716 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2717 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2718 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2719 }
2720 t = save_expr (t);
2721
2722 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2723 if (r != addr_rtx)
2724 emit_move_insn (addr_rtx, r);
2725
2726 t =
2727 build (PLUS_EXPR, TREE_TYPE (t), t,
2728 build_int_2 (rsize * UNITS_PER_WORD, 0));
2729 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2730 TREE_SIDE_EFFECTS (t) = 1;
2731 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2732
2733 if (container)
2734 emit_label (lab_over);
2735
2736 return addr_rtx;
2737 }
2738 \f
2739 /* Return nonzero if OP is general operand representable on x86_64. */
2740
2741 int
2742 x86_64_general_operand (op, mode)
2743 rtx op;
2744 enum machine_mode mode;
2745 {
2746 if (!TARGET_64BIT)
2747 return general_operand (op, mode);
2748 if (nonimmediate_operand (op, mode))
2749 return 1;
2750 return x86_64_sign_extended_value (op);
2751 }
2752
2753 /* Return nonzero if OP is general operand representable on x86_64
2754 as either sign extended or zero extended constant. */
2755
2756 int
2757 x86_64_szext_general_operand (op, mode)
2758 rtx op;
2759 enum machine_mode mode;
2760 {
2761 if (!TARGET_64BIT)
2762 return general_operand (op, mode);
2763 if (nonimmediate_operand (op, mode))
2764 return 1;
2765 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2766 }
2767
2768 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2769
2770 int
2771 x86_64_nonmemory_operand (op, mode)
2772 rtx op;
2773 enum machine_mode mode;
2774 {
2775 if (!TARGET_64BIT)
2776 return nonmemory_operand (op, mode);
2777 if (register_operand (op, mode))
2778 return 1;
2779 return x86_64_sign_extended_value (op);
2780 }
2781
2782 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2783
2784 int
2785 x86_64_movabs_operand (op, mode)
2786 rtx op;
2787 enum machine_mode mode;
2788 {
2789 if (!TARGET_64BIT || !flag_pic)
2790 return nonmemory_operand (op, mode);
2791 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2792 return 1;
2793 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2794 return 1;
2795 return 0;
2796 }
2797
2798 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2799
2800 int
2801 x86_64_szext_nonmemory_operand (op, mode)
2802 rtx op;
2803 enum machine_mode mode;
2804 {
2805 if (!TARGET_64BIT)
2806 return nonmemory_operand (op, mode);
2807 if (register_operand (op, mode))
2808 return 1;
2809 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2810 }
2811
2812 /* Return nonzero if OP is immediate operand representable on x86_64. */
2813
2814 int
2815 x86_64_immediate_operand (op, mode)
2816 rtx op;
2817 enum machine_mode mode;
2818 {
2819 if (!TARGET_64BIT)
2820 return immediate_operand (op, mode);
2821 return x86_64_sign_extended_value (op);
2822 }
2823
2824 /* Return nonzero if OP is immediate operand representable on x86_64. */
2825
2826 int
2827 x86_64_zext_immediate_operand (op, mode)
2828 rtx op;
2829 enum machine_mode mode ATTRIBUTE_UNUSED;
2830 {
2831 return x86_64_zero_extended_value (op);
2832 }
2833
2834 /* Return nonzero if OP is (const_int 1), else return zero. */
2835
2836 int
2837 const_int_1_operand (op, mode)
2838 rtx op;
2839 enum machine_mode mode ATTRIBUTE_UNUSED;
2840 {
2841 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2842 }
2843
2844 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2845 reference and a constant. */
2846
2847 int
2848 symbolic_operand (op, mode)
2849 register rtx op;
2850 enum machine_mode mode ATTRIBUTE_UNUSED;
2851 {
2852 switch (GET_CODE (op))
2853 {
2854 case SYMBOL_REF:
2855 case LABEL_REF:
2856 return 1;
2857
2858 case CONST:
2859 op = XEXP (op, 0);
2860 if (GET_CODE (op) == SYMBOL_REF
2861 || GET_CODE (op) == LABEL_REF
2862 || (GET_CODE (op) == UNSPEC
2863 && (XINT (op, 1) == 6
2864 || XINT (op, 1) == 7
2865 || XINT (op, 1) == 15)))
2866 return 1;
2867 if (GET_CODE (op) != PLUS
2868 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2869 return 0;
2870
2871 op = XEXP (op, 0);
2872 if (GET_CODE (op) == SYMBOL_REF
2873 || GET_CODE (op) == LABEL_REF)
2874 return 1;
2875 /* Only @GOTOFF gets offsets. */
2876 if (GET_CODE (op) != UNSPEC
2877 || XINT (op, 1) != 7)
2878 return 0;
2879
2880 op = XVECEXP (op, 0, 0);
2881 if (GET_CODE (op) == SYMBOL_REF
2882 || GET_CODE (op) == LABEL_REF)
2883 return 1;
2884 return 0;
2885
2886 default:
2887 return 0;
2888 }
2889 }
2890
2891 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2892
2893 int
2894 pic_symbolic_operand (op, mode)
2895 register rtx op;
2896 enum machine_mode mode ATTRIBUTE_UNUSED;
2897 {
2898 if (GET_CODE (op) != CONST)
2899 return 0;
2900 op = XEXP (op, 0);
2901 if (TARGET_64BIT)
2902 {
2903 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2904 return 1;
2905 }
2906 else
2907 {
2908 if (GET_CODE (op) == UNSPEC)
2909 return 1;
2910 if (GET_CODE (op) != PLUS
2911 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2912 return 0;
2913 op = XEXP (op, 0);
2914 if (GET_CODE (op) == UNSPEC)
2915 return 1;
2916 }
2917 return 0;
2918 }
2919
2920 /* Return true if OP is a symbolic operand that resolves locally. */
2921
2922 static int
2923 local_symbolic_operand (op, mode)
2924 rtx op;
2925 enum machine_mode mode ATTRIBUTE_UNUSED;
2926 {
2927 if (GET_CODE (op) == LABEL_REF)
2928 return 1;
2929
2930 if (GET_CODE (op) == CONST
2931 && GET_CODE (XEXP (op, 0)) == PLUS
2932 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2933 op = XEXP (XEXP (op, 0), 0);
2934
2935 if (GET_CODE (op) != SYMBOL_REF)
2936 return 0;
2937
2938 /* These we've been told are local by varasm and encode_section_info
2939 respectively. */
2940 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2941 return 1;
2942
2943 /* There is, however, a not insubstantial body of code in the rest of
2944 the compiler that assumes it can just stick the results of
2945 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2946 /* ??? This is a hack. Should update the body of the compiler to
2947 always create a DECL an invoke ENCODE_SECTION_INFO. */
2948 if (strncmp (XSTR (op, 0), internal_label_prefix,
2949 internal_label_prefix_len) == 0)
2950 return 1;
2951
2952 return 0;
2953 }
2954
2955 /* Test for a valid operand for a call instruction. Don't allow the
2956 arg pointer register or virtual regs since they may decay into
2957 reg + const, which the patterns can't handle. */
2958
2959 int
2960 call_insn_operand (op, mode)
2961 rtx op;
2962 enum machine_mode mode ATTRIBUTE_UNUSED;
2963 {
2964 /* Disallow indirect through a virtual register. This leads to
2965 compiler aborts when trying to eliminate them. */
2966 if (GET_CODE (op) == REG
2967 && (op == arg_pointer_rtx
2968 || op == frame_pointer_rtx
2969 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2970 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2971 return 0;
2972
2973 /* Disallow `call 1234'. Due to varying assembler lameness this
2974 gets either rejected or translated to `call .+1234'. */
2975 if (GET_CODE (op) == CONST_INT)
2976 return 0;
2977
2978 /* Explicitly allow SYMBOL_REF even if pic. */
2979 if (GET_CODE (op) == SYMBOL_REF)
2980 return 1;
2981
2982 /* Half-pic doesn't allow anything but registers and constants.
2983 We've just taken care of the later. */
2984 if (HALF_PIC_P ())
2985 return register_operand (op, Pmode);
2986
2987 /* Otherwise we can allow any general_operand in the address. */
2988 return general_operand (op, Pmode);
2989 }
2990
2991 int
2992 constant_call_address_operand (op, mode)
2993 rtx op;
2994 enum machine_mode mode ATTRIBUTE_UNUSED;
2995 {
2996 if (GET_CODE (op) == CONST
2997 && GET_CODE (XEXP (op, 0)) == PLUS
2998 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2999 op = XEXP (XEXP (op, 0), 0);
3000 return GET_CODE (op) == SYMBOL_REF;
3001 }
3002
3003 /* Match exactly zero and one. */
3004
3005 int
3006 const0_operand (op, mode)
3007 register rtx op;
3008 enum machine_mode mode;
3009 {
3010 return op == CONST0_RTX (mode);
3011 }
3012
3013 int
3014 const1_operand (op, mode)
3015 register rtx op;
3016 enum machine_mode mode ATTRIBUTE_UNUSED;
3017 {
3018 return op == const1_rtx;
3019 }
3020
3021 /* Match 2, 4, or 8. Used for leal multiplicands. */
3022
3023 int
3024 const248_operand (op, mode)
3025 register rtx op;
3026 enum machine_mode mode ATTRIBUTE_UNUSED;
3027 {
3028 return (GET_CODE (op) == CONST_INT
3029 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3030 }
3031
3032 /* True if this is a constant appropriate for an increment or decremenmt. */
3033
3034 int
3035 incdec_operand (op, mode)
3036 register rtx op;
3037 enum machine_mode mode ATTRIBUTE_UNUSED;
3038 {
3039 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3040 registers, since carry flag is not set. */
3041 if (TARGET_PENTIUM4 && !optimize_size)
3042 return 0;
3043 return op == const1_rtx || op == constm1_rtx;
3044 }
3045
3046 /* Return nonzero if OP is acceptable as operand of DImode shift
3047 expander. */
3048
3049 int
3050 shiftdi_operand (op, mode)
3051 rtx op;
3052 enum machine_mode mode ATTRIBUTE_UNUSED;
3053 {
3054 if (TARGET_64BIT)
3055 return nonimmediate_operand (op, mode);
3056 else
3057 return register_operand (op, mode);
3058 }
3059
3060 /* Return false if this is the stack pointer, or any other fake
3061 register eliminable to the stack pointer. Otherwise, this is
3062 a register operand.
3063
3064 This is used to prevent esp from being used as an index reg.
3065 Which would only happen in pathological cases. */
3066
3067 int
3068 reg_no_sp_operand (op, mode)
3069 register rtx op;
3070 enum machine_mode mode;
3071 {
3072 rtx t = op;
3073 if (GET_CODE (t) == SUBREG)
3074 t = SUBREG_REG (t);
3075 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3076 return 0;
3077
3078 return register_operand (op, mode);
3079 }
3080
3081 int
3082 mmx_reg_operand (op, mode)
3083 register rtx op;
3084 enum machine_mode mode ATTRIBUTE_UNUSED;
3085 {
3086 return MMX_REG_P (op);
3087 }
3088
3089 /* Return false if this is any eliminable register. Otherwise
3090 general_operand. */
3091
3092 int
3093 general_no_elim_operand (op, mode)
3094 register rtx op;
3095 enum machine_mode mode;
3096 {
3097 rtx t = op;
3098 if (GET_CODE (t) == SUBREG)
3099 t = SUBREG_REG (t);
3100 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3101 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3102 || t == virtual_stack_dynamic_rtx)
3103 return 0;
3104 if (REG_P (t)
3105 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3106 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3107 return 0;
3108
3109 return general_operand (op, mode);
3110 }
3111
3112 /* Return false if this is any eliminable register. Otherwise
3113 register_operand or const_int. */
3114
3115 int
3116 nonmemory_no_elim_operand (op, mode)
3117 register rtx op;
3118 enum machine_mode mode;
3119 {
3120 rtx t = op;
3121 if (GET_CODE (t) == SUBREG)
3122 t = SUBREG_REG (t);
3123 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3124 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3125 || t == virtual_stack_dynamic_rtx)
3126 return 0;
3127
3128 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3129 }
3130
3131 /* Return true if op is a Q_REGS class register. */
3132
3133 int
3134 q_regs_operand (op, mode)
3135 register rtx op;
3136 enum machine_mode mode;
3137 {
3138 if (mode != VOIDmode && GET_MODE (op) != mode)
3139 return 0;
3140 if (GET_CODE (op) == SUBREG)
3141 op = SUBREG_REG (op);
3142 return QI_REG_P (op);
3143 }
3144
3145 /* Return true if op is a NON_Q_REGS class register. */
3146
3147 int
3148 non_q_regs_operand (op, mode)
3149 register rtx op;
3150 enum machine_mode mode;
3151 {
3152 if (mode != VOIDmode && GET_MODE (op) != mode)
3153 return 0;
3154 if (GET_CODE (op) == SUBREG)
3155 op = SUBREG_REG (op);
3156 return NON_QI_REG_P (op);
3157 }
3158
3159 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3160 insns. */
3161 int
3162 sse_comparison_operator (op, mode)
3163 rtx op;
3164 enum machine_mode mode ATTRIBUTE_UNUSED;
3165 {
3166 enum rtx_code code = GET_CODE (op);
3167 switch (code)
3168 {
3169 /* Operations supported directly. */
3170 case EQ:
3171 case LT:
3172 case LE:
3173 case UNORDERED:
3174 case NE:
3175 case UNGE:
3176 case UNGT:
3177 case ORDERED:
3178 return 1;
3179 /* These are equivalent to ones above in non-IEEE comparisons. */
3180 case UNEQ:
3181 case UNLT:
3182 case UNLE:
3183 case LTGT:
3184 case GE:
3185 case GT:
3186 return !TARGET_IEEE_FP;
3187 default:
3188 return 0;
3189 }
3190 }
3191 /* Return 1 if OP is a valid comparison operator in valid mode. */
3192 int
3193 ix86_comparison_operator (op, mode)
3194 register rtx op;
3195 enum machine_mode mode;
3196 {
3197 enum machine_mode inmode;
3198 enum rtx_code code = GET_CODE (op);
3199 if (mode != VOIDmode && GET_MODE (op) != mode)
3200 return 0;
3201 if (GET_RTX_CLASS (code) != '<')
3202 return 0;
3203 inmode = GET_MODE (XEXP (op, 0));
3204
3205 if (inmode == CCFPmode || inmode == CCFPUmode)
3206 {
3207 enum rtx_code second_code, bypass_code;
3208 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3209 return (bypass_code == NIL && second_code == NIL);
3210 }
3211 switch (code)
3212 {
3213 case EQ: case NE:
3214 return 1;
3215 case LT: case GE:
3216 if (inmode == CCmode || inmode == CCGCmode
3217 || inmode == CCGOCmode || inmode == CCNOmode)
3218 return 1;
3219 return 0;
3220 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3221 if (inmode == CCmode)
3222 return 1;
3223 return 0;
3224 case GT: case LE:
3225 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3226 return 1;
3227 return 0;
3228 default:
3229 return 0;
3230 }
3231 }
3232
3233 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3234
3235 int
3236 fcmov_comparison_operator (op, mode)
3237 register rtx op;
3238 enum machine_mode mode;
3239 {
3240 enum machine_mode inmode;
3241 enum rtx_code code = GET_CODE (op);
3242 if (mode != VOIDmode && GET_MODE (op) != mode)
3243 return 0;
3244 if (GET_RTX_CLASS (code) != '<')
3245 return 0;
3246 inmode = GET_MODE (XEXP (op, 0));
3247 if (inmode == CCFPmode || inmode == CCFPUmode)
3248 {
3249 enum rtx_code second_code, bypass_code;
3250 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3251 if (bypass_code != NIL || second_code != NIL)
3252 return 0;
3253 code = ix86_fp_compare_code_to_integer (code);
3254 }
3255 /* i387 supports just limited amount of conditional codes. */
3256 switch (code)
3257 {
3258 case LTU: case GTU: case LEU: case GEU:
3259 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3260 return 1;
3261 return 0;
3262 case ORDERED: case UNORDERED:
3263 case EQ: case NE:
3264 return 1;
3265 default:
3266 return 0;
3267 }
3268 }
3269
3270 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3271
3272 int
3273 promotable_binary_operator (op, mode)
3274 register rtx op;
3275 enum machine_mode mode ATTRIBUTE_UNUSED;
3276 {
3277 switch (GET_CODE (op))
3278 {
3279 case MULT:
3280 /* Modern CPUs have same latency for HImode and SImode multiply,
3281 but 386 and 486 do HImode multiply faster. */
3282 return ix86_cpu > PROCESSOR_I486;
3283 case PLUS:
3284 case AND:
3285 case IOR:
3286 case XOR:
3287 case ASHIFT:
3288 return 1;
3289 default:
3290 return 0;
3291 }
3292 }
3293
3294 /* Nearly general operand, but accept any const_double, since we wish
3295 to be able to drop them into memory rather than have them get pulled
3296 into registers. */
3297
3298 int
3299 cmp_fp_expander_operand (op, mode)
3300 register rtx op;
3301 enum machine_mode mode;
3302 {
3303 if (mode != VOIDmode && mode != GET_MODE (op))
3304 return 0;
3305 if (GET_CODE (op) == CONST_DOUBLE)
3306 return 1;
3307 return general_operand (op, mode);
3308 }
3309
3310 /* Match an SI or HImode register for a zero_extract. */
3311
3312 int
3313 ext_register_operand (op, mode)
3314 register rtx op;
3315 enum machine_mode mode ATTRIBUTE_UNUSED;
3316 {
3317 int regno;
3318 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3319 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3320 return 0;
3321
3322 if (!register_operand (op, VOIDmode))
3323 return 0;
3324
3325 /* Be curefull to accept only registers having upper parts. */
3326 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3327 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3328 }
3329
3330 /* Return 1 if this is a valid binary floating-point operation.
3331 OP is the expression matched, and MODE is its mode. */
3332
3333 int
3334 binary_fp_operator (op, mode)
3335 register rtx op;
3336 enum machine_mode mode;
3337 {
3338 if (mode != VOIDmode && mode != GET_MODE (op))
3339 return 0;
3340
3341 switch (GET_CODE (op))
3342 {
3343 case PLUS:
3344 case MINUS:
3345 case MULT:
3346 case DIV:
3347 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3348
3349 default:
3350 return 0;
3351 }
3352 }
3353
3354 int
3355 mult_operator (op, mode)
3356 register rtx op;
3357 enum machine_mode mode ATTRIBUTE_UNUSED;
3358 {
3359 return GET_CODE (op) == MULT;
3360 }
3361
3362 int
3363 div_operator (op, mode)
3364 register rtx op;
3365 enum machine_mode mode ATTRIBUTE_UNUSED;
3366 {
3367 return GET_CODE (op) == DIV;
3368 }
3369
3370 int
3371 arith_or_logical_operator (op, mode)
3372 rtx op;
3373 enum machine_mode mode;
3374 {
3375 return ((mode == VOIDmode || GET_MODE (op) == mode)
3376 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3377 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3378 }
3379
3380 /* Returns 1 if OP is memory operand with a displacement. */
3381
3382 int
3383 memory_displacement_operand (op, mode)
3384 register rtx op;
3385 enum machine_mode mode;
3386 {
3387 struct ix86_address parts;
3388
3389 if (! memory_operand (op, mode))
3390 return 0;
3391
3392 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3393 abort ();
3394
3395 return parts.disp != NULL_RTX;
3396 }
3397
3398 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3399 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3400
3401 ??? It seems likely that this will only work because cmpsi is an
3402 expander, and no actual insns use this. */
3403
3404 int
3405 cmpsi_operand (op, mode)
3406 rtx op;
3407 enum machine_mode mode;
3408 {
3409 if (nonimmediate_operand (op, mode))
3410 return 1;
3411
3412 if (GET_CODE (op) == AND
3413 && GET_MODE (op) == SImode
3414 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3415 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3416 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3417 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3418 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3419 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3420 return 1;
3421
3422 return 0;
3423 }
3424
3425 /* Returns 1 if OP is memory operand that can not be represented by the
3426 modRM array. */
3427
3428 int
3429 long_memory_operand (op, mode)
3430 register rtx op;
3431 enum machine_mode mode;
3432 {
3433 if (! memory_operand (op, mode))
3434 return 0;
3435
3436 return memory_address_length (op) != 0;
3437 }
3438
3439 /* Return nonzero if the rtx is known aligned. */
3440
3441 int
3442 aligned_operand (op, mode)
3443 rtx op;
3444 enum machine_mode mode;
3445 {
3446 struct ix86_address parts;
3447
3448 if (!general_operand (op, mode))
3449 return 0;
3450
3451 /* Registers and immediate operands are always "aligned". */
3452 if (GET_CODE (op) != MEM)
3453 return 1;
3454
3455 /* Don't even try to do any aligned optimizations with volatiles. */
3456 if (MEM_VOLATILE_P (op))
3457 return 0;
3458
3459 op = XEXP (op, 0);
3460
3461 /* Pushes and pops are only valid on the stack pointer. */
3462 if (GET_CODE (op) == PRE_DEC
3463 || GET_CODE (op) == POST_INC)
3464 return 1;
3465
3466 /* Decode the address. */
3467 if (! ix86_decompose_address (op, &parts))
3468 abort ();
3469
3470 if (parts.base && GET_CODE (parts.base) == SUBREG)
3471 parts.base = SUBREG_REG (parts.base);
3472 if (parts.index && GET_CODE (parts.index) == SUBREG)
3473 parts.index = SUBREG_REG (parts.index);
3474
3475 /* Look for some component that isn't known to be aligned. */
3476 if (parts.index)
3477 {
3478 if (parts.scale < 4
3479 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3480 return 0;
3481 }
3482 if (parts.base)
3483 {
3484 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3485 return 0;
3486 }
3487 if (parts.disp)
3488 {
3489 if (GET_CODE (parts.disp) != CONST_INT
3490 || (INTVAL (parts.disp) & 3) != 0)
3491 return 0;
3492 }
3493
3494 /* Didn't find one -- this must be an aligned address. */
3495 return 1;
3496 }
3497 \f
3498 /* Return true if the constant is something that can be loaded with
3499 a special instruction. Only handle 0.0 and 1.0; others are less
3500 worthwhile. */
3501
3502 int
3503 standard_80387_constant_p (x)
3504 rtx x;
3505 {
3506 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3507 return -1;
3508 /* Note that on the 80387, other constants, such as pi, that we should support
3509 too. On some machines, these are much slower to load as standard constant,
3510 than to load from doubles in memory. */
3511 if (x == CONST0_RTX (GET_MODE (x)))
3512 return 1;
3513 if (x == CONST1_RTX (GET_MODE (x)))
3514 return 2;
3515 return 0;
3516 }
3517
3518 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3519 */
3520 int
3521 standard_sse_constant_p (x)
3522 rtx x;
3523 {
3524 if (GET_CODE (x) != CONST_DOUBLE)
3525 return -1;
3526 return (x == CONST0_RTX (GET_MODE (x)));
3527 }
3528
3529 /* Returns 1 if OP contains a symbol reference */
3530
3531 int
3532 symbolic_reference_mentioned_p (op)
3533 rtx op;
3534 {
3535 register const char *fmt;
3536 register int i;
3537
3538 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3539 return 1;
3540
3541 fmt = GET_RTX_FORMAT (GET_CODE (op));
3542 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3543 {
3544 if (fmt[i] == 'E')
3545 {
3546 register int j;
3547
3548 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3549 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3550 return 1;
3551 }
3552
3553 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3554 return 1;
3555 }
3556
3557 return 0;
3558 }
3559
3560 /* Return 1 if it is appropriate to emit `ret' instructions in the
3561 body of a function. Do this only if the epilogue is simple, needing a
3562 couple of insns. Prior to reloading, we can't tell how many registers
3563 must be saved, so return 0 then. Return 0 if there is no frame
3564 marker to de-allocate.
3565
3566 If NON_SAVING_SETJMP is defined and true, then it is not possible
3567 for the epilogue to be simple, so return 0. This is a special case
3568 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3569 until final, but jump_optimize may need to know sooner if a
3570 `return' is OK. */
3571
3572 int
3573 ix86_can_use_return_insn_p ()
3574 {
3575 struct ix86_frame frame;
3576
3577 #ifdef NON_SAVING_SETJMP
3578 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3579 return 0;
3580 #endif
3581
3582 if (! reload_completed || frame_pointer_needed)
3583 return 0;
3584
3585 /* Don't allow more than 32 pop, since that's all we can do
3586 with one instruction. */
3587 if (current_function_pops_args
3588 && current_function_args_size >= 32768)
3589 return 0;
3590
3591 ix86_compute_frame_layout (&frame);
3592 return frame.to_allocate == 0 && frame.nregs == 0;
3593 }
3594 \f
3595 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3596 int
3597 x86_64_sign_extended_value (value)
3598 rtx value;
3599 {
3600 switch (GET_CODE (value))
3601 {
3602 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3603 to be at least 32 and this all acceptable constants are
3604 represented as CONST_INT. */
3605 case CONST_INT:
3606 if (HOST_BITS_PER_WIDE_INT == 32)
3607 return 1;
3608 else
3609 {
3610 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3611 return trunc_int_for_mode (val, SImode) == val;
3612 }
3613 break;
3614
3615 /* For certain code models, the symbolic references are known to fit. */
3616 case SYMBOL_REF:
3617 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3618
3619 /* For certain code models, the code is near as well. */
3620 case LABEL_REF:
3621 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3622
3623 /* We also may accept the offsetted memory references in certain special
3624 cases. */
3625 case CONST:
3626 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3627 && XVECLEN (XEXP (value, 0), 0) == 1
3628 && XINT (XEXP (value, 0), 1) == 15)
3629 return 1;
3630 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3631 {
3632 rtx op1 = XEXP (XEXP (value, 0), 0);
3633 rtx op2 = XEXP (XEXP (value, 0), 1);
3634 HOST_WIDE_INT offset;
3635
3636 if (ix86_cmodel == CM_LARGE)
3637 return 0;
3638 if (GET_CODE (op2) != CONST_INT)
3639 return 0;
3640 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3641 switch (GET_CODE (op1))
3642 {
3643 case SYMBOL_REF:
3644 /* For CM_SMALL assume that latest object is 1MB before
3645 end of 31bits boundary. We may also accept pretty
3646 large negative constants knowing that all objects are
3647 in the positive half of address space. */
3648 if (ix86_cmodel == CM_SMALL
3649 && offset < 1024*1024*1024
3650 && trunc_int_for_mode (offset, SImode) == offset)
3651 return 1;
3652 /* For CM_KERNEL we know that all object resist in the
3653 negative half of 32bits address space. We may not
3654 accept negative offsets, since they may be just off
3655 and we may accept pretty large positive ones. */
3656 if (ix86_cmodel == CM_KERNEL
3657 && offset > 0
3658 && trunc_int_for_mode (offset, SImode) == offset)
3659 return 1;
3660 break;
3661 case LABEL_REF:
3662 /* These conditions are similar to SYMBOL_REF ones, just the
3663 constraints for code models differ. */
3664 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3665 && offset < 1024*1024*1024
3666 && trunc_int_for_mode (offset, SImode) == offset)
3667 return 1;
3668 if (ix86_cmodel == CM_KERNEL
3669 && offset > 0
3670 && trunc_int_for_mode (offset, SImode) == offset)
3671 return 1;
3672 break;
3673 default:
3674 return 0;
3675 }
3676 }
3677 return 0;
3678 default:
3679 return 0;
3680 }
3681 }
3682
3683 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3684 int
3685 x86_64_zero_extended_value (value)
3686 rtx value;
3687 {
3688 switch (GET_CODE (value))
3689 {
3690 case CONST_DOUBLE:
3691 if (HOST_BITS_PER_WIDE_INT == 32)
3692 return (GET_MODE (value) == VOIDmode
3693 && !CONST_DOUBLE_HIGH (value));
3694 else
3695 return 0;
3696 case CONST_INT:
3697 if (HOST_BITS_PER_WIDE_INT == 32)
3698 return INTVAL (value) >= 0;
3699 else
3700 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3701 break;
3702
3703 /* For certain code models, the symbolic references are known to fit. */
3704 case SYMBOL_REF:
3705 return ix86_cmodel == CM_SMALL;
3706
3707 /* For certain code models, the code is near as well. */
3708 case LABEL_REF:
3709 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3710
3711 /* We also may accept the offsetted memory references in certain special
3712 cases. */
3713 case CONST:
3714 if (GET_CODE (XEXP (value, 0)) == PLUS)
3715 {
3716 rtx op1 = XEXP (XEXP (value, 0), 0);
3717 rtx op2 = XEXP (XEXP (value, 0), 1);
3718
3719 if (ix86_cmodel == CM_LARGE)
3720 return 0;
3721 switch (GET_CODE (op1))
3722 {
3723 case SYMBOL_REF:
3724 return 0;
3725 /* For small code model we may accept pretty large positive
3726 offsets, since one bit is available for free. Negative
3727 offsets are limited by the size of NULL pointer area
3728 specified by the ABI. */
3729 if (ix86_cmodel == CM_SMALL
3730 && GET_CODE (op2) == CONST_INT
3731 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3732 && (trunc_int_for_mode (INTVAL (op2), SImode)
3733 == INTVAL (op2)))
3734 return 1;
3735 /* ??? For the kernel, we may accept adjustment of
3736 -0x10000000, since we know that it will just convert
3737 negative address space to positive, but perhaps this
3738 is not worthwhile. */
3739 break;
3740 case LABEL_REF:
3741 /* These conditions are similar to SYMBOL_REF ones, just the
3742 constraints for code models differ. */
3743 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3744 && GET_CODE (op2) == CONST_INT
3745 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3746 && (trunc_int_for_mode (INTVAL (op2), SImode)
3747 == INTVAL (op2)))
3748 return 1;
3749 break;
3750 default:
3751 return 0;
3752 }
3753 }
3754 return 0;
3755 default:
3756 return 0;
3757 }
3758 }
3759
3760 /* Value should be nonzero if functions must have frame pointers.
3761 Zero means the frame pointer need not be set up (and parms may
3762 be accessed via the stack pointer) in functions that seem suitable. */
3763
3764 int
3765 ix86_frame_pointer_required ()
3766 {
3767 /* If we accessed previous frames, then the generated code expects
3768 to be able to access the saved ebp value in our frame. */
3769 if (cfun->machine->accesses_prev_frame)
3770 return 1;
3771
3772 /* Several x86 os'es need a frame pointer for other reasons,
3773 usually pertaining to setjmp. */
3774 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3775 return 1;
3776
3777 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3778 the frame pointer by default. Turn it back on now if we've not
3779 got a leaf function. */
3780 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3781 return 1;
3782
3783 return 0;
3784 }
3785
3786 /* Record that the current function accesses previous call frames. */
3787
3788 void
3789 ix86_setup_frame_addresses ()
3790 {
3791 cfun->machine->accesses_prev_frame = 1;
3792 }
3793 \f
3794 static char pic_label_name[32];
3795
3796 /* This function generates code for -fpic that loads %ebx with
3797 the return address of the caller and then returns. */
3798
3799 void
3800 ix86_asm_file_end (file)
3801 FILE *file;
3802 {
3803 rtx xops[2];
3804
3805 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3806 return;
3807
3808 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3809 to updating relocations to a section being discarded such that this
3810 doesn't work. Ought to detect this at configure time. */
3811 #if 0
3812 /* The trick here is to create a linkonce section containing the
3813 pic label thunk, but to refer to it with an internal label.
3814 Because the label is internal, we don't have inter-dso name
3815 binding issues on hosts that don't support ".hidden".
3816
3817 In order to use these macros, however, we must create a fake
3818 function decl. */
3819 if (targetm.have_named_sections)
3820 {
3821 tree decl = build_decl (FUNCTION_DECL,
3822 get_identifier ("i686.get_pc_thunk"),
3823 error_mark_node);
3824 DECL_ONE_ONLY (decl) = 1;
3825 UNIQUE_SECTION (decl, 0);
3826 named_section (decl, NULL);
3827 }
3828 else
3829 #else
3830 text_section ();
3831 #endif
3832
3833 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3834 internal (non-global) label that's being emitted, it didn't make
3835 sense to have .type information for local labels. This caused
3836 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3837 me debug info for a label that you're declaring non-global?) this
3838 was changed to call ASM_OUTPUT_LABEL() instead. */
3839
3840 ASM_OUTPUT_LABEL (file, pic_label_name);
3841
3842 xops[0] = pic_offset_table_rtx;
3843 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3844 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3845 output_asm_insn ("ret", xops);
3846 }
3847
3848 void
3849 load_pic_register ()
3850 {
3851 rtx gotsym, pclab;
3852
3853 if (TARGET_64BIT)
3854 abort ();
3855
3856 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3857
3858 if (TARGET_DEEP_BRANCH_PREDICTION)
3859 {
3860 if (! pic_label_name[0])
3861 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3862 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3863 }
3864 else
3865 {
3866 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3867 }
3868
3869 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3870
3871 if (! TARGET_DEEP_BRANCH_PREDICTION)
3872 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3873
3874 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3875 }
3876
3877 /* Generate an "push" pattern for input ARG. */
3878
3879 static rtx
3880 gen_push (arg)
3881 rtx arg;
3882 {
3883 return gen_rtx_SET (VOIDmode,
3884 gen_rtx_MEM (Pmode,
3885 gen_rtx_PRE_DEC (Pmode,
3886 stack_pointer_rtx)),
3887 arg);
3888 }
3889
3890 /* Return 1 if we need to save REGNO. */
3891 static int
3892 ix86_save_reg (regno, maybe_eh_return)
3893 int regno;
3894 int maybe_eh_return;
3895 {
3896 if (regno == PIC_OFFSET_TABLE_REGNUM
3897 && (current_function_uses_pic_offset_table
3898 || current_function_uses_const_pool
3899 || current_function_calls_eh_return))
3900 return 1;
3901
3902 if (current_function_calls_eh_return && maybe_eh_return)
3903 {
3904 unsigned i;
3905 for (i = 0; ; i++)
3906 {
3907 unsigned test = EH_RETURN_DATA_REGNO (i);
3908 if (test == INVALID_REGNUM)
3909 break;
3910 if (test == (unsigned) regno)
3911 return 1;
3912 }
3913 }
3914
3915 return (regs_ever_live[regno]
3916 && !call_used_regs[regno]
3917 && !fixed_regs[regno]
3918 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3919 }
3920
3921 /* Return number of registers to be saved on the stack. */
3922
3923 static int
3924 ix86_nsaved_regs ()
3925 {
3926 int nregs = 0;
3927 int regno;
3928
3929 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3930 if (ix86_save_reg (regno, true))
3931 nregs++;
3932 return nregs;
3933 }
3934
3935 /* Return the offset between two registers, one to be eliminated, and the other
3936 its replacement, at the start of a routine. */
3937
3938 HOST_WIDE_INT
3939 ix86_initial_elimination_offset (from, to)
3940 int from;
3941 int to;
3942 {
3943 struct ix86_frame frame;
3944 ix86_compute_frame_layout (&frame);
3945
3946 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3947 return frame.hard_frame_pointer_offset;
3948 else if (from == FRAME_POINTER_REGNUM
3949 && to == HARD_FRAME_POINTER_REGNUM)
3950 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3951 else
3952 {
3953 if (to != STACK_POINTER_REGNUM)
3954 abort ();
3955 else if (from == ARG_POINTER_REGNUM)
3956 return frame.stack_pointer_offset;
3957 else if (from != FRAME_POINTER_REGNUM)
3958 abort ();
3959 else
3960 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3961 }
3962 }
3963
3964 /* Fill structure ix86_frame about frame of currently computed function. */
3965
3966 static void
3967 ix86_compute_frame_layout (frame)
3968 struct ix86_frame *frame;
3969 {
3970 HOST_WIDE_INT total_size;
3971 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3972 int offset;
3973 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3974 HOST_WIDE_INT size = get_frame_size ();
3975
3976 frame->nregs = ix86_nsaved_regs ();
3977 total_size = size;
3978
3979 /* Skip return address and saved base pointer. */
3980 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3981
3982 frame->hard_frame_pointer_offset = offset;
3983
3984 /* Do some sanity checking of stack_alignment_needed and
3985 preferred_alignment, since i386 port is the only using those features
3986 that may break easily. */
3987
3988 if (size && !stack_alignment_needed)
3989 abort ();
3990 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3991 abort ();
3992 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3993 abort ();
3994 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3995 abort ();
3996
3997 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3998 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3999
4000 /* Register save area */
4001 offset += frame->nregs * UNITS_PER_WORD;
4002
4003 /* Va-arg area */
4004 if (ix86_save_varrargs_registers)
4005 {
4006 offset += X86_64_VARARGS_SIZE;
4007 frame->va_arg_size = X86_64_VARARGS_SIZE;
4008 }
4009 else
4010 frame->va_arg_size = 0;
4011
4012 /* Align start of frame for local function. */
4013 frame->padding1 = ((offset + stack_alignment_needed - 1)
4014 & -stack_alignment_needed) - offset;
4015
4016 offset += frame->padding1;
4017
4018 /* Frame pointer points here. */
4019 frame->frame_pointer_offset = offset;
4020
4021 offset += size;
4022
4023 /* Add outgoing arguments area. */
4024 if (ACCUMULATE_OUTGOING_ARGS)
4025 {
4026 offset += current_function_outgoing_args_size;
4027 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4028 }
4029 else
4030 frame->outgoing_arguments_size = 0;
4031
4032 /* Align stack boundary. */
4033 frame->padding2 = ((offset + preferred_alignment - 1)
4034 & -preferred_alignment) - offset;
4035
4036 offset += frame->padding2;
4037
4038 /* We've reached end of stack frame. */
4039 frame->stack_pointer_offset = offset;
4040
4041 /* Size prologue needs to allocate. */
4042 frame->to_allocate =
4043 (size + frame->padding1 + frame->padding2
4044 + frame->outgoing_arguments_size + frame->va_arg_size);
4045
4046 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4047 && current_function_is_leaf)
4048 {
4049 frame->red_zone_size = frame->to_allocate;
4050 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4051 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4052 }
4053 else
4054 frame->red_zone_size = 0;
4055 frame->to_allocate -= frame->red_zone_size;
4056 frame->stack_pointer_offset -= frame->red_zone_size;
4057 #if 0
4058 fprintf (stderr, "nregs: %i\n", frame->nregs);
4059 fprintf (stderr, "size: %i\n", size);
4060 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4061 fprintf (stderr, "padding1: %i\n", frame->padding1);
4062 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4063 fprintf (stderr, "padding2: %i\n", frame->padding2);
4064 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4065 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4066 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4067 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4068 frame->hard_frame_pointer_offset);
4069 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4070 #endif
4071 }
4072
4073 /* Emit code to save registers in the prologue. */
4074
4075 static void
4076 ix86_emit_save_regs ()
4077 {
4078 register int regno;
4079 rtx insn;
4080
4081 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4082 if (ix86_save_reg (regno, true))
4083 {
4084 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4085 RTX_FRAME_RELATED_P (insn) = 1;
4086 }
4087 }
4088
4089 /* Emit code to save registers using MOV insns. First register
4090 is restored from POINTER + OFFSET. */
4091 static void
4092 ix86_emit_save_regs_using_mov (pointer, offset)
4093 rtx pointer;
4094 HOST_WIDE_INT offset;
4095 {
4096 int regno;
4097 rtx insn;
4098
4099 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4100 if (ix86_save_reg (regno, true))
4101 {
4102 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4103 Pmode, offset),
4104 gen_rtx_REG (Pmode, regno));
4105 RTX_FRAME_RELATED_P (insn) = 1;
4106 offset += UNITS_PER_WORD;
4107 }
4108 }
4109
4110 /* Expand the prologue into a bunch of separate insns. */
4111
4112 void
4113 ix86_expand_prologue ()
4114 {
4115 rtx insn;
4116 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4117 || current_function_uses_const_pool)
4118 && !TARGET_64BIT);
4119 struct ix86_frame frame;
4120 int use_mov = 0;
4121 HOST_WIDE_INT allocate;
4122
4123 if (!optimize_size)
4124 {
4125 use_fast_prologue_epilogue
4126 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4127 if (TARGET_PROLOGUE_USING_MOVE)
4128 use_mov = use_fast_prologue_epilogue;
4129 }
4130 ix86_compute_frame_layout (&frame);
4131
4132 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4133 slower on all targets. Also sdb doesn't like it. */
4134
4135 if (frame_pointer_needed)
4136 {
4137 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4138 RTX_FRAME_RELATED_P (insn) = 1;
4139
4140 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4141 RTX_FRAME_RELATED_P (insn) = 1;
4142 }
4143
4144 allocate = frame.to_allocate;
4145 /* In case we are dealing only with single register and empty frame,
4146 push is equivalent of the mov+add sequence. */
4147 if (allocate == 0 && frame.nregs <= 1)
4148 use_mov = 0;
4149
4150 if (!use_mov)
4151 ix86_emit_save_regs ();
4152 else
4153 allocate += frame.nregs * UNITS_PER_WORD;
4154
4155 if (allocate == 0)
4156 ;
4157 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4158 {
4159 insn = emit_insn (gen_pro_epilogue_adjust_stack
4160 (stack_pointer_rtx, stack_pointer_rtx,
4161 GEN_INT (-allocate)));
4162 RTX_FRAME_RELATED_P (insn) = 1;
4163 }
4164 else
4165 {
4166 /* ??? Is this only valid for Win32? */
4167
4168 rtx arg0, sym;
4169
4170 if (TARGET_64BIT)
4171 abort ();
4172
4173 arg0 = gen_rtx_REG (SImode, 0);
4174 emit_move_insn (arg0, GEN_INT (allocate));
4175
4176 sym = gen_rtx_MEM (FUNCTION_MODE,
4177 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4178 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4179
4180 CALL_INSN_FUNCTION_USAGE (insn)
4181 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4182 CALL_INSN_FUNCTION_USAGE (insn));
4183 }
4184 if (use_mov)
4185 {
4186 if (!frame_pointer_needed || !frame.to_allocate)
4187 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4188 else
4189 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4190 -frame.nregs * UNITS_PER_WORD);
4191 }
4192
4193 #ifdef SUBTARGET_PROLOGUE
4194 SUBTARGET_PROLOGUE;
4195 #endif
4196
4197 if (pic_reg_used)
4198 load_pic_register ();
4199
4200 /* If we are profiling, make sure no instructions are scheduled before
4201 the call to mcount. However, if -fpic, the above call will have
4202 done that. */
4203 if (current_function_profile && ! pic_reg_used)
4204 emit_insn (gen_blockage ());
4205 }
4206
4207 /* Emit code to restore saved registers using MOV insns. First register
4208 is restored from POINTER + OFFSET. */
4209 static void
4210 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4211 rtx pointer;
4212 int offset;
4213 int maybe_eh_return;
4214 {
4215 int regno;
4216
4217 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4218 if (ix86_save_reg (regno, maybe_eh_return))
4219 {
4220 emit_move_insn (gen_rtx_REG (Pmode, regno),
4221 adjust_address (gen_rtx_MEM (Pmode, pointer),
4222 Pmode, offset));
4223 offset += UNITS_PER_WORD;
4224 }
4225 }
4226
4227 /* Restore function stack, frame, and registers. */
4228
4229 void
4230 ix86_expand_epilogue (style)
4231 int style;
4232 {
4233 int regno;
4234 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4235 struct ix86_frame frame;
4236 HOST_WIDE_INT offset;
4237
4238 ix86_compute_frame_layout (&frame);
4239
4240 /* Calculate start of saved registers relative to ebp. Special care
4241 must be taken for the normal return case of a function using
4242 eh_return: the eax and edx registers are marked as saved, but not
4243 restored along this path. */
4244 offset = frame.nregs;
4245 if (current_function_calls_eh_return && style != 2)
4246 offset -= 2;
4247 offset *= -UNITS_PER_WORD;
4248
4249 /* If we're only restoring one register and sp is not valid then
4250 using a move instruction to restore the register since it's
4251 less work than reloading sp and popping the register.
4252
4253 The default code result in stack adjustment using add/lea instruction,
4254 while this code results in LEAVE instruction (or discrete equivalent),
4255 so it is profitable in some other cases as well. Especially when there
4256 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4257 and there is exactly one register to pop. This heruistic may need some
4258 tuning in future. */
4259 if ((!sp_valid && frame.nregs <= 1)
4260 || (TARGET_EPILOGUE_USING_MOVE
4261 && use_fast_prologue_epilogue
4262 && (frame.nregs > 1 || frame.to_allocate))
4263 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4264 || (frame_pointer_needed && TARGET_USE_LEAVE
4265 && use_fast_prologue_epilogue && frame.nregs == 1)
4266 || current_function_calls_eh_return)
4267 {
4268 /* Restore registers. We can use ebp or esp to address the memory
4269 locations. If both are available, default to ebp, since offsets
4270 are known to be small. Only exception is esp pointing directly to the
4271 end of block of saved registers, where we may simplify addressing
4272 mode. */
4273
4274 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4275 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4276 frame.to_allocate, style == 2);
4277 else
4278 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4279 offset, style == 2);
4280
4281 /* eh_return epilogues need %ecx added to the stack pointer. */
4282 if (style == 2)
4283 {
4284 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4285
4286 if (frame_pointer_needed)
4287 {
4288 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4289 tmp = plus_constant (tmp, UNITS_PER_WORD);
4290 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4291
4292 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4293 emit_move_insn (hard_frame_pointer_rtx, tmp);
4294
4295 emit_insn (gen_pro_epilogue_adjust_stack
4296 (stack_pointer_rtx, sa, const0_rtx));
4297 }
4298 else
4299 {
4300 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4301 tmp = plus_constant (tmp, (frame.to_allocate
4302 + frame.nregs * UNITS_PER_WORD));
4303 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4304 }
4305 }
4306 else if (!frame_pointer_needed)
4307 emit_insn (gen_pro_epilogue_adjust_stack
4308 (stack_pointer_rtx, stack_pointer_rtx,
4309 GEN_INT (frame.to_allocate
4310 + frame.nregs * UNITS_PER_WORD)));
4311 /* If not an i386, mov & pop is faster than "leave". */
4312 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4313 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4314 else
4315 {
4316 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4317 hard_frame_pointer_rtx,
4318 const0_rtx));
4319 if (TARGET_64BIT)
4320 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4321 else
4322 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4323 }
4324 }
4325 else
4326 {
4327 /* First step is to deallocate the stack frame so that we can
4328 pop the registers. */
4329 if (!sp_valid)
4330 {
4331 if (!frame_pointer_needed)
4332 abort ();
4333 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4334 hard_frame_pointer_rtx,
4335 GEN_INT (offset)));
4336 }
4337 else if (frame.to_allocate)
4338 emit_insn (gen_pro_epilogue_adjust_stack
4339 (stack_pointer_rtx, stack_pointer_rtx,
4340 GEN_INT (frame.to_allocate)));
4341
4342 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4343 if (ix86_save_reg (regno, false))
4344 {
4345 if (TARGET_64BIT)
4346 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4347 else
4348 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4349 }
4350 if (frame_pointer_needed)
4351 {
4352 /* Leave results in shorter dependency chains on CPUs that are
4353 able to grok it fast. */
4354 if (TARGET_USE_LEAVE)
4355 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4356 else if (TARGET_64BIT)
4357 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4358 else
4359 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4360 }
4361 }
4362
4363 /* Sibcall epilogues don't want a return instruction. */
4364 if (style == 0)
4365 return;
4366
4367 if (current_function_pops_args && current_function_args_size)
4368 {
4369 rtx popc = GEN_INT (current_function_pops_args);
4370
4371 /* i386 can only pop 64K bytes. If asked to pop more, pop
4372 return address, do explicit add, and jump indirectly to the
4373 caller. */
4374
4375 if (current_function_pops_args >= 65536)
4376 {
4377 rtx ecx = gen_rtx_REG (SImode, 2);
4378
4379 /* There are is no "pascal" calling convention in 64bit ABI. */
4380 if (TARGET_64BIT)
4381 abort ();
4382
4383 emit_insn (gen_popsi1 (ecx));
4384 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4385 emit_jump_insn (gen_return_indirect_internal (ecx));
4386 }
4387 else
4388 emit_jump_insn (gen_return_pop_internal (popc));
4389 }
4390 else
4391 emit_jump_insn (gen_return_internal ());
4392 }
4393 \f
4394 /* Extract the parts of an RTL expression that is a valid memory address
4395 for an instruction. Return 0 if the structure of the address is
4396 grossly off. Return -1 if the address contains ASHIFT, so it is not
4397 strictly valid, but still used for computing length of lea instruction.
4398 */
4399
4400 static int
4401 ix86_decompose_address (addr, out)
4402 register rtx addr;
4403 struct ix86_address *out;
4404 {
4405 rtx base = NULL_RTX;
4406 rtx index = NULL_RTX;
4407 rtx disp = NULL_RTX;
4408 HOST_WIDE_INT scale = 1;
4409 rtx scale_rtx = NULL_RTX;
4410 int retval = 1;
4411
4412 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4413 base = addr;
4414 else if (GET_CODE (addr) == PLUS)
4415 {
4416 rtx op0 = XEXP (addr, 0);
4417 rtx op1 = XEXP (addr, 1);
4418 enum rtx_code code0 = GET_CODE (op0);
4419 enum rtx_code code1 = GET_CODE (op1);
4420
4421 if (code0 == REG || code0 == SUBREG)
4422 {
4423 if (code1 == REG || code1 == SUBREG)
4424 index = op0, base = op1; /* index + base */
4425 else
4426 base = op0, disp = op1; /* base + displacement */
4427 }
4428 else if (code0 == MULT)
4429 {
4430 index = XEXP (op0, 0);
4431 scale_rtx = XEXP (op0, 1);
4432 if (code1 == REG || code1 == SUBREG)
4433 base = op1; /* index*scale + base */
4434 else
4435 disp = op1; /* index*scale + disp */
4436 }
4437 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4438 {
4439 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4440 scale_rtx = XEXP (XEXP (op0, 0), 1);
4441 base = XEXP (op0, 1);
4442 disp = op1;
4443 }
4444 else if (code0 == PLUS)
4445 {
4446 index = XEXP (op0, 0); /* index + base + disp */
4447 base = XEXP (op0, 1);
4448 disp = op1;
4449 }
4450 else
4451 return 0;
4452 }
4453 else if (GET_CODE (addr) == MULT)
4454 {
4455 index = XEXP (addr, 0); /* index*scale */
4456 scale_rtx = XEXP (addr, 1);
4457 }
4458 else if (GET_CODE (addr) == ASHIFT)
4459 {
4460 rtx tmp;
4461
4462 /* We're called for lea too, which implements ashift on occasion. */
4463 index = XEXP (addr, 0);
4464 tmp = XEXP (addr, 1);
4465 if (GET_CODE (tmp) != CONST_INT)
4466 return 0;
4467 scale = INTVAL (tmp);
4468 if ((unsigned HOST_WIDE_INT) scale > 3)
4469 return 0;
4470 scale = 1 << scale;
4471 retval = -1;
4472 }
4473 else
4474 disp = addr; /* displacement */
4475
4476 /* Extract the integral value of scale. */
4477 if (scale_rtx)
4478 {
4479 if (GET_CODE (scale_rtx) != CONST_INT)
4480 return 0;
4481 scale = INTVAL (scale_rtx);
4482 }
4483
4484 /* Allow arg pointer and stack pointer as index if there is not scaling */
4485 if (base && index && scale == 1
4486 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4487 || index == stack_pointer_rtx))
4488 {
4489 rtx tmp = base;
4490 base = index;
4491 index = tmp;
4492 }
4493
4494 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4495 if ((base == hard_frame_pointer_rtx
4496 || base == frame_pointer_rtx
4497 || base == arg_pointer_rtx) && !disp)
4498 disp = const0_rtx;
4499
4500 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4501 Avoid this by transforming to [%esi+0]. */
4502 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4503 && base && !index && !disp
4504 && REG_P (base)
4505 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4506 disp = const0_rtx;
4507
4508 /* Special case: encode reg+reg instead of reg*2. */
4509 if (!base && index && scale && scale == 2)
4510 base = index, scale = 1;
4511
4512 /* Special case: scaling cannot be encoded without base or displacement. */
4513 if (!base && !disp && index && scale != 1)
4514 disp = const0_rtx;
4515
4516 out->base = base;
4517 out->index = index;
4518 out->disp = disp;
4519 out->scale = scale;
4520
4521 return retval;
4522 }
4523 \f
4524 /* Return cost of the memory address x.
4525 For i386, it is better to use a complex address than let gcc copy
4526 the address into a reg and make a new pseudo. But not if the address
4527 requires to two regs - that would mean more pseudos with longer
4528 lifetimes. */
4529 int
4530 ix86_address_cost (x)
4531 rtx x;
4532 {
4533 struct ix86_address parts;
4534 int cost = 1;
4535
4536 if (!ix86_decompose_address (x, &parts))
4537 abort ();
4538
4539 if (parts.base && GET_CODE (parts.base) == SUBREG)
4540 parts.base = SUBREG_REG (parts.base);
4541 if (parts.index && GET_CODE (parts.index) == SUBREG)
4542 parts.index = SUBREG_REG (parts.index);
4543
4544 /* More complex memory references are better. */
4545 if (parts.disp && parts.disp != const0_rtx)
4546 cost--;
4547
4548 /* Attempt to minimize number of registers in the address. */
4549 if ((parts.base
4550 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4551 || (parts.index
4552 && (!REG_P (parts.index)
4553 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4554 cost++;
4555
4556 if (parts.base
4557 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4558 && parts.index
4559 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4560 && parts.base != parts.index)
4561 cost++;
4562
4563 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4564 since it's predecode logic can't detect the length of instructions
4565 and it degenerates to vector decoded. Increase cost of such
4566 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4567 to split such addresses or even refuse such addresses at all.
4568
4569 Following addressing modes are affected:
4570 [base+scale*index]
4571 [scale*index+disp]
4572 [base+index]
4573
4574 The first and last case may be avoidable by explicitly coding the zero in
4575 memory address, but I don't have AMD-K6 machine handy to check this
4576 theory. */
4577
4578 if (TARGET_K6
4579 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4580 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4581 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4582 cost += 10;
4583
4584 return cost;
4585 }
4586 \f
4587 /* If X is a machine specific address (i.e. a symbol or label being
4588 referenced as a displacement from the GOT implemented using an
4589 UNSPEC), then return the base term. Otherwise return X. */
4590
4591 rtx
4592 ix86_find_base_term (x)
4593 rtx x;
4594 {
4595 rtx term;
4596
4597 if (TARGET_64BIT)
4598 {
4599 if (GET_CODE (x) != CONST)
4600 return x;
4601 term = XEXP (x, 0);
4602 if (GET_CODE (term) == PLUS
4603 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4604 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4605 term = XEXP (term, 0);
4606 if (GET_CODE (term) != UNSPEC
4607 || XVECLEN (term, 0) != 1
4608 || XINT (term, 1) != 15)
4609 return x;
4610
4611 term = XVECEXP (term, 0, 0);
4612
4613 if (GET_CODE (term) != SYMBOL_REF
4614 && GET_CODE (term) != LABEL_REF)
4615 return x;
4616
4617 return term;
4618 }
4619
4620 if (GET_CODE (x) != PLUS
4621 || XEXP (x, 0) != pic_offset_table_rtx
4622 || GET_CODE (XEXP (x, 1)) != CONST)
4623 return x;
4624
4625 term = XEXP (XEXP (x, 1), 0);
4626
4627 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4628 term = XEXP (term, 0);
4629
4630 if (GET_CODE (term) != UNSPEC
4631 || XVECLEN (term, 0) != 1
4632 || XINT (term, 1) != 7)
4633 return x;
4634
4635 term = XVECEXP (term, 0, 0);
4636
4637 if (GET_CODE (term) != SYMBOL_REF
4638 && GET_CODE (term) != LABEL_REF)
4639 return x;
4640
4641 return term;
4642 }
4643 \f
4644 /* Determine if a given CONST RTX is a valid memory displacement
4645 in PIC mode. */
4646
4647 int
4648 legitimate_pic_address_disp_p (disp)
4649 register rtx disp;
4650 {
4651 /* In 64bit mode we can allow direct addresses of symbols and labels
4652 when they are not dynamic symbols. */
4653 if (TARGET_64BIT)
4654 {
4655 rtx x = disp;
4656 if (GET_CODE (disp) == CONST)
4657 x = XEXP (disp, 0);
4658 /* ??? Handle PIC code models */
4659 if (GET_CODE (x) == PLUS
4660 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4661 && ix86_cmodel == CM_SMALL_PIC
4662 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4663 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4664 x = XEXP (x, 0);
4665 if (local_symbolic_operand (x, Pmode))
4666 return 1;
4667 }
4668 if (GET_CODE (disp) != CONST)
4669 return 0;
4670 disp = XEXP (disp, 0);
4671
4672 if (TARGET_64BIT)
4673 {
4674 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4675 of GOT tables. We should not need these anyway. */
4676 if (GET_CODE (disp) != UNSPEC
4677 || XVECLEN (disp, 0) != 1
4678 || XINT (disp, 1) != 15)
4679 return 0;
4680
4681 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4682 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4683 return 0;
4684 return 1;
4685 }
4686
4687 if (GET_CODE (disp) == PLUS)
4688 {
4689 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4690 return 0;
4691 disp = XEXP (disp, 0);
4692 }
4693
4694 if (GET_CODE (disp) != UNSPEC
4695 || XVECLEN (disp, 0) != 1)
4696 return 0;
4697
4698 /* Must be @GOT or @GOTOFF. */
4699 switch (XINT (disp, 1))
4700 {
4701 case 6: /* @GOT */
4702 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4703
4704 case 7: /* @GOTOFF */
4705 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4706 }
4707
4708 return 0;
4709 }
4710
4711 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4712 memory address for an instruction. The MODE argument is the machine mode
4713 for the MEM expression that wants to use this address.
4714
4715 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4716 convert common non-canonical forms to canonical form so that they will
4717 be recognized. */
4718
4719 int
4720 legitimate_address_p (mode, addr, strict)
4721 enum machine_mode mode;
4722 register rtx addr;
4723 int strict;
4724 {
4725 struct ix86_address parts;
4726 rtx base, index, disp;
4727 HOST_WIDE_INT scale;
4728 const char *reason = NULL;
4729 rtx reason_rtx = NULL_RTX;
4730
4731 if (TARGET_DEBUG_ADDR)
4732 {
4733 fprintf (stderr,
4734 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4735 GET_MODE_NAME (mode), strict);
4736 debug_rtx (addr);
4737 }
4738
4739 if (ix86_decompose_address (addr, &parts) <= 0)
4740 {
4741 reason = "decomposition failed";
4742 goto report_error;
4743 }
4744
4745 base = parts.base;
4746 index = parts.index;
4747 disp = parts.disp;
4748 scale = parts.scale;
4749
4750 /* Validate base register.
4751
4752 Don't allow SUBREG's here, it can lead to spill failures when the base
4753 is one word out of a two word structure, which is represented internally
4754 as a DImode int. */
4755
4756 if (base)
4757 {
4758 rtx reg;
4759 reason_rtx = base;
4760
4761 if (GET_CODE (base) == SUBREG)
4762 reg = SUBREG_REG (base);
4763 else
4764 reg = base;
4765
4766 if (GET_CODE (reg) != REG)
4767 {
4768 reason = "base is not a register";
4769 goto report_error;
4770 }
4771
4772 if (GET_MODE (base) != Pmode)
4773 {
4774 reason = "base is not in Pmode";
4775 goto report_error;
4776 }
4777
4778 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
4779 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
4780 {
4781 reason = "base is not valid";
4782 goto report_error;
4783 }
4784 }
4785
4786 /* Validate index register.
4787
4788 Don't allow SUBREG's here, it can lead to spill failures when the index
4789 is one word out of a two word structure, which is represented internally
4790 as a DImode int. */
4791
4792 if (index)
4793 {
4794 rtx reg;
4795 reason_rtx = index;
4796
4797 if (GET_CODE (index) == SUBREG)
4798 reg = SUBREG_REG (index);
4799 else
4800 reg = index;
4801
4802 if (GET_CODE (reg) != REG)
4803 {
4804 reason = "index is not a register";
4805 goto report_error;
4806 }
4807
4808 if (GET_MODE (index) != Pmode)
4809 {
4810 reason = "index is not in Pmode";
4811 goto report_error;
4812 }
4813
4814 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
4815 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
4816 {
4817 reason = "index is not valid";
4818 goto report_error;
4819 }
4820 }
4821
4822 /* Validate scale factor. */
4823 if (scale != 1)
4824 {
4825 reason_rtx = GEN_INT (scale);
4826 if (!index)
4827 {
4828 reason = "scale without index";
4829 goto report_error;
4830 }
4831
4832 if (scale != 2 && scale != 4 && scale != 8)
4833 {
4834 reason = "scale is not a valid multiplier";
4835 goto report_error;
4836 }
4837 }
4838
4839 /* Validate displacement. */
4840 if (disp)
4841 {
4842 reason_rtx = disp;
4843
4844 if (!CONSTANT_ADDRESS_P (disp))
4845 {
4846 reason = "displacement is not constant";
4847 goto report_error;
4848 }
4849
4850 if (TARGET_64BIT)
4851 {
4852 if (!x86_64_sign_extended_value (disp))
4853 {
4854 reason = "displacement is out of range";
4855 goto report_error;
4856 }
4857 }
4858 else
4859 {
4860 if (GET_CODE (disp) == CONST_DOUBLE)
4861 {
4862 reason = "displacement is a const_double";
4863 goto report_error;
4864 }
4865 }
4866
4867 if (flag_pic && SYMBOLIC_CONST (disp))
4868 {
4869 if (TARGET_64BIT && (index || base))
4870 {
4871 reason = "non-constant pic memory reference";
4872 goto report_error;
4873 }
4874 if (! legitimate_pic_address_disp_p (disp))
4875 {
4876 reason = "displacement is an invalid pic construct";
4877 goto report_error;
4878 }
4879
4880 /* This code used to verify that a symbolic pic displacement
4881 includes the pic_offset_table_rtx register.
4882
4883 While this is good idea, unfortunately these constructs may
4884 be created by "adds using lea" optimization for incorrect
4885 code like:
4886
4887 int a;
4888 int foo(int i)
4889 {
4890 return *(&a+i);
4891 }
4892
4893 This code is nonsensical, but results in addressing
4894 GOT table with pic_offset_table_rtx base. We can't
4895 just refuse it easily, since it gets matched by
4896 "addsi3" pattern, that later gets split to lea in the
4897 case output register differs from input. While this
4898 can be handled by separate addsi pattern for this case
4899 that never results in lea, this seems to be easier and
4900 correct fix for crash to disable this test. */
4901 }
4902 else if (HALF_PIC_P ())
4903 {
4904 if (! HALF_PIC_ADDRESS_P (disp)
4905 || (base != NULL_RTX || index != NULL_RTX))
4906 {
4907 reason = "displacement is an invalid half-pic reference";
4908 goto report_error;
4909 }
4910 }
4911 }
4912
4913 /* Everything looks valid. */
4914 if (TARGET_DEBUG_ADDR)
4915 fprintf (stderr, "Success.\n");
4916 return TRUE;
4917
4918 report_error:
4919 if (TARGET_DEBUG_ADDR)
4920 {
4921 fprintf (stderr, "Error: %s\n", reason);
4922 debug_rtx (reason_rtx);
4923 }
4924 return FALSE;
4925 }
4926 \f
4927 /* Return an unique alias set for the GOT. */
4928
4929 static HOST_WIDE_INT
4930 ix86_GOT_alias_set ()
4931 {
4932 static HOST_WIDE_INT set = -1;
4933 if (set == -1)
4934 set = new_alias_set ();
4935 return set;
4936 }
4937
4938 /* Return a legitimate reference for ORIG (an address) using the
4939 register REG. If REG is 0, a new pseudo is generated.
4940
4941 There are two types of references that must be handled:
4942
4943 1. Global data references must load the address from the GOT, via
4944 the PIC reg. An insn is emitted to do this load, and the reg is
4945 returned.
4946
4947 2. Static data references, constant pool addresses, and code labels
4948 compute the address as an offset from the GOT, whose base is in
4949 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4950 differentiate them from global data objects. The returned
4951 address is the PIC reg + an unspec constant.
4952
4953 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4954 reg also appears in the address. */
4955
4956 rtx
4957 legitimize_pic_address (orig, reg)
4958 rtx orig;
4959 rtx reg;
4960 {
4961 rtx addr = orig;
4962 rtx new = orig;
4963 rtx base;
4964
4965 if (local_symbolic_operand (addr, Pmode))
4966 {
4967 /* In 64bit mode we can address such objects directly. */
4968 if (TARGET_64BIT)
4969 new = addr;
4970 else
4971 {
4972 /* This symbol may be referenced via a displacement from the PIC
4973 base address (@GOTOFF). */
4974
4975 current_function_uses_pic_offset_table = 1;
4976 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4977 new = gen_rtx_CONST (Pmode, new);
4978 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4979
4980 if (reg != 0)
4981 {
4982 emit_move_insn (reg, new);
4983 new = reg;
4984 }
4985 }
4986 }
4987 else if (GET_CODE (addr) == SYMBOL_REF)
4988 {
4989 if (TARGET_64BIT)
4990 {
4991 current_function_uses_pic_offset_table = 1;
4992 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4993 new = gen_rtx_CONST (Pmode, new);
4994 new = gen_rtx_MEM (Pmode, new);
4995 RTX_UNCHANGING_P (new) = 1;
4996 set_mem_alias_set (new, ix86_GOT_alias_set ());
4997
4998 if (reg == 0)
4999 reg = gen_reg_rtx (Pmode);
5000 /* Use directly gen_movsi, otherwise the address is loaded
5001 into register for CSE. We don't want to CSE this addresses,
5002 instead we CSE addresses from the GOT table, so skip this. */
5003 emit_insn (gen_movsi (reg, new));
5004 new = reg;
5005 }
5006 else
5007 {
5008 /* This symbol must be referenced via a load from the
5009 Global Offset Table (@GOT). */
5010
5011 current_function_uses_pic_offset_table = 1;
5012 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
5013 new = gen_rtx_CONST (Pmode, new);
5014 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5015 new = gen_rtx_MEM (Pmode, new);
5016 RTX_UNCHANGING_P (new) = 1;
5017 set_mem_alias_set (new, ix86_GOT_alias_set ());
5018
5019 if (reg == 0)
5020 reg = gen_reg_rtx (Pmode);
5021 emit_move_insn (reg, new);
5022 new = reg;
5023 }
5024 }
5025 else
5026 {
5027 if (GET_CODE (addr) == CONST)
5028 {
5029 addr = XEXP (addr, 0);
5030
5031 /* We must match stuff we generate before. Assume the only
5032 unspecs that can get here are ours. Not that we could do
5033 anything with them anyway... */
5034 if (GET_CODE (addr) == UNSPEC
5035 || (GET_CODE (addr) == PLUS
5036 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5037 return orig;
5038 if (GET_CODE (addr) != PLUS)
5039 abort ();
5040 }
5041 if (GET_CODE (addr) == PLUS)
5042 {
5043 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5044
5045 /* Check first to see if this is a constant offset from a @GOTOFF
5046 symbol reference. */
5047 if (local_symbolic_operand (op0, Pmode)
5048 && GET_CODE (op1) == CONST_INT)
5049 {
5050 if (!TARGET_64BIT)
5051 {
5052 current_function_uses_pic_offset_table = 1;
5053 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
5054 new = gen_rtx_PLUS (Pmode, new, op1);
5055 new = gen_rtx_CONST (Pmode, new);
5056 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5057
5058 if (reg != 0)
5059 {
5060 emit_move_insn (reg, new);
5061 new = reg;
5062 }
5063 }
5064 else
5065 {
5066 /* ??? We need to limit offsets here. */
5067 }
5068 }
5069 else
5070 {
5071 base = legitimize_pic_address (XEXP (addr, 0), reg);
5072 new = legitimize_pic_address (XEXP (addr, 1),
5073 base == reg ? NULL_RTX : reg);
5074
5075 if (GET_CODE (new) == CONST_INT)
5076 new = plus_constant (base, INTVAL (new));
5077 else
5078 {
5079 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5080 {
5081 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5082 new = XEXP (new, 1);
5083 }
5084 new = gen_rtx_PLUS (Pmode, base, new);
5085 }
5086 }
5087 }
5088 }
5089 return new;
5090 }
5091 \f
5092 /* Try machine-dependent ways of modifying an illegitimate address
5093 to be legitimate. If we find one, return the new, valid address.
5094 This macro is used in only one place: `memory_address' in explow.c.
5095
5096 OLDX is the address as it was before break_out_memory_refs was called.
5097 In some cases it is useful to look at this to decide what needs to be done.
5098
5099 MODE and WIN are passed so that this macro can use
5100 GO_IF_LEGITIMATE_ADDRESS.
5101
5102 It is always safe for this macro to do nothing. It exists to recognize
5103 opportunities to optimize the output.
5104
5105 For the 80386, we handle X+REG by loading X into a register R and
5106 using R+REG. R will go in a general reg and indexing will be used.
5107 However, if REG is a broken-out memory address or multiplication,
5108 nothing needs to be done because REG can certainly go in a general reg.
5109
5110 When -fpic is used, special handling is needed for symbolic references.
5111 See comments by legitimize_pic_address in i386.c for details. */
5112
5113 rtx
5114 legitimize_address (x, oldx, mode)
5115 register rtx x;
5116 register rtx oldx ATTRIBUTE_UNUSED;
5117 enum machine_mode mode;
5118 {
5119 int changed = 0;
5120 unsigned log;
5121
5122 if (TARGET_DEBUG_ADDR)
5123 {
5124 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5125 GET_MODE_NAME (mode));
5126 debug_rtx (x);
5127 }
5128
5129 if (flag_pic && SYMBOLIC_CONST (x))
5130 return legitimize_pic_address (x, 0);
5131
5132 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5133 if (GET_CODE (x) == ASHIFT
5134 && GET_CODE (XEXP (x, 1)) == CONST_INT
5135 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5136 {
5137 changed = 1;
5138 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5139 GEN_INT (1 << log));
5140 }
5141
5142 if (GET_CODE (x) == PLUS)
5143 {
5144 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5145
5146 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5147 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5148 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5149 {
5150 changed = 1;
5151 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5152 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5153 GEN_INT (1 << log));
5154 }
5155
5156 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5157 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5158 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5159 {
5160 changed = 1;
5161 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5162 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5163 GEN_INT (1 << log));
5164 }
5165
5166 /* Put multiply first if it isn't already. */
5167 if (GET_CODE (XEXP (x, 1)) == MULT)
5168 {
5169 rtx tmp = XEXP (x, 0);
5170 XEXP (x, 0) = XEXP (x, 1);
5171 XEXP (x, 1) = tmp;
5172 changed = 1;
5173 }
5174
5175 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5176 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5177 created by virtual register instantiation, register elimination, and
5178 similar optimizations. */
5179 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5180 {
5181 changed = 1;
5182 x = gen_rtx_PLUS (Pmode,
5183 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5184 XEXP (XEXP (x, 1), 0)),
5185 XEXP (XEXP (x, 1), 1));
5186 }
5187
5188 /* Canonicalize
5189 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5190 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5191 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5192 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5193 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5194 && CONSTANT_P (XEXP (x, 1)))
5195 {
5196 rtx constant;
5197 rtx other = NULL_RTX;
5198
5199 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5200 {
5201 constant = XEXP (x, 1);
5202 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5203 }
5204 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5205 {
5206 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5207 other = XEXP (x, 1);
5208 }
5209 else
5210 constant = 0;
5211
5212 if (constant)
5213 {
5214 changed = 1;
5215 x = gen_rtx_PLUS (Pmode,
5216 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5217 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5218 plus_constant (other, INTVAL (constant)));
5219 }
5220 }
5221
5222 if (changed && legitimate_address_p (mode, x, FALSE))
5223 return x;
5224
5225 if (GET_CODE (XEXP (x, 0)) == MULT)
5226 {
5227 changed = 1;
5228 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5229 }
5230
5231 if (GET_CODE (XEXP (x, 1)) == MULT)
5232 {
5233 changed = 1;
5234 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5235 }
5236
5237 if (changed
5238 && GET_CODE (XEXP (x, 1)) == REG
5239 && GET_CODE (XEXP (x, 0)) == REG)
5240 return x;
5241
5242 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5243 {
5244 changed = 1;
5245 x = legitimize_pic_address (x, 0);
5246 }
5247
5248 if (changed && legitimate_address_p (mode, x, FALSE))
5249 return x;
5250
5251 if (GET_CODE (XEXP (x, 0)) == REG)
5252 {
5253 register rtx temp = gen_reg_rtx (Pmode);
5254 register rtx val = force_operand (XEXP (x, 1), temp);
5255 if (val != temp)
5256 emit_move_insn (temp, val);
5257
5258 XEXP (x, 1) = temp;
5259 return x;
5260 }
5261
5262 else if (GET_CODE (XEXP (x, 1)) == REG)
5263 {
5264 register rtx temp = gen_reg_rtx (Pmode);
5265 register rtx val = force_operand (XEXP (x, 0), temp);
5266 if (val != temp)
5267 emit_move_insn (temp, val);
5268
5269 XEXP (x, 0) = temp;
5270 return x;
5271 }
5272 }
5273
5274 return x;
5275 }
5276 \f
5277 /* Print an integer constant expression in assembler syntax. Addition
5278 and subtraction are the only arithmetic that may appear in these
5279 expressions. FILE is the stdio stream to write to, X is the rtx, and
5280 CODE is the operand print code from the output string. */
5281
5282 static void
5283 output_pic_addr_const (file, x, code)
5284 FILE *file;
5285 rtx x;
5286 int code;
5287 {
5288 char buf[256];
5289
5290 switch (GET_CODE (x))
5291 {
5292 case PC:
5293 if (flag_pic)
5294 putc ('.', file);
5295 else
5296 abort ();
5297 break;
5298
5299 case SYMBOL_REF:
5300 assemble_name (file, XSTR (x, 0));
5301 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5302 fputs ("@PLT", file);
5303 break;
5304
5305 case LABEL_REF:
5306 x = XEXP (x, 0);
5307 /* FALLTHRU */
5308 case CODE_LABEL:
5309 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5310 assemble_name (asm_out_file, buf);
5311 break;
5312
5313 case CONST_INT:
5314 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5315 break;
5316
5317 case CONST:
5318 /* This used to output parentheses around the expression,
5319 but that does not work on the 386 (either ATT or BSD assembler). */
5320 output_pic_addr_const (file, XEXP (x, 0), code);
5321 break;
5322
5323 case CONST_DOUBLE:
5324 if (GET_MODE (x) == VOIDmode)
5325 {
5326 /* We can use %d if the number is <32 bits and positive. */
5327 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5328 fprintf (file, "0x%lx%08lx",
5329 (unsigned long) CONST_DOUBLE_HIGH (x),
5330 (unsigned long) CONST_DOUBLE_LOW (x));
5331 else
5332 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5333 }
5334 else
5335 /* We can't handle floating point constants;
5336 PRINT_OPERAND must handle them. */
5337 output_operand_lossage ("floating constant misused");
5338 break;
5339
5340 case PLUS:
5341 /* Some assemblers need integer constants to appear first. */
5342 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5343 {
5344 output_pic_addr_const (file, XEXP (x, 0), code);
5345 putc ('+', file);
5346 output_pic_addr_const (file, XEXP (x, 1), code);
5347 }
5348 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5349 {
5350 output_pic_addr_const (file, XEXP (x, 1), code);
5351 putc ('+', file);
5352 output_pic_addr_const (file, XEXP (x, 0), code);
5353 }
5354 else
5355 abort ();
5356 break;
5357
5358 case MINUS:
5359 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5360 output_pic_addr_const (file, XEXP (x, 0), code);
5361 putc ('-', file);
5362 output_pic_addr_const (file, XEXP (x, 1), code);
5363 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5364 break;
5365
5366 case UNSPEC:
5367 if (XVECLEN (x, 0) != 1)
5368 abort ();
5369 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5370 switch (XINT (x, 1))
5371 {
5372 case 6:
5373 fputs ("@GOT", file);
5374 break;
5375 case 7:
5376 fputs ("@GOTOFF", file);
5377 break;
5378 case 8:
5379 fputs ("@PLT", file);
5380 break;
5381 case 15:
5382 fputs ("@GOTPCREL(%RIP)", file);
5383 break;
5384 default:
5385 output_operand_lossage ("invalid UNSPEC as operand");
5386 break;
5387 }
5388 break;
5389
5390 default:
5391 output_operand_lossage ("invalid expression as operand");
5392 }
5393 }
5394
5395 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5396 We need to handle our special PIC relocations. */
5397
5398 void
5399 i386_dwarf_output_addr_const (file, x)
5400 FILE *file;
5401 rtx x;
5402 {
5403 #ifdef ASM_QUAD
5404 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5405 #else
5406 if (TARGET_64BIT)
5407 abort ();
5408 fprintf (file, "%s", ASM_LONG);
5409 #endif
5410 if (flag_pic)
5411 output_pic_addr_const (file, x, '\0');
5412 else
5413 output_addr_const (file, x);
5414 fputc ('\n', file);
5415 }
5416
5417 /* In the name of slightly smaller debug output, and to cater to
5418 general assembler losage, recognize PIC+GOTOFF and turn it back
5419 into a direct symbol reference. */
5420
5421 rtx
5422 i386_simplify_dwarf_addr (orig_x)
5423 rtx orig_x;
5424 {
5425 rtx x = orig_x, y;
5426
5427 if (GET_CODE (x) == MEM)
5428 x = XEXP (x, 0);
5429
5430 if (TARGET_64BIT)
5431 {
5432 if (GET_CODE (x) != CONST
5433 || GET_CODE (XEXP (x, 0)) != UNSPEC
5434 || XINT (XEXP (x, 0), 1) != 15
5435 || GET_CODE (orig_x) != MEM)
5436 return orig_x;
5437 return XVECEXP (XEXP (x, 0), 0, 0);
5438 }
5439
5440 if (GET_CODE (x) != PLUS
5441 || GET_CODE (XEXP (x, 1)) != CONST)
5442 return orig_x;
5443
5444 if (GET_CODE (XEXP (x, 0)) == REG
5445 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5446 /* %ebx + GOT/GOTOFF */
5447 y = NULL;
5448 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5449 {
5450 /* %ebx + %reg * scale + GOT/GOTOFF */
5451 y = XEXP (x, 0);
5452 if (GET_CODE (XEXP (y, 0)) == REG
5453 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5454 y = XEXP (y, 1);
5455 else if (GET_CODE (XEXP (y, 1)) == REG
5456 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5457 y = XEXP (y, 0);
5458 else
5459 return orig_x;
5460 if (GET_CODE (y) != REG
5461 && GET_CODE (y) != MULT
5462 && GET_CODE (y) != ASHIFT)
5463 return orig_x;
5464 }
5465 else
5466 return orig_x;
5467
5468 x = XEXP (XEXP (x, 1), 0);
5469 if (GET_CODE (x) == UNSPEC
5470 && ((XINT (x, 1) == 6 && GET_CODE (orig_x) == MEM)
5471 || (XINT (x, 1) == 7 && GET_CODE (orig_x) != MEM)))
5472 {
5473 if (y)
5474 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5475 return XVECEXP (x, 0, 0);
5476 }
5477
5478 if (GET_CODE (x) == PLUS
5479 && GET_CODE (XEXP (x, 0)) == UNSPEC
5480 && GET_CODE (XEXP (x, 1)) == CONST_INT
5481 && ((XINT (XEXP (x, 0), 1) == 6 && GET_CODE (orig_x) == MEM)
5482 || (XINT (XEXP (x, 0), 1) == 7 && GET_CODE (orig_x) != MEM)))
5483 {
5484 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5485 if (y)
5486 return gen_rtx_PLUS (Pmode, y, x);
5487 return x;
5488 }
5489
5490 return orig_x;
5491 }
5492 \f
5493 static void
5494 put_condition_code (code, mode, reverse, fp, file)
5495 enum rtx_code code;
5496 enum machine_mode mode;
5497 int reverse, fp;
5498 FILE *file;
5499 {
5500 const char *suffix;
5501
5502 if (mode == CCFPmode || mode == CCFPUmode)
5503 {
5504 enum rtx_code second_code, bypass_code;
5505 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5506 if (bypass_code != NIL || second_code != NIL)
5507 abort ();
5508 code = ix86_fp_compare_code_to_integer (code);
5509 mode = CCmode;
5510 }
5511 if (reverse)
5512 code = reverse_condition (code);
5513
5514 switch (code)
5515 {
5516 case EQ:
5517 suffix = "e";
5518 break;
5519 case NE:
5520 suffix = "ne";
5521 break;
5522 case GT:
5523 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5524 abort ();
5525 suffix = "g";
5526 break;
5527 case GTU:
5528 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5529 Those same assemblers have the same but opposite losage on cmov. */
5530 if (mode != CCmode)
5531 abort ();
5532 suffix = fp ? "nbe" : "a";
5533 break;
5534 case LT:
5535 if (mode == CCNOmode || mode == CCGOCmode)
5536 suffix = "s";
5537 else if (mode == CCmode || mode == CCGCmode)
5538 suffix = "l";
5539 else
5540 abort ();
5541 break;
5542 case LTU:
5543 if (mode != CCmode)
5544 abort ();
5545 suffix = "b";
5546 break;
5547 case GE:
5548 if (mode == CCNOmode || mode == CCGOCmode)
5549 suffix = "ns";
5550 else if (mode == CCmode || mode == CCGCmode)
5551 suffix = "ge";
5552 else
5553 abort ();
5554 break;
5555 case GEU:
5556 /* ??? As above. */
5557 if (mode != CCmode)
5558 abort ();
5559 suffix = fp ? "nb" : "ae";
5560 break;
5561 case LE:
5562 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5563 abort ();
5564 suffix = "le";
5565 break;
5566 case LEU:
5567 if (mode != CCmode)
5568 abort ();
5569 suffix = "be";
5570 break;
5571 case UNORDERED:
5572 suffix = fp ? "u" : "p";
5573 break;
5574 case ORDERED:
5575 suffix = fp ? "nu" : "np";
5576 break;
5577 default:
5578 abort ();
5579 }
5580 fputs (suffix, file);
5581 }
5582
5583 void
5584 print_reg (x, code, file)
5585 rtx x;
5586 int code;
5587 FILE *file;
5588 {
5589 if (REGNO (x) == ARG_POINTER_REGNUM
5590 || REGNO (x) == FRAME_POINTER_REGNUM
5591 || REGNO (x) == FLAGS_REG
5592 || REGNO (x) == FPSR_REG)
5593 abort ();
5594
5595 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
5596 putc ('%', file);
5597
5598 if (code == 'w' || MMX_REG_P (x))
5599 code = 2;
5600 else if (code == 'b')
5601 code = 1;
5602 else if (code == 'k')
5603 code = 4;
5604 else if (code == 'q')
5605 code = 8;
5606 else if (code == 'y')
5607 code = 3;
5608 else if (code == 'h')
5609 code = 0;
5610 else
5611 code = GET_MODE_SIZE (GET_MODE (x));
5612
5613 /* Irritatingly, AMD extended registers use different naming convention
5614 from the normal registers. */
5615 if (REX_INT_REG_P (x))
5616 {
5617 if (!TARGET_64BIT)
5618 abort ();
5619 switch (code)
5620 {
5621 case 0:
5622 error ("extended registers have no high halves");
5623 break;
5624 case 1:
5625 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5626 break;
5627 case 2:
5628 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5629 break;
5630 case 4:
5631 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5632 break;
5633 case 8:
5634 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5635 break;
5636 default:
5637 error ("unsupported operand size for extended register");
5638 break;
5639 }
5640 return;
5641 }
5642 switch (code)
5643 {
5644 case 3:
5645 if (STACK_TOP_P (x))
5646 {
5647 fputs ("st(0)", file);
5648 break;
5649 }
5650 /* FALLTHRU */
5651 case 8:
5652 case 4:
5653 case 12:
5654 if (! ANY_FP_REG_P (x))
5655 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5656 /* FALLTHRU */
5657 case 16:
5658 case 2:
5659 fputs (hi_reg_name[REGNO (x)], file);
5660 break;
5661 case 1:
5662 fputs (qi_reg_name[REGNO (x)], file);
5663 break;
5664 case 0:
5665 fputs (qi_high_reg_name[REGNO (x)], file);
5666 break;
5667 default:
5668 abort ();
5669 }
5670 }
5671
5672 /* Meaning of CODE:
5673 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5674 C -- print opcode suffix for set/cmov insn.
5675 c -- like C, but print reversed condition
5676 F,f -- likewise, but for floating-point.
5677 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5678 nothing
5679 R -- print the prefix for register names.
5680 z -- print the opcode suffix for the size of the current operand.
5681 * -- print a star (in certain assembler syntax)
5682 A -- print an absolute memory reference.
5683 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5684 s -- print a shift double count, followed by the assemblers argument
5685 delimiter.
5686 b -- print the QImode name of the register for the indicated operand.
5687 %b0 would print %al if operands[0] is reg 0.
5688 w -- likewise, print the HImode name of the register.
5689 k -- likewise, print the SImode name of the register.
5690 q -- likewise, print the DImode name of the register.
5691 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5692 y -- print "st(0)" instead of "st" as a register.
5693 D -- print condition for SSE cmp instruction.
5694 P -- if PIC, print an @PLT suffix.
5695 X -- don't print any sort of PIC '@' suffix for a symbol.
5696 */
5697
5698 void
5699 print_operand (file, x, code)
5700 FILE *file;
5701 rtx x;
5702 int code;
5703 {
5704 if (code)
5705 {
5706 switch (code)
5707 {
5708 case '*':
5709 if (ASSEMBLER_DIALECT == ASM_ATT)
5710 putc ('*', file);
5711 return;
5712
5713 case 'A':
5714 if (ASSEMBLER_DIALECT == ASM_ATT)
5715 putc ('*', file);
5716 else if (ASSEMBLER_DIALECT == ASM_INTEL)
5717 {
5718 /* Intel syntax. For absolute addresses, registers should not
5719 be surrounded by braces. */
5720 if (GET_CODE (x) != REG)
5721 {
5722 putc ('[', file);
5723 PRINT_OPERAND (file, x, 0);
5724 putc (']', file);
5725 return;
5726 }
5727 }
5728 else
5729 abort ();
5730
5731 PRINT_OPERAND (file, x, 0);
5732 return;
5733
5734
5735 case 'L':
5736 if (ASSEMBLER_DIALECT == ASM_ATT)
5737 putc ('l', file);
5738 return;
5739
5740 case 'W':
5741 if (ASSEMBLER_DIALECT == ASM_ATT)
5742 putc ('w', file);
5743 return;
5744
5745 case 'B':
5746 if (ASSEMBLER_DIALECT == ASM_ATT)
5747 putc ('b', file);
5748 return;
5749
5750 case 'Q':
5751 if (ASSEMBLER_DIALECT == ASM_ATT)
5752 putc ('l', file);
5753 return;
5754
5755 case 'S':
5756 if (ASSEMBLER_DIALECT == ASM_ATT)
5757 putc ('s', file);
5758 return;
5759
5760 case 'T':
5761 if (ASSEMBLER_DIALECT == ASM_ATT)
5762 putc ('t', file);
5763 return;
5764
5765 case 'z':
5766 /* 387 opcodes don't get size suffixes if the operands are
5767 registers. */
5768 if (STACK_REG_P (x))
5769 return;
5770
5771 /* Likewise if using Intel opcodes. */
5772 if (ASSEMBLER_DIALECT == ASM_INTEL)
5773 return;
5774
5775 /* This is the size of op from size of operand. */
5776 switch (GET_MODE_SIZE (GET_MODE (x)))
5777 {
5778 case 2:
5779 #ifdef HAVE_GAS_FILDS_FISTS
5780 putc ('s', file);
5781 #endif
5782 return;
5783
5784 case 4:
5785 if (GET_MODE (x) == SFmode)
5786 {
5787 putc ('s', file);
5788 return;
5789 }
5790 else
5791 putc ('l', file);
5792 return;
5793
5794 case 12:
5795 case 16:
5796 putc ('t', file);
5797 return;
5798
5799 case 8:
5800 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5801 {
5802 #ifdef GAS_MNEMONICS
5803 putc ('q', file);
5804 #else
5805 putc ('l', file);
5806 putc ('l', file);
5807 #endif
5808 }
5809 else
5810 putc ('l', file);
5811 return;
5812
5813 default:
5814 abort ();
5815 }
5816
5817 case 'b':
5818 case 'w':
5819 case 'k':
5820 case 'q':
5821 case 'h':
5822 case 'y':
5823 case 'X':
5824 case 'P':
5825 break;
5826
5827 case 's':
5828 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5829 {
5830 PRINT_OPERAND (file, x, 0);
5831 putc (',', file);
5832 }
5833 return;
5834
5835 case 'D':
5836 /* Little bit of braindamage here. The SSE compare instructions
5837 does use completely different names for the comparisons that the
5838 fp conditional moves. */
5839 switch (GET_CODE (x))
5840 {
5841 case EQ:
5842 case UNEQ:
5843 fputs ("eq", file);
5844 break;
5845 case LT:
5846 case UNLT:
5847 fputs ("lt", file);
5848 break;
5849 case LE:
5850 case UNLE:
5851 fputs ("le", file);
5852 break;
5853 case UNORDERED:
5854 fputs ("unord", file);
5855 break;
5856 case NE:
5857 case LTGT:
5858 fputs ("neq", file);
5859 break;
5860 case UNGE:
5861 case GE:
5862 fputs ("nlt", file);
5863 break;
5864 case UNGT:
5865 case GT:
5866 fputs ("nle", file);
5867 break;
5868 case ORDERED:
5869 fputs ("ord", file);
5870 break;
5871 default:
5872 abort ();
5873 break;
5874 }
5875 return;
5876 case 'O':
5877 #ifdef CMOV_SUN_AS_SYNTAX
5878 if (ASSEMBLER_DIALECT == ASM_ATT)
5879 {
5880 switch (GET_MODE (x))
5881 {
5882 case HImode: putc ('w', file); break;
5883 case SImode:
5884 case SFmode: putc ('l', file); break;
5885 case DImode:
5886 case DFmode: putc ('q', file); break;
5887 default: abort ();
5888 }
5889 putc ('.', file);
5890 }
5891 #endif
5892 return;
5893 case 'C':
5894 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5895 return;
5896 case 'F':
5897 #ifdef CMOV_SUN_AS_SYNTAX
5898 if (ASSEMBLER_DIALECT == ASM_ATT)
5899 putc ('.', file);
5900 #endif
5901 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5902 return;
5903
5904 /* Like above, but reverse condition */
5905 case 'c':
5906 /* Check to see if argument to %c is really a constant
5907 and not a condition code which needs to be reversed. */
5908 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5909 {
5910 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5911 return;
5912 }
5913 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5914 return;
5915 case 'f':
5916 #ifdef CMOV_SUN_AS_SYNTAX
5917 if (ASSEMBLER_DIALECT == ASM_ATT)
5918 putc ('.', file);
5919 #endif
5920 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5921 return;
5922 case '+':
5923 {
5924 rtx x;
5925
5926 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5927 return;
5928
5929 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5930 if (x)
5931 {
5932 int pred_val = INTVAL (XEXP (x, 0));
5933
5934 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5935 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5936 {
5937 int taken = pred_val > REG_BR_PROB_BASE / 2;
5938 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5939
5940 /* Emit hints only in the case default branch prediction
5941 heruistics would fail. */
5942 if (taken != cputaken)
5943 {
5944 /* We use 3e (DS) prefix for taken branches and
5945 2e (CS) prefix for not taken branches. */
5946 if (taken)
5947 fputs ("ds ; ", file);
5948 else
5949 fputs ("cs ; ", file);
5950 }
5951 }
5952 }
5953 return;
5954 }
5955 default:
5956 output_operand_lossage ("invalid operand code `%c'", code);
5957 }
5958 }
5959
5960 if (GET_CODE (x) == REG)
5961 {
5962 PRINT_REG (x, code, file);
5963 }
5964
5965 else if (GET_CODE (x) == MEM)
5966 {
5967 /* No `byte ptr' prefix for call instructions. */
5968 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
5969 {
5970 const char * size;
5971 switch (GET_MODE_SIZE (GET_MODE (x)))
5972 {
5973 case 1: size = "BYTE"; break;
5974 case 2: size = "WORD"; break;
5975 case 4: size = "DWORD"; break;
5976 case 8: size = "QWORD"; break;
5977 case 12: size = "XWORD"; break;
5978 case 16: size = "XMMWORD"; break;
5979 default:
5980 abort ();
5981 }
5982
5983 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5984 if (code == 'b')
5985 size = "BYTE";
5986 else if (code == 'w')
5987 size = "WORD";
5988 else if (code == 'k')
5989 size = "DWORD";
5990
5991 fputs (size, file);
5992 fputs (" PTR ", file);
5993 }
5994
5995 x = XEXP (x, 0);
5996 if (flag_pic && CONSTANT_ADDRESS_P (x))
5997 output_pic_addr_const (file, x, code);
5998 /* Avoid (%rip) for call operands. */
5999 else if (CONSTANT_ADDRESS_P (x) && code =='P'
6000 && GET_CODE (x) != CONST_INT)
6001 output_addr_const (file, x);
6002 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6003 output_operand_lossage ("invalid constraints for operand");
6004 else
6005 output_address (x);
6006 }
6007
6008 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6009 {
6010 REAL_VALUE_TYPE r;
6011 long l;
6012
6013 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6014 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6015
6016 if (ASSEMBLER_DIALECT == ASM_ATT)
6017 putc ('$', file);
6018 fprintf (file, "0x%lx", l);
6019 }
6020
6021 /* These float cases don't actually occur as immediate operands. */
6022 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6023 {
6024 REAL_VALUE_TYPE r;
6025 char dstr[30];
6026
6027 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6028 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6029 fprintf (file, "%s", dstr);
6030 }
6031
6032 else if (GET_CODE (x) == CONST_DOUBLE
6033 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6034 {
6035 REAL_VALUE_TYPE r;
6036 char dstr[30];
6037
6038 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6039 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6040 fprintf (file, "%s", dstr);
6041 }
6042 else
6043 {
6044 if (code != 'P')
6045 {
6046 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6047 {
6048 if (ASSEMBLER_DIALECT == ASM_ATT)
6049 putc ('$', file);
6050 }
6051 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6052 || GET_CODE (x) == LABEL_REF)
6053 {
6054 if (ASSEMBLER_DIALECT == ASM_ATT)
6055 putc ('$', file);
6056 else
6057 fputs ("OFFSET FLAT:", file);
6058 }
6059 }
6060 if (GET_CODE (x) == CONST_INT)
6061 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6062 else if (flag_pic)
6063 output_pic_addr_const (file, x, code);
6064 else
6065 output_addr_const (file, x);
6066 }
6067 }
6068 \f
6069 /* Print a memory operand whose address is ADDR. */
6070
6071 void
6072 print_operand_address (file, addr)
6073 FILE *file;
6074 register rtx addr;
6075 {
6076 struct ix86_address parts;
6077 rtx base, index, disp;
6078 int scale;
6079
6080 if (! ix86_decompose_address (addr, &parts))
6081 abort ();
6082
6083 base = parts.base;
6084 index = parts.index;
6085 disp = parts.disp;
6086 scale = parts.scale;
6087
6088 if (!base && !index)
6089 {
6090 /* Displacement only requires special attention. */
6091
6092 if (GET_CODE (disp) == CONST_INT)
6093 {
6094 if (ASSEMBLER_DIALECT == ASM_INTEL)
6095 {
6096 if (USER_LABEL_PREFIX[0] == 0)
6097 putc ('%', file);
6098 fputs ("ds:", file);
6099 }
6100 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6101 }
6102 else if (flag_pic)
6103 output_pic_addr_const (file, addr, 0);
6104 else
6105 output_addr_const (file, addr);
6106
6107 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6108 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6109 fputs ("(%rip)", file);
6110 }
6111 else
6112 {
6113 if (ASSEMBLER_DIALECT == ASM_ATT)
6114 {
6115 if (disp)
6116 {
6117 if (flag_pic)
6118 output_pic_addr_const (file, disp, 0);
6119 else if (GET_CODE (disp) == LABEL_REF)
6120 output_asm_label (disp);
6121 else
6122 output_addr_const (file, disp);
6123 }
6124
6125 putc ('(', file);
6126 if (base)
6127 PRINT_REG (base, 0, file);
6128 if (index)
6129 {
6130 putc (',', file);
6131 PRINT_REG (index, 0, file);
6132 if (scale != 1)
6133 fprintf (file, ",%d", scale);
6134 }
6135 putc (')', file);
6136 }
6137 else
6138 {
6139 rtx offset = NULL_RTX;
6140
6141 if (disp)
6142 {
6143 /* Pull out the offset of a symbol; print any symbol itself. */
6144 if (GET_CODE (disp) == CONST
6145 && GET_CODE (XEXP (disp, 0)) == PLUS
6146 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6147 {
6148 offset = XEXP (XEXP (disp, 0), 1);
6149 disp = gen_rtx_CONST (VOIDmode,
6150 XEXP (XEXP (disp, 0), 0));
6151 }
6152
6153 if (flag_pic)
6154 output_pic_addr_const (file, disp, 0);
6155 else if (GET_CODE (disp) == LABEL_REF)
6156 output_asm_label (disp);
6157 else if (GET_CODE (disp) == CONST_INT)
6158 offset = disp;
6159 else
6160 output_addr_const (file, disp);
6161 }
6162
6163 putc ('[', file);
6164 if (base)
6165 {
6166 PRINT_REG (base, 0, file);
6167 if (offset)
6168 {
6169 if (INTVAL (offset) >= 0)
6170 putc ('+', file);
6171 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6172 }
6173 }
6174 else if (offset)
6175 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6176 else
6177 putc ('0', file);
6178
6179 if (index)
6180 {
6181 putc ('+', file);
6182 PRINT_REG (index, 0, file);
6183 if (scale != 1)
6184 fprintf (file, "*%d", scale);
6185 }
6186 putc (']', file);
6187 }
6188 }
6189 }
6190 \f
6191 /* Split one or more DImode RTL references into pairs of SImode
6192 references. The RTL can be REG, offsettable MEM, integer constant, or
6193 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6194 split and "num" is its length. lo_half and hi_half are output arrays
6195 that parallel "operands". */
6196
6197 void
6198 split_di (operands, num, lo_half, hi_half)
6199 rtx operands[];
6200 int num;
6201 rtx lo_half[], hi_half[];
6202 {
6203 while (num--)
6204 {
6205 rtx op = operands[num];
6206
6207 /* simplify_subreg refuse to split volatile memory addresses,
6208 but we still have to handle it. */
6209 if (GET_CODE (op) == MEM)
6210 {
6211 lo_half[num] = adjust_address (op, SImode, 0);
6212 hi_half[num] = adjust_address (op, SImode, 4);
6213 }
6214 else
6215 {
6216 lo_half[num] = simplify_gen_subreg (SImode, op,
6217 GET_MODE (op) == VOIDmode
6218 ? DImode : GET_MODE (op), 0);
6219 hi_half[num] = simplify_gen_subreg (SImode, op,
6220 GET_MODE (op) == VOIDmode
6221 ? DImode : GET_MODE (op), 4);
6222 }
6223 }
6224 }
6225 /* Split one or more TImode RTL references into pairs of SImode
6226 references. The RTL can be REG, offsettable MEM, integer constant, or
6227 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6228 split and "num" is its length. lo_half and hi_half are output arrays
6229 that parallel "operands". */
6230
6231 void
6232 split_ti (operands, num, lo_half, hi_half)
6233 rtx operands[];
6234 int num;
6235 rtx lo_half[], hi_half[];
6236 {
6237 while (num--)
6238 {
6239 rtx op = operands[num];
6240
6241 /* simplify_subreg refuse to split volatile memory addresses, but we
6242 still have to handle it. */
6243 if (GET_CODE (op) == MEM)
6244 {
6245 lo_half[num] = adjust_address (op, DImode, 0);
6246 hi_half[num] = adjust_address (op, DImode, 8);
6247 }
6248 else
6249 {
6250 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6251 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6252 }
6253 }
6254 }
6255 \f
6256 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6257 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6258 is the expression of the binary operation. The output may either be
6259 emitted here, or returned to the caller, like all output_* functions.
6260
6261 There is no guarantee that the operands are the same mode, as they
6262 might be within FLOAT or FLOAT_EXTEND expressions. */
6263
6264 #ifndef SYSV386_COMPAT
6265 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6266 wants to fix the assemblers because that causes incompatibility
6267 with gcc. No-one wants to fix gcc because that causes
6268 incompatibility with assemblers... You can use the option of
6269 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6270 #define SYSV386_COMPAT 1
6271 #endif
6272
6273 const char *
6274 output_387_binary_op (insn, operands)
6275 rtx insn;
6276 rtx *operands;
6277 {
6278 static char buf[30];
6279 const char *p;
6280 const char *ssep;
6281 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6282
6283 #ifdef ENABLE_CHECKING
6284 /* Even if we do not want to check the inputs, this documents input
6285 constraints. Which helps in understanding the following code. */
6286 if (STACK_REG_P (operands[0])
6287 && ((REG_P (operands[1])
6288 && REGNO (operands[0]) == REGNO (operands[1])
6289 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6290 || (REG_P (operands[2])
6291 && REGNO (operands[0]) == REGNO (operands[2])
6292 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6293 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6294 ; /* ok */
6295 else if (!is_sse)
6296 abort ();
6297 #endif
6298
6299 switch (GET_CODE (operands[3]))
6300 {
6301 case PLUS:
6302 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6303 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6304 p = "fiadd";
6305 else
6306 p = "fadd";
6307 ssep = "add";
6308 break;
6309
6310 case MINUS:
6311 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6312 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6313 p = "fisub";
6314 else
6315 p = "fsub";
6316 ssep = "sub";
6317 break;
6318
6319 case MULT:
6320 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6321 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6322 p = "fimul";
6323 else
6324 p = "fmul";
6325 ssep = "mul";
6326 break;
6327
6328 case DIV:
6329 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6330 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6331 p = "fidiv";
6332 else
6333 p = "fdiv";
6334 ssep = "div";
6335 break;
6336
6337 default:
6338 abort ();
6339 }
6340
6341 if (is_sse)
6342 {
6343 strcpy (buf, ssep);
6344 if (GET_MODE (operands[0]) == SFmode)
6345 strcat (buf, "ss\t{%2, %0|%0, %2}");
6346 else
6347 strcat (buf, "sd\t{%2, %0|%0, %2}");
6348 return buf;
6349 }
6350 strcpy (buf, p);
6351
6352 switch (GET_CODE (operands[3]))
6353 {
6354 case MULT:
6355 case PLUS:
6356 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6357 {
6358 rtx temp = operands[2];
6359 operands[2] = operands[1];
6360 operands[1] = temp;
6361 }
6362
6363 /* know operands[0] == operands[1]. */
6364
6365 if (GET_CODE (operands[2]) == MEM)
6366 {
6367 p = "%z2\t%2";
6368 break;
6369 }
6370
6371 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6372 {
6373 if (STACK_TOP_P (operands[0]))
6374 /* How is it that we are storing to a dead operand[2]?
6375 Well, presumably operands[1] is dead too. We can't
6376 store the result to st(0) as st(0) gets popped on this
6377 instruction. Instead store to operands[2] (which I
6378 think has to be st(1)). st(1) will be popped later.
6379 gcc <= 2.8.1 didn't have this check and generated
6380 assembly code that the Unixware assembler rejected. */
6381 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6382 else
6383 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6384 break;
6385 }
6386
6387 if (STACK_TOP_P (operands[0]))
6388 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6389 else
6390 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6391 break;
6392
6393 case MINUS:
6394 case DIV:
6395 if (GET_CODE (operands[1]) == MEM)
6396 {
6397 p = "r%z1\t%1";
6398 break;
6399 }
6400
6401 if (GET_CODE (operands[2]) == MEM)
6402 {
6403 p = "%z2\t%2";
6404 break;
6405 }
6406
6407 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6408 {
6409 #if SYSV386_COMPAT
6410 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6411 derived assemblers, confusingly reverse the direction of
6412 the operation for fsub{r} and fdiv{r} when the
6413 destination register is not st(0). The Intel assembler
6414 doesn't have this brain damage. Read !SYSV386_COMPAT to
6415 figure out what the hardware really does. */
6416 if (STACK_TOP_P (operands[0]))
6417 p = "{p\t%0, %2|rp\t%2, %0}";
6418 else
6419 p = "{rp\t%2, %0|p\t%0, %2}";
6420 #else
6421 if (STACK_TOP_P (operands[0]))
6422 /* As above for fmul/fadd, we can't store to st(0). */
6423 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6424 else
6425 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6426 #endif
6427 break;
6428 }
6429
6430 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6431 {
6432 #if SYSV386_COMPAT
6433 if (STACK_TOP_P (operands[0]))
6434 p = "{rp\t%0, %1|p\t%1, %0}";
6435 else
6436 p = "{p\t%1, %0|rp\t%0, %1}";
6437 #else
6438 if (STACK_TOP_P (operands[0]))
6439 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6440 else
6441 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6442 #endif
6443 break;
6444 }
6445
6446 if (STACK_TOP_P (operands[0]))
6447 {
6448 if (STACK_TOP_P (operands[1]))
6449 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6450 else
6451 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6452 break;
6453 }
6454 else if (STACK_TOP_P (operands[1]))
6455 {
6456 #if SYSV386_COMPAT
6457 p = "{\t%1, %0|r\t%0, %1}";
6458 #else
6459 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6460 #endif
6461 }
6462 else
6463 {
6464 #if SYSV386_COMPAT
6465 p = "{r\t%2, %0|\t%0, %2}";
6466 #else
6467 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6468 #endif
6469 }
6470 break;
6471
6472 default:
6473 abort ();
6474 }
6475
6476 strcat (buf, p);
6477 return buf;
6478 }
6479
6480 /* Output code to initialize control word copies used by
6481 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6482 is set to control word rounding downwards. */
6483 void
6484 emit_i387_cw_initialization (normal, round_down)
6485 rtx normal, round_down;
6486 {
6487 rtx reg = gen_reg_rtx (HImode);
6488
6489 emit_insn (gen_x86_fnstcw_1 (normal));
6490 emit_move_insn (reg, normal);
6491 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6492 && !TARGET_64BIT)
6493 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6494 else
6495 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6496 emit_move_insn (round_down, reg);
6497 }
6498
6499 /* Output code for INSN to convert a float to a signed int. OPERANDS
6500 are the insn operands. The output may be [HSD]Imode and the input
6501 operand may be [SDX]Fmode. */
6502
6503 const char *
6504 output_fix_trunc (insn, operands)
6505 rtx insn;
6506 rtx *operands;
6507 {
6508 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6509 int dimode_p = GET_MODE (operands[0]) == DImode;
6510
6511 /* Jump through a hoop or two for DImode, since the hardware has no
6512 non-popping instruction. We used to do this a different way, but
6513 that was somewhat fragile and broke with post-reload splitters. */
6514 if (dimode_p && !stack_top_dies)
6515 output_asm_insn ("fld\t%y1", operands);
6516
6517 if (!STACK_TOP_P (operands[1]))
6518 abort ();
6519
6520 if (GET_CODE (operands[0]) != MEM)
6521 abort ();
6522
6523 output_asm_insn ("fldcw\t%3", operands);
6524 if (stack_top_dies || dimode_p)
6525 output_asm_insn ("fistp%z0\t%0", operands);
6526 else
6527 output_asm_insn ("fist%z0\t%0", operands);
6528 output_asm_insn ("fldcw\t%2", operands);
6529
6530 return "";
6531 }
6532
6533 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6534 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6535 when fucom should be used. */
6536
6537 const char *
6538 output_fp_compare (insn, operands, eflags_p, unordered_p)
6539 rtx insn;
6540 rtx *operands;
6541 int eflags_p, unordered_p;
6542 {
6543 int stack_top_dies;
6544 rtx cmp_op0 = operands[0];
6545 rtx cmp_op1 = operands[1];
6546 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6547
6548 if (eflags_p == 2)
6549 {
6550 cmp_op0 = cmp_op1;
6551 cmp_op1 = operands[2];
6552 }
6553 if (is_sse)
6554 {
6555 if (GET_MODE (operands[0]) == SFmode)
6556 if (unordered_p)
6557 return "ucomiss\t{%1, %0|%0, %1}";
6558 else
6559 return "comiss\t{%1, %0|%0, %y}";
6560 else
6561 if (unordered_p)
6562 return "ucomisd\t{%1, %0|%0, %1}";
6563 else
6564 return "comisd\t{%1, %0|%0, %y}";
6565 }
6566
6567 if (! STACK_TOP_P (cmp_op0))
6568 abort ();
6569
6570 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6571
6572 if (STACK_REG_P (cmp_op1)
6573 && stack_top_dies
6574 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6575 && REGNO (cmp_op1) != FIRST_STACK_REG)
6576 {
6577 /* If both the top of the 387 stack dies, and the other operand
6578 is also a stack register that dies, then this must be a
6579 `fcompp' float compare */
6580
6581 if (eflags_p == 1)
6582 {
6583 /* There is no double popping fcomi variant. Fortunately,
6584 eflags is immune from the fstp's cc clobbering. */
6585 if (unordered_p)
6586 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6587 else
6588 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6589 return "fstp\t%y0";
6590 }
6591 else
6592 {
6593 if (eflags_p == 2)
6594 {
6595 if (unordered_p)
6596 return "fucompp\n\tfnstsw\t%0";
6597 else
6598 return "fcompp\n\tfnstsw\t%0";
6599 }
6600 else
6601 {
6602 if (unordered_p)
6603 return "fucompp";
6604 else
6605 return "fcompp";
6606 }
6607 }
6608 }
6609 else
6610 {
6611 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6612
6613 static const char * const alt[24] =
6614 {
6615 "fcom%z1\t%y1",
6616 "fcomp%z1\t%y1",
6617 "fucom%z1\t%y1",
6618 "fucomp%z1\t%y1",
6619
6620 "ficom%z1\t%y1",
6621 "ficomp%z1\t%y1",
6622 NULL,
6623 NULL,
6624
6625 "fcomi\t{%y1, %0|%0, %y1}",
6626 "fcomip\t{%y1, %0|%0, %y1}",
6627 "fucomi\t{%y1, %0|%0, %y1}",
6628 "fucomip\t{%y1, %0|%0, %y1}",
6629
6630 NULL,
6631 NULL,
6632 NULL,
6633 NULL,
6634
6635 "fcom%z2\t%y2\n\tfnstsw\t%0",
6636 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6637 "fucom%z2\t%y2\n\tfnstsw\t%0",
6638 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6639
6640 "ficom%z2\t%y2\n\tfnstsw\t%0",
6641 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6642 NULL,
6643 NULL
6644 };
6645
6646 int mask;
6647 const char *ret;
6648
6649 mask = eflags_p << 3;
6650 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6651 mask |= unordered_p << 1;
6652 mask |= stack_top_dies;
6653
6654 if (mask >= 24)
6655 abort ();
6656 ret = alt[mask];
6657 if (ret == NULL)
6658 abort ();
6659
6660 return ret;
6661 }
6662 }
6663
6664 void
6665 ix86_output_addr_vec_elt (file, value)
6666 FILE *file;
6667 int value;
6668 {
6669 const char *directive = ASM_LONG;
6670
6671 if (TARGET_64BIT)
6672 {
6673 #ifdef ASM_QUAD
6674 directive = ASM_QUAD;
6675 #else
6676 abort ();
6677 #endif
6678 }
6679
6680 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6681 }
6682
6683 void
6684 ix86_output_addr_diff_elt (file, value, rel)
6685 FILE *file;
6686 int value, rel;
6687 {
6688 if (TARGET_64BIT)
6689 fprintf (file, "%s%s%d-.+(.-%s%d)\n",
6690 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6691 else if (HAVE_AS_GOTOFF_IN_DATA)
6692 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6693 else
6694 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6695 ASM_LONG, LPREFIX, value);
6696 }
6697 \f
6698 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6699 for the target. */
6700
6701 void
6702 ix86_expand_clear (dest)
6703 rtx dest;
6704 {
6705 rtx tmp;
6706
6707 /* We play register width games, which are only valid after reload. */
6708 if (!reload_completed)
6709 abort ();
6710
6711 /* Avoid HImode and its attendant prefix byte. */
6712 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6713 dest = gen_rtx_REG (SImode, REGNO (dest));
6714
6715 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6716
6717 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6718 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6719 {
6720 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6721 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6722 }
6723
6724 emit_insn (tmp);
6725 }
6726
6727 void
6728 ix86_expand_move (mode, operands)
6729 enum machine_mode mode;
6730 rtx operands[];
6731 {
6732 int strict = (reload_in_progress || reload_completed);
6733 rtx insn;
6734
6735 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6736 {
6737 /* Emit insns to move operands[1] into operands[0]. */
6738
6739 if (GET_CODE (operands[0]) == MEM)
6740 operands[1] = force_reg (Pmode, operands[1]);
6741 else
6742 {
6743 rtx temp = operands[0];
6744 if (GET_CODE (temp) != REG)
6745 temp = gen_reg_rtx (Pmode);
6746 temp = legitimize_pic_address (operands[1], temp);
6747 if (temp == operands[0])
6748 return;
6749 operands[1] = temp;
6750 }
6751 }
6752 else
6753 {
6754 if (GET_CODE (operands[0]) == MEM
6755 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6756 || !push_operand (operands[0], mode))
6757 && GET_CODE (operands[1]) == MEM)
6758 operands[1] = force_reg (mode, operands[1]);
6759
6760 if (push_operand (operands[0], mode)
6761 && ! general_no_elim_operand (operands[1], mode))
6762 operands[1] = copy_to_mode_reg (mode, operands[1]);
6763
6764 /* Force large constants in 64bit compilation into register
6765 to get them CSEed. */
6766 if (TARGET_64BIT && mode == DImode
6767 && immediate_operand (operands[1], mode)
6768 && !x86_64_zero_extended_value (operands[1])
6769 && !register_operand (operands[0], mode)
6770 && optimize && !reload_completed && !reload_in_progress)
6771 operands[1] = copy_to_mode_reg (mode, operands[1]);
6772
6773 if (FLOAT_MODE_P (mode))
6774 {
6775 /* If we are loading a floating point constant to a register,
6776 force the value to memory now, since we'll get better code
6777 out the back end. */
6778
6779 if (strict)
6780 ;
6781 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6782 && register_operand (operands[0], mode))
6783 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6784 }
6785 }
6786
6787 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6788
6789 emit_insn (insn);
6790 }
6791
6792 void
6793 ix86_expand_vector_move (mode, operands)
6794 enum machine_mode mode;
6795 rtx operands[];
6796 {
6797 /* Force constants other than zero into memory. We do not know how
6798 the instructions used to build constants modify the upper 64 bits
6799 of the register, once we have that information we may be able
6800 to handle some of them more efficiently. */
6801 if ((reload_in_progress | reload_completed) == 0
6802 && register_operand (operands[0], mode)
6803 && CONSTANT_P (operands[1]))
6804 {
6805 rtx addr = gen_reg_rtx (Pmode);
6806 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6807 operands[1] = gen_rtx_MEM (mode, addr);
6808 }
6809
6810 /* Make operand1 a register if it isn't already. */
6811 if ((reload_in_progress | reload_completed) == 0
6812 && !register_operand (operands[0], mode)
6813 && !register_operand (operands[1], mode)
6814 && operands[1] != CONST0_RTX (mode))
6815 {
6816 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
6817 emit_move_insn (operands[0], temp);
6818 return;
6819 }
6820
6821 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6822 }
6823
6824 /* Attempt to expand a binary operator. Make the expansion closer to the
6825 actual machine, then just general_operand, which will allow 3 separate
6826 memory references (one output, two input) in a single insn. */
6827
6828 void
6829 ix86_expand_binary_operator (code, mode, operands)
6830 enum rtx_code code;
6831 enum machine_mode mode;
6832 rtx operands[];
6833 {
6834 int matching_memory;
6835 rtx src1, src2, dst, op, clob;
6836
6837 dst = operands[0];
6838 src1 = operands[1];
6839 src2 = operands[2];
6840
6841 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6842 if (GET_RTX_CLASS (code) == 'c'
6843 && (rtx_equal_p (dst, src2)
6844 || immediate_operand (src1, mode)))
6845 {
6846 rtx temp = src1;
6847 src1 = src2;
6848 src2 = temp;
6849 }
6850
6851 /* If the destination is memory, and we do not have matching source
6852 operands, do things in registers. */
6853 matching_memory = 0;
6854 if (GET_CODE (dst) == MEM)
6855 {
6856 if (rtx_equal_p (dst, src1))
6857 matching_memory = 1;
6858 else if (GET_RTX_CLASS (code) == 'c'
6859 && rtx_equal_p (dst, src2))
6860 matching_memory = 2;
6861 else
6862 dst = gen_reg_rtx (mode);
6863 }
6864
6865 /* Both source operands cannot be in memory. */
6866 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6867 {
6868 if (matching_memory != 2)
6869 src2 = force_reg (mode, src2);
6870 else
6871 src1 = force_reg (mode, src1);
6872 }
6873
6874 /* If the operation is not commutable, source 1 cannot be a constant
6875 or non-matching memory. */
6876 if ((CONSTANT_P (src1)
6877 || (!matching_memory && GET_CODE (src1) == MEM))
6878 && GET_RTX_CLASS (code) != 'c')
6879 src1 = force_reg (mode, src1);
6880
6881 /* If optimizing, copy to regs to improve CSE */
6882 if (optimize && ! no_new_pseudos)
6883 {
6884 if (GET_CODE (dst) == MEM)
6885 dst = gen_reg_rtx (mode);
6886 if (GET_CODE (src1) == MEM)
6887 src1 = force_reg (mode, src1);
6888 if (GET_CODE (src2) == MEM)
6889 src2 = force_reg (mode, src2);
6890 }
6891
6892 /* Emit the instruction. */
6893
6894 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6895 if (reload_in_progress)
6896 {
6897 /* Reload doesn't know about the flags register, and doesn't know that
6898 it doesn't want to clobber it. We can only do this with PLUS. */
6899 if (code != PLUS)
6900 abort ();
6901 emit_insn (op);
6902 }
6903 else
6904 {
6905 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6906 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6907 }
6908
6909 /* Fix up the destination if needed. */
6910 if (dst != operands[0])
6911 emit_move_insn (operands[0], dst);
6912 }
6913
6914 /* Return TRUE or FALSE depending on whether the binary operator meets the
6915 appropriate constraints. */
6916
6917 int
6918 ix86_binary_operator_ok (code, mode, operands)
6919 enum rtx_code code;
6920 enum machine_mode mode ATTRIBUTE_UNUSED;
6921 rtx operands[3];
6922 {
6923 /* Both source operands cannot be in memory. */
6924 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6925 return 0;
6926 /* If the operation is not commutable, source 1 cannot be a constant. */
6927 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6928 return 0;
6929 /* If the destination is memory, we must have a matching source operand. */
6930 if (GET_CODE (operands[0]) == MEM
6931 && ! (rtx_equal_p (operands[0], operands[1])
6932 || (GET_RTX_CLASS (code) == 'c'
6933 && rtx_equal_p (operands[0], operands[2]))))
6934 return 0;
6935 /* If the operation is not commutable and the source 1 is memory, we must
6936 have a matching destination. */
6937 if (GET_CODE (operands[1]) == MEM
6938 && GET_RTX_CLASS (code) != 'c'
6939 && ! rtx_equal_p (operands[0], operands[1]))
6940 return 0;
6941 return 1;
6942 }
6943
6944 /* Attempt to expand a unary operator. Make the expansion closer to the
6945 actual machine, then just general_operand, which will allow 2 separate
6946 memory references (one output, one input) in a single insn. */
6947
6948 void
6949 ix86_expand_unary_operator (code, mode, operands)
6950 enum rtx_code code;
6951 enum machine_mode mode;
6952 rtx operands[];
6953 {
6954 int matching_memory;
6955 rtx src, dst, op, clob;
6956
6957 dst = operands[0];
6958 src = operands[1];
6959
6960 /* If the destination is memory, and we do not have matching source
6961 operands, do things in registers. */
6962 matching_memory = 0;
6963 if (GET_CODE (dst) == MEM)
6964 {
6965 if (rtx_equal_p (dst, src))
6966 matching_memory = 1;
6967 else
6968 dst = gen_reg_rtx (mode);
6969 }
6970
6971 /* When source operand is memory, destination must match. */
6972 if (!matching_memory && GET_CODE (src) == MEM)
6973 src = force_reg (mode, src);
6974
6975 /* If optimizing, copy to regs to improve CSE */
6976 if (optimize && ! no_new_pseudos)
6977 {
6978 if (GET_CODE (dst) == MEM)
6979 dst = gen_reg_rtx (mode);
6980 if (GET_CODE (src) == MEM)
6981 src = force_reg (mode, src);
6982 }
6983
6984 /* Emit the instruction. */
6985
6986 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6987 if (reload_in_progress || code == NOT)
6988 {
6989 /* Reload doesn't know about the flags register, and doesn't know that
6990 it doesn't want to clobber it. */
6991 if (code != NOT)
6992 abort ();
6993 emit_insn (op);
6994 }
6995 else
6996 {
6997 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6998 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6999 }
7000
7001 /* Fix up the destination if needed. */
7002 if (dst != operands[0])
7003 emit_move_insn (operands[0], dst);
7004 }
7005
7006 /* Return TRUE or FALSE depending on whether the unary operator meets the
7007 appropriate constraints. */
7008
7009 int
7010 ix86_unary_operator_ok (code, mode, operands)
7011 enum rtx_code code ATTRIBUTE_UNUSED;
7012 enum machine_mode mode ATTRIBUTE_UNUSED;
7013 rtx operands[2] ATTRIBUTE_UNUSED;
7014 {
7015 /* If one of operands is memory, source and destination must match. */
7016 if ((GET_CODE (operands[0]) == MEM
7017 || GET_CODE (operands[1]) == MEM)
7018 && ! rtx_equal_p (operands[0], operands[1]))
7019 return FALSE;
7020 return TRUE;
7021 }
7022
7023 /* Return TRUE or FALSE depending on whether the first SET in INSN
7024 has source and destination with matching CC modes, and that the
7025 CC mode is at least as constrained as REQ_MODE. */
7026
7027 int
7028 ix86_match_ccmode (insn, req_mode)
7029 rtx insn;
7030 enum machine_mode req_mode;
7031 {
7032 rtx set;
7033 enum machine_mode set_mode;
7034
7035 set = PATTERN (insn);
7036 if (GET_CODE (set) == PARALLEL)
7037 set = XVECEXP (set, 0, 0);
7038 if (GET_CODE (set) != SET)
7039 abort ();
7040 if (GET_CODE (SET_SRC (set)) != COMPARE)
7041 abort ();
7042
7043 set_mode = GET_MODE (SET_DEST (set));
7044 switch (set_mode)
7045 {
7046 case CCNOmode:
7047 if (req_mode != CCNOmode
7048 && (req_mode != CCmode
7049 || XEXP (SET_SRC (set), 1) != const0_rtx))
7050 return 0;
7051 break;
7052 case CCmode:
7053 if (req_mode == CCGCmode)
7054 return 0;
7055 /* FALLTHRU */
7056 case CCGCmode:
7057 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7058 return 0;
7059 /* FALLTHRU */
7060 case CCGOCmode:
7061 if (req_mode == CCZmode)
7062 return 0;
7063 /* FALLTHRU */
7064 case CCZmode:
7065 break;
7066
7067 default:
7068 abort ();
7069 }
7070
7071 return (GET_MODE (SET_SRC (set)) == set_mode);
7072 }
7073
7074 /* Generate insn patterns to do an integer compare of OPERANDS. */
7075
7076 static rtx
7077 ix86_expand_int_compare (code, op0, op1)
7078 enum rtx_code code;
7079 rtx op0, op1;
7080 {
7081 enum machine_mode cmpmode;
7082 rtx tmp, flags;
7083
7084 cmpmode = SELECT_CC_MODE (code, op0, op1);
7085 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7086
7087 /* This is very simple, but making the interface the same as in the
7088 FP case makes the rest of the code easier. */
7089 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7090 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7091
7092 /* Return the test that should be put into the flags user, i.e.
7093 the bcc, scc, or cmov instruction. */
7094 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7095 }
7096
7097 /* Figure out whether to use ordered or unordered fp comparisons.
7098 Return the appropriate mode to use. */
7099
7100 enum machine_mode
7101 ix86_fp_compare_mode (code)
7102 enum rtx_code code ATTRIBUTE_UNUSED;
7103 {
7104 /* ??? In order to make all comparisons reversible, we do all comparisons
7105 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7106 all forms trapping and nontrapping comparisons, we can make inequality
7107 comparisons trapping again, since it results in better code when using
7108 FCOM based compares. */
7109 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7110 }
7111
7112 enum machine_mode
7113 ix86_cc_mode (code, op0, op1)
7114 enum rtx_code code;
7115 rtx op0, op1;
7116 {
7117 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7118 return ix86_fp_compare_mode (code);
7119 switch (code)
7120 {
7121 /* Only zero flag is needed. */
7122 case EQ: /* ZF=0 */
7123 case NE: /* ZF!=0 */
7124 return CCZmode;
7125 /* Codes needing carry flag. */
7126 case GEU: /* CF=0 */
7127 case GTU: /* CF=0 & ZF=0 */
7128 case LTU: /* CF=1 */
7129 case LEU: /* CF=1 | ZF=1 */
7130 return CCmode;
7131 /* Codes possibly doable only with sign flag when
7132 comparing against zero. */
7133 case GE: /* SF=OF or SF=0 */
7134 case LT: /* SF<>OF or SF=1 */
7135 if (op1 == const0_rtx)
7136 return CCGOCmode;
7137 else
7138 /* For other cases Carry flag is not required. */
7139 return CCGCmode;
7140 /* Codes doable only with sign flag when comparing
7141 against zero, but we miss jump instruction for it
7142 so we need to use relational tests agains overflow
7143 that thus needs to be zero. */
7144 case GT: /* ZF=0 & SF=OF */
7145 case LE: /* ZF=1 | SF<>OF */
7146 if (op1 == const0_rtx)
7147 return CCNOmode;
7148 else
7149 return CCGCmode;
7150 /* strcmp pattern do (use flags) and combine may ask us for proper
7151 mode. */
7152 case USE:
7153 return CCmode;
7154 default:
7155 abort ();
7156 }
7157 }
7158
7159 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7160
7161 int
7162 ix86_use_fcomi_compare (code)
7163 enum rtx_code code ATTRIBUTE_UNUSED;
7164 {
7165 enum rtx_code swapped_code = swap_condition (code);
7166 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7167 || (ix86_fp_comparison_cost (swapped_code)
7168 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7169 }
7170
7171 /* Swap, force into registers, or otherwise massage the two operands
7172 to a fp comparison. The operands are updated in place; the new
7173 comparsion code is returned. */
7174
7175 static enum rtx_code
7176 ix86_prepare_fp_compare_args (code, pop0, pop1)
7177 enum rtx_code code;
7178 rtx *pop0, *pop1;
7179 {
7180 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7181 rtx op0 = *pop0, op1 = *pop1;
7182 enum machine_mode op_mode = GET_MODE (op0);
7183 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7184
7185 /* All of the unordered compare instructions only work on registers.
7186 The same is true of the XFmode compare instructions. The same is
7187 true of the fcomi compare instructions. */
7188
7189 if (!is_sse
7190 && (fpcmp_mode == CCFPUmode
7191 || op_mode == XFmode
7192 || op_mode == TFmode
7193 || ix86_use_fcomi_compare (code)))
7194 {
7195 op0 = force_reg (op_mode, op0);
7196 op1 = force_reg (op_mode, op1);
7197 }
7198 else
7199 {
7200 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7201 things around if they appear profitable, otherwise force op0
7202 into a register. */
7203
7204 if (standard_80387_constant_p (op0) == 0
7205 || (GET_CODE (op0) == MEM
7206 && ! (standard_80387_constant_p (op1) == 0
7207 || GET_CODE (op1) == MEM)))
7208 {
7209 rtx tmp;
7210 tmp = op0, op0 = op1, op1 = tmp;
7211 code = swap_condition (code);
7212 }
7213
7214 if (GET_CODE (op0) != REG)
7215 op0 = force_reg (op_mode, op0);
7216
7217 if (CONSTANT_P (op1))
7218 {
7219 if (standard_80387_constant_p (op1))
7220 op1 = force_reg (op_mode, op1);
7221 else
7222 op1 = validize_mem (force_const_mem (op_mode, op1));
7223 }
7224 }
7225
7226 /* Try to rearrange the comparison to make it cheaper. */
7227 if (ix86_fp_comparison_cost (code)
7228 > ix86_fp_comparison_cost (swap_condition (code))
7229 && (GET_CODE (op1) == REG || !no_new_pseudos))
7230 {
7231 rtx tmp;
7232 tmp = op0, op0 = op1, op1 = tmp;
7233 code = swap_condition (code);
7234 if (GET_CODE (op0) != REG)
7235 op0 = force_reg (op_mode, op0);
7236 }
7237
7238 *pop0 = op0;
7239 *pop1 = op1;
7240 return code;
7241 }
7242
7243 /* Convert comparison codes we use to represent FP comparison to integer
7244 code that will result in proper branch. Return UNKNOWN if no such code
7245 is available. */
7246 static enum rtx_code
7247 ix86_fp_compare_code_to_integer (code)
7248 enum rtx_code code;
7249 {
7250 switch (code)
7251 {
7252 case GT:
7253 return GTU;
7254 case GE:
7255 return GEU;
7256 case ORDERED:
7257 case UNORDERED:
7258 return code;
7259 break;
7260 case UNEQ:
7261 return EQ;
7262 break;
7263 case UNLT:
7264 return LTU;
7265 break;
7266 case UNLE:
7267 return LEU;
7268 break;
7269 case LTGT:
7270 return NE;
7271 break;
7272 default:
7273 return UNKNOWN;
7274 }
7275 }
7276
7277 /* Split comparison code CODE into comparisons we can do using branch
7278 instructions. BYPASS_CODE is comparison code for branch that will
7279 branch around FIRST_CODE and SECOND_CODE. If some of branches
7280 is not required, set value to NIL.
7281 We never require more than two branches. */
7282 static void
7283 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7284 enum rtx_code code, *bypass_code, *first_code, *second_code;
7285 {
7286 *first_code = code;
7287 *bypass_code = NIL;
7288 *second_code = NIL;
7289
7290 /* The fcomi comparison sets flags as follows:
7291
7292 cmp ZF PF CF
7293 > 0 0 0
7294 < 0 0 1
7295 = 1 0 0
7296 un 1 1 1 */
7297
7298 switch (code)
7299 {
7300 case GT: /* GTU - CF=0 & ZF=0 */
7301 case GE: /* GEU - CF=0 */
7302 case ORDERED: /* PF=0 */
7303 case UNORDERED: /* PF=1 */
7304 case UNEQ: /* EQ - ZF=1 */
7305 case UNLT: /* LTU - CF=1 */
7306 case UNLE: /* LEU - CF=1 | ZF=1 */
7307 case LTGT: /* EQ - ZF=0 */
7308 break;
7309 case LT: /* LTU - CF=1 - fails on unordered */
7310 *first_code = UNLT;
7311 *bypass_code = UNORDERED;
7312 break;
7313 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7314 *first_code = UNLE;
7315 *bypass_code = UNORDERED;
7316 break;
7317 case EQ: /* EQ - ZF=1 - fails on unordered */
7318 *first_code = UNEQ;
7319 *bypass_code = UNORDERED;
7320 break;
7321 case NE: /* NE - ZF=0 - fails on unordered */
7322 *first_code = LTGT;
7323 *second_code = UNORDERED;
7324 break;
7325 case UNGE: /* GEU - CF=0 - fails on unordered */
7326 *first_code = GE;
7327 *second_code = UNORDERED;
7328 break;
7329 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7330 *first_code = GT;
7331 *second_code = UNORDERED;
7332 break;
7333 default:
7334 abort ();
7335 }
7336 if (!TARGET_IEEE_FP)
7337 {
7338 *second_code = NIL;
7339 *bypass_code = NIL;
7340 }
7341 }
7342
7343 /* Return cost of comparison done fcom + arithmetics operations on AX.
7344 All following functions do use number of instructions as an cost metrics.
7345 In future this should be tweaked to compute bytes for optimize_size and
7346 take into account performance of various instructions on various CPUs. */
7347 static int
7348 ix86_fp_comparison_arithmetics_cost (code)
7349 enum rtx_code code;
7350 {
7351 if (!TARGET_IEEE_FP)
7352 return 4;
7353 /* The cost of code output by ix86_expand_fp_compare. */
7354 switch (code)
7355 {
7356 case UNLE:
7357 case UNLT:
7358 case LTGT:
7359 case GT:
7360 case GE:
7361 case UNORDERED:
7362 case ORDERED:
7363 case UNEQ:
7364 return 4;
7365 break;
7366 case LT:
7367 case NE:
7368 case EQ:
7369 case UNGE:
7370 return 5;
7371 break;
7372 case LE:
7373 case UNGT:
7374 return 6;
7375 break;
7376 default:
7377 abort ();
7378 }
7379 }
7380
7381 /* Return cost of comparison done using fcomi operation.
7382 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7383 static int
7384 ix86_fp_comparison_fcomi_cost (code)
7385 enum rtx_code code;
7386 {
7387 enum rtx_code bypass_code, first_code, second_code;
7388 /* Return arbitarily high cost when instruction is not supported - this
7389 prevents gcc from using it. */
7390 if (!TARGET_CMOVE)
7391 return 1024;
7392 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7393 return (bypass_code != NIL || second_code != NIL) + 2;
7394 }
7395
7396 /* Return cost of comparison done using sahf operation.
7397 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7398 static int
7399 ix86_fp_comparison_sahf_cost (code)
7400 enum rtx_code code;
7401 {
7402 enum rtx_code bypass_code, first_code, second_code;
7403 /* Return arbitarily high cost when instruction is not preferred - this
7404 avoids gcc from using it. */
7405 if (!TARGET_USE_SAHF && !optimize_size)
7406 return 1024;
7407 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7408 return (bypass_code != NIL || second_code != NIL) + 3;
7409 }
7410
7411 /* Compute cost of the comparison done using any method.
7412 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7413 static int
7414 ix86_fp_comparison_cost (code)
7415 enum rtx_code code;
7416 {
7417 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7418 int min;
7419
7420 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7421 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7422
7423 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7424 if (min > sahf_cost)
7425 min = sahf_cost;
7426 if (min > fcomi_cost)
7427 min = fcomi_cost;
7428 return min;
7429 }
7430
7431 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7432
7433 static rtx
7434 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7435 enum rtx_code code;
7436 rtx op0, op1, scratch;
7437 rtx *second_test;
7438 rtx *bypass_test;
7439 {
7440 enum machine_mode fpcmp_mode, intcmp_mode;
7441 rtx tmp, tmp2;
7442 int cost = ix86_fp_comparison_cost (code);
7443 enum rtx_code bypass_code, first_code, second_code;
7444
7445 fpcmp_mode = ix86_fp_compare_mode (code);
7446 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7447
7448 if (second_test)
7449 *second_test = NULL_RTX;
7450 if (bypass_test)
7451 *bypass_test = NULL_RTX;
7452
7453 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7454
7455 /* Do fcomi/sahf based test when profitable. */
7456 if ((bypass_code == NIL || bypass_test)
7457 && (second_code == NIL || second_test)
7458 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7459 {
7460 if (TARGET_CMOVE)
7461 {
7462 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7463 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7464 tmp);
7465 emit_insn (tmp);
7466 }
7467 else
7468 {
7469 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7470 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7471 if (!scratch)
7472 scratch = gen_reg_rtx (HImode);
7473 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7474 emit_insn (gen_x86_sahf_1 (scratch));
7475 }
7476
7477 /* The FP codes work out to act like unsigned. */
7478 intcmp_mode = fpcmp_mode;
7479 code = first_code;
7480 if (bypass_code != NIL)
7481 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7482 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7483 const0_rtx);
7484 if (second_code != NIL)
7485 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7486 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7487 const0_rtx);
7488 }
7489 else
7490 {
7491 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7492 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7493 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7494 if (!scratch)
7495 scratch = gen_reg_rtx (HImode);
7496 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7497
7498 /* In the unordered case, we have to check C2 for NaN's, which
7499 doesn't happen to work out to anything nice combination-wise.
7500 So do some bit twiddling on the value we've got in AH to come
7501 up with an appropriate set of condition codes. */
7502
7503 intcmp_mode = CCNOmode;
7504 switch (code)
7505 {
7506 case GT:
7507 case UNGT:
7508 if (code == GT || !TARGET_IEEE_FP)
7509 {
7510 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7511 code = EQ;
7512 }
7513 else
7514 {
7515 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7516 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7517 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7518 intcmp_mode = CCmode;
7519 code = GEU;
7520 }
7521 break;
7522 case LT:
7523 case UNLT:
7524 if (code == LT && TARGET_IEEE_FP)
7525 {
7526 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7527 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7528 intcmp_mode = CCmode;
7529 code = EQ;
7530 }
7531 else
7532 {
7533 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7534 code = NE;
7535 }
7536 break;
7537 case GE:
7538 case UNGE:
7539 if (code == GE || !TARGET_IEEE_FP)
7540 {
7541 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7542 code = EQ;
7543 }
7544 else
7545 {
7546 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7547 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7548 GEN_INT (0x01)));
7549 code = NE;
7550 }
7551 break;
7552 case LE:
7553 case UNLE:
7554 if (code == LE && TARGET_IEEE_FP)
7555 {
7556 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7557 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7558 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7559 intcmp_mode = CCmode;
7560 code = LTU;
7561 }
7562 else
7563 {
7564 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7565 code = NE;
7566 }
7567 break;
7568 case EQ:
7569 case UNEQ:
7570 if (code == EQ && TARGET_IEEE_FP)
7571 {
7572 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7573 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7574 intcmp_mode = CCmode;
7575 code = EQ;
7576 }
7577 else
7578 {
7579 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7580 code = NE;
7581 break;
7582 }
7583 break;
7584 case NE:
7585 case LTGT:
7586 if (code == NE && TARGET_IEEE_FP)
7587 {
7588 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7589 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7590 GEN_INT (0x40)));
7591 code = NE;
7592 }
7593 else
7594 {
7595 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7596 code = EQ;
7597 }
7598 break;
7599
7600 case UNORDERED:
7601 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7602 code = NE;
7603 break;
7604 case ORDERED:
7605 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7606 code = EQ;
7607 break;
7608
7609 default:
7610 abort ();
7611 }
7612 }
7613
7614 /* Return the test that should be put into the flags user, i.e.
7615 the bcc, scc, or cmov instruction. */
7616 return gen_rtx_fmt_ee (code, VOIDmode,
7617 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7618 const0_rtx);
7619 }
7620
7621 rtx
7622 ix86_expand_compare (code, second_test, bypass_test)
7623 enum rtx_code code;
7624 rtx *second_test, *bypass_test;
7625 {
7626 rtx op0, op1, ret;
7627 op0 = ix86_compare_op0;
7628 op1 = ix86_compare_op1;
7629
7630 if (second_test)
7631 *second_test = NULL_RTX;
7632 if (bypass_test)
7633 *bypass_test = NULL_RTX;
7634
7635 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7636 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7637 second_test, bypass_test);
7638 else
7639 ret = ix86_expand_int_compare (code, op0, op1);
7640
7641 return ret;
7642 }
7643
7644 /* Return true if the CODE will result in nontrivial jump sequence. */
7645 bool
7646 ix86_fp_jump_nontrivial_p (code)
7647 enum rtx_code code;
7648 {
7649 enum rtx_code bypass_code, first_code, second_code;
7650 if (!TARGET_CMOVE)
7651 return true;
7652 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7653 return bypass_code != NIL || second_code != NIL;
7654 }
7655
7656 void
7657 ix86_expand_branch (code, label)
7658 enum rtx_code code;
7659 rtx label;
7660 {
7661 rtx tmp;
7662
7663 switch (GET_MODE (ix86_compare_op0))
7664 {
7665 case QImode:
7666 case HImode:
7667 case SImode:
7668 simple:
7669 tmp = ix86_expand_compare (code, NULL, NULL);
7670 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7671 gen_rtx_LABEL_REF (VOIDmode, label),
7672 pc_rtx);
7673 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7674 return;
7675
7676 case SFmode:
7677 case DFmode:
7678 case XFmode:
7679 case TFmode:
7680 {
7681 rtvec vec;
7682 int use_fcomi;
7683 enum rtx_code bypass_code, first_code, second_code;
7684
7685 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7686 &ix86_compare_op1);
7687
7688 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7689
7690 /* Check whether we will use the natural sequence with one jump. If
7691 so, we can expand jump early. Otherwise delay expansion by
7692 creating compound insn to not confuse optimizers. */
7693 if (bypass_code == NIL && second_code == NIL
7694 && TARGET_CMOVE)
7695 {
7696 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7697 gen_rtx_LABEL_REF (VOIDmode, label),
7698 pc_rtx, NULL_RTX);
7699 }
7700 else
7701 {
7702 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7703 ix86_compare_op0, ix86_compare_op1);
7704 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7705 gen_rtx_LABEL_REF (VOIDmode, label),
7706 pc_rtx);
7707 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7708
7709 use_fcomi = ix86_use_fcomi_compare (code);
7710 vec = rtvec_alloc (3 + !use_fcomi);
7711 RTVEC_ELT (vec, 0) = tmp;
7712 RTVEC_ELT (vec, 1)
7713 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7714 RTVEC_ELT (vec, 2)
7715 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7716 if (! use_fcomi)
7717 RTVEC_ELT (vec, 3)
7718 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7719
7720 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7721 }
7722 return;
7723 }
7724
7725 case DImode:
7726 if (TARGET_64BIT)
7727 goto simple;
7728 /* Expand DImode branch into multiple compare+branch. */
7729 {
7730 rtx lo[2], hi[2], label2;
7731 enum rtx_code code1, code2, code3;
7732
7733 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7734 {
7735 tmp = ix86_compare_op0;
7736 ix86_compare_op0 = ix86_compare_op1;
7737 ix86_compare_op1 = tmp;
7738 code = swap_condition (code);
7739 }
7740 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7741 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7742
7743 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7744 avoid two branches. This costs one extra insn, so disable when
7745 optimizing for size. */
7746
7747 if ((code == EQ || code == NE)
7748 && (!optimize_size
7749 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7750 {
7751 rtx xor0, xor1;
7752
7753 xor1 = hi[0];
7754 if (hi[1] != const0_rtx)
7755 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7756 NULL_RTX, 0, OPTAB_WIDEN);
7757
7758 xor0 = lo[0];
7759 if (lo[1] != const0_rtx)
7760 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7761 NULL_RTX, 0, OPTAB_WIDEN);
7762
7763 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7764 NULL_RTX, 0, OPTAB_WIDEN);
7765
7766 ix86_compare_op0 = tmp;
7767 ix86_compare_op1 = const0_rtx;
7768 ix86_expand_branch (code, label);
7769 return;
7770 }
7771
7772 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7773 op1 is a constant and the low word is zero, then we can just
7774 examine the high word. */
7775
7776 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7777 switch (code)
7778 {
7779 case LT: case LTU: case GE: case GEU:
7780 ix86_compare_op0 = hi[0];
7781 ix86_compare_op1 = hi[1];
7782 ix86_expand_branch (code, label);
7783 return;
7784 default:
7785 break;
7786 }
7787
7788 /* Otherwise, we need two or three jumps. */
7789
7790 label2 = gen_label_rtx ();
7791
7792 code1 = code;
7793 code2 = swap_condition (code);
7794 code3 = unsigned_condition (code);
7795
7796 switch (code)
7797 {
7798 case LT: case GT: case LTU: case GTU:
7799 break;
7800
7801 case LE: code1 = LT; code2 = GT; break;
7802 case GE: code1 = GT; code2 = LT; break;
7803 case LEU: code1 = LTU; code2 = GTU; break;
7804 case GEU: code1 = GTU; code2 = LTU; break;
7805
7806 case EQ: code1 = NIL; code2 = NE; break;
7807 case NE: code2 = NIL; break;
7808
7809 default:
7810 abort ();
7811 }
7812
7813 /*
7814 * a < b =>
7815 * if (hi(a) < hi(b)) goto true;
7816 * if (hi(a) > hi(b)) goto false;
7817 * if (lo(a) < lo(b)) goto true;
7818 * false:
7819 */
7820
7821 ix86_compare_op0 = hi[0];
7822 ix86_compare_op1 = hi[1];
7823
7824 if (code1 != NIL)
7825 ix86_expand_branch (code1, label);
7826 if (code2 != NIL)
7827 ix86_expand_branch (code2, label2);
7828
7829 ix86_compare_op0 = lo[0];
7830 ix86_compare_op1 = lo[1];
7831 ix86_expand_branch (code3, label);
7832
7833 if (code2 != NIL)
7834 emit_label (label2);
7835 return;
7836 }
7837
7838 default:
7839 abort ();
7840 }
7841 }
7842
7843 /* Split branch based on floating point condition. */
7844 void
7845 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7846 enum rtx_code code;
7847 rtx op1, op2, target1, target2, tmp;
7848 {
7849 rtx second, bypass;
7850 rtx label = NULL_RTX;
7851 rtx condition;
7852 int bypass_probability = -1, second_probability = -1, probability = -1;
7853 rtx i;
7854
7855 if (target2 != pc_rtx)
7856 {
7857 rtx tmp = target2;
7858 code = reverse_condition_maybe_unordered (code);
7859 target2 = target1;
7860 target1 = tmp;
7861 }
7862
7863 condition = ix86_expand_fp_compare (code, op1, op2,
7864 tmp, &second, &bypass);
7865
7866 if (split_branch_probability >= 0)
7867 {
7868 /* Distribute the probabilities across the jumps.
7869 Assume the BYPASS and SECOND to be always test
7870 for UNORDERED. */
7871 probability = split_branch_probability;
7872
7873 /* Value of 1 is low enough to make no need for probability
7874 to be updated. Later we may run some experiments and see
7875 if unordered values are more frequent in practice. */
7876 if (bypass)
7877 bypass_probability = 1;
7878 if (second)
7879 second_probability = 1;
7880 }
7881 if (bypass != NULL_RTX)
7882 {
7883 label = gen_label_rtx ();
7884 i = emit_jump_insn (gen_rtx_SET
7885 (VOIDmode, pc_rtx,
7886 gen_rtx_IF_THEN_ELSE (VOIDmode,
7887 bypass,
7888 gen_rtx_LABEL_REF (VOIDmode,
7889 label),
7890 pc_rtx)));
7891 if (bypass_probability >= 0)
7892 REG_NOTES (i)
7893 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7894 GEN_INT (bypass_probability),
7895 REG_NOTES (i));
7896 }
7897 i = emit_jump_insn (gen_rtx_SET
7898 (VOIDmode, pc_rtx,
7899 gen_rtx_IF_THEN_ELSE (VOIDmode,
7900 condition, target1, target2)));
7901 if (probability >= 0)
7902 REG_NOTES (i)
7903 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7904 GEN_INT (probability),
7905 REG_NOTES (i));
7906 if (second != NULL_RTX)
7907 {
7908 i = emit_jump_insn (gen_rtx_SET
7909 (VOIDmode, pc_rtx,
7910 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7911 target2)));
7912 if (second_probability >= 0)
7913 REG_NOTES (i)
7914 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7915 GEN_INT (second_probability),
7916 REG_NOTES (i));
7917 }
7918 if (label != NULL_RTX)
7919 emit_label (label);
7920 }
7921
7922 int
7923 ix86_expand_setcc (code, dest)
7924 enum rtx_code code;
7925 rtx dest;
7926 {
7927 rtx ret, tmp, tmpreg;
7928 rtx second_test, bypass_test;
7929
7930 if (GET_MODE (ix86_compare_op0) == DImode
7931 && !TARGET_64BIT)
7932 return 0; /* FAIL */
7933
7934 if (GET_MODE (dest) != QImode)
7935 abort ();
7936
7937 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7938 PUT_MODE (ret, QImode);
7939
7940 tmp = dest;
7941 tmpreg = dest;
7942
7943 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7944 if (bypass_test || second_test)
7945 {
7946 rtx test = second_test;
7947 int bypass = 0;
7948 rtx tmp2 = gen_reg_rtx (QImode);
7949 if (bypass_test)
7950 {
7951 if (second_test)
7952 abort ();
7953 test = bypass_test;
7954 bypass = 1;
7955 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7956 }
7957 PUT_MODE (test, QImode);
7958 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7959
7960 if (bypass)
7961 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7962 else
7963 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7964 }
7965
7966 return 1; /* DONE */
7967 }
7968
7969 int
7970 ix86_expand_int_movcc (operands)
7971 rtx operands[];
7972 {
7973 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7974 rtx compare_seq, compare_op;
7975 rtx second_test, bypass_test;
7976 enum machine_mode mode = GET_MODE (operands[0]);
7977
7978 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7979 In case comparsion is done with immediate, we can convert it to LTU or
7980 GEU by altering the integer. */
7981
7982 if ((code == LEU || code == GTU)
7983 && GET_CODE (ix86_compare_op1) == CONST_INT
7984 && mode != HImode
7985 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
7986 && GET_CODE (operands[2]) == CONST_INT
7987 && GET_CODE (operands[3]) == CONST_INT)
7988 {
7989 if (code == LEU)
7990 code = LTU;
7991 else
7992 code = GEU;
7993 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7994 }
7995
7996 start_sequence ();
7997 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7998 compare_seq = gen_sequence ();
7999 end_sequence ();
8000
8001 compare_code = GET_CODE (compare_op);
8002
8003 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8004 HImode insns, we'd be swallowed in word prefix ops. */
8005
8006 if (mode != HImode
8007 && (mode != DImode || TARGET_64BIT)
8008 && GET_CODE (operands[2]) == CONST_INT
8009 && GET_CODE (operands[3]) == CONST_INT)
8010 {
8011 rtx out = operands[0];
8012 HOST_WIDE_INT ct = INTVAL (operands[2]);
8013 HOST_WIDE_INT cf = INTVAL (operands[3]);
8014 HOST_WIDE_INT diff;
8015
8016 if ((compare_code == LTU || compare_code == GEU)
8017 && !second_test && !bypass_test)
8018 {
8019
8020 /* Detect overlap between destination and compare sources. */
8021 rtx tmp = out;
8022
8023 /* To simplify rest of code, restrict to the GEU case. */
8024 if (compare_code == LTU)
8025 {
8026 int tmp = ct;
8027 ct = cf;
8028 cf = tmp;
8029 compare_code = reverse_condition (compare_code);
8030 code = reverse_condition (code);
8031 }
8032 diff = ct - cf;
8033
8034 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8035 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8036 tmp = gen_reg_rtx (mode);
8037
8038 emit_insn (compare_seq);
8039 if (mode == DImode)
8040 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8041 else
8042 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8043
8044 if (diff == 1)
8045 {
8046 /*
8047 * cmpl op0,op1
8048 * sbbl dest,dest
8049 * [addl dest, ct]
8050 *
8051 * Size 5 - 8.
8052 */
8053 if (ct)
8054 tmp = expand_simple_binop (mode, PLUS,
8055 tmp, GEN_INT (ct),
8056 tmp, 1, OPTAB_DIRECT);
8057 }
8058 else if (cf == -1)
8059 {
8060 /*
8061 * cmpl op0,op1
8062 * sbbl dest,dest
8063 * orl $ct, dest
8064 *
8065 * Size 8.
8066 */
8067 tmp = expand_simple_binop (mode, IOR,
8068 tmp, GEN_INT (ct),
8069 tmp, 1, OPTAB_DIRECT);
8070 }
8071 else if (diff == -1 && ct)
8072 {
8073 /*
8074 * cmpl op0,op1
8075 * sbbl dest,dest
8076 * xorl $-1, dest
8077 * [addl dest, cf]
8078 *
8079 * Size 8 - 11.
8080 */
8081 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8082 if (cf)
8083 tmp = expand_simple_binop (mode, PLUS,
8084 tmp, GEN_INT (cf),
8085 tmp, 1, OPTAB_DIRECT);
8086 }
8087 else
8088 {
8089 /*
8090 * cmpl op0,op1
8091 * sbbl dest,dest
8092 * andl cf - ct, dest
8093 * [addl dest, ct]
8094 *
8095 * Size 8 - 11.
8096 */
8097 tmp = expand_simple_binop (mode, AND,
8098 tmp,
8099 gen_int_mode (cf - ct, mode),
8100 tmp, 1, OPTAB_DIRECT);
8101 if (ct)
8102 tmp = expand_simple_binop (mode, PLUS,
8103 tmp, GEN_INT (ct),
8104 tmp, 1, OPTAB_DIRECT);
8105 }
8106
8107 if (tmp != out)
8108 emit_move_insn (out, tmp);
8109
8110 return 1; /* DONE */
8111 }
8112
8113 diff = ct - cf;
8114 if (diff < 0)
8115 {
8116 HOST_WIDE_INT tmp;
8117 tmp = ct, ct = cf, cf = tmp;
8118 diff = -diff;
8119 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8120 {
8121 /* We may be reversing unordered compare to normal compare, that
8122 is not valid in general (we may convert non-trapping condition
8123 to trapping one), however on i386 we currently emit all
8124 comparisons unordered. */
8125 compare_code = reverse_condition_maybe_unordered (compare_code);
8126 code = reverse_condition_maybe_unordered (code);
8127 }
8128 else
8129 {
8130 compare_code = reverse_condition (compare_code);
8131 code = reverse_condition (code);
8132 }
8133 }
8134 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8135 || diff == 3 || diff == 5 || diff == 9)
8136 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8137 {
8138 /*
8139 * xorl dest,dest
8140 * cmpl op1,op2
8141 * setcc dest
8142 * lea cf(dest*(ct-cf)),dest
8143 *
8144 * Size 14.
8145 *
8146 * This also catches the degenerate setcc-only case.
8147 */
8148
8149 rtx tmp;
8150 int nops;
8151
8152 out = emit_store_flag (out, code, ix86_compare_op0,
8153 ix86_compare_op1, VOIDmode, 0, 1);
8154
8155 nops = 0;
8156 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8157 done in proper mode to match. */
8158 if (diff == 1)
8159 tmp = out;
8160 else
8161 {
8162 rtx out1;
8163 out1 = out;
8164 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8165 nops++;
8166 if (diff & 1)
8167 {
8168 tmp = gen_rtx_PLUS (mode, tmp, out1);
8169 nops++;
8170 }
8171 }
8172 if (cf != 0)
8173 {
8174 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8175 nops++;
8176 }
8177 if (tmp != out
8178 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8179 {
8180 if (nops == 1)
8181 {
8182 rtx clob;
8183
8184 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8185 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8186
8187 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8188 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8189 emit_insn (tmp);
8190 }
8191 else
8192 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8193 }
8194 if (out != operands[0])
8195 emit_move_insn (operands[0], out);
8196
8197 return 1; /* DONE */
8198 }
8199
8200 /*
8201 * General case: Jumpful:
8202 * xorl dest,dest cmpl op1, op2
8203 * cmpl op1, op2 movl ct, dest
8204 * setcc dest jcc 1f
8205 * decl dest movl cf, dest
8206 * andl (cf-ct),dest 1:
8207 * addl ct,dest
8208 *
8209 * Size 20. Size 14.
8210 *
8211 * This is reasonably steep, but branch mispredict costs are
8212 * high on modern cpus, so consider failing only if optimizing
8213 * for space.
8214 *
8215 * %%% Parameterize branch_cost on the tuning architecture, then
8216 * use that. The 80386 couldn't care less about mispredicts.
8217 */
8218
8219 if (!optimize_size && !TARGET_CMOVE)
8220 {
8221 if (ct == 0)
8222 {
8223 ct = cf;
8224 cf = 0;
8225 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8226 {
8227 /* We may be reversing unordered compare to normal compare,
8228 that is not valid in general (we may convert non-trapping
8229 condition to trapping one), however on i386 we currently
8230 emit all comparisons unordered. */
8231 compare_code = reverse_condition_maybe_unordered (compare_code);
8232 code = reverse_condition_maybe_unordered (code);
8233 }
8234 else
8235 {
8236 compare_code = reverse_condition (compare_code);
8237 code = reverse_condition (code);
8238 }
8239 }
8240
8241 out = emit_store_flag (out, code, ix86_compare_op0,
8242 ix86_compare_op1, VOIDmode, 0, 1);
8243
8244 out = expand_simple_binop (mode, PLUS,
8245 out, constm1_rtx,
8246 out, 1, OPTAB_DIRECT);
8247 out = expand_simple_binop (mode, AND,
8248 out,
8249 gen_int_mode (cf - ct, mode),
8250 out, 1, OPTAB_DIRECT);
8251 out = expand_simple_binop (mode, PLUS,
8252 out, GEN_INT (ct),
8253 out, 1, OPTAB_DIRECT);
8254 if (out != operands[0])
8255 emit_move_insn (operands[0], out);
8256
8257 return 1; /* DONE */
8258 }
8259 }
8260
8261 if (!TARGET_CMOVE)
8262 {
8263 /* Try a few things more with specific constants and a variable. */
8264
8265 optab op;
8266 rtx var, orig_out, out, tmp;
8267
8268 if (optimize_size)
8269 return 0; /* FAIL */
8270
8271 /* If one of the two operands is an interesting constant, load a
8272 constant with the above and mask it in with a logical operation. */
8273
8274 if (GET_CODE (operands[2]) == CONST_INT)
8275 {
8276 var = operands[3];
8277 if (INTVAL (operands[2]) == 0)
8278 operands[3] = constm1_rtx, op = and_optab;
8279 else if (INTVAL (operands[2]) == -1)
8280 operands[3] = const0_rtx, op = ior_optab;
8281 else
8282 return 0; /* FAIL */
8283 }
8284 else if (GET_CODE (operands[3]) == CONST_INT)
8285 {
8286 var = operands[2];
8287 if (INTVAL (operands[3]) == 0)
8288 operands[2] = constm1_rtx, op = and_optab;
8289 else if (INTVAL (operands[3]) == -1)
8290 operands[2] = const0_rtx, op = ior_optab;
8291 else
8292 return 0; /* FAIL */
8293 }
8294 else
8295 return 0; /* FAIL */
8296
8297 orig_out = operands[0];
8298 tmp = gen_reg_rtx (mode);
8299 operands[0] = tmp;
8300
8301 /* Recurse to get the constant loaded. */
8302 if (ix86_expand_int_movcc (operands) == 0)
8303 return 0; /* FAIL */
8304
8305 /* Mask in the interesting variable. */
8306 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8307 OPTAB_WIDEN);
8308 if (out != orig_out)
8309 emit_move_insn (orig_out, out);
8310
8311 return 1; /* DONE */
8312 }
8313
8314 /*
8315 * For comparison with above,
8316 *
8317 * movl cf,dest
8318 * movl ct,tmp
8319 * cmpl op1,op2
8320 * cmovcc tmp,dest
8321 *
8322 * Size 15.
8323 */
8324
8325 if (! nonimmediate_operand (operands[2], mode))
8326 operands[2] = force_reg (mode, operands[2]);
8327 if (! nonimmediate_operand (operands[3], mode))
8328 operands[3] = force_reg (mode, operands[3]);
8329
8330 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8331 {
8332 rtx tmp = gen_reg_rtx (mode);
8333 emit_move_insn (tmp, operands[3]);
8334 operands[3] = tmp;
8335 }
8336 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8337 {
8338 rtx tmp = gen_reg_rtx (mode);
8339 emit_move_insn (tmp, operands[2]);
8340 operands[2] = tmp;
8341 }
8342 if (! register_operand (operands[2], VOIDmode)
8343 && ! register_operand (operands[3], VOIDmode))
8344 operands[2] = force_reg (mode, operands[2]);
8345
8346 emit_insn (compare_seq);
8347 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8348 gen_rtx_IF_THEN_ELSE (mode,
8349 compare_op, operands[2],
8350 operands[3])));
8351 if (bypass_test)
8352 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8353 gen_rtx_IF_THEN_ELSE (mode,
8354 bypass_test,
8355 operands[3],
8356 operands[0])));
8357 if (second_test)
8358 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8359 gen_rtx_IF_THEN_ELSE (mode,
8360 second_test,
8361 operands[2],
8362 operands[0])));
8363
8364 return 1; /* DONE */
8365 }
8366
8367 int
8368 ix86_expand_fp_movcc (operands)
8369 rtx operands[];
8370 {
8371 enum rtx_code code;
8372 rtx tmp;
8373 rtx compare_op, second_test, bypass_test;
8374
8375 /* For SF/DFmode conditional moves based on comparisons
8376 in same mode, we may want to use SSE min/max instructions. */
8377 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8378 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8379 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8380 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8381 && (!TARGET_IEEE_FP
8382 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8383 /* We may be called from the post-reload splitter. */
8384 && (!REG_P (operands[0])
8385 || SSE_REG_P (operands[0])
8386 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8387 {
8388 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8389 code = GET_CODE (operands[1]);
8390
8391 /* See if we have (cross) match between comparison operands and
8392 conditional move operands. */
8393 if (rtx_equal_p (operands[2], op1))
8394 {
8395 rtx tmp = op0;
8396 op0 = op1;
8397 op1 = tmp;
8398 code = reverse_condition_maybe_unordered (code);
8399 }
8400 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8401 {
8402 /* Check for min operation. */
8403 if (code == LT)
8404 {
8405 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8406 if (memory_operand (op0, VOIDmode))
8407 op0 = force_reg (GET_MODE (operands[0]), op0);
8408 if (GET_MODE (operands[0]) == SFmode)
8409 emit_insn (gen_minsf3 (operands[0], op0, op1));
8410 else
8411 emit_insn (gen_mindf3 (operands[0], op0, op1));
8412 return 1;
8413 }
8414 /* Check for max operation. */
8415 if (code == GT)
8416 {
8417 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8418 if (memory_operand (op0, VOIDmode))
8419 op0 = force_reg (GET_MODE (operands[0]), op0);
8420 if (GET_MODE (operands[0]) == SFmode)
8421 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8422 else
8423 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8424 return 1;
8425 }
8426 }
8427 /* Manage condition to be sse_comparison_operator. In case we are
8428 in non-ieee mode, try to canonicalize the destination operand
8429 to be first in the comparison - this helps reload to avoid extra
8430 moves. */
8431 if (!sse_comparison_operator (operands[1], VOIDmode)
8432 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8433 {
8434 rtx tmp = ix86_compare_op0;
8435 ix86_compare_op0 = ix86_compare_op1;
8436 ix86_compare_op1 = tmp;
8437 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8438 VOIDmode, ix86_compare_op0,
8439 ix86_compare_op1);
8440 }
8441 /* Similary try to manage result to be first operand of conditional
8442 move. We also don't support the NE comparison on SSE, so try to
8443 avoid it. */
8444 if ((rtx_equal_p (operands[0], operands[3])
8445 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8446 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8447 {
8448 rtx tmp = operands[2];
8449 operands[2] = operands[3];
8450 operands[3] = tmp;
8451 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8452 (GET_CODE (operands[1])),
8453 VOIDmode, ix86_compare_op0,
8454 ix86_compare_op1);
8455 }
8456 if (GET_MODE (operands[0]) == SFmode)
8457 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8458 operands[2], operands[3],
8459 ix86_compare_op0, ix86_compare_op1));
8460 else
8461 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8462 operands[2], operands[3],
8463 ix86_compare_op0, ix86_compare_op1));
8464 return 1;
8465 }
8466
8467 /* The floating point conditional move instructions don't directly
8468 support conditions resulting from a signed integer comparison. */
8469
8470 code = GET_CODE (operands[1]);
8471 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8472
8473 /* The floating point conditional move instructions don't directly
8474 support signed integer comparisons. */
8475
8476 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8477 {
8478 if (second_test != NULL || bypass_test != NULL)
8479 abort ();
8480 tmp = gen_reg_rtx (QImode);
8481 ix86_expand_setcc (code, tmp);
8482 code = NE;
8483 ix86_compare_op0 = tmp;
8484 ix86_compare_op1 = const0_rtx;
8485 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8486 }
8487 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8488 {
8489 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8490 emit_move_insn (tmp, operands[3]);
8491 operands[3] = tmp;
8492 }
8493 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8494 {
8495 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8496 emit_move_insn (tmp, operands[2]);
8497 operands[2] = tmp;
8498 }
8499
8500 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8501 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8502 compare_op,
8503 operands[2],
8504 operands[3])));
8505 if (bypass_test)
8506 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8507 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8508 bypass_test,
8509 operands[3],
8510 operands[0])));
8511 if (second_test)
8512 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8513 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8514 second_test,
8515 operands[2],
8516 operands[0])));
8517
8518 return 1;
8519 }
8520
8521 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8522 works for floating pointer parameters and nonoffsetable memories.
8523 For pushes, it returns just stack offsets; the values will be saved
8524 in the right order. Maximally three parts are generated. */
8525
8526 static int
8527 ix86_split_to_parts (operand, parts, mode)
8528 rtx operand;
8529 rtx *parts;
8530 enum machine_mode mode;
8531 {
8532 int size;
8533
8534 if (!TARGET_64BIT)
8535 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8536 else
8537 size = (GET_MODE_SIZE (mode) + 4) / 8;
8538
8539 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8540 abort ();
8541 if (size < 2 || size > 3)
8542 abort ();
8543
8544 /* Optimize constant pool reference to immediates. This is used by fp moves,
8545 that force all constants to memory to allow combining. */
8546
8547 if (GET_CODE (operand) == MEM
8548 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8549 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8550 operand = get_pool_constant (XEXP (operand, 0));
8551
8552 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8553 {
8554 /* The only non-offsetable memories we handle are pushes. */
8555 if (! push_operand (operand, VOIDmode))
8556 abort ();
8557
8558 operand = copy_rtx (operand);
8559 PUT_MODE (operand, Pmode);
8560 parts[0] = parts[1] = parts[2] = operand;
8561 }
8562 else if (!TARGET_64BIT)
8563 {
8564 if (mode == DImode)
8565 split_di (&operand, 1, &parts[0], &parts[1]);
8566 else
8567 {
8568 if (REG_P (operand))
8569 {
8570 if (!reload_completed)
8571 abort ();
8572 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8573 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8574 if (size == 3)
8575 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8576 }
8577 else if (offsettable_memref_p (operand))
8578 {
8579 operand = adjust_address (operand, SImode, 0);
8580 parts[0] = operand;
8581 parts[1] = adjust_address (operand, SImode, 4);
8582 if (size == 3)
8583 parts[2] = adjust_address (operand, SImode, 8);
8584 }
8585 else if (GET_CODE (operand) == CONST_DOUBLE)
8586 {
8587 REAL_VALUE_TYPE r;
8588 long l[4];
8589
8590 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8591 switch (mode)
8592 {
8593 case XFmode:
8594 case TFmode:
8595 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8596 parts[2] = gen_int_mode (l[2], SImode);
8597 break;
8598 case DFmode:
8599 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8600 break;
8601 default:
8602 abort ();
8603 }
8604 parts[1] = gen_int_mode (l[1], SImode);
8605 parts[0] = gen_int_mode (l[0], SImode);
8606 }
8607 else
8608 abort ();
8609 }
8610 }
8611 else
8612 {
8613 if (mode == TImode)
8614 split_ti (&operand, 1, &parts[0], &parts[1]);
8615 if (mode == XFmode || mode == TFmode)
8616 {
8617 if (REG_P (operand))
8618 {
8619 if (!reload_completed)
8620 abort ();
8621 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8622 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8623 }
8624 else if (offsettable_memref_p (operand))
8625 {
8626 operand = adjust_address (operand, DImode, 0);
8627 parts[0] = operand;
8628 parts[1] = adjust_address (operand, SImode, 8);
8629 }
8630 else if (GET_CODE (operand) == CONST_DOUBLE)
8631 {
8632 REAL_VALUE_TYPE r;
8633 long l[3];
8634
8635 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8636 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8637 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8638 if (HOST_BITS_PER_WIDE_INT >= 64)
8639 parts[0]
8640 = gen_int_mode
8641 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8642 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8643 DImode);
8644 else
8645 parts[0] = immed_double_const (l[0], l[1], DImode);
8646 parts[1] = gen_int_mode (l[2], SImode);
8647 }
8648 else
8649 abort ();
8650 }
8651 }
8652
8653 return size;
8654 }
8655
8656 /* Emit insns to perform a move or push of DI, DF, and XF values.
8657 Return false when normal moves are needed; true when all required
8658 insns have been emitted. Operands 2-4 contain the input values
8659 int the correct order; operands 5-7 contain the output values. */
8660
8661 void
8662 ix86_split_long_move (operands)
8663 rtx operands[];
8664 {
8665 rtx part[2][3];
8666 int nparts;
8667 int push = 0;
8668 int collisions = 0;
8669 enum machine_mode mode = GET_MODE (operands[0]);
8670
8671 /* The DFmode expanders may ask us to move double.
8672 For 64bit target this is single move. By hiding the fact
8673 here we simplify i386.md splitters. */
8674 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8675 {
8676 /* Optimize constant pool reference to immediates. This is used by
8677 fp moves, that force all constants to memory to allow combining. */
8678
8679 if (GET_CODE (operands[1]) == MEM
8680 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8681 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8682 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8683 if (push_operand (operands[0], VOIDmode))
8684 {
8685 operands[0] = copy_rtx (operands[0]);
8686 PUT_MODE (operands[0], Pmode);
8687 }
8688 else
8689 operands[0] = gen_lowpart (DImode, operands[0]);
8690 operands[1] = gen_lowpart (DImode, operands[1]);
8691 emit_move_insn (operands[0], operands[1]);
8692 return;
8693 }
8694
8695 /* The only non-offsettable memory we handle is push. */
8696 if (push_operand (operands[0], VOIDmode))
8697 push = 1;
8698 else if (GET_CODE (operands[0]) == MEM
8699 && ! offsettable_memref_p (operands[0]))
8700 abort ();
8701
8702 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8703 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8704
8705 /* When emitting push, take care for source operands on the stack. */
8706 if (push && GET_CODE (operands[1]) == MEM
8707 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8708 {
8709 if (nparts == 3)
8710 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8711 XEXP (part[1][2], 0));
8712 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8713 XEXP (part[1][1], 0));
8714 }
8715
8716 /* We need to do copy in the right order in case an address register
8717 of the source overlaps the destination. */
8718 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8719 {
8720 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8721 collisions++;
8722 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8723 collisions++;
8724 if (nparts == 3
8725 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8726 collisions++;
8727
8728 /* Collision in the middle part can be handled by reordering. */
8729 if (collisions == 1 && nparts == 3
8730 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8731 {
8732 rtx tmp;
8733 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8734 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8735 }
8736
8737 /* If there are more collisions, we can't handle it by reordering.
8738 Do an lea to the last part and use only one colliding move. */
8739 else if (collisions > 1)
8740 {
8741 collisions = 1;
8742 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8743 XEXP (part[1][0], 0)));
8744 part[1][0] = change_address (part[1][0],
8745 TARGET_64BIT ? DImode : SImode,
8746 part[0][nparts - 1]);
8747 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8748 if (nparts == 3)
8749 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8750 }
8751 }
8752
8753 if (push)
8754 {
8755 if (!TARGET_64BIT)
8756 {
8757 if (nparts == 3)
8758 {
8759 /* We use only first 12 bytes of TFmode value, but for pushing we
8760 are required to adjust stack as if we were pushing real 16byte
8761 value. */
8762 if (mode == TFmode && !TARGET_64BIT)
8763 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8764 GEN_INT (-4)));
8765 emit_move_insn (part[0][2], part[1][2]);
8766 }
8767 }
8768 else
8769 {
8770 /* In 64bit mode we don't have 32bit push available. In case this is
8771 register, it is OK - we will just use larger counterpart. We also
8772 retype memory - these comes from attempt to avoid REX prefix on
8773 moving of second half of TFmode value. */
8774 if (GET_MODE (part[1][1]) == SImode)
8775 {
8776 if (GET_CODE (part[1][1]) == MEM)
8777 part[1][1] = adjust_address (part[1][1], DImode, 0);
8778 else if (REG_P (part[1][1]))
8779 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8780 else
8781 abort ();
8782 if (GET_MODE (part[1][0]) == SImode)
8783 part[1][0] = part[1][1];
8784 }
8785 }
8786 emit_move_insn (part[0][1], part[1][1]);
8787 emit_move_insn (part[0][0], part[1][0]);
8788 return;
8789 }
8790
8791 /* Choose correct order to not overwrite the source before it is copied. */
8792 if ((REG_P (part[0][0])
8793 && REG_P (part[1][1])
8794 && (REGNO (part[0][0]) == REGNO (part[1][1])
8795 || (nparts == 3
8796 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8797 || (collisions > 0
8798 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8799 {
8800 if (nparts == 3)
8801 {
8802 operands[2] = part[0][2];
8803 operands[3] = part[0][1];
8804 operands[4] = part[0][0];
8805 operands[5] = part[1][2];
8806 operands[6] = part[1][1];
8807 operands[7] = part[1][0];
8808 }
8809 else
8810 {
8811 operands[2] = part[0][1];
8812 operands[3] = part[0][0];
8813 operands[5] = part[1][1];
8814 operands[6] = part[1][0];
8815 }
8816 }
8817 else
8818 {
8819 if (nparts == 3)
8820 {
8821 operands[2] = part[0][0];
8822 operands[3] = part[0][1];
8823 operands[4] = part[0][2];
8824 operands[5] = part[1][0];
8825 operands[6] = part[1][1];
8826 operands[7] = part[1][2];
8827 }
8828 else
8829 {
8830 operands[2] = part[0][0];
8831 operands[3] = part[0][1];
8832 operands[5] = part[1][0];
8833 operands[6] = part[1][1];
8834 }
8835 }
8836 emit_move_insn (operands[2], operands[5]);
8837 emit_move_insn (operands[3], operands[6]);
8838 if (nparts == 3)
8839 emit_move_insn (operands[4], operands[7]);
8840
8841 return;
8842 }
8843
8844 void
8845 ix86_split_ashldi (operands, scratch)
8846 rtx *operands, scratch;
8847 {
8848 rtx low[2], high[2];
8849 int count;
8850
8851 if (GET_CODE (operands[2]) == CONST_INT)
8852 {
8853 split_di (operands, 2, low, high);
8854 count = INTVAL (operands[2]) & 63;
8855
8856 if (count >= 32)
8857 {
8858 emit_move_insn (high[0], low[1]);
8859 emit_move_insn (low[0], const0_rtx);
8860
8861 if (count > 32)
8862 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8863 }
8864 else
8865 {
8866 if (!rtx_equal_p (operands[0], operands[1]))
8867 emit_move_insn (operands[0], operands[1]);
8868 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8869 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8870 }
8871 }
8872 else
8873 {
8874 if (!rtx_equal_p (operands[0], operands[1]))
8875 emit_move_insn (operands[0], operands[1]);
8876
8877 split_di (operands, 1, low, high);
8878
8879 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8880 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8881
8882 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8883 {
8884 if (! no_new_pseudos)
8885 scratch = force_reg (SImode, const0_rtx);
8886 else
8887 emit_move_insn (scratch, const0_rtx);
8888
8889 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8890 scratch));
8891 }
8892 else
8893 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8894 }
8895 }
8896
8897 void
8898 ix86_split_ashrdi (operands, scratch)
8899 rtx *operands, scratch;
8900 {
8901 rtx low[2], high[2];
8902 int count;
8903
8904 if (GET_CODE (operands[2]) == CONST_INT)
8905 {
8906 split_di (operands, 2, low, high);
8907 count = INTVAL (operands[2]) & 63;
8908
8909 if (count >= 32)
8910 {
8911 emit_move_insn (low[0], high[1]);
8912
8913 if (! reload_completed)
8914 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8915 else
8916 {
8917 emit_move_insn (high[0], low[0]);
8918 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8919 }
8920
8921 if (count > 32)
8922 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8923 }
8924 else
8925 {
8926 if (!rtx_equal_p (operands[0], operands[1]))
8927 emit_move_insn (operands[0], operands[1]);
8928 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8929 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8930 }
8931 }
8932 else
8933 {
8934 if (!rtx_equal_p (operands[0], operands[1]))
8935 emit_move_insn (operands[0], operands[1]);
8936
8937 split_di (operands, 1, low, high);
8938
8939 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8940 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8941
8942 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8943 {
8944 if (! no_new_pseudos)
8945 scratch = gen_reg_rtx (SImode);
8946 emit_move_insn (scratch, high[0]);
8947 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8948 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8949 scratch));
8950 }
8951 else
8952 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8953 }
8954 }
8955
8956 void
8957 ix86_split_lshrdi (operands, scratch)
8958 rtx *operands, scratch;
8959 {
8960 rtx low[2], high[2];
8961 int count;
8962
8963 if (GET_CODE (operands[2]) == CONST_INT)
8964 {
8965 split_di (operands, 2, low, high);
8966 count = INTVAL (operands[2]) & 63;
8967
8968 if (count >= 32)
8969 {
8970 emit_move_insn (low[0], high[1]);
8971 emit_move_insn (high[0], const0_rtx);
8972
8973 if (count > 32)
8974 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8975 }
8976 else
8977 {
8978 if (!rtx_equal_p (operands[0], operands[1]))
8979 emit_move_insn (operands[0], operands[1]);
8980 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8981 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8982 }
8983 }
8984 else
8985 {
8986 if (!rtx_equal_p (operands[0], operands[1]))
8987 emit_move_insn (operands[0], operands[1]);
8988
8989 split_di (operands, 1, low, high);
8990
8991 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8992 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8993
8994 /* Heh. By reversing the arguments, we can reuse this pattern. */
8995 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8996 {
8997 if (! no_new_pseudos)
8998 scratch = force_reg (SImode, const0_rtx);
8999 else
9000 emit_move_insn (scratch, const0_rtx);
9001
9002 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9003 scratch));
9004 }
9005 else
9006 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9007 }
9008 }
9009
9010 /* Helper function for the string operations below. Dest VARIABLE whether
9011 it is aligned to VALUE bytes. If true, jump to the label. */
9012 static rtx
9013 ix86_expand_aligntest (variable, value)
9014 rtx variable;
9015 int value;
9016 {
9017 rtx label = gen_label_rtx ();
9018 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9019 if (GET_MODE (variable) == DImode)
9020 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9021 else
9022 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9023 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9024 1, label);
9025 return label;
9026 }
9027
9028 /* Adjust COUNTER by the VALUE. */
9029 static void
9030 ix86_adjust_counter (countreg, value)
9031 rtx countreg;
9032 HOST_WIDE_INT value;
9033 {
9034 if (GET_MODE (countreg) == DImode)
9035 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9036 else
9037 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9038 }
9039
9040 /* Zero extend possibly SImode EXP to Pmode register. */
9041 rtx
9042 ix86_zero_extend_to_Pmode (exp)
9043 rtx exp;
9044 {
9045 rtx r;
9046 if (GET_MODE (exp) == VOIDmode)
9047 return force_reg (Pmode, exp);
9048 if (GET_MODE (exp) == Pmode)
9049 return copy_to_mode_reg (Pmode, exp);
9050 r = gen_reg_rtx (Pmode);
9051 emit_insn (gen_zero_extendsidi2 (r, exp));
9052 return r;
9053 }
9054
9055 /* Expand string move (memcpy) operation. Use i386 string operations when
9056 profitable. expand_clrstr contains similar code. */
9057 int
9058 ix86_expand_movstr (dst, src, count_exp, align_exp)
9059 rtx dst, src, count_exp, align_exp;
9060 {
9061 rtx srcreg, destreg, countreg;
9062 enum machine_mode counter_mode;
9063 HOST_WIDE_INT align = 0;
9064 unsigned HOST_WIDE_INT count = 0;
9065 rtx insns;
9066
9067 start_sequence ();
9068
9069 if (GET_CODE (align_exp) == CONST_INT)
9070 align = INTVAL (align_exp);
9071
9072 /* This simple hack avoids all inlining code and simplifies code below. */
9073 if (!TARGET_ALIGN_STRINGOPS)
9074 align = 64;
9075
9076 if (GET_CODE (count_exp) == CONST_INT)
9077 count = INTVAL (count_exp);
9078
9079 /* Figure out proper mode for counter. For 32bits it is always SImode,
9080 for 64bits use SImode when possible, otherwise DImode.
9081 Set count to number of bytes copied when known at compile time. */
9082 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9083 || x86_64_zero_extended_value (count_exp))
9084 counter_mode = SImode;
9085 else
9086 counter_mode = DImode;
9087
9088 if (counter_mode != SImode && counter_mode != DImode)
9089 abort ();
9090
9091 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9092 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9093
9094 emit_insn (gen_cld ());
9095
9096 /* When optimizing for size emit simple rep ; movsb instruction for
9097 counts not divisible by 4. */
9098
9099 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9100 {
9101 countreg = ix86_zero_extend_to_Pmode (count_exp);
9102 if (TARGET_64BIT)
9103 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9104 destreg, srcreg, countreg));
9105 else
9106 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9107 destreg, srcreg, countreg));
9108 }
9109
9110 /* For constant aligned (or small unaligned) copies use rep movsl
9111 followed by code copying the rest. For PentiumPro ensure 8 byte
9112 alignment to allow rep movsl acceleration. */
9113
9114 else if (count != 0
9115 && (align >= 8
9116 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9117 || optimize_size || count < (unsigned int) 64))
9118 {
9119 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9120 if (count & ~(size - 1))
9121 {
9122 countreg = copy_to_mode_reg (counter_mode,
9123 GEN_INT ((count >> (size == 4 ? 2 : 3))
9124 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9125 countreg = ix86_zero_extend_to_Pmode (countreg);
9126 if (size == 4)
9127 {
9128 if (TARGET_64BIT)
9129 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9130 destreg, srcreg, countreg));
9131 else
9132 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9133 destreg, srcreg, countreg));
9134 }
9135 else
9136 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9137 destreg, srcreg, countreg));
9138 }
9139 if (size == 8 && (count & 0x04))
9140 emit_insn (gen_strmovsi (destreg, srcreg));
9141 if (count & 0x02)
9142 emit_insn (gen_strmovhi (destreg, srcreg));
9143 if (count & 0x01)
9144 emit_insn (gen_strmovqi (destreg, srcreg));
9145 }
9146 /* The generic code based on the glibc implementation:
9147 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9148 allowing accelerated copying there)
9149 - copy the data using rep movsl
9150 - copy the rest. */
9151 else
9152 {
9153 rtx countreg2;
9154 rtx label = NULL;
9155
9156 /* In case we don't know anything about the alignment, default to
9157 library version, since it is usually equally fast and result in
9158 shorter code. */
9159 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9160 {
9161 end_sequence ();
9162 return 0;
9163 }
9164
9165 if (TARGET_SINGLE_STRINGOP)
9166 emit_insn (gen_cld ());
9167
9168 countreg2 = gen_reg_rtx (Pmode);
9169 countreg = copy_to_mode_reg (counter_mode, count_exp);
9170
9171 /* We don't use loops to align destination and to copy parts smaller
9172 than 4 bytes, because gcc is able to optimize such code better (in
9173 the case the destination or the count really is aligned, gcc is often
9174 able to predict the branches) and also it is friendlier to the
9175 hardware branch prediction.
9176
9177 Using loops is benefical for generic case, because we can
9178 handle small counts using the loops. Many CPUs (such as Athlon)
9179 have large REP prefix setup costs.
9180
9181 This is quite costy. Maybe we can revisit this decision later or
9182 add some customizability to this code. */
9183
9184 if (count == 0
9185 && align < (TARGET_PENTIUMPRO && (count == 0
9186 || count >= (unsigned int) 260)
9187 ? 8 : UNITS_PER_WORD))
9188 {
9189 label = gen_label_rtx ();
9190 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9191 LEU, 0, counter_mode, 1, label);
9192 }
9193 if (align <= 1)
9194 {
9195 rtx label = ix86_expand_aligntest (destreg, 1);
9196 emit_insn (gen_strmovqi (destreg, srcreg));
9197 ix86_adjust_counter (countreg, 1);
9198 emit_label (label);
9199 LABEL_NUSES (label) = 1;
9200 }
9201 if (align <= 2)
9202 {
9203 rtx label = ix86_expand_aligntest (destreg, 2);
9204 emit_insn (gen_strmovhi (destreg, srcreg));
9205 ix86_adjust_counter (countreg, 2);
9206 emit_label (label);
9207 LABEL_NUSES (label) = 1;
9208 }
9209 if (align <= 4
9210 && ((TARGET_PENTIUMPRO && (count == 0
9211 || count >= (unsigned int) 260))
9212 || TARGET_64BIT))
9213 {
9214 rtx label = ix86_expand_aligntest (destreg, 4);
9215 emit_insn (gen_strmovsi (destreg, srcreg));
9216 ix86_adjust_counter (countreg, 4);
9217 emit_label (label);
9218 LABEL_NUSES (label) = 1;
9219 }
9220
9221 if (!TARGET_SINGLE_STRINGOP)
9222 emit_insn (gen_cld ());
9223 if (TARGET_64BIT)
9224 {
9225 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9226 GEN_INT (3)));
9227 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9228 destreg, srcreg, countreg2));
9229 }
9230 else
9231 {
9232 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9233 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9234 destreg, srcreg, countreg2));
9235 }
9236
9237 if (label)
9238 {
9239 emit_label (label);
9240 LABEL_NUSES (label) = 1;
9241 }
9242 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9243 emit_insn (gen_strmovsi (destreg, srcreg));
9244 if ((align <= 4 || count == 0) && TARGET_64BIT)
9245 {
9246 rtx label = ix86_expand_aligntest (countreg, 4);
9247 emit_insn (gen_strmovsi (destreg, srcreg));
9248 emit_label (label);
9249 LABEL_NUSES (label) = 1;
9250 }
9251 if (align > 2 && count != 0 && (count & 2))
9252 emit_insn (gen_strmovhi (destreg, srcreg));
9253 if (align <= 2 || count == 0)
9254 {
9255 rtx label = ix86_expand_aligntest (countreg, 2);
9256 emit_insn (gen_strmovhi (destreg, srcreg));
9257 emit_label (label);
9258 LABEL_NUSES (label) = 1;
9259 }
9260 if (align > 1 && count != 0 && (count & 1))
9261 emit_insn (gen_strmovqi (destreg, srcreg));
9262 if (align <= 1 || count == 0)
9263 {
9264 rtx label = ix86_expand_aligntest (countreg, 1);
9265 emit_insn (gen_strmovqi (destreg, srcreg));
9266 emit_label (label);
9267 LABEL_NUSES (label) = 1;
9268 }
9269 }
9270
9271 insns = get_insns ();
9272 end_sequence ();
9273
9274 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9275 emit_insns (insns);
9276 return 1;
9277 }
9278
9279 /* Expand string clear operation (bzero). Use i386 string operations when
9280 profitable. expand_movstr contains similar code. */
9281 int
9282 ix86_expand_clrstr (src, count_exp, align_exp)
9283 rtx src, count_exp, align_exp;
9284 {
9285 rtx destreg, zeroreg, countreg;
9286 enum machine_mode counter_mode;
9287 HOST_WIDE_INT align = 0;
9288 unsigned HOST_WIDE_INT count = 0;
9289
9290 if (GET_CODE (align_exp) == CONST_INT)
9291 align = INTVAL (align_exp);
9292
9293 /* This simple hack avoids all inlining code and simplifies code below. */
9294 if (!TARGET_ALIGN_STRINGOPS)
9295 align = 32;
9296
9297 if (GET_CODE (count_exp) == CONST_INT)
9298 count = INTVAL (count_exp);
9299 /* Figure out proper mode for counter. For 32bits it is always SImode,
9300 for 64bits use SImode when possible, otherwise DImode.
9301 Set count to number of bytes copied when known at compile time. */
9302 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9303 || x86_64_zero_extended_value (count_exp))
9304 counter_mode = SImode;
9305 else
9306 counter_mode = DImode;
9307
9308 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9309
9310 emit_insn (gen_cld ());
9311
9312 /* When optimizing for size emit simple rep ; movsb instruction for
9313 counts not divisible by 4. */
9314
9315 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9316 {
9317 countreg = ix86_zero_extend_to_Pmode (count_exp);
9318 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9319 if (TARGET_64BIT)
9320 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9321 destreg, countreg));
9322 else
9323 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9324 destreg, countreg));
9325 }
9326 else if (count != 0
9327 && (align >= 8
9328 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9329 || optimize_size || count < (unsigned int) 64))
9330 {
9331 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9332 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9333 if (count & ~(size - 1))
9334 {
9335 countreg = copy_to_mode_reg (counter_mode,
9336 GEN_INT ((count >> (size == 4 ? 2 : 3))
9337 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9338 countreg = ix86_zero_extend_to_Pmode (countreg);
9339 if (size == 4)
9340 {
9341 if (TARGET_64BIT)
9342 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9343 destreg, countreg));
9344 else
9345 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9346 destreg, countreg));
9347 }
9348 else
9349 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9350 destreg, countreg));
9351 }
9352 if (size == 8 && (count & 0x04))
9353 emit_insn (gen_strsetsi (destreg,
9354 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9355 if (count & 0x02)
9356 emit_insn (gen_strsethi (destreg,
9357 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9358 if (count & 0x01)
9359 emit_insn (gen_strsetqi (destreg,
9360 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9361 }
9362 else
9363 {
9364 rtx countreg2;
9365 rtx label = NULL;
9366
9367 /* In case we don't know anything about the alignment, default to
9368 library version, since it is usually equally fast and result in
9369 shorter code. */
9370 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9371 return 0;
9372
9373 if (TARGET_SINGLE_STRINGOP)
9374 emit_insn (gen_cld ());
9375
9376 countreg2 = gen_reg_rtx (Pmode);
9377 countreg = copy_to_mode_reg (counter_mode, count_exp);
9378 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9379
9380 if (count == 0
9381 && align < (TARGET_PENTIUMPRO && (count == 0
9382 || count >= (unsigned int) 260)
9383 ? 8 : UNITS_PER_WORD))
9384 {
9385 label = gen_label_rtx ();
9386 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9387 LEU, 0, counter_mode, 1, label);
9388 }
9389 if (align <= 1)
9390 {
9391 rtx label = ix86_expand_aligntest (destreg, 1);
9392 emit_insn (gen_strsetqi (destreg,
9393 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9394 ix86_adjust_counter (countreg, 1);
9395 emit_label (label);
9396 LABEL_NUSES (label) = 1;
9397 }
9398 if (align <= 2)
9399 {
9400 rtx label = ix86_expand_aligntest (destreg, 2);
9401 emit_insn (gen_strsethi (destreg,
9402 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9403 ix86_adjust_counter (countreg, 2);
9404 emit_label (label);
9405 LABEL_NUSES (label) = 1;
9406 }
9407 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9408 || count >= (unsigned int) 260))
9409 {
9410 rtx label = ix86_expand_aligntest (destreg, 4);
9411 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9412 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9413 : zeroreg)));
9414 ix86_adjust_counter (countreg, 4);
9415 emit_label (label);
9416 LABEL_NUSES (label) = 1;
9417 }
9418
9419 if (!TARGET_SINGLE_STRINGOP)
9420 emit_insn (gen_cld ());
9421 if (TARGET_64BIT)
9422 {
9423 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9424 GEN_INT (3)));
9425 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9426 destreg, countreg2));
9427 }
9428 else
9429 {
9430 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9431 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9432 destreg, countreg2));
9433 }
9434
9435 if (label)
9436 {
9437 emit_label (label);
9438 LABEL_NUSES (label) = 1;
9439 }
9440 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9441 emit_insn (gen_strsetsi (destreg,
9442 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9443 if (TARGET_64BIT && (align <= 4 || count == 0))
9444 {
9445 rtx label = ix86_expand_aligntest (destreg, 2);
9446 emit_insn (gen_strsetsi (destreg,
9447 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9448 emit_label (label);
9449 LABEL_NUSES (label) = 1;
9450 }
9451 if (align > 2 && count != 0 && (count & 2))
9452 emit_insn (gen_strsethi (destreg,
9453 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9454 if (align <= 2 || count == 0)
9455 {
9456 rtx label = ix86_expand_aligntest (destreg, 2);
9457 emit_insn (gen_strsethi (destreg,
9458 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9459 emit_label (label);
9460 LABEL_NUSES (label) = 1;
9461 }
9462 if (align > 1 && count != 0 && (count & 1))
9463 emit_insn (gen_strsetqi (destreg,
9464 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9465 if (align <= 1 || count == 0)
9466 {
9467 rtx label = ix86_expand_aligntest (destreg, 1);
9468 emit_insn (gen_strsetqi (destreg,
9469 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9470 emit_label (label);
9471 LABEL_NUSES (label) = 1;
9472 }
9473 }
9474 return 1;
9475 }
9476 /* Expand strlen. */
9477 int
9478 ix86_expand_strlen (out, src, eoschar, align)
9479 rtx out, src, eoschar, align;
9480 {
9481 rtx addr, scratch1, scratch2, scratch3, scratch4;
9482
9483 /* The generic case of strlen expander is long. Avoid it's
9484 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9485
9486 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9487 && !TARGET_INLINE_ALL_STRINGOPS
9488 && !optimize_size
9489 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9490 return 0;
9491
9492 addr = force_reg (Pmode, XEXP (src, 0));
9493 scratch1 = gen_reg_rtx (Pmode);
9494
9495 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9496 && !optimize_size)
9497 {
9498 /* Well it seems that some optimizer does not combine a call like
9499 foo(strlen(bar), strlen(bar));
9500 when the move and the subtraction is done here. It does calculate
9501 the length just once when these instructions are done inside of
9502 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9503 often used and I use one fewer register for the lifetime of
9504 output_strlen_unroll() this is better. */
9505
9506 emit_move_insn (out, addr);
9507
9508 ix86_expand_strlensi_unroll_1 (out, align);
9509
9510 /* strlensi_unroll_1 returns the address of the zero at the end of
9511 the string, like memchr(), so compute the length by subtracting
9512 the start address. */
9513 if (TARGET_64BIT)
9514 emit_insn (gen_subdi3 (out, out, addr));
9515 else
9516 emit_insn (gen_subsi3 (out, out, addr));
9517 }
9518 else
9519 {
9520 scratch2 = gen_reg_rtx (Pmode);
9521 scratch3 = gen_reg_rtx (Pmode);
9522 scratch4 = force_reg (Pmode, constm1_rtx);
9523
9524 emit_move_insn (scratch3, addr);
9525 eoschar = force_reg (QImode, eoschar);
9526
9527 emit_insn (gen_cld ());
9528 if (TARGET_64BIT)
9529 {
9530 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9531 align, scratch4, scratch3));
9532 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9533 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9534 }
9535 else
9536 {
9537 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9538 align, scratch4, scratch3));
9539 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9540 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9541 }
9542 }
9543 return 1;
9544 }
9545
9546 /* Expand the appropriate insns for doing strlen if not just doing
9547 repnz; scasb
9548
9549 out = result, initialized with the start address
9550 align_rtx = alignment of the address.
9551 scratch = scratch register, initialized with the startaddress when
9552 not aligned, otherwise undefined
9553
9554 This is just the body. It needs the initialisations mentioned above and
9555 some address computing at the end. These things are done in i386.md. */
9556
9557 static void
9558 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9559 rtx out, align_rtx;
9560 {
9561 int align;
9562 rtx tmp;
9563 rtx align_2_label = NULL_RTX;
9564 rtx align_3_label = NULL_RTX;
9565 rtx align_4_label = gen_label_rtx ();
9566 rtx end_0_label = gen_label_rtx ();
9567 rtx mem;
9568 rtx tmpreg = gen_reg_rtx (SImode);
9569 rtx scratch = gen_reg_rtx (SImode);
9570
9571 align = 0;
9572 if (GET_CODE (align_rtx) == CONST_INT)
9573 align = INTVAL (align_rtx);
9574
9575 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9576
9577 /* Is there a known alignment and is it less than 4? */
9578 if (align < 4)
9579 {
9580 rtx scratch1 = gen_reg_rtx (Pmode);
9581 emit_move_insn (scratch1, out);
9582 /* Is there a known alignment and is it not 2? */
9583 if (align != 2)
9584 {
9585 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9586 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9587
9588 /* Leave just the 3 lower bits. */
9589 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9590 NULL_RTX, 0, OPTAB_WIDEN);
9591
9592 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9593 Pmode, 1, align_4_label);
9594 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9595 Pmode, 1, align_2_label);
9596 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9597 Pmode, 1, align_3_label);
9598 }
9599 else
9600 {
9601 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9602 check if is aligned to 4 - byte. */
9603
9604 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9605 NULL_RTX, 0, OPTAB_WIDEN);
9606
9607 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9608 Pmode, 1, align_4_label);
9609 }
9610
9611 mem = gen_rtx_MEM (QImode, out);
9612
9613 /* Now compare the bytes. */
9614
9615 /* Compare the first n unaligned byte on a byte per byte basis. */
9616 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9617 QImode, 1, end_0_label);
9618
9619 /* Increment the address. */
9620 if (TARGET_64BIT)
9621 emit_insn (gen_adddi3 (out, out, const1_rtx));
9622 else
9623 emit_insn (gen_addsi3 (out, out, const1_rtx));
9624
9625 /* Not needed with an alignment of 2 */
9626 if (align != 2)
9627 {
9628 emit_label (align_2_label);
9629
9630 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9631 end_0_label);
9632
9633 if (TARGET_64BIT)
9634 emit_insn (gen_adddi3 (out, out, const1_rtx));
9635 else
9636 emit_insn (gen_addsi3 (out, out, const1_rtx));
9637
9638 emit_label (align_3_label);
9639 }
9640
9641 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9642 end_0_label);
9643
9644 if (TARGET_64BIT)
9645 emit_insn (gen_adddi3 (out, out, const1_rtx));
9646 else
9647 emit_insn (gen_addsi3 (out, out, const1_rtx));
9648 }
9649
9650 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9651 align this loop. It gives only huge programs, but does not help to
9652 speed up. */
9653 emit_label (align_4_label);
9654
9655 mem = gen_rtx_MEM (SImode, out);
9656 emit_move_insn (scratch, mem);
9657 if (TARGET_64BIT)
9658 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9659 else
9660 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9661
9662 /* This formula yields a nonzero result iff one of the bytes is zero.
9663 This saves three branches inside loop and many cycles. */
9664
9665 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9666 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9667 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9668 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9669 gen_int_mode (0x80808080, SImode)));
9670 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9671 align_4_label);
9672
9673 if (TARGET_CMOVE)
9674 {
9675 rtx reg = gen_reg_rtx (SImode);
9676 rtx reg2 = gen_reg_rtx (Pmode);
9677 emit_move_insn (reg, tmpreg);
9678 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9679
9680 /* If zero is not in the first two bytes, move two bytes forward. */
9681 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9682 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9683 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9684 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9685 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9686 reg,
9687 tmpreg)));
9688 /* Emit lea manually to avoid clobbering of flags. */
9689 emit_insn (gen_rtx_SET (SImode, reg2,
9690 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9691
9692 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9693 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9694 emit_insn (gen_rtx_SET (VOIDmode, out,
9695 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9696 reg2,
9697 out)));
9698
9699 }
9700 else
9701 {
9702 rtx end_2_label = gen_label_rtx ();
9703 /* Is zero in the first two bytes? */
9704
9705 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9706 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9707 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9708 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9709 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9710 pc_rtx);
9711 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9712 JUMP_LABEL (tmp) = end_2_label;
9713
9714 /* Not in the first two. Move two bytes forward. */
9715 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9716 if (TARGET_64BIT)
9717 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9718 else
9719 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9720
9721 emit_label (end_2_label);
9722
9723 }
9724
9725 /* Avoid branch in fixing the byte. */
9726 tmpreg = gen_lowpart (QImode, tmpreg);
9727 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9728 if (TARGET_64BIT)
9729 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9730 else
9731 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9732
9733 emit_label (end_0_label);
9734 }
9735 \f
9736 /* Clear stack slot assignments remembered from previous functions.
9737 This is called from INIT_EXPANDERS once before RTL is emitted for each
9738 function. */
9739
9740 static void
9741 ix86_init_machine_status (p)
9742 struct function *p;
9743 {
9744 p->machine = (struct machine_function *)
9745 xcalloc (1, sizeof (struct machine_function));
9746 }
9747
9748 /* Mark machine specific bits of P for GC. */
9749 static void
9750 ix86_mark_machine_status (p)
9751 struct function *p;
9752 {
9753 struct machine_function *machine = p->machine;
9754 enum machine_mode mode;
9755 int n;
9756
9757 if (! machine)
9758 return;
9759
9760 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9761 mode = (enum machine_mode) ((int) mode + 1))
9762 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9763 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9764 }
9765
9766 static void
9767 ix86_free_machine_status (p)
9768 struct function *p;
9769 {
9770 free (p->machine);
9771 p->machine = NULL;
9772 }
9773
9774 /* Return a MEM corresponding to a stack slot with mode MODE.
9775 Allocate a new slot if necessary.
9776
9777 The RTL for a function can have several slots available: N is
9778 which slot to use. */
9779
9780 rtx
9781 assign_386_stack_local (mode, n)
9782 enum machine_mode mode;
9783 int n;
9784 {
9785 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9786 abort ();
9787
9788 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9789 ix86_stack_locals[(int) mode][n]
9790 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9791
9792 return ix86_stack_locals[(int) mode][n];
9793 }
9794 \f
9795 /* Calculate the length of the memory address in the instruction
9796 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9797
9798 static int
9799 memory_address_length (addr)
9800 rtx addr;
9801 {
9802 struct ix86_address parts;
9803 rtx base, index, disp;
9804 int len;
9805
9806 if (GET_CODE (addr) == PRE_DEC
9807 || GET_CODE (addr) == POST_INC
9808 || GET_CODE (addr) == PRE_MODIFY
9809 || GET_CODE (addr) == POST_MODIFY)
9810 return 0;
9811
9812 if (! ix86_decompose_address (addr, &parts))
9813 abort ();
9814
9815 base = parts.base;
9816 index = parts.index;
9817 disp = parts.disp;
9818 len = 0;
9819
9820 /* Register Indirect. */
9821 if (base && !index && !disp)
9822 {
9823 /* Special cases: ebp and esp need the two-byte modrm form. */
9824 if (addr == stack_pointer_rtx
9825 || addr == arg_pointer_rtx
9826 || addr == frame_pointer_rtx
9827 || addr == hard_frame_pointer_rtx)
9828 len = 1;
9829 }
9830
9831 /* Direct Addressing. */
9832 else if (disp && !base && !index)
9833 len = 4;
9834
9835 else
9836 {
9837 /* Find the length of the displacement constant. */
9838 if (disp)
9839 {
9840 if (GET_CODE (disp) == CONST_INT
9841 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9842 len = 1;
9843 else
9844 len = 4;
9845 }
9846
9847 /* An index requires the two-byte modrm form. */
9848 if (index)
9849 len += 1;
9850 }
9851
9852 return len;
9853 }
9854
9855 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9856 expect that insn have 8bit immediate alternative. */
9857 int
9858 ix86_attr_length_immediate_default (insn, shortform)
9859 rtx insn;
9860 int shortform;
9861 {
9862 int len = 0;
9863 int i;
9864 extract_insn_cached (insn);
9865 for (i = recog_data.n_operands - 1; i >= 0; --i)
9866 if (CONSTANT_P (recog_data.operand[i]))
9867 {
9868 if (len)
9869 abort ();
9870 if (shortform
9871 && GET_CODE (recog_data.operand[i]) == CONST_INT
9872 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9873 len = 1;
9874 else
9875 {
9876 switch (get_attr_mode (insn))
9877 {
9878 case MODE_QI:
9879 len+=1;
9880 break;
9881 case MODE_HI:
9882 len+=2;
9883 break;
9884 case MODE_SI:
9885 len+=4;
9886 break;
9887 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9888 case MODE_DI:
9889 len+=4;
9890 break;
9891 default:
9892 fatal_insn ("unknown insn mode", insn);
9893 }
9894 }
9895 }
9896 return len;
9897 }
9898 /* Compute default value for "length_address" attribute. */
9899 int
9900 ix86_attr_length_address_default (insn)
9901 rtx insn;
9902 {
9903 int i;
9904 extract_insn_cached (insn);
9905 for (i = recog_data.n_operands - 1; i >= 0; --i)
9906 if (GET_CODE (recog_data.operand[i]) == MEM)
9907 {
9908 return memory_address_length (XEXP (recog_data.operand[i], 0));
9909 break;
9910 }
9911 return 0;
9912 }
9913 \f
9914 /* Return the maximum number of instructions a cpu can issue. */
9915
9916 static int
9917 ix86_issue_rate ()
9918 {
9919 switch (ix86_cpu)
9920 {
9921 case PROCESSOR_PENTIUM:
9922 case PROCESSOR_K6:
9923 return 2;
9924
9925 case PROCESSOR_PENTIUMPRO:
9926 case PROCESSOR_PENTIUM4:
9927 case PROCESSOR_ATHLON:
9928 return 3;
9929
9930 default:
9931 return 1;
9932 }
9933 }
9934
9935 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9936 by DEP_INSN and nothing set by DEP_INSN. */
9937
9938 static int
9939 ix86_flags_dependant (insn, dep_insn, insn_type)
9940 rtx insn, dep_insn;
9941 enum attr_type insn_type;
9942 {
9943 rtx set, set2;
9944
9945 /* Simplify the test for uninteresting insns. */
9946 if (insn_type != TYPE_SETCC
9947 && insn_type != TYPE_ICMOV
9948 && insn_type != TYPE_FCMOV
9949 && insn_type != TYPE_IBR)
9950 return 0;
9951
9952 if ((set = single_set (dep_insn)) != 0)
9953 {
9954 set = SET_DEST (set);
9955 set2 = NULL_RTX;
9956 }
9957 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9958 && XVECLEN (PATTERN (dep_insn), 0) == 2
9959 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9960 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9961 {
9962 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9963 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9964 }
9965 else
9966 return 0;
9967
9968 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9969 return 0;
9970
9971 /* This test is true if the dependent insn reads the flags but
9972 not any other potentially set register. */
9973 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9974 return 0;
9975
9976 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9977 return 0;
9978
9979 return 1;
9980 }
9981
9982 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9983 address with operands set by DEP_INSN. */
9984
9985 static int
9986 ix86_agi_dependant (insn, dep_insn, insn_type)
9987 rtx insn, dep_insn;
9988 enum attr_type insn_type;
9989 {
9990 rtx addr;
9991
9992 if (insn_type == TYPE_LEA
9993 && TARGET_PENTIUM)
9994 {
9995 addr = PATTERN (insn);
9996 if (GET_CODE (addr) == SET)
9997 ;
9998 else if (GET_CODE (addr) == PARALLEL
9999 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10000 addr = XVECEXP (addr, 0, 0);
10001 else
10002 abort ();
10003 addr = SET_SRC (addr);
10004 }
10005 else
10006 {
10007 int i;
10008 extract_insn_cached (insn);
10009 for (i = recog_data.n_operands - 1; i >= 0; --i)
10010 if (GET_CODE (recog_data.operand[i]) == MEM)
10011 {
10012 addr = XEXP (recog_data.operand[i], 0);
10013 goto found;
10014 }
10015 return 0;
10016 found:;
10017 }
10018
10019 return modified_in_p (addr, dep_insn);
10020 }
10021
10022 static int
10023 ix86_adjust_cost (insn, link, dep_insn, cost)
10024 rtx insn, link, dep_insn;
10025 int cost;
10026 {
10027 enum attr_type insn_type, dep_insn_type;
10028 enum attr_memory memory, dep_memory;
10029 rtx set, set2;
10030 int dep_insn_code_number;
10031
10032 /* Anti and output depenancies have zero cost on all CPUs. */
10033 if (REG_NOTE_KIND (link) != 0)
10034 return 0;
10035
10036 dep_insn_code_number = recog_memoized (dep_insn);
10037
10038 /* If we can't recognize the insns, we can't really do anything. */
10039 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10040 return cost;
10041
10042 insn_type = get_attr_type (insn);
10043 dep_insn_type = get_attr_type (dep_insn);
10044
10045 switch (ix86_cpu)
10046 {
10047 case PROCESSOR_PENTIUM:
10048 /* Address Generation Interlock adds a cycle of latency. */
10049 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10050 cost += 1;
10051
10052 /* ??? Compares pair with jump/setcc. */
10053 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10054 cost = 0;
10055
10056 /* Floating point stores require value to be ready one cycle ealier. */
10057 if (insn_type == TYPE_FMOV
10058 && get_attr_memory (insn) == MEMORY_STORE
10059 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10060 cost += 1;
10061 break;
10062
10063 case PROCESSOR_PENTIUMPRO:
10064 memory = get_attr_memory (insn);
10065 dep_memory = get_attr_memory (dep_insn);
10066
10067 /* Since we can't represent delayed latencies of load+operation,
10068 increase the cost here for non-imov insns. */
10069 if (dep_insn_type != TYPE_IMOV
10070 && dep_insn_type != TYPE_FMOV
10071 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10072 cost += 1;
10073
10074 /* INT->FP conversion is expensive. */
10075 if (get_attr_fp_int_src (dep_insn))
10076 cost += 5;
10077
10078 /* There is one cycle extra latency between an FP op and a store. */
10079 if (insn_type == TYPE_FMOV
10080 && (set = single_set (dep_insn)) != NULL_RTX
10081 && (set2 = single_set (insn)) != NULL_RTX
10082 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10083 && GET_CODE (SET_DEST (set2)) == MEM)
10084 cost += 1;
10085
10086 /* Show ability of reorder buffer to hide latency of load by executing
10087 in parallel with previous instruction in case
10088 previous instruction is not needed to compute the address. */
10089 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10090 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10091 {
10092 /* Claim moves to take one cycle, as core can issue one load
10093 at time and the next load can start cycle later. */
10094 if (dep_insn_type == TYPE_IMOV
10095 || dep_insn_type == TYPE_FMOV)
10096 cost = 1;
10097 else if (cost > 1)
10098 cost--;
10099 }
10100 break;
10101
10102 case PROCESSOR_K6:
10103 memory = get_attr_memory (insn);
10104 dep_memory = get_attr_memory (dep_insn);
10105 /* The esp dependency is resolved before the instruction is really
10106 finished. */
10107 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10108 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10109 return 1;
10110
10111 /* Since we can't represent delayed latencies of load+operation,
10112 increase the cost here for non-imov insns. */
10113 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10114 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10115
10116 /* INT->FP conversion is expensive. */
10117 if (get_attr_fp_int_src (dep_insn))
10118 cost += 5;
10119
10120 /* Show ability of reorder buffer to hide latency of load by executing
10121 in parallel with previous instruction in case
10122 previous instruction is not needed to compute the address. */
10123 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10124 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10125 {
10126 /* Claim moves to take one cycle, as core can issue one load
10127 at time and the next load can start cycle later. */
10128 if (dep_insn_type == TYPE_IMOV
10129 || dep_insn_type == TYPE_FMOV)
10130 cost = 1;
10131 else if (cost > 2)
10132 cost -= 2;
10133 else
10134 cost = 1;
10135 }
10136 break;
10137
10138 case PROCESSOR_ATHLON:
10139 memory = get_attr_memory (insn);
10140 dep_memory = get_attr_memory (dep_insn);
10141
10142 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10143 {
10144 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10145 cost += 2;
10146 else
10147 cost += 3;
10148 }
10149 /* Show ability of reorder buffer to hide latency of load by executing
10150 in parallel with previous instruction in case
10151 previous instruction is not needed to compute the address. */
10152 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10153 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10154 {
10155 /* Claim moves to take one cycle, as core can issue one load
10156 at time and the next load can start cycle later. */
10157 if (dep_insn_type == TYPE_IMOV
10158 || dep_insn_type == TYPE_FMOV)
10159 cost = 0;
10160 else if (cost >= 3)
10161 cost -= 3;
10162 else
10163 cost = 0;
10164 }
10165
10166 default:
10167 break;
10168 }
10169
10170 return cost;
10171 }
10172
10173 static union
10174 {
10175 struct ppro_sched_data
10176 {
10177 rtx decode[3];
10178 int issued_this_cycle;
10179 } ppro;
10180 } ix86_sched_data;
10181
10182 static int
10183 ix86_safe_length (insn)
10184 rtx insn;
10185 {
10186 if (recog_memoized (insn) >= 0)
10187 return get_attr_length (insn);
10188 else
10189 return 128;
10190 }
10191
10192 static int
10193 ix86_safe_length_prefix (insn)
10194 rtx insn;
10195 {
10196 if (recog_memoized (insn) >= 0)
10197 return get_attr_length (insn);
10198 else
10199 return 0;
10200 }
10201
10202 static enum attr_memory
10203 ix86_safe_memory (insn)
10204 rtx insn;
10205 {
10206 if (recog_memoized (insn) >= 0)
10207 return get_attr_memory (insn);
10208 else
10209 return MEMORY_UNKNOWN;
10210 }
10211
10212 static enum attr_pent_pair
10213 ix86_safe_pent_pair (insn)
10214 rtx insn;
10215 {
10216 if (recog_memoized (insn) >= 0)
10217 return get_attr_pent_pair (insn);
10218 else
10219 return PENT_PAIR_NP;
10220 }
10221
10222 static enum attr_ppro_uops
10223 ix86_safe_ppro_uops (insn)
10224 rtx insn;
10225 {
10226 if (recog_memoized (insn) >= 0)
10227 return get_attr_ppro_uops (insn);
10228 else
10229 return PPRO_UOPS_MANY;
10230 }
10231
10232 static void
10233 ix86_dump_ppro_packet (dump)
10234 FILE *dump;
10235 {
10236 if (ix86_sched_data.ppro.decode[0])
10237 {
10238 fprintf (dump, "PPRO packet: %d",
10239 INSN_UID (ix86_sched_data.ppro.decode[0]));
10240 if (ix86_sched_data.ppro.decode[1])
10241 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10242 if (ix86_sched_data.ppro.decode[2])
10243 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10244 fputc ('\n', dump);
10245 }
10246 }
10247
10248 /* We're beginning a new block. Initialize data structures as necessary. */
10249
10250 static void
10251 ix86_sched_init (dump, sched_verbose, veclen)
10252 FILE *dump ATTRIBUTE_UNUSED;
10253 int sched_verbose ATTRIBUTE_UNUSED;
10254 int veclen ATTRIBUTE_UNUSED;
10255 {
10256 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10257 }
10258
10259 /* Shift INSN to SLOT, and shift everything else down. */
10260
10261 static void
10262 ix86_reorder_insn (insnp, slot)
10263 rtx *insnp, *slot;
10264 {
10265 if (insnp != slot)
10266 {
10267 rtx insn = *insnp;
10268 do
10269 insnp[0] = insnp[1];
10270 while (++insnp != slot);
10271 *insnp = insn;
10272 }
10273 }
10274
10275 /* Find an instruction with given pairability and minimal amount of cycles
10276 lost by the fact that the CPU waits for both pipelines to finish before
10277 reading next instructions. Also take care that both instructions together
10278 can not exceed 7 bytes. */
10279
10280 static rtx *
10281 ix86_pent_find_pair (e_ready, ready, type, first)
10282 rtx *e_ready;
10283 rtx *ready;
10284 enum attr_pent_pair type;
10285 rtx first;
10286 {
10287 int mincycles, cycles;
10288 enum attr_pent_pair tmp;
10289 enum attr_memory memory;
10290 rtx *insnp, *bestinsnp = NULL;
10291
10292 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10293 return NULL;
10294
10295 memory = ix86_safe_memory (first);
10296 cycles = result_ready_cost (first);
10297 mincycles = INT_MAX;
10298
10299 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10300 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10301 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10302 {
10303 enum attr_memory second_memory;
10304 int secondcycles, currentcycles;
10305
10306 second_memory = ix86_safe_memory (*insnp);
10307 secondcycles = result_ready_cost (*insnp);
10308 currentcycles = abs (cycles - secondcycles);
10309
10310 if (secondcycles >= 1 && cycles >= 1)
10311 {
10312 /* Two read/modify/write instructions together takes two
10313 cycles longer. */
10314 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10315 currentcycles += 2;
10316
10317 /* Read modify/write instruction followed by read/modify
10318 takes one cycle longer. */
10319 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10320 && tmp != PENT_PAIR_UV
10321 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10322 currentcycles += 1;
10323 }
10324 if (currentcycles < mincycles)
10325 bestinsnp = insnp, mincycles = currentcycles;
10326 }
10327
10328 return bestinsnp;
10329 }
10330
10331 /* Subroutines of ix86_sched_reorder. */
10332
10333 static void
10334 ix86_sched_reorder_pentium (ready, e_ready)
10335 rtx *ready;
10336 rtx *e_ready;
10337 {
10338 enum attr_pent_pair pair1, pair2;
10339 rtx *insnp;
10340
10341 /* This wouldn't be necessary if Haifa knew that static insn ordering
10342 is important to which pipe an insn is issued to. So we have to make
10343 some minor rearrangements. */
10344
10345 pair1 = ix86_safe_pent_pair (*e_ready);
10346
10347 /* If the first insn is non-pairable, let it be. */
10348 if (pair1 == PENT_PAIR_NP)
10349 return;
10350
10351 pair2 = PENT_PAIR_NP;
10352 insnp = 0;
10353
10354 /* If the first insn is UV or PV pairable, search for a PU
10355 insn to go with. */
10356 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10357 {
10358 insnp = ix86_pent_find_pair (e_ready-1, ready,
10359 PENT_PAIR_PU, *e_ready);
10360 if (insnp)
10361 pair2 = PENT_PAIR_PU;
10362 }
10363
10364 /* If the first insn is PU or UV pairable, search for a PV
10365 insn to go with. */
10366 if (pair2 == PENT_PAIR_NP
10367 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10368 {
10369 insnp = ix86_pent_find_pair (e_ready-1, ready,
10370 PENT_PAIR_PV, *e_ready);
10371 if (insnp)
10372 pair2 = PENT_PAIR_PV;
10373 }
10374
10375 /* If the first insn is pairable, search for a UV
10376 insn to go with. */
10377 if (pair2 == PENT_PAIR_NP)
10378 {
10379 insnp = ix86_pent_find_pair (e_ready-1, ready,
10380 PENT_PAIR_UV, *e_ready);
10381 if (insnp)
10382 pair2 = PENT_PAIR_UV;
10383 }
10384
10385 if (pair2 == PENT_PAIR_NP)
10386 return;
10387
10388 /* Found something! Decide if we need to swap the order. */
10389 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10390 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10391 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10392 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10393 ix86_reorder_insn (insnp, e_ready);
10394 else
10395 ix86_reorder_insn (insnp, e_ready - 1);
10396 }
10397
10398 static void
10399 ix86_sched_reorder_ppro (ready, e_ready)
10400 rtx *ready;
10401 rtx *e_ready;
10402 {
10403 rtx decode[3];
10404 enum attr_ppro_uops cur_uops;
10405 int issued_this_cycle;
10406 rtx *insnp;
10407 int i;
10408
10409 /* At this point .ppro.decode contains the state of the three
10410 decoders from last "cycle". That is, those insns that were
10411 actually independent. But here we're scheduling for the
10412 decoder, and we may find things that are decodable in the
10413 same cycle. */
10414
10415 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10416 issued_this_cycle = 0;
10417
10418 insnp = e_ready;
10419 cur_uops = ix86_safe_ppro_uops (*insnp);
10420
10421 /* If the decoders are empty, and we've a complex insn at the
10422 head of the priority queue, let it issue without complaint. */
10423 if (decode[0] == NULL)
10424 {
10425 if (cur_uops == PPRO_UOPS_MANY)
10426 {
10427 decode[0] = *insnp;
10428 goto ppro_done;
10429 }
10430
10431 /* Otherwise, search for a 2-4 uop unsn to issue. */
10432 while (cur_uops != PPRO_UOPS_FEW)
10433 {
10434 if (insnp == ready)
10435 break;
10436 cur_uops = ix86_safe_ppro_uops (*--insnp);
10437 }
10438
10439 /* If so, move it to the head of the line. */
10440 if (cur_uops == PPRO_UOPS_FEW)
10441 ix86_reorder_insn (insnp, e_ready);
10442
10443 /* Issue the head of the queue. */
10444 issued_this_cycle = 1;
10445 decode[0] = *e_ready--;
10446 }
10447
10448 /* Look for simple insns to fill in the other two slots. */
10449 for (i = 1; i < 3; ++i)
10450 if (decode[i] == NULL)
10451 {
10452 if (ready >= e_ready)
10453 goto ppro_done;
10454
10455 insnp = e_ready;
10456 cur_uops = ix86_safe_ppro_uops (*insnp);
10457 while (cur_uops != PPRO_UOPS_ONE)
10458 {
10459 if (insnp == ready)
10460 break;
10461 cur_uops = ix86_safe_ppro_uops (*--insnp);
10462 }
10463
10464 /* Found one. Move it to the head of the queue and issue it. */
10465 if (cur_uops == PPRO_UOPS_ONE)
10466 {
10467 ix86_reorder_insn (insnp, e_ready);
10468 decode[i] = *e_ready--;
10469 issued_this_cycle++;
10470 continue;
10471 }
10472
10473 /* ??? Didn't find one. Ideally, here we would do a lazy split
10474 of 2-uop insns, issue one and queue the other. */
10475 }
10476
10477 ppro_done:
10478 if (issued_this_cycle == 0)
10479 issued_this_cycle = 1;
10480 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10481 }
10482
10483 /* We are about to being issuing insns for this clock cycle.
10484 Override the default sort algorithm to better slot instructions. */
10485 static int
10486 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10487 FILE *dump ATTRIBUTE_UNUSED;
10488 int sched_verbose ATTRIBUTE_UNUSED;
10489 rtx *ready;
10490 int *n_readyp;
10491 int clock_var ATTRIBUTE_UNUSED;
10492 {
10493 int n_ready = *n_readyp;
10494 rtx *e_ready = ready + n_ready - 1;
10495
10496 if (n_ready < 2)
10497 goto out;
10498
10499 switch (ix86_cpu)
10500 {
10501 default:
10502 break;
10503
10504 case PROCESSOR_PENTIUM:
10505 ix86_sched_reorder_pentium (ready, e_ready);
10506 break;
10507
10508 case PROCESSOR_PENTIUMPRO:
10509 ix86_sched_reorder_ppro (ready, e_ready);
10510 break;
10511 }
10512
10513 out:
10514 return ix86_issue_rate ();
10515 }
10516
10517 /* We are about to issue INSN. Return the number of insns left on the
10518 ready queue that can be issued this cycle. */
10519
10520 static int
10521 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10522 FILE *dump;
10523 int sched_verbose;
10524 rtx insn;
10525 int can_issue_more;
10526 {
10527 int i;
10528 switch (ix86_cpu)
10529 {
10530 default:
10531 return can_issue_more - 1;
10532
10533 case PROCESSOR_PENTIUMPRO:
10534 {
10535 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10536
10537 if (uops == PPRO_UOPS_MANY)
10538 {
10539 if (sched_verbose)
10540 ix86_dump_ppro_packet (dump);
10541 ix86_sched_data.ppro.decode[0] = insn;
10542 ix86_sched_data.ppro.decode[1] = NULL;
10543 ix86_sched_data.ppro.decode[2] = NULL;
10544 if (sched_verbose)
10545 ix86_dump_ppro_packet (dump);
10546 ix86_sched_data.ppro.decode[0] = NULL;
10547 }
10548 else if (uops == PPRO_UOPS_FEW)
10549 {
10550 if (sched_verbose)
10551 ix86_dump_ppro_packet (dump);
10552 ix86_sched_data.ppro.decode[0] = insn;
10553 ix86_sched_data.ppro.decode[1] = NULL;
10554 ix86_sched_data.ppro.decode[2] = NULL;
10555 }
10556 else
10557 {
10558 for (i = 0; i < 3; ++i)
10559 if (ix86_sched_data.ppro.decode[i] == NULL)
10560 {
10561 ix86_sched_data.ppro.decode[i] = insn;
10562 break;
10563 }
10564 if (i == 3)
10565 abort ();
10566 if (i == 2)
10567 {
10568 if (sched_verbose)
10569 ix86_dump_ppro_packet (dump);
10570 ix86_sched_data.ppro.decode[0] = NULL;
10571 ix86_sched_data.ppro.decode[1] = NULL;
10572 ix86_sched_data.ppro.decode[2] = NULL;
10573 }
10574 }
10575 }
10576 return --ix86_sched_data.ppro.issued_this_cycle;
10577 }
10578 }
10579 \f
10580 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10581 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10582 appropriate. */
10583
10584 void
10585 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10586 rtx insns;
10587 rtx dstref, srcref, dstreg, srcreg;
10588 {
10589 rtx insn;
10590
10591 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10592 if (INSN_P (insn))
10593 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10594 dstreg, srcreg);
10595 }
10596
10597 /* Subroutine of above to actually do the updating by recursively walking
10598 the rtx. */
10599
10600 static void
10601 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10602 rtx x;
10603 rtx dstref, srcref, dstreg, srcreg;
10604 {
10605 enum rtx_code code = GET_CODE (x);
10606 const char *format_ptr = GET_RTX_FORMAT (code);
10607 int i, j;
10608
10609 if (code == MEM && XEXP (x, 0) == dstreg)
10610 MEM_COPY_ATTRIBUTES (x, dstref);
10611 else if (code == MEM && XEXP (x, 0) == srcreg)
10612 MEM_COPY_ATTRIBUTES (x, srcref);
10613
10614 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10615 {
10616 if (*format_ptr == 'e')
10617 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10618 dstreg, srcreg);
10619 else if (*format_ptr == 'E')
10620 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10621 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10622 dstreg, srcreg);
10623 }
10624 }
10625 \f
10626 /* Compute the alignment given to a constant that is being placed in memory.
10627 EXP is the constant and ALIGN is the alignment that the object would
10628 ordinarily have.
10629 The value of this function is used instead of that alignment to align
10630 the object. */
10631
10632 int
10633 ix86_constant_alignment (exp, align)
10634 tree exp;
10635 int align;
10636 {
10637 if (TREE_CODE (exp) == REAL_CST)
10638 {
10639 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10640 return 64;
10641 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10642 return 128;
10643 }
10644 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10645 && align < 256)
10646 return 256;
10647
10648 return align;
10649 }
10650
10651 /* Compute the alignment for a static variable.
10652 TYPE is the data type, and ALIGN is the alignment that
10653 the object would ordinarily have. The value of this function is used
10654 instead of that alignment to align the object. */
10655
10656 int
10657 ix86_data_alignment (type, align)
10658 tree type;
10659 int align;
10660 {
10661 if (AGGREGATE_TYPE_P (type)
10662 && TYPE_SIZE (type)
10663 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10664 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10665 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10666 return 256;
10667
10668 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10669 to 16byte boundary. */
10670 if (TARGET_64BIT)
10671 {
10672 if (AGGREGATE_TYPE_P (type)
10673 && TYPE_SIZE (type)
10674 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10675 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10676 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10677 return 128;
10678 }
10679
10680 if (TREE_CODE (type) == ARRAY_TYPE)
10681 {
10682 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10683 return 64;
10684 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10685 return 128;
10686 }
10687 else if (TREE_CODE (type) == COMPLEX_TYPE)
10688 {
10689
10690 if (TYPE_MODE (type) == DCmode && align < 64)
10691 return 64;
10692 if (TYPE_MODE (type) == XCmode && align < 128)
10693 return 128;
10694 }
10695 else if ((TREE_CODE (type) == RECORD_TYPE
10696 || TREE_CODE (type) == UNION_TYPE
10697 || TREE_CODE (type) == QUAL_UNION_TYPE)
10698 && TYPE_FIELDS (type))
10699 {
10700 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10701 return 64;
10702 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10703 return 128;
10704 }
10705 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10706 || TREE_CODE (type) == INTEGER_TYPE)
10707 {
10708 if (TYPE_MODE (type) == DFmode && align < 64)
10709 return 64;
10710 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10711 return 128;
10712 }
10713
10714 return align;
10715 }
10716
10717 /* Compute the alignment for a local variable.
10718 TYPE is the data type, and ALIGN is the alignment that
10719 the object would ordinarily have. The value of this macro is used
10720 instead of that alignment to align the object. */
10721
10722 int
10723 ix86_local_alignment (type, align)
10724 tree type;
10725 int align;
10726 {
10727 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10728 to 16byte boundary. */
10729 if (TARGET_64BIT)
10730 {
10731 if (AGGREGATE_TYPE_P (type)
10732 && TYPE_SIZE (type)
10733 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10734 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10735 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10736 return 128;
10737 }
10738 if (TREE_CODE (type) == ARRAY_TYPE)
10739 {
10740 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10741 return 64;
10742 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10743 return 128;
10744 }
10745 else if (TREE_CODE (type) == COMPLEX_TYPE)
10746 {
10747 if (TYPE_MODE (type) == DCmode && align < 64)
10748 return 64;
10749 if (TYPE_MODE (type) == XCmode && align < 128)
10750 return 128;
10751 }
10752 else if ((TREE_CODE (type) == RECORD_TYPE
10753 || TREE_CODE (type) == UNION_TYPE
10754 || TREE_CODE (type) == QUAL_UNION_TYPE)
10755 && TYPE_FIELDS (type))
10756 {
10757 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10758 return 64;
10759 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10760 return 128;
10761 }
10762 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10763 || TREE_CODE (type) == INTEGER_TYPE)
10764 {
10765
10766 if (TYPE_MODE (type) == DFmode && align < 64)
10767 return 64;
10768 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10769 return 128;
10770 }
10771 return align;
10772 }
10773 \f
10774 /* Emit RTL insns to initialize the variable parts of a trampoline.
10775 FNADDR is an RTX for the address of the function's pure code.
10776 CXT is an RTX for the static chain value for the function. */
10777 void
10778 x86_initialize_trampoline (tramp, fnaddr, cxt)
10779 rtx tramp, fnaddr, cxt;
10780 {
10781 if (!TARGET_64BIT)
10782 {
10783 /* Compute offset from the end of the jmp to the target function. */
10784 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10785 plus_constant (tramp, 10),
10786 NULL_RTX, 1, OPTAB_DIRECT);
10787 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10788 gen_int_mode (0xb9, QImode));
10789 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10790 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10791 gen_int_mode (0xe9, QImode));
10792 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10793 }
10794 else
10795 {
10796 int offset = 0;
10797 /* Try to load address using shorter movl instead of movabs.
10798 We may want to support movq for kernel mode, but kernel does not use
10799 trampolines at the moment. */
10800 if (x86_64_zero_extended_value (fnaddr))
10801 {
10802 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10803 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10804 gen_int_mode (0xbb41, HImode));
10805 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10806 gen_lowpart (SImode, fnaddr));
10807 offset += 6;
10808 }
10809 else
10810 {
10811 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10812 gen_int_mode (0xbb49, HImode));
10813 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10814 fnaddr);
10815 offset += 10;
10816 }
10817 /* Load static chain using movabs to r10. */
10818 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10819 gen_int_mode (0xba49, HImode));
10820 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10821 cxt);
10822 offset += 10;
10823 /* Jump to the r11 */
10824 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10825 gen_int_mode (0xff49, HImode));
10826 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10827 gen_int_mode (0xe3, QImode));
10828 offset += 3;
10829 if (offset > TRAMPOLINE_SIZE)
10830 abort ();
10831 }
10832 }
10833 \f
10834 #define def_builtin(MASK, NAME, TYPE, CODE) \
10835 do { \
10836 if ((MASK) & target_flags) \
10837 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10838 } while (0)
10839
10840 struct builtin_description
10841 {
10842 const unsigned int mask;
10843 const enum insn_code icode;
10844 const char *const name;
10845 const enum ix86_builtins code;
10846 const enum rtx_code comparison;
10847 const unsigned int flag;
10848 };
10849
10850 static const struct builtin_description bdesc_comi[] =
10851 {
10852 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10853 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10854 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10855 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10856 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10857 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10858 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10859 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10860 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10861 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10862 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10863 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10864 };
10865
10866 static const struct builtin_description bdesc_2arg[] =
10867 {
10868 /* SSE */
10869 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10870 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10871 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10872 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10873 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10874 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10875 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10876 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10877
10878 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10879 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10880 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10881 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10882 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10883 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10884 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10885 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10886 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10887 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10888 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10889 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10890 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10891 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10892 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10893 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10894 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10895 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10896 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10897 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10898 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10899 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10900 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10901 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10902
10903 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10904 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10905 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10906 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10907
10908 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10909 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10910 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10911 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10912 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10913
10914 /* MMX */
10915 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10916 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10917 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10918 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10919 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10920 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10921
10922 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10923 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10924 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10925 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10926 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10927 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10928 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10929 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10930
10931 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10932 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10933 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10934
10935 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10936 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10937 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10938 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10939
10940 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10941 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10942
10943 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10944 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10945 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10946 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10947 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10948 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10949
10950 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10951 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10952 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10953 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10954
10955 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10956 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10957 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10958 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10959 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10960 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10961
10962 /* Special. */
10963 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10964 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10965 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10966
10967 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10968 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10969
10970 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10971 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10972 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10973 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10974 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10975 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10976
10977 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10978 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10979 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10980 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10981 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10982 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10983
10984 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10985 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10986 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10987 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10988
10989 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10990 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10991
10992 };
10993
10994 static const struct builtin_description bdesc_1arg[] =
10995 {
10996 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10997 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10998
10999 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11000 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11001 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11002
11003 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11004 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11005 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11006 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
11007
11008 };
11009
11010 void
11011 ix86_init_builtins ()
11012 {
11013 if (TARGET_MMX)
11014 ix86_init_mmx_sse_builtins ();
11015 }
11016
11017 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11018 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11019 builtins. */
11020 static void
11021 ix86_init_mmx_sse_builtins ()
11022 {
11023 const struct builtin_description * d;
11024 size_t i;
11025 tree endlink = void_list_node;
11026
11027 tree pchar_type_node = build_pointer_type (char_type_node);
11028 tree pfloat_type_node = build_pointer_type (float_type_node);
11029 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11030 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11031
11032 /* Comparisons. */
11033 tree int_ftype_v4sf_v4sf
11034 = build_function_type (integer_type_node,
11035 tree_cons (NULL_TREE, V4SF_type_node,
11036 tree_cons (NULL_TREE,
11037 V4SF_type_node,
11038 endlink)));
11039 tree v4si_ftype_v4sf_v4sf
11040 = build_function_type (V4SI_type_node,
11041 tree_cons (NULL_TREE, V4SF_type_node,
11042 tree_cons (NULL_TREE,
11043 V4SF_type_node,
11044 endlink)));
11045 /* MMX/SSE/integer conversions. */
11046 tree int_ftype_v4sf
11047 = build_function_type (integer_type_node,
11048 tree_cons (NULL_TREE, V4SF_type_node,
11049 endlink));
11050 tree int_ftype_v8qi
11051 = build_function_type (integer_type_node,
11052 tree_cons (NULL_TREE, V8QI_type_node,
11053 endlink));
11054 tree v4sf_ftype_v4sf_int
11055 = build_function_type (V4SF_type_node,
11056 tree_cons (NULL_TREE, V4SF_type_node,
11057 tree_cons (NULL_TREE, integer_type_node,
11058 endlink)));
11059 tree v4sf_ftype_v4sf_v2si
11060 = build_function_type (V4SF_type_node,
11061 tree_cons (NULL_TREE, V4SF_type_node,
11062 tree_cons (NULL_TREE, V2SI_type_node,
11063 endlink)));
11064 tree int_ftype_v4hi_int
11065 = build_function_type (integer_type_node,
11066 tree_cons (NULL_TREE, V4HI_type_node,
11067 tree_cons (NULL_TREE, integer_type_node,
11068 endlink)));
11069 tree v4hi_ftype_v4hi_int_int
11070 = build_function_type (V4HI_type_node,
11071 tree_cons (NULL_TREE, V4HI_type_node,
11072 tree_cons (NULL_TREE, integer_type_node,
11073 tree_cons (NULL_TREE,
11074 integer_type_node,
11075 endlink))));
11076 /* Miscellaneous. */
11077 tree v8qi_ftype_v4hi_v4hi
11078 = build_function_type (V8QI_type_node,
11079 tree_cons (NULL_TREE, V4HI_type_node,
11080 tree_cons (NULL_TREE, V4HI_type_node,
11081 endlink)));
11082 tree v4hi_ftype_v2si_v2si
11083 = build_function_type (V4HI_type_node,
11084 tree_cons (NULL_TREE, V2SI_type_node,
11085 tree_cons (NULL_TREE, V2SI_type_node,
11086 endlink)));
11087 tree v4sf_ftype_v4sf_v4sf_int
11088 = build_function_type (V4SF_type_node,
11089 tree_cons (NULL_TREE, V4SF_type_node,
11090 tree_cons (NULL_TREE, V4SF_type_node,
11091 tree_cons (NULL_TREE,
11092 integer_type_node,
11093 endlink))));
11094 tree v4hi_ftype_v8qi_v8qi
11095 = build_function_type (V4HI_type_node,
11096 tree_cons (NULL_TREE, V8QI_type_node,
11097 tree_cons (NULL_TREE, V8QI_type_node,
11098 endlink)));
11099 tree v2si_ftype_v4hi_v4hi
11100 = build_function_type (V2SI_type_node,
11101 tree_cons (NULL_TREE, V4HI_type_node,
11102 tree_cons (NULL_TREE, V4HI_type_node,
11103 endlink)));
11104 tree v4hi_ftype_v4hi_int
11105 = build_function_type (V4HI_type_node,
11106 tree_cons (NULL_TREE, V4HI_type_node,
11107 tree_cons (NULL_TREE, integer_type_node,
11108 endlink)));
11109 tree v4hi_ftype_v4hi_di
11110 = build_function_type (V4HI_type_node,
11111 tree_cons (NULL_TREE, V4HI_type_node,
11112 tree_cons (NULL_TREE,
11113 long_long_integer_type_node,
11114 endlink)));
11115 tree v2si_ftype_v2si_di
11116 = build_function_type (V2SI_type_node,
11117 tree_cons (NULL_TREE, V2SI_type_node,
11118 tree_cons (NULL_TREE,
11119 long_long_integer_type_node,
11120 endlink)));
11121 tree void_ftype_void
11122 = build_function_type (void_type_node, endlink);
11123 tree void_ftype_unsigned
11124 = build_function_type (void_type_node,
11125 tree_cons (NULL_TREE, unsigned_type_node,
11126 endlink));
11127 tree unsigned_ftype_void
11128 = build_function_type (unsigned_type_node, endlink);
11129 tree di_ftype_void
11130 = build_function_type (long_long_unsigned_type_node, endlink);
11131 tree v4sf_ftype_void
11132 = build_function_type (V4SF_type_node, endlink);
11133 tree v2si_ftype_v4sf
11134 = build_function_type (V2SI_type_node,
11135 tree_cons (NULL_TREE, V4SF_type_node,
11136 endlink));
11137 /* Loads/stores. */
11138 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11139 tree_cons (NULL_TREE, V8QI_type_node,
11140 tree_cons (NULL_TREE,
11141 pchar_type_node,
11142 endlink)));
11143 tree void_ftype_v8qi_v8qi_pchar
11144 = build_function_type (void_type_node, maskmovq_args);
11145 tree v4sf_ftype_pfloat
11146 = build_function_type (V4SF_type_node,
11147 tree_cons (NULL_TREE, pfloat_type_node,
11148 endlink));
11149 /* @@@ the type is bogus */
11150 tree v4sf_ftype_v4sf_pv2si
11151 = build_function_type (V4SF_type_node,
11152 tree_cons (NULL_TREE, V4SF_type_node,
11153 tree_cons (NULL_TREE, pv2si_type_node,
11154 endlink)));
11155 tree void_ftype_pv2si_v4sf
11156 = build_function_type (void_type_node,
11157 tree_cons (NULL_TREE, pv2si_type_node,
11158 tree_cons (NULL_TREE, V4SF_type_node,
11159 endlink)));
11160 tree void_ftype_pfloat_v4sf
11161 = build_function_type (void_type_node,
11162 tree_cons (NULL_TREE, pfloat_type_node,
11163 tree_cons (NULL_TREE, V4SF_type_node,
11164 endlink)));
11165 tree void_ftype_pdi_di
11166 = build_function_type (void_type_node,
11167 tree_cons (NULL_TREE, pdi_type_node,
11168 tree_cons (NULL_TREE,
11169 long_long_unsigned_type_node,
11170 endlink)));
11171 /* Normal vector unops. */
11172 tree v4sf_ftype_v4sf
11173 = build_function_type (V4SF_type_node,
11174 tree_cons (NULL_TREE, V4SF_type_node,
11175 endlink));
11176
11177 /* Normal vector binops. */
11178 tree v4sf_ftype_v4sf_v4sf
11179 = build_function_type (V4SF_type_node,
11180 tree_cons (NULL_TREE, V4SF_type_node,
11181 tree_cons (NULL_TREE, V4SF_type_node,
11182 endlink)));
11183 tree v8qi_ftype_v8qi_v8qi
11184 = build_function_type (V8QI_type_node,
11185 tree_cons (NULL_TREE, V8QI_type_node,
11186 tree_cons (NULL_TREE, V8QI_type_node,
11187 endlink)));
11188 tree v4hi_ftype_v4hi_v4hi
11189 = build_function_type (V4HI_type_node,
11190 tree_cons (NULL_TREE, V4HI_type_node,
11191 tree_cons (NULL_TREE, V4HI_type_node,
11192 endlink)));
11193 tree v2si_ftype_v2si_v2si
11194 = build_function_type (V2SI_type_node,
11195 tree_cons (NULL_TREE, V2SI_type_node,
11196 tree_cons (NULL_TREE, V2SI_type_node,
11197 endlink)));
11198 tree di_ftype_di_di
11199 = build_function_type (long_long_unsigned_type_node,
11200 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11201 tree_cons (NULL_TREE,
11202 long_long_unsigned_type_node,
11203 endlink)));
11204
11205 tree v2si_ftype_v2sf
11206 = build_function_type (V2SI_type_node,
11207 tree_cons (NULL_TREE, V2SF_type_node,
11208 endlink));
11209 tree v2sf_ftype_v2si
11210 = build_function_type (V2SF_type_node,
11211 tree_cons (NULL_TREE, V2SI_type_node,
11212 endlink));
11213 tree v2si_ftype_v2si
11214 = build_function_type (V2SI_type_node,
11215 tree_cons (NULL_TREE, V2SI_type_node,
11216 endlink));
11217 tree v2sf_ftype_v2sf
11218 = build_function_type (V2SF_type_node,
11219 tree_cons (NULL_TREE, V2SF_type_node,
11220 endlink));
11221 tree v2sf_ftype_v2sf_v2sf
11222 = build_function_type (V2SF_type_node,
11223 tree_cons (NULL_TREE, V2SF_type_node,
11224 tree_cons (NULL_TREE,
11225 V2SF_type_node,
11226 endlink)));
11227 tree v2si_ftype_v2sf_v2sf
11228 = build_function_type (V2SI_type_node,
11229 tree_cons (NULL_TREE, V2SF_type_node,
11230 tree_cons (NULL_TREE,
11231 V2SF_type_node,
11232 endlink)));
11233
11234 /* Add all builtins that are more or less simple operations on two
11235 operands. */
11236 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11237 {
11238 /* Use one of the operands; the target can have a different mode for
11239 mask-generating compares. */
11240 enum machine_mode mode;
11241 tree type;
11242
11243 if (d->name == 0)
11244 continue;
11245 mode = insn_data[d->icode].operand[1].mode;
11246
11247 switch (mode)
11248 {
11249 case V4SFmode:
11250 type = v4sf_ftype_v4sf_v4sf;
11251 break;
11252 case V8QImode:
11253 type = v8qi_ftype_v8qi_v8qi;
11254 break;
11255 case V4HImode:
11256 type = v4hi_ftype_v4hi_v4hi;
11257 break;
11258 case V2SImode:
11259 type = v2si_ftype_v2si_v2si;
11260 break;
11261 case DImode:
11262 type = di_ftype_di_di;
11263 break;
11264
11265 default:
11266 abort ();
11267 }
11268
11269 /* Override for comparisons. */
11270 if (d->icode == CODE_FOR_maskcmpv4sf3
11271 || d->icode == CODE_FOR_maskncmpv4sf3
11272 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11273 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11274 type = v4si_ftype_v4sf_v4sf;
11275
11276 def_builtin (d->mask, d->name, type, d->code);
11277 }
11278
11279 /* Add the remaining MMX insns with somewhat more complicated types. */
11280 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11281 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11282 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11283 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11284 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11285 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11286 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11287
11288 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11289 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11290 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11291
11292 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11293 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11294
11295 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11296 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11297
11298 /* comi/ucomi insns. */
11299 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
11300 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11301
11302 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11303 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11304 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11305
11306 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11307 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11308 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11309 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11310 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11311 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11312
11313 def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11314 def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11315 def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11316 def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11317
11318 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11319 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11320
11321 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11322
11323 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11324 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11325 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11326 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11327 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11328 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11329
11330 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11331 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11332 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11333 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11334
11335 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11336 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11337 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11338 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11339
11340 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11341
11342 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11343
11344 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11345 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11346 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11347 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11348 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11349 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11350
11351 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11352
11353 /* Original 3DNow! */
11354 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11355 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11356 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11357 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11358 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11359 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11360 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11361 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11362 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11363 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11364 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11365 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11366 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11367 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11368 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11369 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11370 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11371 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11372 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11373 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11374
11375 /* 3DNow! extension as used in the Athlon CPU. */
11376 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11377 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11378 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11379 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11380 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11381 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11382
11383 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11384 }
11385
11386 /* Errors in the source file can cause expand_expr to return const0_rtx
11387 where we expect a vector. To avoid crashing, use one of the vector
11388 clear instructions. */
11389 static rtx
11390 safe_vector_operand (x, mode)
11391 rtx x;
11392 enum machine_mode mode;
11393 {
11394 if (x != const0_rtx)
11395 return x;
11396 x = gen_reg_rtx (mode);
11397
11398 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11399 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11400 : gen_rtx_SUBREG (DImode, x, 0)));
11401 else
11402 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11403 : gen_rtx_SUBREG (V4SFmode, x, 0)));
11404 return x;
11405 }
11406
11407 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11408
11409 static rtx
11410 ix86_expand_binop_builtin (icode, arglist, target)
11411 enum insn_code icode;
11412 tree arglist;
11413 rtx target;
11414 {
11415 rtx pat;
11416 tree arg0 = TREE_VALUE (arglist);
11417 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11418 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11419 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11420 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11421 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11422 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11423
11424 if (VECTOR_MODE_P (mode0))
11425 op0 = safe_vector_operand (op0, mode0);
11426 if (VECTOR_MODE_P (mode1))
11427 op1 = safe_vector_operand (op1, mode1);
11428
11429 if (! target
11430 || GET_MODE (target) != tmode
11431 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11432 target = gen_reg_rtx (tmode);
11433
11434 /* In case the insn wants input operands in modes different from
11435 the result, abort. */
11436 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11437 abort ();
11438
11439 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11440 op0 = copy_to_mode_reg (mode0, op0);
11441 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11442 op1 = copy_to_mode_reg (mode1, op1);
11443
11444 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11445 yet one of the two must not be a memory. This is normally enforced
11446 by expanders, but we didn't bother to create one here. */
11447 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11448 op0 = copy_to_mode_reg (mode0, op0);
11449
11450 pat = GEN_FCN (icode) (target, op0, op1);
11451 if (! pat)
11452 return 0;
11453 emit_insn (pat);
11454 return target;
11455 }
11456
11457 /* In type_for_mode we restrict the ability to create TImode types
11458 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11459 to have a V4SFmode signature. Convert them in-place to TImode. */
11460
11461 static rtx
11462 ix86_expand_timode_binop_builtin (icode, arglist, target)
11463 enum insn_code icode;
11464 tree arglist;
11465 rtx target;
11466 {
11467 rtx pat;
11468 tree arg0 = TREE_VALUE (arglist);
11469 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11470 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11471 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11472
11473 op0 = gen_lowpart (TImode, op0);
11474 op1 = gen_lowpart (TImode, op1);
11475 target = gen_reg_rtx (TImode);
11476
11477 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11478 op0 = copy_to_mode_reg (TImode, op0);
11479 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11480 op1 = copy_to_mode_reg (TImode, op1);
11481
11482 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11483 yet one of the two must not be a memory. This is normally enforced
11484 by expanders, but we didn't bother to create one here. */
11485 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11486 op0 = copy_to_mode_reg (TImode, op0);
11487
11488 pat = GEN_FCN (icode) (target, op0, op1);
11489 if (! pat)
11490 return 0;
11491 emit_insn (pat);
11492
11493 return gen_lowpart (V4SFmode, target);
11494 }
11495
11496 /* Subroutine of ix86_expand_builtin to take care of stores. */
11497
11498 static rtx
11499 ix86_expand_store_builtin (icode, arglist)
11500 enum insn_code icode;
11501 tree arglist;
11502 {
11503 rtx pat;
11504 tree arg0 = TREE_VALUE (arglist);
11505 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11506 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11507 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11508 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11509 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11510
11511 if (VECTOR_MODE_P (mode1))
11512 op1 = safe_vector_operand (op1, mode1);
11513
11514 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11515
11516 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11517 op1 = copy_to_mode_reg (mode1, op1);
11518
11519 pat = GEN_FCN (icode) (op0, op1);
11520 if (pat)
11521 emit_insn (pat);
11522 return 0;
11523 }
11524
11525 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11526
11527 static rtx
11528 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11529 enum insn_code icode;
11530 tree arglist;
11531 rtx target;
11532 int do_load;
11533 {
11534 rtx pat;
11535 tree arg0 = TREE_VALUE (arglist);
11536 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11537 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11538 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11539
11540 if (! target
11541 || GET_MODE (target) != tmode
11542 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11543 target = gen_reg_rtx (tmode);
11544 if (do_load)
11545 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11546 else
11547 {
11548 if (VECTOR_MODE_P (mode0))
11549 op0 = safe_vector_operand (op0, mode0);
11550
11551 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11552 op0 = copy_to_mode_reg (mode0, op0);
11553 }
11554
11555 pat = GEN_FCN (icode) (target, op0);
11556 if (! pat)
11557 return 0;
11558 emit_insn (pat);
11559 return target;
11560 }
11561
11562 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11563 sqrtss, rsqrtss, rcpss. */
11564
11565 static rtx
11566 ix86_expand_unop1_builtin (icode, arglist, target)
11567 enum insn_code icode;
11568 tree arglist;
11569 rtx target;
11570 {
11571 rtx pat;
11572 tree arg0 = TREE_VALUE (arglist);
11573 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11574 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11575 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11576
11577 if (! target
11578 || GET_MODE (target) != tmode
11579 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11580 target = gen_reg_rtx (tmode);
11581
11582 if (VECTOR_MODE_P (mode0))
11583 op0 = safe_vector_operand (op0, mode0);
11584
11585 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11586 op0 = copy_to_mode_reg (mode0, op0);
11587
11588 op1 = op0;
11589 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
11590 op1 = copy_to_mode_reg (mode0, op1);
11591
11592 pat = GEN_FCN (icode) (target, op0, op1);
11593 if (! pat)
11594 return 0;
11595 emit_insn (pat);
11596 return target;
11597 }
11598
11599 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11600
11601 static rtx
11602 ix86_expand_sse_compare (d, arglist, target)
11603 const struct builtin_description *d;
11604 tree arglist;
11605 rtx target;
11606 {
11607 rtx pat;
11608 tree arg0 = TREE_VALUE (arglist);
11609 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11610 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11611 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11612 rtx op2;
11613 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11614 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11615 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11616 enum rtx_code comparison = d->comparison;
11617
11618 if (VECTOR_MODE_P (mode0))
11619 op0 = safe_vector_operand (op0, mode0);
11620 if (VECTOR_MODE_P (mode1))
11621 op1 = safe_vector_operand (op1, mode1);
11622
11623 /* Swap operands if we have a comparison that isn't available in
11624 hardware. */
11625 if (d->flag)
11626 {
11627 rtx tmp = gen_reg_rtx (mode1);
11628 emit_move_insn (tmp, op1);
11629 op1 = op0;
11630 op0 = tmp;
11631 }
11632
11633 if (! target
11634 || GET_MODE (target) != tmode
11635 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11636 target = gen_reg_rtx (tmode);
11637
11638 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11639 op0 = copy_to_mode_reg (mode0, op0);
11640 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11641 op1 = copy_to_mode_reg (mode1, op1);
11642
11643 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11644 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11645 if (! pat)
11646 return 0;
11647 emit_insn (pat);
11648 return target;
11649 }
11650
11651 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11652
11653 static rtx
11654 ix86_expand_sse_comi (d, arglist, target)
11655 const struct builtin_description *d;
11656 tree arglist;
11657 rtx target;
11658 {
11659 rtx pat;
11660 tree arg0 = TREE_VALUE (arglist);
11661 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11662 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11663 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11664 rtx op2;
11665 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11666 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11667 enum rtx_code comparison = d->comparison;
11668
11669 if (VECTOR_MODE_P (mode0))
11670 op0 = safe_vector_operand (op0, mode0);
11671 if (VECTOR_MODE_P (mode1))
11672 op1 = safe_vector_operand (op1, mode1);
11673
11674 /* Swap operands if we have a comparison that isn't available in
11675 hardware. */
11676 if (d->flag)
11677 {
11678 rtx tmp = op1;
11679 op1 = op0;
11680 op0 = tmp;
11681 }
11682
11683 target = gen_reg_rtx (SImode);
11684 emit_move_insn (target, const0_rtx);
11685 target = gen_rtx_SUBREG (QImode, target, 0);
11686
11687 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11688 op0 = copy_to_mode_reg (mode0, op0);
11689 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11690 op1 = copy_to_mode_reg (mode1, op1);
11691
11692 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11693 pat = GEN_FCN (d->icode) (op0, op1, op2);
11694 if (! pat)
11695 return 0;
11696 emit_insn (pat);
11697 emit_insn (gen_rtx_SET (VOIDmode,
11698 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11699 gen_rtx_fmt_ee (comparison, QImode,
11700 gen_rtx_REG (CCmode, FLAGS_REG),
11701 const0_rtx)));
11702
11703 return SUBREG_REG (target);
11704 }
11705
11706 /* Expand an expression EXP that calls a built-in function,
11707 with result going to TARGET if that's convenient
11708 (and in mode MODE if that's convenient).
11709 SUBTARGET may be used as the target for computing one of EXP's operands.
11710 IGNORE is nonzero if the value is to be ignored. */
11711
11712 rtx
11713 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11714 tree exp;
11715 rtx target;
11716 rtx subtarget ATTRIBUTE_UNUSED;
11717 enum machine_mode mode ATTRIBUTE_UNUSED;
11718 int ignore ATTRIBUTE_UNUSED;
11719 {
11720 const struct builtin_description *d;
11721 size_t i;
11722 enum insn_code icode;
11723 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11724 tree arglist = TREE_OPERAND (exp, 1);
11725 tree arg0, arg1, arg2;
11726 rtx op0, op1, op2, pat;
11727 enum machine_mode tmode, mode0, mode1, mode2;
11728 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11729
11730 switch (fcode)
11731 {
11732 case IX86_BUILTIN_EMMS:
11733 emit_insn (gen_emms ());
11734 return 0;
11735
11736 case IX86_BUILTIN_SFENCE:
11737 emit_insn (gen_sfence ());
11738 return 0;
11739
11740 case IX86_BUILTIN_PEXTRW:
11741 icode = CODE_FOR_mmx_pextrw;
11742 arg0 = TREE_VALUE (arglist);
11743 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11744 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11745 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11746 tmode = insn_data[icode].operand[0].mode;
11747 mode0 = insn_data[icode].operand[1].mode;
11748 mode1 = insn_data[icode].operand[2].mode;
11749
11750 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11751 op0 = copy_to_mode_reg (mode0, op0);
11752 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11753 {
11754 /* @@@ better error message */
11755 error ("selector must be an immediate");
11756 return gen_reg_rtx (tmode);
11757 }
11758 if (target == 0
11759 || GET_MODE (target) != tmode
11760 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11761 target = gen_reg_rtx (tmode);
11762 pat = GEN_FCN (icode) (target, op0, op1);
11763 if (! pat)
11764 return 0;
11765 emit_insn (pat);
11766 return target;
11767
11768 case IX86_BUILTIN_PINSRW:
11769 icode = CODE_FOR_mmx_pinsrw;
11770 arg0 = TREE_VALUE (arglist);
11771 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11772 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11773 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11774 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11775 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11776 tmode = insn_data[icode].operand[0].mode;
11777 mode0 = insn_data[icode].operand[1].mode;
11778 mode1 = insn_data[icode].operand[2].mode;
11779 mode2 = insn_data[icode].operand[3].mode;
11780
11781 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11782 op0 = copy_to_mode_reg (mode0, op0);
11783 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11784 op1 = copy_to_mode_reg (mode1, op1);
11785 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11786 {
11787 /* @@@ better error message */
11788 error ("selector must be an immediate");
11789 return const0_rtx;
11790 }
11791 if (target == 0
11792 || GET_MODE (target) != tmode
11793 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11794 target = gen_reg_rtx (tmode);
11795 pat = GEN_FCN (icode) (target, op0, op1, op2);
11796 if (! pat)
11797 return 0;
11798 emit_insn (pat);
11799 return target;
11800
11801 case IX86_BUILTIN_MASKMOVQ:
11802 icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq;
11803 /* Note the arg order is different from the operand order. */
11804 arg1 = TREE_VALUE (arglist);
11805 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11806 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11807 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11808 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11809 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11810 mode0 = insn_data[icode].operand[0].mode;
11811 mode1 = insn_data[icode].operand[1].mode;
11812 mode2 = insn_data[icode].operand[2].mode;
11813
11814 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11815 op0 = copy_to_mode_reg (mode0, op0);
11816 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11817 op1 = copy_to_mode_reg (mode1, op1);
11818 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11819 op2 = copy_to_mode_reg (mode2, op2);
11820 pat = GEN_FCN (icode) (op0, op1, op2);
11821 if (! pat)
11822 return 0;
11823 emit_insn (pat);
11824 return 0;
11825
11826 case IX86_BUILTIN_SQRTSS:
11827 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11828 case IX86_BUILTIN_RSQRTSS:
11829 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11830 case IX86_BUILTIN_RCPSS:
11831 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11832
11833 case IX86_BUILTIN_ANDPS:
11834 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
11835 arglist, target);
11836 case IX86_BUILTIN_ANDNPS:
11837 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
11838 arglist, target);
11839 case IX86_BUILTIN_ORPS:
11840 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
11841 arglist, target);
11842 case IX86_BUILTIN_XORPS:
11843 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
11844 arglist, target);
11845
11846 case IX86_BUILTIN_LOADAPS:
11847 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11848
11849 case IX86_BUILTIN_LOADUPS:
11850 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11851
11852 case IX86_BUILTIN_STOREAPS:
11853 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
11854 case IX86_BUILTIN_STOREUPS:
11855 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
11856
11857 case IX86_BUILTIN_LOADSS:
11858 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11859
11860 case IX86_BUILTIN_STORESS:
11861 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
11862
11863 case IX86_BUILTIN_LOADHPS:
11864 case IX86_BUILTIN_LOADLPS:
11865 icode = (fcode == IX86_BUILTIN_LOADHPS
11866 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11867 arg0 = TREE_VALUE (arglist);
11868 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11869 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11870 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11871 tmode = insn_data[icode].operand[0].mode;
11872 mode0 = insn_data[icode].operand[1].mode;
11873 mode1 = insn_data[icode].operand[2].mode;
11874
11875 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11876 op0 = copy_to_mode_reg (mode0, op0);
11877 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11878 if (target == 0
11879 || GET_MODE (target) != tmode
11880 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11881 target = gen_reg_rtx (tmode);
11882 pat = GEN_FCN (icode) (target, op0, op1);
11883 if (! pat)
11884 return 0;
11885 emit_insn (pat);
11886 return target;
11887
11888 case IX86_BUILTIN_STOREHPS:
11889 case IX86_BUILTIN_STORELPS:
11890 icode = (fcode == IX86_BUILTIN_STOREHPS
11891 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11892 arg0 = TREE_VALUE (arglist);
11893 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11894 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11895 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11896 mode0 = insn_data[icode].operand[1].mode;
11897 mode1 = insn_data[icode].operand[2].mode;
11898
11899 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11900 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11901 op1 = copy_to_mode_reg (mode1, op1);
11902
11903 pat = GEN_FCN (icode) (op0, op0, op1);
11904 if (! pat)
11905 return 0;
11906 emit_insn (pat);
11907 return 0;
11908
11909 case IX86_BUILTIN_MOVNTPS:
11910 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
11911 case IX86_BUILTIN_MOVNTQ:
11912 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
11913
11914 case IX86_BUILTIN_LDMXCSR:
11915 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11916 target = assign_386_stack_local (SImode, 0);
11917 emit_move_insn (target, op0);
11918 emit_insn (gen_ldmxcsr (target));
11919 return 0;
11920
11921 case IX86_BUILTIN_STMXCSR:
11922 target = assign_386_stack_local (SImode, 0);
11923 emit_insn (gen_stmxcsr (target));
11924 return copy_to_mode_reg (SImode, target);
11925
11926 case IX86_BUILTIN_SHUFPS:
11927 icode = CODE_FOR_sse_shufps;
11928 arg0 = TREE_VALUE (arglist);
11929 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11930 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11931 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11932 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11933 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11934 tmode = insn_data[icode].operand[0].mode;
11935 mode0 = insn_data[icode].operand[1].mode;
11936 mode1 = insn_data[icode].operand[2].mode;
11937 mode2 = insn_data[icode].operand[3].mode;
11938
11939 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11940 op0 = copy_to_mode_reg (mode0, op0);
11941 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11942 op1 = copy_to_mode_reg (mode1, op1);
11943 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11944 {
11945 /* @@@ better error message */
11946 error ("mask must be an immediate");
11947 return gen_reg_rtx (tmode);
11948 }
11949 if (target == 0
11950 || GET_MODE (target) != tmode
11951 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11952 target = gen_reg_rtx (tmode);
11953 pat = GEN_FCN (icode) (target, op0, op1, op2);
11954 if (! pat)
11955 return 0;
11956 emit_insn (pat);
11957 return target;
11958
11959 case IX86_BUILTIN_PSHUFW:
11960 icode = CODE_FOR_mmx_pshufw;
11961 arg0 = TREE_VALUE (arglist);
11962 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11963 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11964 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11965 tmode = insn_data[icode].operand[0].mode;
11966 mode1 = insn_data[icode].operand[1].mode;
11967 mode2 = insn_data[icode].operand[2].mode;
11968
11969 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11970 op0 = copy_to_mode_reg (mode1, op0);
11971 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
11972 {
11973 /* @@@ better error message */
11974 error ("mask must be an immediate");
11975 return const0_rtx;
11976 }
11977 if (target == 0
11978 || GET_MODE (target) != tmode
11979 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11980 target = gen_reg_rtx (tmode);
11981 pat = GEN_FCN (icode) (target, op0, op1);
11982 if (! pat)
11983 return 0;
11984 emit_insn (pat);
11985 return target;
11986
11987 case IX86_BUILTIN_FEMMS:
11988 emit_insn (gen_femms ());
11989 return NULL_RTX;
11990
11991 case IX86_BUILTIN_PAVGUSB:
11992 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11993
11994 case IX86_BUILTIN_PF2ID:
11995 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11996
11997 case IX86_BUILTIN_PFACC:
11998 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11999
12000 case IX86_BUILTIN_PFADD:
12001 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
12002
12003 case IX86_BUILTIN_PFCMPEQ:
12004 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
12005
12006 case IX86_BUILTIN_PFCMPGE:
12007 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
12008
12009 case IX86_BUILTIN_PFCMPGT:
12010 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
12011
12012 case IX86_BUILTIN_PFMAX:
12013 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
12014
12015 case IX86_BUILTIN_PFMIN:
12016 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
12017
12018 case IX86_BUILTIN_PFMUL:
12019 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
12020
12021 case IX86_BUILTIN_PFRCP:
12022 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
12023
12024 case IX86_BUILTIN_PFRCPIT1:
12025 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
12026
12027 case IX86_BUILTIN_PFRCPIT2:
12028 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
12029
12030 case IX86_BUILTIN_PFRSQIT1:
12031 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
12032
12033 case IX86_BUILTIN_PFRSQRT:
12034 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12035
12036 case IX86_BUILTIN_PFSUB:
12037 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12038
12039 case IX86_BUILTIN_PFSUBR:
12040 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12041
12042 case IX86_BUILTIN_PI2FD:
12043 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12044
12045 case IX86_BUILTIN_PMULHRW:
12046 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12047
12048 case IX86_BUILTIN_PF2IW:
12049 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12050
12051 case IX86_BUILTIN_PFNACC:
12052 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12053
12054 case IX86_BUILTIN_PFPNACC:
12055 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12056
12057 case IX86_BUILTIN_PI2FW:
12058 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12059
12060 case IX86_BUILTIN_PSWAPDSI:
12061 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12062
12063 case IX86_BUILTIN_PSWAPDSF:
12064 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12065
12066 case IX86_BUILTIN_SSE_ZERO:
12067 target = gen_reg_rtx (V4SFmode);
12068 emit_insn (gen_sse_clrv4sf (target));
12069 return target;
12070
12071 case IX86_BUILTIN_MMX_ZERO:
12072 target = gen_reg_rtx (DImode);
12073 emit_insn (gen_mmx_clrdi (target));
12074 return target;
12075
12076 default:
12077 break;
12078 }
12079
12080 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12081 if (d->code == fcode)
12082 {
12083 /* Compares are treated specially. */
12084 if (d->icode == CODE_FOR_maskcmpv4sf3
12085 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12086 || d->icode == CODE_FOR_maskncmpv4sf3
12087 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12088 return ix86_expand_sse_compare (d, arglist, target);
12089
12090 return ix86_expand_binop_builtin (d->icode, arglist, target);
12091 }
12092
12093 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12094 if (d->code == fcode)
12095 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12096
12097 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12098 if (d->code == fcode)
12099 return ix86_expand_sse_comi (d, arglist, target);
12100
12101 /* @@@ Should really do something sensible here. */
12102 return 0;
12103 }
12104
12105 /* Store OPERAND to the memory after reload is completed. This means
12106 that we can't easily use assign_stack_local. */
12107 rtx
12108 ix86_force_to_memory (mode, operand)
12109 enum machine_mode mode;
12110 rtx operand;
12111 {
12112 rtx result;
12113 if (!reload_completed)
12114 abort ();
12115 if (TARGET_64BIT && TARGET_RED_ZONE)
12116 {
12117 result = gen_rtx_MEM (mode,
12118 gen_rtx_PLUS (Pmode,
12119 stack_pointer_rtx,
12120 GEN_INT (-RED_ZONE_SIZE)));
12121 emit_move_insn (result, operand);
12122 }
12123 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12124 {
12125 switch (mode)
12126 {
12127 case HImode:
12128 case SImode:
12129 operand = gen_lowpart (DImode, operand);
12130 /* FALLTHRU */
12131 case DImode:
12132 emit_insn (
12133 gen_rtx_SET (VOIDmode,
12134 gen_rtx_MEM (DImode,
12135 gen_rtx_PRE_DEC (DImode,
12136 stack_pointer_rtx)),
12137 operand));
12138 break;
12139 default:
12140 abort ();
12141 }
12142 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12143 }
12144 else
12145 {
12146 switch (mode)
12147 {
12148 case DImode:
12149 {
12150 rtx operands[2];
12151 split_di (&operand, 1, operands, operands + 1);
12152 emit_insn (
12153 gen_rtx_SET (VOIDmode,
12154 gen_rtx_MEM (SImode,
12155 gen_rtx_PRE_DEC (Pmode,
12156 stack_pointer_rtx)),
12157 operands[1]));
12158 emit_insn (
12159 gen_rtx_SET (VOIDmode,
12160 gen_rtx_MEM (SImode,
12161 gen_rtx_PRE_DEC (Pmode,
12162 stack_pointer_rtx)),
12163 operands[0]));
12164 }
12165 break;
12166 case HImode:
12167 /* It is better to store HImodes as SImodes. */
12168 if (!TARGET_PARTIAL_REG_STALL)
12169 operand = gen_lowpart (SImode, operand);
12170 /* FALLTHRU */
12171 case SImode:
12172 emit_insn (
12173 gen_rtx_SET (VOIDmode,
12174 gen_rtx_MEM (GET_MODE (operand),
12175 gen_rtx_PRE_DEC (SImode,
12176 stack_pointer_rtx)),
12177 operand));
12178 break;
12179 default:
12180 abort ();
12181 }
12182 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12183 }
12184 return result;
12185 }
12186
12187 /* Free operand from the memory. */
12188 void
12189 ix86_free_from_memory (mode)
12190 enum machine_mode mode;
12191 {
12192 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12193 {
12194 int size;
12195
12196 if (mode == DImode || TARGET_64BIT)
12197 size = 8;
12198 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12199 size = 2;
12200 else
12201 size = 4;
12202 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12203 to pop or add instruction if registers are available. */
12204 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12205 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12206 GEN_INT (size))));
12207 }
12208 }
12209
12210 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12211 QImode must go into class Q_REGS.
12212 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12213 movdf to do mem-to-mem moves through integer regs. */
12214 enum reg_class
12215 ix86_preferred_reload_class (x, class)
12216 rtx x;
12217 enum reg_class class;
12218 {
12219 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12220 {
12221 /* SSE can't load any constant directly yet. */
12222 if (SSE_CLASS_P (class))
12223 return NO_REGS;
12224 /* Floats can load 0 and 1. */
12225 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12226 {
12227 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12228 if (MAYBE_SSE_CLASS_P (class))
12229 return (reg_class_subset_p (class, GENERAL_REGS)
12230 ? GENERAL_REGS : FLOAT_REGS);
12231 else
12232 return class;
12233 }
12234 /* General regs can load everything. */
12235 if (reg_class_subset_p (class, GENERAL_REGS))
12236 return GENERAL_REGS;
12237 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12238 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12239 return NO_REGS;
12240 }
12241 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12242 return NO_REGS;
12243 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12244 return Q_REGS;
12245 return class;
12246 }
12247
12248 /* If we are copying between general and FP registers, we need a memory
12249 location. The same is true for SSE and MMX registers.
12250
12251 The macro can't work reliably when one of the CLASSES is class containing
12252 registers from multiple units (SSE, MMX, integer). We avoid this by never
12253 combining those units in single alternative in the machine description.
12254 Ensure that this constraint holds to avoid unexpected surprises.
12255
12256 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12257 enforce these sanity checks. */
12258 int
12259 ix86_secondary_memory_needed (class1, class2, mode, strict)
12260 enum reg_class class1, class2;
12261 enum machine_mode mode;
12262 int strict;
12263 {
12264 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12265 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12266 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12267 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12268 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12269 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12270 {
12271 if (strict)
12272 abort ();
12273 else
12274 return 1;
12275 }
12276 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12277 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12278 && (mode) != SImode)
12279 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12280 && (mode) != SImode));
12281 }
12282 /* Return the cost of moving data from a register in class CLASS1 to
12283 one in class CLASS2.
12284
12285 It is not required that the cost always equal 2 when FROM is the same as TO;
12286 on some machines it is expensive to move between registers if they are not
12287 general registers. */
12288 int
12289 ix86_register_move_cost (mode, class1, class2)
12290 enum machine_mode mode;
12291 enum reg_class class1, class2;
12292 {
12293 /* In case we require secondary memory, compute cost of the store followed
12294 by load. In case of copying from general_purpose_register we may emit
12295 multiple stores followed by single load causing memory size mismatch
12296 stall. Count this as arbitarily high cost of 20. */
12297 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12298 {
12299 int add_cost = 0;
12300 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12301 add_cost = 20;
12302 return (MEMORY_MOVE_COST (mode, class1, 0)
12303 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12304 }
12305 /* Moves between SSE/MMX and integer unit are expensive. */
12306 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12307 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12308 return ix86_cost->mmxsse_to_integer;
12309 if (MAYBE_FLOAT_CLASS_P (class1))
12310 return ix86_cost->fp_move;
12311 if (MAYBE_SSE_CLASS_P (class1))
12312 return ix86_cost->sse_move;
12313 if (MAYBE_MMX_CLASS_P (class1))
12314 return ix86_cost->mmx_move;
12315 return 2;
12316 }
12317
12318 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12319 int
12320 ix86_hard_regno_mode_ok (regno, mode)
12321 int regno;
12322 enum machine_mode mode;
12323 {
12324 /* Flags and only flags can only hold CCmode values. */
12325 if (CC_REGNO_P (regno))
12326 return GET_MODE_CLASS (mode) == MODE_CC;
12327 if (GET_MODE_CLASS (mode) == MODE_CC
12328 || GET_MODE_CLASS (mode) == MODE_RANDOM
12329 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12330 return 0;
12331 if (FP_REGNO_P (regno))
12332 return VALID_FP_MODE_P (mode);
12333 if (SSE_REGNO_P (regno))
12334 return VALID_SSE_REG_MODE (mode);
12335 if (MMX_REGNO_P (regno))
12336 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12337 /* We handle both integer and floats in the general purpose registers.
12338 In future we should be able to handle vector modes as well. */
12339 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12340 return 0;
12341 /* Take care for QImode values - they can be in non-QI regs, but then
12342 they do cause partial register stalls. */
12343 if (regno < 4 || mode != QImode || TARGET_64BIT)
12344 return 1;
12345 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12346 }
12347
12348 /* Return the cost of moving data of mode M between a
12349 register and memory. A value of 2 is the default; this cost is
12350 relative to those in `REGISTER_MOVE_COST'.
12351
12352 If moving between registers and memory is more expensive than
12353 between two registers, you should define this macro to express the
12354 relative cost.
12355
12356 Model also increased moving costs of QImode registers in non
12357 Q_REGS classes.
12358 */
12359 int
12360 ix86_memory_move_cost (mode, class, in)
12361 enum machine_mode mode;
12362 enum reg_class class;
12363 int in;
12364 {
12365 if (FLOAT_CLASS_P (class))
12366 {
12367 int index;
12368 switch (mode)
12369 {
12370 case SFmode:
12371 index = 0;
12372 break;
12373 case DFmode:
12374 index = 1;
12375 break;
12376 case XFmode:
12377 case TFmode:
12378 index = 2;
12379 break;
12380 default:
12381 return 100;
12382 }
12383 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12384 }
12385 if (SSE_CLASS_P (class))
12386 {
12387 int index;
12388 switch (GET_MODE_SIZE (mode))
12389 {
12390 case 4:
12391 index = 0;
12392 break;
12393 case 8:
12394 index = 1;
12395 break;
12396 case 16:
12397 index = 2;
12398 break;
12399 default:
12400 return 100;
12401 }
12402 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12403 }
12404 if (MMX_CLASS_P (class))
12405 {
12406 int index;
12407 switch (GET_MODE_SIZE (mode))
12408 {
12409 case 4:
12410 index = 0;
12411 break;
12412 case 8:
12413 index = 1;
12414 break;
12415 default:
12416 return 100;
12417 }
12418 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12419 }
12420 switch (GET_MODE_SIZE (mode))
12421 {
12422 case 1:
12423 if (in)
12424 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12425 : ix86_cost->movzbl_load);
12426 else
12427 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12428 : ix86_cost->int_store[0] + 4);
12429 break;
12430 case 2:
12431 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12432 default:
12433 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12434 if (mode == TFmode)
12435 mode = XFmode;
12436 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12437 * (int) GET_MODE_SIZE (mode) / 4);
12438 }
12439 }
12440
12441 #ifdef DO_GLOBAL_CTORS_BODY
12442 static void
12443 ix86_svr3_asm_out_constructor (symbol, priority)
12444 rtx symbol;
12445 int priority ATTRIBUTE_UNUSED;
12446 {
12447 init_section ();
12448 fputs ("\tpushl $", asm_out_file);
12449 assemble_name (asm_out_file, XSTR (symbol, 0));
12450 fputc ('\n', asm_out_file);
12451 }
12452 #endif
12453
12454 /* Order the registers for register allocator. */
12455
12456 void
12457 x86_order_regs_for_local_alloc ()
12458 {
12459 int pos = 0;
12460 int i;
12461
12462 /* First allocate the local general purpose registers. */
12463 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12464 if (GENERAL_REGNO_P (i) && call_used_regs[i])
12465 reg_alloc_order [pos++] = i;
12466
12467 /* Global general purpose registers. */
12468 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12469 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
12470 reg_alloc_order [pos++] = i;
12471
12472 /* x87 registers come first in case we are doing FP math
12473 using them. */
12474 if (!TARGET_SSE_MATH)
12475 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12476 reg_alloc_order [pos++] = i;
12477
12478 /* SSE registers. */
12479 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
12480 reg_alloc_order [pos++] = i;
12481 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
12482 reg_alloc_order [pos++] = i;
12483
12484 /* x87 registerts. */
12485 if (TARGET_SSE_MATH)
12486 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12487 reg_alloc_order [pos++] = i;
12488
12489 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
12490 reg_alloc_order [pos++] = i;
12491
12492 /* Initialize the rest of array as we do not allocate some registers
12493 at all. */
12494 while (pos < FIRST_PSEUDO_REGISTER)
12495 reg_alloc_order [pos++] = 0;
12496 }
This page took 2.086667 seconds and 6 git commands to generate.