]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.md (mul patterns): Allow memory operand to be first; add expanders where needed...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT (-1)
48 #endif
49
50 /* Processor costs (relative to an add) */
51 static const
52 struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
60 3, /* cost of movsx */
61 3, /* cost of movzx */
62 0, /* "large" insn */
63 2, /* MOVE_RATIO */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
84 0, /* size of prefetch block */
85 0, /* number of parallel prefetches */
86 };
87 /* Processor costs (relative to an add) */
88 static const
89 struct processor_costs i386_cost = { /* 386 specific costs */
90 1, /* cost of an add instruction */
91 1, /* cost of a lea instruction */
92 3, /* variable shift costs */
93 2, /* constant shift costs */
94 6, /* cost of starting a multiply */
95 1, /* cost of multiply per each bit set */
96 23, /* cost of a divide/mod */
97 3, /* cost of movsx */
98 2, /* cost of movzx */
99 15, /* "large" insn */
100 3, /* MOVE_RATIO */
101 4, /* cost for loading QImode using movzbl */
102 {2, 4, 2}, /* cost of loading integer registers
103 in QImode, HImode and SImode.
104 Relative to reg-reg move (2). */
105 {2, 4, 2}, /* cost of storing integer registers */
106 2, /* cost of reg,reg fld/fst */
107 {8, 8, 8}, /* cost of loading fp registers
108 in SFmode, DFmode and XFmode */
109 {8, 8, 8}, /* cost of loading integer registers */
110 2, /* cost of moving MMX register */
111 {4, 8}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {4, 8}, /* cost of storing MMX registers
114 in SImode and DImode */
115 2, /* cost of moving SSE register */
116 {4, 8, 16}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {4, 8, 16}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of prefetch block */
122 0, /* number of parallel prefetches */
123 };
124
125 static const
126 struct processor_costs i486_cost = { /* 486 specific costs */
127 1, /* cost of an add instruction */
128 1, /* cost of a lea instruction */
129 3, /* variable shift costs */
130 2, /* constant shift costs */
131 12, /* cost of starting a multiply */
132 1, /* cost of multiply per each bit set */
133 40, /* cost of a divide/mod */
134 3, /* cost of movsx */
135 2, /* cost of movzx */
136 15, /* "large" insn */
137 3, /* MOVE_RATIO */
138 4, /* cost for loading QImode using movzbl */
139 {2, 4, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 4, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {8, 8, 8}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {8, 8, 8}, /* cost of loading integer registers */
147 2, /* cost of moving MMX register */
148 {4, 8}, /* cost of loading MMX registers
149 in SImode and DImode */
150 {4, 8}, /* cost of storing MMX registers
151 in SImode and DImode */
152 2, /* cost of moving SSE register */
153 {4, 8, 16}, /* cost of loading SSE registers
154 in SImode, DImode and TImode */
155 {4, 8, 16}, /* cost of storing SSE registers
156 in SImode, DImode and TImode */
157 3, /* MMX or SSE register to integer */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 };
161
162 static const
163 struct processor_costs pentium_cost = {
164 1, /* cost of an add instruction */
165 1, /* cost of a lea instruction */
166 4, /* variable shift costs */
167 1, /* constant shift costs */
168 11, /* cost of starting a multiply */
169 0, /* cost of multiply per each bit set */
170 25, /* cost of a divide/mod */
171 3, /* cost of movsx */
172 2, /* cost of movzx */
173 8, /* "large" insn */
174 6, /* MOVE_RATIO */
175 6, /* cost for loading QImode using movzbl */
176 {2, 4, 2}, /* cost of loading integer registers
177 in QImode, HImode and SImode.
178 Relative to reg-reg move (2). */
179 {2, 4, 2}, /* cost of storing integer registers */
180 2, /* cost of reg,reg fld/fst */
181 {2, 2, 6}, /* cost of loading fp registers
182 in SFmode, DFmode and XFmode */
183 {4, 4, 6}, /* cost of loading integer registers */
184 8, /* cost of moving MMX register */
185 {8, 8}, /* cost of loading MMX registers
186 in SImode and DImode */
187 {8, 8}, /* cost of storing MMX registers
188 in SImode and DImode */
189 2, /* cost of moving SSE register */
190 {4, 8, 16}, /* cost of loading SSE registers
191 in SImode, DImode and TImode */
192 {4, 8, 16}, /* cost of storing SSE registers
193 in SImode, DImode and TImode */
194 3, /* MMX or SSE register to integer */
195 0, /* size of prefetch block */
196 0, /* number of parallel prefetches */
197 };
198
199 static const
200 struct processor_costs pentiumpro_cost = {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
203 1, /* variable shift costs */
204 1, /* constant shift costs */
205 4, /* cost of starting a multiply */
206 0, /* cost of multiply per each bit set */
207 17, /* cost of a divide/mod */
208 1, /* cost of movsx */
209 1, /* cost of movzx */
210 8, /* "large" insn */
211 6, /* MOVE_RATIO */
212 2, /* cost for loading QImode using movzbl */
213 {4, 4, 4}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
215 Relative to reg-reg move (2). */
216 {2, 2, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
220 {4, 4, 6}, /* cost of loading integer registers */
221 2, /* cost of moving MMX register */
222 {2, 2}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {2, 2}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {2, 2, 8}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {2, 2, 8}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 32, /* size of prefetch block */
233 6, /* number of parallel prefetches */
234 };
235
236 static const
237 struct processor_costs k6_cost = {
238 1, /* cost of an add instruction */
239 2, /* cost of a lea instruction */
240 1, /* variable shift costs */
241 1, /* constant shift costs */
242 3, /* cost of starting a multiply */
243 0, /* cost of multiply per each bit set */
244 18, /* cost of a divide/mod */
245 2, /* cost of movsx */
246 2, /* cost of movzx */
247 8, /* "large" insn */
248 4, /* MOVE_RATIO */
249 3, /* cost for loading QImode using movzbl */
250 {4, 5, 4}, /* cost of loading integer registers
251 in QImode, HImode and SImode.
252 Relative to reg-reg move (2). */
253 {2, 3, 2}, /* cost of storing integer registers */
254 4, /* cost of reg,reg fld/fst */
255 {6, 6, 6}, /* cost of loading fp registers
256 in SFmode, DFmode and XFmode */
257 {4, 4, 4}, /* cost of loading integer registers */
258 2, /* cost of moving MMX register */
259 {2, 2}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {2, 2}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {2, 2, 8}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {2, 2, 8}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 6, /* MMX or SSE register to integer */
269 32, /* size of prefetch block */
270 1, /* number of parallel prefetches */
271 };
272
273 static const
274 struct processor_costs athlon_cost = {
275 1, /* cost of an add instruction */
276 2, /* cost of a lea instruction */
277 1, /* variable shift costs */
278 1, /* constant shift costs */
279 5, /* cost of starting a multiply */
280 0, /* cost of multiply per each bit set */
281 42, /* cost of a divide/mod */
282 1, /* cost of movsx */
283 1, /* cost of movzx */
284 8, /* "large" insn */
285 9, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {4, 5, 4}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 3, 2}, /* cost of storing integer registers */
291 4, /* cost of reg,reg fld/fst */
292 {6, 6, 20}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {4, 4, 16}, /* cost of loading integer registers */
295 2, /* cost of moving MMX register */
296 {2, 2}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {2, 2}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {2, 2, 8}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {2, 2, 8}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
305 6, /* MMX or SSE register to integer */
306 64, /* size of prefetch block */
307 6, /* number of parallel prefetches */
308 };
309
310 static const
311 struct processor_costs pentium4_cost = {
312 1, /* cost of an add instruction */
313 1, /* cost of a lea instruction */
314 8, /* variable shift costs */
315 8, /* constant shift costs */
316 30, /* cost of starting a multiply */
317 0, /* cost of multiply per each bit set */
318 112, /* cost of a divide/mod */
319 1, /* cost of movsx */
320 1, /* cost of movzx */
321 16, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 5, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 3, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of loading integer registers */
332 2, /* cost of moving MMX register */
333 {2, 2}, /* cost of loading MMX registers
334 in SImode and DImode */
335 {2, 2}, /* cost of storing MMX registers
336 in SImode and DImode */
337 12, /* cost of moving SSE register */
338 {12, 12, 12}, /* cost of loading SSE registers
339 in SImode, DImode and TImode */
340 {2, 2, 8}, /* cost of storing SSE registers
341 in SImode, DImode and TImode */
342 10, /* MMX or SSE register to integer */
343 64, /* size of prefetch block */
344 6, /* number of parallel prefetches */
345 };
346
347 const struct processor_costs *ix86_cost = &pentium_cost;
348
349 /* Processor feature/optimization bitmasks. */
350 #define m_386 (1<<PROCESSOR_I386)
351 #define m_486 (1<<PROCESSOR_I486)
352 #define m_PENT (1<<PROCESSOR_PENTIUM)
353 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
354 #define m_K6 (1<<PROCESSOR_K6)
355 #define m_ATHLON (1<<PROCESSOR_ATHLON)
356 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
357
358 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
359 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
360 const int x86_zero_extend_with_and = m_486 | m_PENT;
361 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
362 const int x86_double_with_add = ~m_386;
363 const int x86_use_bit_test = m_386;
364 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
365 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
366 const int x86_3dnow_a = m_ATHLON;
367 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
368 const int x86_branch_hints = m_PENT4;
369 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
370 const int x86_partial_reg_stall = m_PPRO;
371 const int x86_use_loop = m_K6;
372 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
373 const int x86_use_mov0 = m_K6;
374 const int x86_use_cltd = ~(m_PENT | m_K6);
375 const int x86_read_modify_write = ~m_PENT;
376 const int x86_read_modify = ~(m_PENT | m_PPRO);
377 const int x86_split_long_moves = m_PPRO;
378 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
379 const int x86_single_stringop = m_386 | m_PENT4;
380 const int x86_qimode_math = ~(0);
381 const int x86_promote_qi_regs = 0;
382 const int x86_himode_math = ~(m_PPRO);
383 const int x86_promote_hi_regs = m_PPRO;
384 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
385 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
386 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
387 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
388 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
389 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
390 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
391 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
392 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
393 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_decompose_lea = m_PENT4;
395
396 /* In case the avreage insn count for single function invocation is
397 lower than this constant, emit fast (but longer) prologue and
398 epilogue code. */
399 #define FAST_PROLOGUE_INSN_COUNT 30
400 /* Set by prologue expander and used by epilogue expander to determine
401 the style used. */
402 static int use_fast_prologue_epilogue;
403
404 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
405
406 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
407 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
408 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
409
410 /* Array of the smallest class containing reg number REGNO, indexed by
411 REGNO. Used by REGNO_REG_CLASS in i386.h. */
412
413 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
414 {
415 /* ax, dx, cx, bx */
416 AREG, DREG, CREG, BREG,
417 /* si, di, bp, sp */
418 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
419 /* FP registers */
420 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
421 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
422 /* arg pointer */
423 NON_Q_REGS,
424 /* flags, fpsr, dirflag, frame */
425 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
426 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
427 SSE_REGS, SSE_REGS,
428 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
429 MMX_REGS, MMX_REGS,
430 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
431 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
433 SSE_REGS, SSE_REGS,
434 };
435
436 /* The "default" register map used in 32bit mode. */
437
438 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
439 {
440 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
441 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
442 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
443 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
444 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
445 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
446 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
447 };
448
449 static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
450 1 /*RDX*/, 2 /*RCX*/,
451 FIRST_REX_INT_REG /*R8 */,
452 FIRST_REX_INT_REG + 1 /*R9 */};
453 static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
454
455 /* The "default" register map used in 64bit mode. */
456 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
457 {
458 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
459 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
460 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
461 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
462 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
463 8,9,10,11,12,13,14,15, /* extended integer registers */
464 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
465 };
466
467 /* Define the register numbers to be used in Dwarf debugging information.
468 The SVR4 reference port C compiler uses the following register numbers
469 in its Dwarf output code:
470 0 for %eax (gcc regno = 0)
471 1 for %ecx (gcc regno = 2)
472 2 for %edx (gcc regno = 1)
473 3 for %ebx (gcc regno = 3)
474 4 for %esp (gcc regno = 7)
475 5 for %ebp (gcc regno = 6)
476 6 for %esi (gcc regno = 4)
477 7 for %edi (gcc regno = 5)
478 The following three DWARF register numbers are never generated by
479 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
480 believes these numbers have these meanings.
481 8 for %eip (no gcc equivalent)
482 9 for %eflags (gcc regno = 17)
483 10 for %trapno (no gcc equivalent)
484 It is not at all clear how we should number the FP stack registers
485 for the x86 architecture. If the version of SDB on x86/svr4 were
486 a bit less brain dead with respect to floating-point then we would
487 have a precedent to follow with respect to DWARF register numbers
488 for x86 FP registers, but the SDB on x86/svr4 is so completely
489 broken with respect to FP registers that it is hardly worth thinking
490 of it as something to strive for compatibility with.
491 The version of x86/svr4 SDB I have at the moment does (partially)
492 seem to believe that DWARF register number 11 is associated with
493 the x86 register %st(0), but that's about all. Higher DWARF
494 register numbers don't seem to be associated with anything in
495 particular, and even for DWARF regno 11, SDB only seems to under-
496 stand that it should say that a variable lives in %st(0) (when
497 asked via an `=' command) if we said it was in DWARF regno 11,
498 but SDB still prints garbage when asked for the value of the
499 variable in question (via a `/' command).
500 (Also note that the labels SDB prints for various FP stack regs
501 when doing an `x' command are all wrong.)
502 Note that these problems generally don't affect the native SVR4
503 C compiler because it doesn't allow the use of -O with -g and
504 because when it is *not* optimizing, it allocates a memory
505 location for each floating-point variable, and the memory
506 location is what gets described in the DWARF AT_location
507 attribute for the variable in question.
508 Regardless of the severe mental illness of the x86/svr4 SDB, we
509 do something sensible here and we use the following DWARF
510 register numbers. Note that these are all stack-top-relative
511 numbers.
512 11 for %st(0) (gcc regno = 8)
513 12 for %st(1) (gcc regno = 9)
514 13 for %st(2) (gcc regno = 10)
515 14 for %st(3) (gcc regno = 11)
516 15 for %st(4) (gcc regno = 12)
517 16 for %st(5) (gcc regno = 13)
518 17 for %st(6) (gcc regno = 14)
519 18 for %st(7) (gcc regno = 15)
520 */
521 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
522 {
523 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
524 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
525 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
526 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
527 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
528 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
529 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
530 };
531
532 /* Test and compare insns in i386.md store the information needed to
533 generate branch and scc insns here. */
534
535 rtx ix86_compare_op0 = NULL_RTX;
536 rtx ix86_compare_op1 = NULL_RTX;
537
538 #define MAX_386_STACK_LOCALS 3
539 /* Size of the register save area. */
540 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
541
542 /* Define the structure for the machine field in struct function. */
543 struct machine_function
544 {
545 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
546 int save_varrargs_registers;
547 int accesses_prev_frame;
548 };
549
550 #define ix86_stack_locals (cfun->machine->stack_locals)
551 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
552
553 /* Structure describing stack frame layout.
554 Stack grows downward:
555
556 [arguments]
557 <- ARG_POINTER
558 saved pc
559
560 saved frame pointer if frame_pointer_needed
561 <- HARD_FRAME_POINTER
562 [saved regs]
563
564 [padding1] \
565 )
566 [va_arg registers] (
567 > to_allocate <- FRAME_POINTER
568 [frame] (
569 )
570 [padding2] /
571 */
572 struct ix86_frame
573 {
574 int nregs;
575 int padding1;
576 int va_arg_size;
577 HOST_WIDE_INT frame;
578 int padding2;
579 int outgoing_arguments_size;
580 int red_zone_size;
581
582 HOST_WIDE_INT to_allocate;
583 /* The offsets relative to ARG_POINTER. */
584 HOST_WIDE_INT frame_pointer_offset;
585 HOST_WIDE_INT hard_frame_pointer_offset;
586 HOST_WIDE_INT stack_pointer_offset;
587 };
588
589 /* Used to enable/disable debugging features. */
590 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
591 /* Code model option as passed by user. */
592 const char *ix86_cmodel_string;
593 /* Parsed value. */
594 enum cmodel ix86_cmodel;
595 /* Asm dialect. */
596 const char *ix86_asm_string;
597 enum asm_dialect ix86_asm_dialect = ASM_ATT;
598
599 /* which cpu are we scheduling for */
600 enum processor_type ix86_cpu;
601
602 /* which unit we are generating floating point math for */
603 enum fpmath_unit ix86_fpmath;
604
605 /* which instruction set architecture to use. */
606 int ix86_arch;
607
608 /* Strings to hold which cpu and instruction set architecture to use. */
609 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
610 const char *ix86_arch_string; /* for -march=<xxx> */
611 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
612
613 /* # of registers to use to pass arguments. */
614 const char *ix86_regparm_string;
615
616 /* true if sse prefetch instruction is not NOOP. */
617 int x86_prefetch_sse;
618
619 /* ix86_regparm_string as a number */
620 int ix86_regparm;
621
622 /* Alignment to use for loops and jumps: */
623
624 /* Power of two alignment for loops. */
625 const char *ix86_align_loops_string;
626
627 /* Power of two alignment for non-loop jumps. */
628 const char *ix86_align_jumps_string;
629
630 /* Power of two alignment for stack boundary in bytes. */
631 const char *ix86_preferred_stack_boundary_string;
632
633 /* Preferred alignment for stack boundary in bits. */
634 int ix86_preferred_stack_boundary;
635
636 /* Values 1-5: see jump.c */
637 int ix86_branch_cost;
638 const char *ix86_branch_cost_string;
639
640 /* Power of two alignment for functions. */
641 const char *ix86_align_funcs_string;
642
643 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
644 static char internal_label_prefix[16];
645 static int internal_label_prefix_len;
646 \f
647 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
648 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
649 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
650 int, int, FILE *));
651 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
652 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
653 rtx *, rtx *));
654 static rtx gen_push PARAMS ((rtx));
655 static int memory_address_length PARAMS ((rtx addr));
656 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
657 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
658 static int ix86_safe_length PARAMS ((rtx));
659 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
660 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
661 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
662 static void ix86_dump_ppro_packet PARAMS ((FILE *));
663 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
664 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
665 rtx));
666 static void ix86_init_machine_status PARAMS ((struct function *));
667 static void ix86_mark_machine_status PARAMS ((struct function *));
668 static void ix86_free_machine_status PARAMS ((struct function *));
669 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
670 static int ix86_safe_length_prefix PARAMS ((rtx));
671 static int ix86_nsaved_regs PARAMS ((void));
672 static void ix86_emit_save_regs PARAMS ((void));
673 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
674 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
675 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
676 static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *));
677 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
678 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
679 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
680 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
681 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
682 static int ix86_issue_rate PARAMS ((void));
683 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
684 static void ix86_sched_init PARAMS ((FILE *, int, int));
685 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
686 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
687 static void ix86_init_mmx_sse_builtins PARAMS ((void));
688
689 struct ix86_address
690 {
691 rtx base, index, disp;
692 HOST_WIDE_INT scale;
693 };
694
695 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
696
697 struct builtin_description;
698 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
699 tree, rtx));
700 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
701 tree, rtx));
702 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
703 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
704 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
705 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
706 tree, rtx));
707 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
708 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
709 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
710 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
711 enum rtx_code *,
712 enum rtx_code *,
713 enum rtx_code *));
714 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
715 rtx *, rtx *));
716 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
717 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
718 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
719 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
720 static int ix86_save_reg PARAMS ((int, int));
721 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
722 static int ix86_comp_type_attributes PARAMS ((tree, tree));
723 const struct attribute_spec ix86_attribute_table[];
724 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
725 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
726
727 #ifdef DO_GLOBAL_CTORS_BODY
728 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
729 #endif
730
731 /* Register class used for passing given 64bit part of the argument.
732 These represent classes as documented by the PS ABI, with the exception
733 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
734 use SF or DFmode move instead of DImode to avoid reformating penalties.
735
736 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
737 whenever possible (upper half does contain padding).
738 */
739 enum x86_64_reg_class
740 {
741 X86_64_NO_CLASS,
742 X86_64_INTEGER_CLASS,
743 X86_64_INTEGERSI_CLASS,
744 X86_64_SSE_CLASS,
745 X86_64_SSESF_CLASS,
746 X86_64_SSEDF_CLASS,
747 X86_64_SSEUP_CLASS,
748 X86_64_X87_CLASS,
749 X86_64_X87UP_CLASS,
750 X86_64_MEMORY_CLASS
751 };
752 static const char * const x86_64_reg_class_name[] =
753 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
754
755 #define MAX_CLASSES 4
756 static int classify_argument PARAMS ((enum machine_mode, tree,
757 enum x86_64_reg_class [MAX_CLASSES],
758 int));
759 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
760 int *));
761 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
762 const int *, int));
763 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
764 enum x86_64_reg_class));
765 \f
766 /* Initialize the GCC target structure. */
767 #undef TARGET_ATTRIBUTE_TABLE
768 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
769 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
770 # undef TARGET_MERGE_DECL_ATTRIBUTES
771 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
772 #endif
773
774 #undef TARGET_COMP_TYPE_ATTRIBUTES
775 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
776
777 #undef TARGET_INIT_BUILTINS
778 #define TARGET_INIT_BUILTINS ix86_init_builtins
779
780 #undef TARGET_EXPAND_BUILTIN
781 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
782
783 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
784 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
785 HOST_WIDE_INT));
786 # undef TARGET_ASM_FUNCTION_PROLOGUE
787 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
788 #endif
789
790 #undef TARGET_ASM_OPEN_PAREN
791 #define TARGET_ASM_OPEN_PAREN ""
792 #undef TARGET_ASM_CLOSE_PAREN
793 #define TARGET_ASM_CLOSE_PAREN ""
794
795 #undef TARGET_ASM_ALIGNED_HI_OP
796 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
797 #undef TARGET_ASM_ALIGNED_SI_OP
798 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
799 #ifdef ASM_QUAD
800 #undef TARGET_ASM_ALIGNED_DI_OP
801 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
802 #endif
803
804 #undef TARGET_ASM_UNALIGNED_HI_OP
805 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
806 #undef TARGET_ASM_UNALIGNED_SI_OP
807 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
808 #undef TARGET_ASM_UNALIGNED_DI_OP
809 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
810
811 #undef TARGET_SCHED_ADJUST_COST
812 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
813 #undef TARGET_SCHED_ISSUE_RATE
814 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
815 #undef TARGET_SCHED_VARIABLE_ISSUE
816 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
817 #undef TARGET_SCHED_INIT
818 #define TARGET_SCHED_INIT ix86_sched_init
819 #undef TARGET_SCHED_REORDER
820 #define TARGET_SCHED_REORDER ix86_sched_reorder
821
822 struct gcc_target targetm = TARGET_INITIALIZER;
823 \f
824 /* Sometimes certain combinations of command options do not make
825 sense on a particular target machine. You can define a macro
826 `OVERRIDE_OPTIONS' to take account of this. This macro, if
827 defined, is executed once just after all the command options have
828 been parsed.
829
830 Don't use this macro to turn on various extra optimizations for
831 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
832
833 void
834 override_options ()
835 {
836 int i;
837 /* Comes from final.c -- no real reason to change it. */
838 #define MAX_CODE_ALIGN 16
839
840 static struct ptt
841 {
842 const struct processor_costs *cost; /* Processor costs */
843 const int target_enable; /* Target flags to enable. */
844 const int target_disable; /* Target flags to disable. */
845 const int align_loop; /* Default alignments. */
846 const int align_loop_max_skip;
847 const int align_jump;
848 const int align_jump_max_skip;
849 const int align_func;
850 const int branch_cost;
851 }
852 const processor_target_table[PROCESSOR_max] =
853 {
854 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
855 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
856 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
857 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
858 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
859 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
860 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
861 };
862
863 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
864 static struct pta
865 {
866 const char *const name; /* processor name or nickname. */
867 const enum processor_type processor;
868 const enum pta_flags
869 {
870 PTA_SSE = 1,
871 PTA_SSE2 = 2,
872 PTA_MMX = 4,
873 PTA_PREFETCH_SSE = 8,
874 PTA_3DNOW = 16,
875 PTA_3DNOW_A = 64
876 } flags;
877 }
878 const processor_alias_table[] =
879 {
880 {"i386", PROCESSOR_I386, 0},
881 {"i486", PROCESSOR_I486, 0},
882 {"i586", PROCESSOR_PENTIUM, 0},
883 {"pentium", PROCESSOR_PENTIUM, 0},
884 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
885 {"i686", PROCESSOR_PENTIUMPRO, 0},
886 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
887 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
888 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
889 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
890 PTA_MMX | PTA_PREFETCH_SSE},
891 {"k6", PROCESSOR_K6, PTA_MMX},
892 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
893 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
894 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
895 | PTA_3DNOW_A},
896 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
897 | PTA_3DNOW | PTA_3DNOW_A},
898 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
899 | PTA_3DNOW_A | PTA_SSE},
900 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
901 | PTA_3DNOW_A | PTA_SSE},
902 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
903 | PTA_3DNOW_A | PTA_SSE},
904 };
905
906 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
907
908 #ifdef SUBTARGET_OVERRIDE_OPTIONS
909 SUBTARGET_OVERRIDE_OPTIONS;
910 #endif
911
912 if (!ix86_cpu_string && ix86_arch_string)
913 ix86_cpu_string = ix86_arch_string;
914 if (!ix86_cpu_string)
915 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
916 if (!ix86_arch_string)
917 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
918
919 if (ix86_cmodel_string != 0)
920 {
921 if (!strcmp (ix86_cmodel_string, "small"))
922 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
923 else if (flag_pic)
924 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
925 else if (!strcmp (ix86_cmodel_string, "32"))
926 ix86_cmodel = CM_32;
927 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
928 ix86_cmodel = CM_KERNEL;
929 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
930 ix86_cmodel = CM_MEDIUM;
931 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
932 ix86_cmodel = CM_LARGE;
933 else
934 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
935 }
936 else
937 {
938 ix86_cmodel = CM_32;
939 if (TARGET_64BIT)
940 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
941 }
942 if (ix86_asm_string != 0)
943 {
944 if (!strcmp (ix86_asm_string, "intel"))
945 ix86_asm_dialect = ASM_INTEL;
946 else if (!strcmp (ix86_asm_string, "att"))
947 ix86_asm_dialect = ASM_ATT;
948 else
949 error ("bad value (%s) for -masm= switch", ix86_asm_string);
950 }
951 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
952 error ("code model `%s' not supported in the %s bit mode",
953 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
954 if (ix86_cmodel == CM_LARGE)
955 sorry ("code model `large' not supported yet");
956 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
957 sorry ("%i-bit mode not compiled in",
958 (target_flags & MASK_64BIT) ? 64 : 32);
959
960 for (i = 0; i < pta_size; i++)
961 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
962 {
963 ix86_arch = processor_alias_table[i].processor;
964 /* Default cpu tuning to the architecture. */
965 ix86_cpu = ix86_arch;
966 if (processor_alias_table[i].flags & PTA_MMX
967 && !(target_flags & MASK_MMX_SET))
968 target_flags |= MASK_MMX;
969 if (processor_alias_table[i].flags & PTA_3DNOW
970 && !(target_flags & MASK_3DNOW_SET))
971 target_flags |= MASK_3DNOW;
972 if (processor_alias_table[i].flags & PTA_3DNOW_A
973 && !(target_flags & MASK_3DNOW_A_SET))
974 target_flags |= MASK_3DNOW_A;
975 if (processor_alias_table[i].flags & PTA_SSE
976 && !(target_flags & MASK_SSE_SET))
977 target_flags |= MASK_SSE;
978 if (processor_alias_table[i].flags & PTA_SSE2
979 && !(target_flags & MASK_SSE2_SET))
980 target_flags |= MASK_SSE2;
981 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
982 x86_prefetch_sse = true;
983 break;
984 }
985
986 if (i == pta_size)
987 error ("bad value (%s) for -march= switch", ix86_arch_string);
988
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
991 {
992 ix86_cpu = processor_alias_table[i].processor;
993 break;
994 }
995 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
996 x86_prefetch_sse = true;
997 if (i == pta_size)
998 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
999
1000 if (optimize_size)
1001 ix86_cost = &size_cost;
1002 else
1003 ix86_cost = processor_target_table[ix86_cpu].cost;
1004 target_flags |= processor_target_table[ix86_cpu].target_enable;
1005 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1006
1007 /* Arrange to set up i386_stack_locals for all functions. */
1008 init_machine_status = ix86_init_machine_status;
1009 mark_machine_status = ix86_mark_machine_status;
1010 free_machine_status = ix86_free_machine_status;
1011
1012 /* Validate -mregparm= value. */
1013 if (ix86_regparm_string)
1014 {
1015 i = atoi (ix86_regparm_string);
1016 if (i < 0 || i > REGPARM_MAX)
1017 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1018 else
1019 ix86_regparm = i;
1020 }
1021 else
1022 if (TARGET_64BIT)
1023 ix86_regparm = REGPARM_MAX;
1024
1025 /* If the user has provided any of the -malign-* options,
1026 warn and use that value only if -falign-* is not set.
1027 Remove this code in GCC 3.2 or later. */
1028 if (ix86_align_loops_string)
1029 {
1030 warning ("-malign-loops is obsolete, use -falign-loops");
1031 if (align_loops == 0)
1032 {
1033 i = atoi (ix86_align_loops_string);
1034 if (i < 0 || i > MAX_CODE_ALIGN)
1035 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1036 else
1037 align_loops = 1 << i;
1038 }
1039 }
1040
1041 if (ix86_align_jumps_string)
1042 {
1043 warning ("-malign-jumps is obsolete, use -falign-jumps");
1044 if (align_jumps == 0)
1045 {
1046 i = atoi (ix86_align_jumps_string);
1047 if (i < 0 || i > MAX_CODE_ALIGN)
1048 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1049 else
1050 align_jumps = 1 << i;
1051 }
1052 }
1053
1054 if (ix86_align_funcs_string)
1055 {
1056 warning ("-malign-functions is obsolete, use -falign-functions");
1057 if (align_functions == 0)
1058 {
1059 i = atoi (ix86_align_funcs_string);
1060 if (i < 0 || i > MAX_CODE_ALIGN)
1061 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1062 else
1063 align_functions = 1 << i;
1064 }
1065 }
1066
1067 /* Default align_* from the processor table. */
1068 if (align_loops == 0)
1069 {
1070 align_loops = processor_target_table[ix86_cpu].align_loop;
1071 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1072 }
1073 if (align_jumps == 0)
1074 {
1075 align_jumps = processor_target_table[ix86_cpu].align_jump;
1076 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1077 }
1078 if (align_functions == 0)
1079 {
1080 align_functions = processor_target_table[ix86_cpu].align_func;
1081 }
1082
1083 /* Validate -mpreferred-stack-boundary= value, or provide default.
1084 The default of 128 bits is for Pentium III's SSE __m128, but we
1085 don't want additional code to keep the stack aligned when
1086 optimizing for code size. */
1087 ix86_preferred_stack_boundary = (optimize_size
1088 ? TARGET_64BIT ? 64 : 32
1089 : 128);
1090 if (ix86_preferred_stack_boundary_string)
1091 {
1092 i = atoi (ix86_preferred_stack_boundary_string);
1093 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1094 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1095 TARGET_64BIT ? 3 : 2);
1096 else
1097 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1098 }
1099
1100 /* Validate -mbranch-cost= value, or provide default. */
1101 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1102 if (ix86_branch_cost_string)
1103 {
1104 i = atoi (ix86_branch_cost_string);
1105 if (i < 0 || i > 5)
1106 error ("-mbranch-cost=%d is not between 0 and 5", i);
1107 else
1108 ix86_branch_cost = i;
1109 }
1110
1111 /* Keep nonleaf frame pointers. */
1112 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1113 flag_omit_frame_pointer = 1;
1114
1115 /* If we're doing fast math, we don't care about comparison order
1116 wrt NaNs. This lets us use a shorter comparison sequence. */
1117 if (flag_unsafe_math_optimizations)
1118 target_flags &= ~MASK_IEEE_FP;
1119
1120 if (TARGET_64BIT)
1121 {
1122 if (TARGET_ALIGN_DOUBLE)
1123 error ("-malign-double makes no sense in the 64bit mode");
1124 if (TARGET_RTD)
1125 error ("-mrtd calling convention not supported in the 64bit mode");
1126 /* Enable by default the SSE and MMX builtins. */
1127 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1128 ix86_fpmath = FPMATH_SSE;
1129 }
1130 else
1131 ix86_fpmath = FPMATH_387;
1132
1133 if (ix86_fpmath_string != 0)
1134 {
1135 if (! strcmp (ix86_fpmath_string, "387"))
1136 ix86_fpmath = FPMATH_387;
1137 else if (! strcmp (ix86_fpmath_string, "sse"))
1138 {
1139 if (!TARGET_SSE)
1140 {
1141 warning ("SSE instruction set disabled, using 387 arithmetics");
1142 ix86_fpmath = FPMATH_387;
1143 }
1144 else
1145 ix86_fpmath = FPMATH_SSE;
1146 }
1147 else if (! strcmp (ix86_fpmath_string, "387,sse")
1148 || ! strcmp (ix86_fpmath_string, "sse,387"))
1149 {
1150 if (!TARGET_SSE)
1151 {
1152 warning ("SSE instruction set disabled, using 387 arithmetics");
1153 ix86_fpmath = FPMATH_387;
1154 }
1155 else if (!TARGET_80387)
1156 {
1157 warning ("387 instruction set disabled, using SSE arithmetics");
1158 ix86_fpmath = FPMATH_SSE;
1159 }
1160 else
1161 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1162 }
1163 else
1164 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1165 }
1166
1167 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1168 on by -msse. */
1169 if (TARGET_SSE)
1170 {
1171 target_flags |= MASK_MMX;
1172 x86_prefetch_sse = true;
1173 }
1174
1175 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1176 if (TARGET_3DNOW)
1177 {
1178 target_flags |= MASK_MMX;
1179 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1180 extensions it adds. */
1181 if (x86_3dnow_a & (1 << ix86_arch))
1182 target_flags |= MASK_3DNOW_A;
1183 }
1184 if ((x86_accumulate_outgoing_args & CPUMASK)
1185 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1186 && !optimize_size)
1187 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1188
1189 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1190 {
1191 char *p;
1192 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1193 p = strchr (internal_label_prefix, 'X');
1194 internal_label_prefix_len = p - internal_label_prefix;
1195 *p = '\0';
1196 }
1197 }
1198 \f
1199 void
1200 optimization_options (level, size)
1201 int level;
1202 int size ATTRIBUTE_UNUSED;
1203 {
1204 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1205 make the problem with not enough registers even worse. */
1206 #ifdef INSN_SCHEDULING
1207 if (level > 1)
1208 flag_schedule_insns = 0;
1209 #endif
1210 if (TARGET_64BIT && optimize >= 1)
1211 flag_omit_frame_pointer = 1;
1212 if (TARGET_64BIT)
1213 {
1214 flag_pcc_struct_return = 0;
1215 flag_asynchronous_unwind_tables = 1;
1216 }
1217 }
1218 \f
1219 /* Table of valid machine attributes. */
1220 const struct attribute_spec ix86_attribute_table[] =
1221 {
1222 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1223 /* Stdcall attribute says callee is responsible for popping arguments
1224 if they are not variable. */
1225 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1226 /* Cdecl attribute says the callee is a normal C declaration */
1227 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1228 /* Regparm attribute specifies how many integer arguments are to be
1229 passed in registers. */
1230 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1231 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1232 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1233 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1234 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1235 #endif
1236 { NULL, 0, 0, false, false, false, NULL }
1237 };
1238
1239 /* Handle a "cdecl" or "stdcall" attribute;
1240 arguments as in struct attribute_spec.handler. */
1241 static tree
1242 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1243 tree *node;
1244 tree name;
1245 tree args ATTRIBUTE_UNUSED;
1246 int flags ATTRIBUTE_UNUSED;
1247 bool *no_add_attrs;
1248 {
1249 if (TREE_CODE (*node) != FUNCTION_TYPE
1250 && TREE_CODE (*node) != METHOD_TYPE
1251 && TREE_CODE (*node) != FIELD_DECL
1252 && TREE_CODE (*node) != TYPE_DECL)
1253 {
1254 warning ("`%s' attribute only applies to functions",
1255 IDENTIFIER_POINTER (name));
1256 *no_add_attrs = true;
1257 }
1258
1259 if (TARGET_64BIT)
1260 {
1261 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1262 *no_add_attrs = true;
1263 }
1264
1265 return NULL_TREE;
1266 }
1267
1268 /* Handle a "regparm" attribute;
1269 arguments as in struct attribute_spec.handler. */
1270 static tree
1271 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1272 tree *node;
1273 tree name;
1274 tree args;
1275 int flags ATTRIBUTE_UNUSED;
1276 bool *no_add_attrs;
1277 {
1278 if (TREE_CODE (*node) != FUNCTION_TYPE
1279 && TREE_CODE (*node) != METHOD_TYPE
1280 && TREE_CODE (*node) != FIELD_DECL
1281 && TREE_CODE (*node) != TYPE_DECL)
1282 {
1283 warning ("`%s' attribute only applies to functions",
1284 IDENTIFIER_POINTER (name));
1285 *no_add_attrs = true;
1286 }
1287 else
1288 {
1289 tree cst;
1290
1291 cst = TREE_VALUE (args);
1292 if (TREE_CODE (cst) != INTEGER_CST)
1293 {
1294 warning ("`%s' attribute requires an integer constant argument",
1295 IDENTIFIER_POINTER (name));
1296 *no_add_attrs = true;
1297 }
1298 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1299 {
1300 warning ("argument to `%s' attribute larger than %d",
1301 IDENTIFIER_POINTER (name), REGPARM_MAX);
1302 *no_add_attrs = true;
1303 }
1304 }
1305
1306 return NULL_TREE;
1307 }
1308
1309 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1310
1311 /* Generate the assembly code for function entry. FILE is a stdio
1312 stream to output the code to. SIZE is an int: how many units of
1313 temporary storage to allocate.
1314
1315 Refer to the array `regs_ever_live' to determine which registers to
1316 save; `regs_ever_live[I]' is nonzero if register number I is ever
1317 used in the function. This function is responsible for knowing
1318 which registers should not be saved even if used.
1319
1320 We override it here to allow for the new profiling code to go before
1321 the prologue and the old mcount code to go after the prologue (and
1322 after %ebx has been set up for ELF shared library support). */
1323
1324 static void
1325 ix86_osf_output_function_prologue (file, size)
1326 FILE *file;
1327 HOST_WIDE_INT size;
1328 {
1329 const char *prefix = "";
1330 const char *const lprefix = LPREFIX;
1331 int labelno = profile_label_no;
1332
1333 #ifdef OSF_OS
1334
1335 if (TARGET_UNDERSCORES)
1336 prefix = "_";
1337
1338 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1339 {
1340 if (!flag_pic && !HALF_PIC_P ())
1341 {
1342 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1343 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1344 }
1345
1346 else if (HALF_PIC_P ())
1347 {
1348 rtx symref;
1349
1350 HALF_PIC_EXTERNAL ("_mcount_ptr");
1351 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1352 "_mcount_ptr"));
1353
1354 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1355 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1356 XSTR (symref, 0));
1357 fprintf (file, "\tcall *(%%eax)\n");
1358 }
1359
1360 else
1361 {
1362 static int call_no = 0;
1363
1364 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1365 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1366 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1367 lprefix, call_no++);
1368 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1369 lprefix, labelno);
1370 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1371 prefix);
1372 fprintf (file, "\tcall *(%%eax)\n");
1373 }
1374 }
1375
1376 #else /* !OSF_OS */
1377
1378 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1379 {
1380 if (!flag_pic)
1381 {
1382 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1383 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1384 }
1385
1386 else
1387 {
1388 static int call_no = 0;
1389
1390 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1391 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1392 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1393 lprefix, call_no++);
1394 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1395 lprefix, labelno);
1396 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1397 prefix);
1398 fprintf (file, "\tcall *(%%eax)\n");
1399 }
1400 }
1401 #endif /* !OSF_OS */
1402
1403 function_prologue (file, size);
1404 }
1405
1406 #endif /* OSF_OS || TARGET_OSF1ELF */
1407
1408 /* Return 0 if the attributes for two types are incompatible, 1 if they
1409 are compatible, and 2 if they are nearly compatible (which causes a
1410 warning to be generated). */
1411
1412 static int
1413 ix86_comp_type_attributes (type1, type2)
1414 tree type1;
1415 tree type2;
1416 {
1417 /* Check for mismatch of non-default calling convention. */
1418 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1419
1420 if (TREE_CODE (type1) != FUNCTION_TYPE)
1421 return 1;
1422
1423 /* Check for mismatched return types (cdecl vs stdcall). */
1424 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1425 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1426 return 0;
1427 return 1;
1428 }
1429 \f
1430 /* Value is the number of bytes of arguments automatically
1431 popped when returning from a subroutine call.
1432 FUNDECL is the declaration node of the function (as a tree),
1433 FUNTYPE is the data type of the function (as a tree),
1434 or for a library call it is an identifier node for the subroutine name.
1435 SIZE is the number of bytes of arguments passed on the stack.
1436
1437 On the 80386, the RTD insn may be used to pop them if the number
1438 of args is fixed, but if the number is variable then the caller
1439 must pop them all. RTD can't be used for library calls now
1440 because the library is compiled with the Unix compiler.
1441 Use of RTD is a selectable option, since it is incompatible with
1442 standard Unix calling sequences. If the option is not selected,
1443 the caller must always pop the args.
1444
1445 The attribute stdcall is equivalent to RTD on a per module basis. */
1446
1447 int
1448 ix86_return_pops_args (fundecl, funtype, size)
1449 tree fundecl;
1450 tree funtype;
1451 int size;
1452 {
1453 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1454
1455 /* Cdecl functions override -mrtd, and never pop the stack. */
1456 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1457
1458 /* Stdcall functions will pop the stack if not variable args. */
1459 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1460 rtd = 1;
1461
1462 if (rtd
1463 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1464 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1465 == void_type_node)))
1466 return size;
1467 }
1468
1469 /* Lose any fake structure return argument. */
1470 if (aggregate_value_p (TREE_TYPE (funtype))
1471 && !TARGET_64BIT)
1472 return GET_MODE_SIZE (Pmode);
1473
1474 return 0;
1475 }
1476 \f
1477 /* Argument support functions. */
1478
1479 /* Return true when register may be used to pass function parameters. */
1480 bool
1481 ix86_function_arg_regno_p (regno)
1482 int regno;
1483 {
1484 int i;
1485 if (!TARGET_64BIT)
1486 return (regno < REGPARM_MAX
1487 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1488 if (SSE_REGNO_P (regno) && TARGET_SSE)
1489 return true;
1490 /* RAX is used as hidden argument to va_arg functions. */
1491 if (!regno)
1492 return true;
1493 for (i = 0; i < REGPARM_MAX; i++)
1494 if (regno == x86_64_int_parameter_registers[i])
1495 return true;
1496 return false;
1497 }
1498
1499 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1500 for a call to a function whose data type is FNTYPE.
1501 For a library call, FNTYPE is 0. */
1502
1503 void
1504 init_cumulative_args (cum, fntype, libname)
1505 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1506 tree fntype; /* tree ptr for function decl */
1507 rtx libname; /* SYMBOL_REF of library name or 0 */
1508 {
1509 static CUMULATIVE_ARGS zero_cum;
1510 tree param, next_param;
1511
1512 if (TARGET_DEBUG_ARG)
1513 {
1514 fprintf (stderr, "\ninit_cumulative_args (");
1515 if (fntype)
1516 fprintf (stderr, "fntype code = %s, ret code = %s",
1517 tree_code_name[(int) TREE_CODE (fntype)],
1518 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1519 else
1520 fprintf (stderr, "no fntype");
1521
1522 if (libname)
1523 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1524 }
1525
1526 *cum = zero_cum;
1527
1528 /* Set up the number of registers to use for passing arguments. */
1529 cum->nregs = ix86_regparm;
1530 cum->sse_nregs = SSE_REGPARM_MAX;
1531 if (fntype && !TARGET_64BIT)
1532 {
1533 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1534
1535 if (attr)
1536 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1537 }
1538 cum->maybe_vaarg = false;
1539
1540 /* Determine if this function has variable arguments. This is
1541 indicated by the last argument being 'void_type_mode' if there
1542 are no variable arguments. If there are variable arguments, then
1543 we won't pass anything in registers */
1544
1545 if (cum->nregs)
1546 {
1547 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1548 param != 0; param = next_param)
1549 {
1550 next_param = TREE_CHAIN (param);
1551 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1552 {
1553 if (!TARGET_64BIT)
1554 cum->nregs = 0;
1555 cum->maybe_vaarg = true;
1556 }
1557 }
1558 }
1559 if ((!fntype && !libname)
1560 || (fntype && !TYPE_ARG_TYPES (fntype)))
1561 cum->maybe_vaarg = 1;
1562
1563 if (TARGET_DEBUG_ARG)
1564 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1565
1566 return;
1567 }
1568
1569 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1570 of this code is to classify each 8bytes of incoming argument by the register
1571 class and assign registers accordingly. */
1572
1573 /* Return the union class of CLASS1 and CLASS2.
1574 See the x86-64 PS ABI for details. */
1575
1576 static enum x86_64_reg_class
1577 merge_classes (class1, class2)
1578 enum x86_64_reg_class class1, class2;
1579 {
1580 /* Rule #1: If both classes are equal, this is the resulting class. */
1581 if (class1 == class2)
1582 return class1;
1583
1584 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1585 the other class. */
1586 if (class1 == X86_64_NO_CLASS)
1587 return class2;
1588 if (class2 == X86_64_NO_CLASS)
1589 return class1;
1590
1591 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1592 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1593 return X86_64_MEMORY_CLASS;
1594
1595 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1596 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1597 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1598 return X86_64_INTEGERSI_CLASS;
1599 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1600 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1601 return X86_64_INTEGER_CLASS;
1602
1603 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1604 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1605 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1606 return X86_64_MEMORY_CLASS;
1607
1608 /* Rule #6: Otherwise class SSE is used. */
1609 return X86_64_SSE_CLASS;
1610 }
1611
1612 /* Classify the argument of type TYPE and mode MODE.
1613 CLASSES will be filled by the register class used to pass each word
1614 of the operand. The number of words is returned. In case the parameter
1615 should be passed in memory, 0 is returned. As a special case for zero
1616 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1617
1618 BIT_OFFSET is used internally for handling records and specifies offset
1619 of the offset in bits modulo 256 to avoid overflow cases.
1620
1621 See the x86-64 PS ABI for details.
1622 */
1623
1624 static int
1625 classify_argument (mode, type, classes, bit_offset)
1626 enum machine_mode mode;
1627 tree type;
1628 enum x86_64_reg_class classes[MAX_CLASSES];
1629 int bit_offset;
1630 {
1631 int bytes =
1632 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1633 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1634
1635 if (type && AGGREGATE_TYPE_P (type))
1636 {
1637 int i;
1638 tree field;
1639 enum x86_64_reg_class subclasses[MAX_CLASSES];
1640
1641 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1642 if (bytes > 16)
1643 return 0;
1644
1645 for (i = 0; i < words; i++)
1646 classes[i] = X86_64_NO_CLASS;
1647
1648 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1649 signalize memory class, so handle it as special case. */
1650 if (!words)
1651 {
1652 classes[0] = X86_64_NO_CLASS;
1653 return 1;
1654 }
1655
1656 /* Classify each field of record and merge classes. */
1657 if (TREE_CODE (type) == RECORD_TYPE)
1658 {
1659 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1660 {
1661 if (TREE_CODE (field) == FIELD_DECL)
1662 {
1663 int num;
1664
1665 /* Bitfields are always classified as integer. Handle them
1666 early, since later code would consider them to be
1667 misaligned integers. */
1668 if (DECL_BIT_FIELD (field))
1669 {
1670 for (i = int_bit_position (field) / 8 / 8;
1671 i < (int_bit_position (field)
1672 + tree_low_cst (DECL_SIZE (field), 0)
1673 + 63) / 8 / 8; i++)
1674 classes[i] =
1675 merge_classes (X86_64_INTEGER_CLASS,
1676 classes[i]);
1677 }
1678 else
1679 {
1680 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1681 TREE_TYPE (field), subclasses,
1682 (int_bit_position (field)
1683 + bit_offset) % 256);
1684 if (!num)
1685 return 0;
1686 for (i = 0; i < num; i++)
1687 {
1688 int pos =
1689 (int_bit_position (field) + bit_offset) / 8 / 8;
1690 classes[i + pos] =
1691 merge_classes (subclasses[i], classes[i + pos]);
1692 }
1693 }
1694 }
1695 }
1696 }
1697 /* Arrays are handled as small records. */
1698 else if (TREE_CODE (type) == ARRAY_TYPE)
1699 {
1700 int num;
1701 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1702 TREE_TYPE (type), subclasses, bit_offset);
1703 if (!num)
1704 return 0;
1705
1706 /* The partial classes are now full classes. */
1707 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1708 subclasses[0] = X86_64_SSE_CLASS;
1709 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1710 subclasses[0] = X86_64_INTEGER_CLASS;
1711
1712 for (i = 0; i < words; i++)
1713 classes[i] = subclasses[i % num];
1714 }
1715 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1716 else if (TREE_CODE (type) == UNION_TYPE)
1717 {
1718 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1719 {
1720 if (TREE_CODE (field) == FIELD_DECL)
1721 {
1722 int num;
1723 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1724 TREE_TYPE (field), subclasses,
1725 bit_offset);
1726 if (!num)
1727 return 0;
1728 for (i = 0; i < num; i++)
1729 classes[i] = merge_classes (subclasses[i], classes[i]);
1730 }
1731 }
1732 }
1733 else
1734 abort ();
1735
1736 /* Final merger cleanup. */
1737 for (i = 0; i < words; i++)
1738 {
1739 /* If one class is MEMORY, everything should be passed in
1740 memory. */
1741 if (classes[i] == X86_64_MEMORY_CLASS)
1742 return 0;
1743
1744 /* The X86_64_SSEUP_CLASS should be always preceded by
1745 X86_64_SSE_CLASS. */
1746 if (classes[i] == X86_64_SSEUP_CLASS
1747 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1748 classes[i] = X86_64_SSE_CLASS;
1749
1750 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1751 if (classes[i] == X86_64_X87UP_CLASS
1752 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1753 classes[i] = X86_64_SSE_CLASS;
1754 }
1755 return words;
1756 }
1757
1758 /* Compute alignment needed. We align all types to natural boundaries with
1759 exception of XFmode that is aligned to 64bits. */
1760 if (mode != VOIDmode && mode != BLKmode)
1761 {
1762 int mode_alignment = GET_MODE_BITSIZE (mode);
1763
1764 if (mode == XFmode)
1765 mode_alignment = 128;
1766 else if (mode == XCmode)
1767 mode_alignment = 256;
1768 /* Misaligned fields are always returned in memory. */
1769 if (bit_offset % mode_alignment)
1770 return 0;
1771 }
1772
1773 /* Classification of atomic types. */
1774 switch (mode)
1775 {
1776 case DImode:
1777 case SImode:
1778 case HImode:
1779 case QImode:
1780 case CSImode:
1781 case CHImode:
1782 case CQImode:
1783 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1784 classes[0] = X86_64_INTEGERSI_CLASS;
1785 else
1786 classes[0] = X86_64_INTEGER_CLASS;
1787 return 1;
1788 case CDImode:
1789 case TImode:
1790 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1791 return 2;
1792 case CTImode:
1793 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1794 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1795 return 4;
1796 case SFmode:
1797 if (!(bit_offset % 64))
1798 classes[0] = X86_64_SSESF_CLASS;
1799 else
1800 classes[0] = X86_64_SSE_CLASS;
1801 return 1;
1802 case DFmode:
1803 classes[0] = X86_64_SSEDF_CLASS;
1804 return 1;
1805 case TFmode:
1806 classes[0] = X86_64_X87_CLASS;
1807 classes[1] = X86_64_X87UP_CLASS;
1808 return 2;
1809 case TCmode:
1810 classes[0] = X86_64_X87_CLASS;
1811 classes[1] = X86_64_X87UP_CLASS;
1812 classes[2] = X86_64_X87_CLASS;
1813 classes[3] = X86_64_X87UP_CLASS;
1814 return 4;
1815 case DCmode:
1816 classes[0] = X86_64_SSEDF_CLASS;
1817 classes[1] = X86_64_SSEDF_CLASS;
1818 return 2;
1819 case SCmode:
1820 classes[0] = X86_64_SSE_CLASS;
1821 return 1;
1822 case BLKmode:
1823 return 0;
1824 default:
1825 abort ();
1826 }
1827 }
1828
1829 /* Examine the argument and return set number of register required in each
1830 class. Return 0 iff parameter should be passed in memory. */
1831 static int
1832 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1833 enum machine_mode mode;
1834 tree type;
1835 int *int_nregs, *sse_nregs;
1836 int in_return;
1837 {
1838 enum x86_64_reg_class class[MAX_CLASSES];
1839 int n = classify_argument (mode, type, class, 0);
1840
1841 *int_nregs = 0;
1842 *sse_nregs = 0;
1843 if (!n)
1844 return 0;
1845 for (n--; n >= 0; n--)
1846 switch (class[n])
1847 {
1848 case X86_64_INTEGER_CLASS:
1849 case X86_64_INTEGERSI_CLASS:
1850 (*int_nregs)++;
1851 break;
1852 case X86_64_SSE_CLASS:
1853 case X86_64_SSESF_CLASS:
1854 case X86_64_SSEDF_CLASS:
1855 (*sse_nregs)++;
1856 break;
1857 case X86_64_NO_CLASS:
1858 case X86_64_SSEUP_CLASS:
1859 break;
1860 case X86_64_X87_CLASS:
1861 case X86_64_X87UP_CLASS:
1862 if (!in_return)
1863 return 0;
1864 break;
1865 case X86_64_MEMORY_CLASS:
1866 abort ();
1867 }
1868 return 1;
1869 }
1870 /* Construct container for the argument used by GCC interface. See
1871 FUNCTION_ARG for the detailed description. */
1872 static rtx
1873 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1874 enum machine_mode mode;
1875 tree type;
1876 int in_return;
1877 int nintregs, nsseregs;
1878 const int * intreg;
1879 int sse_regno;
1880 {
1881 enum machine_mode tmpmode;
1882 int bytes =
1883 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1884 enum x86_64_reg_class class[MAX_CLASSES];
1885 int n;
1886 int i;
1887 int nexps = 0;
1888 int needed_sseregs, needed_intregs;
1889 rtx exp[MAX_CLASSES];
1890 rtx ret;
1891
1892 n = classify_argument (mode, type, class, 0);
1893 if (TARGET_DEBUG_ARG)
1894 {
1895 if (!n)
1896 fprintf (stderr, "Memory class\n");
1897 else
1898 {
1899 fprintf (stderr, "Classes:");
1900 for (i = 0; i < n; i++)
1901 {
1902 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1903 }
1904 fprintf (stderr, "\n");
1905 }
1906 }
1907 if (!n)
1908 return NULL;
1909 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1910 return NULL;
1911 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1912 return NULL;
1913
1914 /* First construct simple cases. Avoid SCmode, since we want to use
1915 single register to pass this type. */
1916 if (n == 1 && mode != SCmode)
1917 switch (class[0])
1918 {
1919 case X86_64_INTEGER_CLASS:
1920 case X86_64_INTEGERSI_CLASS:
1921 return gen_rtx_REG (mode, intreg[0]);
1922 case X86_64_SSE_CLASS:
1923 case X86_64_SSESF_CLASS:
1924 case X86_64_SSEDF_CLASS:
1925 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1926 case X86_64_X87_CLASS:
1927 return gen_rtx_REG (mode, FIRST_STACK_REG);
1928 case X86_64_NO_CLASS:
1929 /* Zero sized array, struct or class. */
1930 return NULL;
1931 default:
1932 abort ();
1933 }
1934 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1935 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1936 if (n == 2
1937 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1938 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1939 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1940 && class[1] == X86_64_INTEGER_CLASS
1941 && (mode == CDImode || mode == TImode)
1942 && intreg[0] + 1 == intreg[1])
1943 return gen_rtx_REG (mode, intreg[0]);
1944 if (n == 4
1945 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1946 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1947 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1948
1949 /* Otherwise figure out the entries of the PARALLEL. */
1950 for (i = 0; i < n; i++)
1951 {
1952 switch (class[i])
1953 {
1954 case X86_64_NO_CLASS:
1955 break;
1956 case X86_64_INTEGER_CLASS:
1957 case X86_64_INTEGERSI_CLASS:
1958 /* Merge TImodes on aligned occassions here too. */
1959 if (i * 8 + 8 > bytes)
1960 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1961 else if (class[i] == X86_64_INTEGERSI_CLASS)
1962 tmpmode = SImode;
1963 else
1964 tmpmode = DImode;
1965 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1966 if (tmpmode == BLKmode)
1967 tmpmode = DImode;
1968 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1969 gen_rtx_REG (tmpmode, *intreg),
1970 GEN_INT (i*8));
1971 intreg++;
1972 break;
1973 case X86_64_SSESF_CLASS:
1974 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1975 gen_rtx_REG (SFmode,
1976 SSE_REGNO (sse_regno)),
1977 GEN_INT (i*8));
1978 sse_regno++;
1979 break;
1980 case X86_64_SSEDF_CLASS:
1981 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1982 gen_rtx_REG (DFmode,
1983 SSE_REGNO (sse_regno)),
1984 GEN_INT (i*8));
1985 sse_regno++;
1986 break;
1987 case X86_64_SSE_CLASS:
1988 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1989 tmpmode = TImode, i++;
1990 else
1991 tmpmode = DImode;
1992 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1993 gen_rtx_REG (tmpmode,
1994 SSE_REGNO (sse_regno)),
1995 GEN_INT (i*8));
1996 sse_regno++;
1997 break;
1998 default:
1999 abort ();
2000 }
2001 }
2002 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2003 for (i = 0; i < nexps; i++)
2004 XVECEXP (ret, 0, i) = exp [i];
2005 return ret;
2006 }
2007
2008 /* Update the data in CUM to advance over an argument
2009 of mode MODE and data type TYPE.
2010 (TYPE is null for libcalls where that information may not be available.) */
2011
2012 void
2013 function_arg_advance (cum, mode, type, named)
2014 CUMULATIVE_ARGS *cum; /* current arg information */
2015 enum machine_mode mode; /* current arg mode */
2016 tree type; /* type of the argument or 0 if lib support */
2017 int named; /* whether or not the argument was named */
2018 {
2019 int bytes =
2020 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2021 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2022
2023 if (TARGET_DEBUG_ARG)
2024 fprintf (stderr,
2025 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2026 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2027 if (TARGET_64BIT)
2028 {
2029 int int_nregs, sse_nregs;
2030 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2031 cum->words += words;
2032 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2033 {
2034 cum->nregs -= int_nregs;
2035 cum->sse_nregs -= sse_nregs;
2036 cum->regno += int_nregs;
2037 cum->sse_regno += sse_nregs;
2038 }
2039 else
2040 cum->words += words;
2041 }
2042 else
2043 {
2044 if (TARGET_SSE && mode == TImode)
2045 {
2046 cum->sse_words += words;
2047 cum->sse_nregs -= 1;
2048 cum->sse_regno += 1;
2049 if (cum->sse_nregs <= 0)
2050 {
2051 cum->sse_nregs = 0;
2052 cum->sse_regno = 0;
2053 }
2054 }
2055 else
2056 {
2057 cum->words += words;
2058 cum->nregs -= words;
2059 cum->regno += words;
2060
2061 if (cum->nregs <= 0)
2062 {
2063 cum->nregs = 0;
2064 cum->regno = 0;
2065 }
2066 }
2067 }
2068 return;
2069 }
2070
2071 /* Define where to put the arguments to a function.
2072 Value is zero to push the argument on the stack,
2073 or a hard register in which to store the argument.
2074
2075 MODE is the argument's machine mode.
2076 TYPE is the data type of the argument (as a tree).
2077 This is null for libcalls where that information may
2078 not be available.
2079 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2080 the preceding args and about the function being called.
2081 NAMED is nonzero if this argument is a named parameter
2082 (otherwise it is an extra parameter matching an ellipsis). */
2083
2084 rtx
2085 function_arg (cum, mode, type, named)
2086 CUMULATIVE_ARGS *cum; /* current arg information */
2087 enum machine_mode mode; /* current arg mode */
2088 tree type; /* type of the argument or 0 if lib support */
2089 int named; /* != 0 for normal args, == 0 for ... args */
2090 {
2091 rtx ret = NULL_RTX;
2092 int bytes =
2093 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2094 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2095
2096 /* Handle an hidden AL argument containing number of registers for varargs
2097 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2098 any AL settings. */
2099 if (mode == VOIDmode)
2100 {
2101 if (TARGET_64BIT)
2102 return GEN_INT (cum->maybe_vaarg
2103 ? (cum->sse_nregs < 0
2104 ? SSE_REGPARM_MAX
2105 : cum->sse_regno)
2106 : -1);
2107 else
2108 return constm1_rtx;
2109 }
2110 if (TARGET_64BIT)
2111 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2112 &x86_64_int_parameter_registers [cum->regno],
2113 cum->sse_regno);
2114 else
2115 switch (mode)
2116 {
2117 /* For now, pass fp/complex values on the stack. */
2118 default:
2119 break;
2120
2121 case BLKmode:
2122 case DImode:
2123 case SImode:
2124 case HImode:
2125 case QImode:
2126 if (words <= cum->nregs)
2127 ret = gen_rtx_REG (mode, cum->regno);
2128 break;
2129 case TImode:
2130 if (cum->sse_nregs)
2131 ret = gen_rtx_REG (mode, cum->sse_regno);
2132 break;
2133 }
2134
2135 if (TARGET_DEBUG_ARG)
2136 {
2137 fprintf (stderr,
2138 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2139 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2140
2141 if (ret)
2142 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2143 else
2144 fprintf (stderr, ", stack");
2145
2146 fprintf (stderr, " )\n");
2147 }
2148
2149 return ret;
2150 }
2151
2152 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2153 and type. */
2154
2155 int
2156 ix86_function_arg_boundary (mode, type)
2157 enum machine_mode mode;
2158 tree type;
2159 {
2160 int align;
2161 if (!TARGET_64BIT)
2162 return PARM_BOUNDARY;
2163 if (type)
2164 align = TYPE_ALIGN (type);
2165 else
2166 align = GET_MODE_ALIGNMENT (mode);
2167 if (align < PARM_BOUNDARY)
2168 align = PARM_BOUNDARY;
2169 if (align > 128)
2170 align = 128;
2171 return align;
2172 }
2173
2174 /* Return true if N is a possible register number of function value. */
2175 bool
2176 ix86_function_value_regno_p (regno)
2177 int regno;
2178 {
2179 if (!TARGET_64BIT)
2180 {
2181 return ((regno) == 0
2182 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2183 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2184 }
2185 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2186 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2187 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2188 }
2189
2190 /* Define how to find the value returned by a function.
2191 VALTYPE is the data type of the value (as a tree).
2192 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2193 otherwise, FUNC is 0. */
2194 rtx
2195 ix86_function_value (valtype)
2196 tree valtype;
2197 {
2198 if (TARGET_64BIT)
2199 {
2200 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2201 REGPARM_MAX, SSE_REGPARM_MAX,
2202 x86_64_int_return_registers, 0);
2203 /* For zero sized structures, construct_continer return NULL, but we need
2204 to keep rest of compiler happy by returning meaningfull value. */
2205 if (!ret)
2206 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2207 return ret;
2208 }
2209 else
2210 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2211 }
2212
2213 /* Return false iff type is returned in memory. */
2214 int
2215 ix86_return_in_memory (type)
2216 tree type;
2217 {
2218 int needed_intregs, needed_sseregs;
2219 if (TARGET_64BIT)
2220 {
2221 return !examine_argument (TYPE_MODE (type), type, 1,
2222 &needed_intregs, &needed_sseregs);
2223 }
2224 else
2225 {
2226 if (TYPE_MODE (type) == BLKmode
2227 || (VECTOR_MODE_P (TYPE_MODE (type))
2228 && int_size_in_bytes (type) == 8)
2229 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2230 && TYPE_MODE (type) != TFmode
2231 && !VECTOR_MODE_P (TYPE_MODE (type))))
2232 return 1;
2233 return 0;
2234 }
2235 }
2236
2237 /* Define how to find the value returned by a library function
2238 assuming the value has mode MODE. */
2239 rtx
2240 ix86_libcall_value (mode)
2241 enum machine_mode mode;
2242 {
2243 if (TARGET_64BIT)
2244 {
2245 switch (mode)
2246 {
2247 case SFmode:
2248 case SCmode:
2249 case DFmode:
2250 case DCmode:
2251 return gen_rtx_REG (mode, FIRST_SSE_REG);
2252 case TFmode:
2253 case TCmode:
2254 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2255 default:
2256 return gen_rtx_REG (mode, 0);
2257 }
2258 }
2259 else
2260 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2261 }
2262 \f
2263 /* Create the va_list data type. */
2264
2265 tree
2266 ix86_build_va_list ()
2267 {
2268 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2269
2270 /* For i386 we use plain pointer to argument area. */
2271 if (!TARGET_64BIT)
2272 return build_pointer_type (char_type_node);
2273
2274 record = make_lang_type (RECORD_TYPE);
2275 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2276
2277 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2278 unsigned_type_node);
2279 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2280 unsigned_type_node);
2281 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2282 ptr_type_node);
2283 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2284 ptr_type_node);
2285
2286 DECL_FIELD_CONTEXT (f_gpr) = record;
2287 DECL_FIELD_CONTEXT (f_fpr) = record;
2288 DECL_FIELD_CONTEXT (f_ovf) = record;
2289 DECL_FIELD_CONTEXT (f_sav) = record;
2290
2291 TREE_CHAIN (record) = type_decl;
2292 TYPE_NAME (record) = type_decl;
2293 TYPE_FIELDS (record) = f_gpr;
2294 TREE_CHAIN (f_gpr) = f_fpr;
2295 TREE_CHAIN (f_fpr) = f_ovf;
2296 TREE_CHAIN (f_ovf) = f_sav;
2297
2298 layout_type (record);
2299
2300 /* The correct type is an array type of one element. */
2301 return build_array_type (record, build_index_type (size_zero_node));
2302 }
2303
2304 /* Perform any needed actions needed for a function that is receiving a
2305 variable number of arguments.
2306
2307 CUM is as above.
2308
2309 MODE and TYPE are the mode and type of the current parameter.
2310
2311 PRETEND_SIZE is a variable that should be set to the amount of stack
2312 that must be pushed by the prolog to pretend that our caller pushed
2313 it.
2314
2315 Normally, this macro will push all remaining incoming registers on the
2316 stack and set PRETEND_SIZE to the length of the registers pushed. */
2317
2318 void
2319 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2320 CUMULATIVE_ARGS *cum;
2321 enum machine_mode mode;
2322 tree type;
2323 int *pretend_size ATTRIBUTE_UNUSED;
2324 int no_rtl;
2325
2326 {
2327 CUMULATIVE_ARGS next_cum;
2328 rtx save_area = NULL_RTX, mem;
2329 rtx label;
2330 rtx label_ref;
2331 rtx tmp_reg;
2332 rtx nsse_reg;
2333 int set;
2334 tree fntype;
2335 int stdarg_p;
2336 int i;
2337
2338 if (!TARGET_64BIT)
2339 return;
2340
2341 /* Indicate to allocate space on the stack for varargs save area. */
2342 ix86_save_varrargs_registers = 1;
2343
2344 fntype = TREE_TYPE (current_function_decl);
2345 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2346 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2347 != void_type_node));
2348
2349 /* For varargs, we do not want to skip the dummy va_dcl argument.
2350 For stdargs, we do want to skip the last named argument. */
2351 next_cum = *cum;
2352 if (stdarg_p)
2353 function_arg_advance (&next_cum, mode, type, 1);
2354
2355 if (!no_rtl)
2356 save_area = frame_pointer_rtx;
2357
2358 set = get_varargs_alias_set ();
2359
2360 for (i = next_cum.regno; i < ix86_regparm; i++)
2361 {
2362 mem = gen_rtx_MEM (Pmode,
2363 plus_constant (save_area, i * UNITS_PER_WORD));
2364 set_mem_alias_set (mem, set);
2365 emit_move_insn (mem, gen_rtx_REG (Pmode,
2366 x86_64_int_parameter_registers[i]));
2367 }
2368
2369 if (next_cum.sse_nregs)
2370 {
2371 /* Now emit code to save SSE registers. The AX parameter contains number
2372 of SSE parameter regsiters used to call this function. We use
2373 sse_prologue_save insn template that produces computed jump across
2374 SSE saves. We need some preparation work to get this working. */
2375
2376 label = gen_label_rtx ();
2377 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2378
2379 /* Compute address to jump to :
2380 label - 5*eax + nnamed_sse_arguments*5 */
2381 tmp_reg = gen_reg_rtx (Pmode);
2382 nsse_reg = gen_reg_rtx (Pmode);
2383 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2384 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2385 gen_rtx_MULT (Pmode, nsse_reg,
2386 GEN_INT (4))));
2387 if (next_cum.sse_regno)
2388 emit_move_insn
2389 (nsse_reg,
2390 gen_rtx_CONST (DImode,
2391 gen_rtx_PLUS (DImode,
2392 label_ref,
2393 GEN_INT (next_cum.sse_regno * 4))));
2394 else
2395 emit_move_insn (nsse_reg, label_ref);
2396 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2397
2398 /* Compute address of memory block we save into. We always use pointer
2399 pointing 127 bytes after first byte to store - this is needed to keep
2400 instruction size limited by 4 bytes. */
2401 tmp_reg = gen_reg_rtx (Pmode);
2402 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2403 plus_constant (save_area,
2404 8 * REGPARM_MAX + 127)));
2405 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2406 set_mem_alias_set (mem, set);
2407 set_mem_align (mem, BITS_PER_WORD);
2408
2409 /* And finally do the dirty job! */
2410 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2411 GEN_INT (next_cum.sse_regno), label));
2412 }
2413
2414 }
2415
2416 /* Implement va_start. */
2417
2418 void
2419 ix86_va_start (stdarg_p, valist, nextarg)
2420 int stdarg_p;
2421 tree valist;
2422 rtx nextarg;
2423 {
2424 HOST_WIDE_INT words, n_gpr, n_fpr;
2425 tree f_gpr, f_fpr, f_ovf, f_sav;
2426 tree gpr, fpr, ovf, sav, t;
2427
2428 /* Only 64bit target needs something special. */
2429 if (!TARGET_64BIT)
2430 {
2431 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2432 return;
2433 }
2434
2435 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2436 f_fpr = TREE_CHAIN (f_gpr);
2437 f_ovf = TREE_CHAIN (f_fpr);
2438 f_sav = TREE_CHAIN (f_ovf);
2439
2440 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2441 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2442 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2443 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2444 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2445
2446 /* Count number of gp and fp argument registers used. */
2447 words = current_function_args_info.words;
2448 n_gpr = current_function_args_info.regno;
2449 n_fpr = current_function_args_info.sse_regno;
2450
2451 if (TARGET_DEBUG_ARG)
2452 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2453 (int) words, (int) n_gpr, (int) n_fpr);
2454
2455 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2456 build_int_2 (n_gpr * 8, 0));
2457 TREE_SIDE_EFFECTS (t) = 1;
2458 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2459
2460 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2461 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2462 TREE_SIDE_EFFECTS (t) = 1;
2463 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2464
2465 /* Find the overflow area. */
2466 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2467 if (words != 0)
2468 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2469 build_int_2 (words * UNITS_PER_WORD, 0));
2470 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2471 TREE_SIDE_EFFECTS (t) = 1;
2472 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2473
2474 /* Find the register save area.
2475 Prologue of the function save it right above stack frame. */
2476 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2477 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2478 TREE_SIDE_EFFECTS (t) = 1;
2479 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2480 }
2481
2482 /* Implement va_arg. */
2483 rtx
2484 ix86_va_arg (valist, type)
2485 tree valist, type;
2486 {
2487 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2488 tree f_gpr, f_fpr, f_ovf, f_sav;
2489 tree gpr, fpr, ovf, sav, t;
2490 int size, rsize;
2491 rtx lab_false, lab_over = NULL_RTX;
2492 rtx addr_rtx, r;
2493 rtx container;
2494
2495 /* Only 64bit target needs something special. */
2496 if (!TARGET_64BIT)
2497 {
2498 return std_expand_builtin_va_arg (valist, type);
2499 }
2500
2501 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2502 f_fpr = TREE_CHAIN (f_gpr);
2503 f_ovf = TREE_CHAIN (f_fpr);
2504 f_sav = TREE_CHAIN (f_ovf);
2505
2506 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2507 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2508 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2509 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2510 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2511
2512 size = int_size_in_bytes (type);
2513 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2514
2515 container = construct_container (TYPE_MODE (type), type, 0,
2516 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2517 /*
2518 * Pull the value out of the saved registers ...
2519 */
2520
2521 addr_rtx = gen_reg_rtx (Pmode);
2522
2523 if (container)
2524 {
2525 rtx int_addr_rtx, sse_addr_rtx;
2526 int needed_intregs, needed_sseregs;
2527 int need_temp;
2528
2529 lab_over = gen_label_rtx ();
2530 lab_false = gen_label_rtx ();
2531
2532 examine_argument (TYPE_MODE (type), type, 0,
2533 &needed_intregs, &needed_sseregs);
2534
2535
2536 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2537 || TYPE_ALIGN (type) > 128);
2538
2539 /* In case we are passing structure, verify that it is consetuctive block
2540 on the register save area. If not we need to do moves. */
2541 if (!need_temp && !REG_P (container))
2542 {
2543 /* Verify that all registers are strictly consetuctive */
2544 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2545 {
2546 int i;
2547
2548 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2549 {
2550 rtx slot = XVECEXP (container, 0, i);
2551 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2552 || INTVAL (XEXP (slot, 1)) != i * 16)
2553 need_temp = 1;
2554 }
2555 }
2556 else
2557 {
2558 int i;
2559
2560 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2561 {
2562 rtx slot = XVECEXP (container, 0, i);
2563 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2564 || INTVAL (XEXP (slot, 1)) != i * 8)
2565 need_temp = 1;
2566 }
2567 }
2568 }
2569 if (!need_temp)
2570 {
2571 int_addr_rtx = addr_rtx;
2572 sse_addr_rtx = addr_rtx;
2573 }
2574 else
2575 {
2576 int_addr_rtx = gen_reg_rtx (Pmode);
2577 sse_addr_rtx = gen_reg_rtx (Pmode);
2578 }
2579 /* First ensure that we fit completely in registers. */
2580 if (needed_intregs)
2581 {
2582 emit_cmp_and_jump_insns (expand_expr
2583 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2584 GEN_INT ((REGPARM_MAX - needed_intregs +
2585 1) * 8), GE, const1_rtx, SImode,
2586 1, lab_false);
2587 }
2588 if (needed_sseregs)
2589 {
2590 emit_cmp_and_jump_insns (expand_expr
2591 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2592 GEN_INT ((SSE_REGPARM_MAX -
2593 needed_sseregs + 1) * 16 +
2594 REGPARM_MAX * 8), GE, const1_rtx,
2595 SImode, 1, lab_false);
2596 }
2597
2598 /* Compute index to start of area used for integer regs. */
2599 if (needed_intregs)
2600 {
2601 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2602 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2603 if (r != int_addr_rtx)
2604 emit_move_insn (int_addr_rtx, r);
2605 }
2606 if (needed_sseregs)
2607 {
2608 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2609 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2610 if (r != sse_addr_rtx)
2611 emit_move_insn (sse_addr_rtx, r);
2612 }
2613 if (need_temp)
2614 {
2615 int i;
2616 rtx mem;
2617
2618 /* Never use the memory itself, as it has the alias set. */
2619 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2620 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2621 set_mem_alias_set (mem, get_varargs_alias_set ());
2622 set_mem_align (mem, BITS_PER_UNIT);
2623
2624 for (i = 0; i < XVECLEN (container, 0); i++)
2625 {
2626 rtx slot = XVECEXP (container, 0, i);
2627 rtx reg = XEXP (slot, 0);
2628 enum machine_mode mode = GET_MODE (reg);
2629 rtx src_addr;
2630 rtx src_mem;
2631 int src_offset;
2632 rtx dest_mem;
2633
2634 if (SSE_REGNO_P (REGNO (reg)))
2635 {
2636 src_addr = sse_addr_rtx;
2637 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2638 }
2639 else
2640 {
2641 src_addr = int_addr_rtx;
2642 src_offset = REGNO (reg) * 8;
2643 }
2644 src_mem = gen_rtx_MEM (mode, src_addr);
2645 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2646 src_mem = adjust_address (src_mem, mode, src_offset);
2647 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2648 emit_move_insn (dest_mem, src_mem);
2649 }
2650 }
2651
2652 if (needed_intregs)
2653 {
2654 t =
2655 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2656 build_int_2 (needed_intregs * 8, 0));
2657 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2658 TREE_SIDE_EFFECTS (t) = 1;
2659 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2660 }
2661 if (needed_sseregs)
2662 {
2663 t =
2664 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2665 build_int_2 (needed_sseregs * 16, 0));
2666 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2667 TREE_SIDE_EFFECTS (t) = 1;
2668 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2669 }
2670
2671 emit_jump_insn (gen_jump (lab_over));
2672 emit_barrier ();
2673 emit_label (lab_false);
2674 }
2675
2676 /* ... otherwise out of the overflow area. */
2677
2678 /* Care for on-stack alignment if needed. */
2679 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2680 t = ovf;
2681 else
2682 {
2683 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2684 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2685 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2686 }
2687 t = save_expr (t);
2688
2689 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2690 if (r != addr_rtx)
2691 emit_move_insn (addr_rtx, r);
2692
2693 t =
2694 build (PLUS_EXPR, TREE_TYPE (t), t,
2695 build_int_2 (rsize * UNITS_PER_WORD, 0));
2696 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2697 TREE_SIDE_EFFECTS (t) = 1;
2698 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2699
2700 if (container)
2701 emit_label (lab_over);
2702
2703 return addr_rtx;
2704 }
2705 \f
2706 /* Return nonzero if OP is general operand representable on x86_64. */
2707
2708 int
2709 x86_64_general_operand (op, mode)
2710 rtx op;
2711 enum machine_mode mode;
2712 {
2713 if (!TARGET_64BIT)
2714 return general_operand (op, mode);
2715 if (nonimmediate_operand (op, mode))
2716 return 1;
2717 return x86_64_sign_extended_value (op);
2718 }
2719
2720 /* Return nonzero if OP is general operand representable on x86_64
2721 as either sign extended or zero extended constant. */
2722
2723 int
2724 x86_64_szext_general_operand (op, mode)
2725 rtx op;
2726 enum machine_mode mode;
2727 {
2728 if (!TARGET_64BIT)
2729 return general_operand (op, mode);
2730 if (nonimmediate_operand (op, mode))
2731 return 1;
2732 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2733 }
2734
2735 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2736
2737 int
2738 x86_64_nonmemory_operand (op, mode)
2739 rtx op;
2740 enum machine_mode mode;
2741 {
2742 if (!TARGET_64BIT)
2743 return nonmemory_operand (op, mode);
2744 if (register_operand (op, mode))
2745 return 1;
2746 return x86_64_sign_extended_value (op);
2747 }
2748
2749 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2750
2751 int
2752 x86_64_movabs_operand (op, mode)
2753 rtx op;
2754 enum machine_mode mode;
2755 {
2756 if (!TARGET_64BIT || !flag_pic)
2757 return nonmemory_operand (op, mode);
2758 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2759 return 1;
2760 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2761 return 1;
2762 return 0;
2763 }
2764
2765 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2766
2767 int
2768 x86_64_szext_nonmemory_operand (op, mode)
2769 rtx op;
2770 enum machine_mode mode;
2771 {
2772 if (!TARGET_64BIT)
2773 return nonmemory_operand (op, mode);
2774 if (register_operand (op, mode))
2775 return 1;
2776 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2777 }
2778
2779 /* Return nonzero if OP is immediate operand representable on x86_64. */
2780
2781 int
2782 x86_64_immediate_operand (op, mode)
2783 rtx op;
2784 enum machine_mode mode;
2785 {
2786 if (!TARGET_64BIT)
2787 return immediate_operand (op, mode);
2788 return x86_64_sign_extended_value (op);
2789 }
2790
2791 /* Return nonzero if OP is immediate operand representable on x86_64. */
2792
2793 int
2794 x86_64_zext_immediate_operand (op, mode)
2795 rtx op;
2796 enum machine_mode mode ATTRIBUTE_UNUSED;
2797 {
2798 return x86_64_zero_extended_value (op);
2799 }
2800
2801 /* Return nonzero if OP is (const_int 1), else return zero. */
2802
2803 int
2804 const_int_1_operand (op, mode)
2805 rtx op;
2806 enum machine_mode mode ATTRIBUTE_UNUSED;
2807 {
2808 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2809 }
2810
2811 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2812 reference and a constant. */
2813
2814 int
2815 symbolic_operand (op, mode)
2816 register rtx op;
2817 enum machine_mode mode ATTRIBUTE_UNUSED;
2818 {
2819 switch (GET_CODE (op))
2820 {
2821 case SYMBOL_REF:
2822 case LABEL_REF:
2823 return 1;
2824
2825 case CONST:
2826 op = XEXP (op, 0);
2827 if (GET_CODE (op) == SYMBOL_REF
2828 || GET_CODE (op) == LABEL_REF
2829 || (GET_CODE (op) == UNSPEC
2830 && (XINT (op, 1) == 6
2831 || XINT (op, 1) == 7
2832 || XINT (op, 1) == 15)))
2833 return 1;
2834 if (GET_CODE (op) != PLUS
2835 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2836 return 0;
2837
2838 op = XEXP (op, 0);
2839 if (GET_CODE (op) == SYMBOL_REF
2840 || GET_CODE (op) == LABEL_REF)
2841 return 1;
2842 /* Only @GOTOFF gets offsets. */
2843 if (GET_CODE (op) != UNSPEC
2844 || XINT (op, 1) != 7)
2845 return 0;
2846
2847 op = XVECEXP (op, 0, 0);
2848 if (GET_CODE (op) == SYMBOL_REF
2849 || GET_CODE (op) == LABEL_REF)
2850 return 1;
2851 return 0;
2852
2853 default:
2854 return 0;
2855 }
2856 }
2857
2858 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2859
2860 int
2861 pic_symbolic_operand (op, mode)
2862 register rtx op;
2863 enum machine_mode mode ATTRIBUTE_UNUSED;
2864 {
2865 if (GET_CODE (op) != CONST)
2866 return 0;
2867 op = XEXP (op, 0);
2868 if (TARGET_64BIT)
2869 {
2870 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2871 return 1;
2872 }
2873 else
2874 {
2875 if (GET_CODE (op) == UNSPEC)
2876 return 1;
2877 if (GET_CODE (op) != PLUS
2878 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2879 return 0;
2880 op = XEXP (op, 0);
2881 if (GET_CODE (op) == UNSPEC)
2882 return 1;
2883 }
2884 return 0;
2885 }
2886
2887 /* Return true if OP is a symbolic operand that resolves locally. */
2888
2889 static int
2890 local_symbolic_operand (op, mode)
2891 rtx op;
2892 enum machine_mode mode ATTRIBUTE_UNUSED;
2893 {
2894 if (GET_CODE (op) == LABEL_REF)
2895 return 1;
2896
2897 if (GET_CODE (op) == CONST
2898 && GET_CODE (XEXP (op, 0)) == PLUS
2899 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2900 op = XEXP (XEXP (op, 0), 0);
2901
2902 if (GET_CODE (op) != SYMBOL_REF)
2903 return 0;
2904
2905 /* These we've been told are local by varasm and encode_section_info
2906 respectively. */
2907 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2908 return 1;
2909
2910 /* There is, however, a not insubstantial body of code in the rest of
2911 the compiler that assumes it can just stick the results of
2912 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2913 /* ??? This is a hack. Should update the body of the compiler to
2914 always create a DECL an invoke ENCODE_SECTION_INFO. */
2915 if (strncmp (XSTR (op, 0), internal_label_prefix,
2916 internal_label_prefix_len) == 0)
2917 return 1;
2918
2919 return 0;
2920 }
2921
2922 /* Test for a valid operand for a call instruction. Don't allow the
2923 arg pointer register or virtual regs since they may decay into
2924 reg + const, which the patterns can't handle. */
2925
2926 int
2927 call_insn_operand (op, mode)
2928 rtx op;
2929 enum machine_mode mode ATTRIBUTE_UNUSED;
2930 {
2931 /* Disallow indirect through a virtual register. This leads to
2932 compiler aborts when trying to eliminate them. */
2933 if (GET_CODE (op) == REG
2934 && (op == arg_pointer_rtx
2935 || op == frame_pointer_rtx
2936 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2937 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2938 return 0;
2939
2940 /* Disallow `call 1234'. Due to varying assembler lameness this
2941 gets either rejected or translated to `call .+1234'. */
2942 if (GET_CODE (op) == CONST_INT)
2943 return 0;
2944
2945 /* Explicitly allow SYMBOL_REF even if pic. */
2946 if (GET_CODE (op) == SYMBOL_REF)
2947 return 1;
2948
2949 /* Half-pic doesn't allow anything but registers and constants.
2950 We've just taken care of the later. */
2951 if (HALF_PIC_P ())
2952 return register_operand (op, Pmode);
2953
2954 /* Otherwise we can allow any general_operand in the address. */
2955 return general_operand (op, Pmode);
2956 }
2957
2958 int
2959 constant_call_address_operand (op, mode)
2960 rtx op;
2961 enum machine_mode mode ATTRIBUTE_UNUSED;
2962 {
2963 if (GET_CODE (op) == CONST
2964 && GET_CODE (XEXP (op, 0)) == PLUS
2965 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2966 op = XEXP (XEXP (op, 0), 0);
2967 return GET_CODE (op) == SYMBOL_REF;
2968 }
2969
2970 /* Match exactly zero and one. */
2971
2972 int
2973 const0_operand (op, mode)
2974 register rtx op;
2975 enum machine_mode mode;
2976 {
2977 return op == CONST0_RTX (mode);
2978 }
2979
2980 int
2981 const1_operand (op, mode)
2982 register rtx op;
2983 enum machine_mode mode ATTRIBUTE_UNUSED;
2984 {
2985 return op == const1_rtx;
2986 }
2987
2988 /* Match 2, 4, or 8. Used for leal multiplicands. */
2989
2990 int
2991 const248_operand (op, mode)
2992 register rtx op;
2993 enum machine_mode mode ATTRIBUTE_UNUSED;
2994 {
2995 return (GET_CODE (op) == CONST_INT
2996 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2997 }
2998
2999 /* True if this is a constant appropriate for an increment or decremenmt. */
3000
3001 int
3002 incdec_operand (op, mode)
3003 register rtx op;
3004 enum machine_mode mode ATTRIBUTE_UNUSED;
3005 {
3006 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3007 registers, since carry flag is not set. */
3008 if (TARGET_PENTIUM4 && !optimize_size)
3009 return 0;
3010 return op == const1_rtx || op == constm1_rtx;
3011 }
3012
3013 /* Return nonzero if OP is acceptable as operand of DImode shift
3014 expander. */
3015
3016 int
3017 shiftdi_operand (op, mode)
3018 rtx op;
3019 enum machine_mode mode ATTRIBUTE_UNUSED;
3020 {
3021 if (TARGET_64BIT)
3022 return nonimmediate_operand (op, mode);
3023 else
3024 return register_operand (op, mode);
3025 }
3026
3027 /* Return false if this is the stack pointer, or any other fake
3028 register eliminable to the stack pointer. Otherwise, this is
3029 a register operand.
3030
3031 This is used to prevent esp from being used as an index reg.
3032 Which would only happen in pathological cases. */
3033
3034 int
3035 reg_no_sp_operand (op, mode)
3036 register rtx op;
3037 enum machine_mode mode;
3038 {
3039 rtx t = op;
3040 if (GET_CODE (t) == SUBREG)
3041 t = SUBREG_REG (t);
3042 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3043 return 0;
3044
3045 return register_operand (op, mode);
3046 }
3047
3048 int
3049 mmx_reg_operand (op, mode)
3050 register rtx op;
3051 enum machine_mode mode ATTRIBUTE_UNUSED;
3052 {
3053 return MMX_REG_P (op);
3054 }
3055
3056 /* Return false if this is any eliminable register. Otherwise
3057 general_operand. */
3058
3059 int
3060 general_no_elim_operand (op, mode)
3061 register rtx op;
3062 enum machine_mode mode;
3063 {
3064 rtx t = op;
3065 if (GET_CODE (t) == SUBREG)
3066 t = SUBREG_REG (t);
3067 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3068 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3069 || t == virtual_stack_dynamic_rtx)
3070 return 0;
3071 if (REG_P (t)
3072 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3073 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3074 return 0;
3075
3076 return general_operand (op, mode);
3077 }
3078
3079 /* Return false if this is any eliminable register. Otherwise
3080 register_operand or const_int. */
3081
3082 int
3083 nonmemory_no_elim_operand (op, mode)
3084 register rtx op;
3085 enum machine_mode mode;
3086 {
3087 rtx t = op;
3088 if (GET_CODE (t) == SUBREG)
3089 t = SUBREG_REG (t);
3090 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3091 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3092 || t == virtual_stack_dynamic_rtx)
3093 return 0;
3094
3095 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3096 }
3097
3098 /* Return true if op is a Q_REGS class register. */
3099
3100 int
3101 q_regs_operand (op, mode)
3102 register rtx op;
3103 enum machine_mode mode;
3104 {
3105 if (mode != VOIDmode && GET_MODE (op) != mode)
3106 return 0;
3107 if (GET_CODE (op) == SUBREG)
3108 op = SUBREG_REG (op);
3109 return QI_REG_P (op);
3110 }
3111
3112 /* Return true if op is a NON_Q_REGS class register. */
3113
3114 int
3115 non_q_regs_operand (op, mode)
3116 register rtx op;
3117 enum machine_mode mode;
3118 {
3119 if (mode != VOIDmode && GET_MODE (op) != mode)
3120 return 0;
3121 if (GET_CODE (op) == SUBREG)
3122 op = SUBREG_REG (op);
3123 return NON_QI_REG_P (op);
3124 }
3125
3126 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3127 insns. */
3128 int
3129 sse_comparison_operator (op, mode)
3130 rtx op;
3131 enum machine_mode mode ATTRIBUTE_UNUSED;
3132 {
3133 enum rtx_code code = GET_CODE (op);
3134 switch (code)
3135 {
3136 /* Operations supported directly. */
3137 case EQ:
3138 case LT:
3139 case LE:
3140 case UNORDERED:
3141 case NE:
3142 case UNGE:
3143 case UNGT:
3144 case ORDERED:
3145 return 1;
3146 /* These are equivalent to ones above in non-IEEE comparisons. */
3147 case UNEQ:
3148 case UNLT:
3149 case UNLE:
3150 case LTGT:
3151 case GE:
3152 case GT:
3153 return !TARGET_IEEE_FP;
3154 default:
3155 return 0;
3156 }
3157 }
3158 /* Return 1 if OP is a valid comparison operator in valid mode. */
3159 int
3160 ix86_comparison_operator (op, mode)
3161 register rtx op;
3162 enum machine_mode mode;
3163 {
3164 enum machine_mode inmode;
3165 enum rtx_code code = GET_CODE (op);
3166 if (mode != VOIDmode && GET_MODE (op) != mode)
3167 return 0;
3168 if (GET_RTX_CLASS (code) != '<')
3169 return 0;
3170 inmode = GET_MODE (XEXP (op, 0));
3171
3172 if (inmode == CCFPmode || inmode == CCFPUmode)
3173 {
3174 enum rtx_code second_code, bypass_code;
3175 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3176 return (bypass_code == NIL && second_code == NIL);
3177 }
3178 switch (code)
3179 {
3180 case EQ: case NE:
3181 return 1;
3182 case LT: case GE:
3183 if (inmode == CCmode || inmode == CCGCmode
3184 || inmode == CCGOCmode || inmode == CCNOmode)
3185 return 1;
3186 return 0;
3187 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3188 if (inmode == CCmode)
3189 return 1;
3190 return 0;
3191 case GT: case LE:
3192 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3193 return 1;
3194 return 0;
3195 default:
3196 return 0;
3197 }
3198 }
3199
3200 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3201
3202 int
3203 fcmov_comparison_operator (op, mode)
3204 register rtx op;
3205 enum machine_mode mode;
3206 {
3207 enum machine_mode inmode;
3208 enum rtx_code code = GET_CODE (op);
3209 if (mode != VOIDmode && GET_MODE (op) != mode)
3210 return 0;
3211 if (GET_RTX_CLASS (code) != '<')
3212 return 0;
3213 inmode = GET_MODE (XEXP (op, 0));
3214 if (inmode == CCFPmode || inmode == CCFPUmode)
3215 {
3216 enum rtx_code second_code, bypass_code;
3217 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3218 if (bypass_code != NIL || second_code != NIL)
3219 return 0;
3220 code = ix86_fp_compare_code_to_integer (code);
3221 }
3222 /* i387 supports just limited amount of conditional codes. */
3223 switch (code)
3224 {
3225 case LTU: case GTU: case LEU: case GEU:
3226 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3227 return 1;
3228 return 0;
3229 case ORDERED: case UNORDERED:
3230 case EQ: case NE:
3231 return 1;
3232 default:
3233 return 0;
3234 }
3235 }
3236
3237 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3238
3239 int
3240 promotable_binary_operator (op, mode)
3241 register rtx op;
3242 enum machine_mode mode ATTRIBUTE_UNUSED;
3243 {
3244 switch (GET_CODE (op))
3245 {
3246 case MULT:
3247 /* Modern CPUs have same latency for HImode and SImode multiply,
3248 but 386 and 486 do HImode multiply faster. */
3249 return ix86_cpu > PROCESSOR_I486;
3250 case PLUS:
3251 case AND:
3252 case IOR:
3253 case XOR:
3254 case ASHIFT:
3255 return 1;
3256 default:
3257 return 0;
3258 }
3259 }
3260
3261 /* Nearly general operand, but accept any const_double, since we wish
3262 to be able to drop them into memory rather than have them get pulled
3263 into registers. */
3264
3265 int
3266 cmp_fp_expander_operand (op, mode)
3267 register rtx op;
3268 enum machine_mode mode;
3269 {
3270 if (mode != VOIDmode && mode != GET_MODE (op))
3271 return 0;
3272 if (GET_CODE (op) == CONST_DOUBLE)
3273 return 1;
3274 return general_operand (op, mode);
3275 }
3276
3277 /* Match an SI or HImode register for a zero_extract. */
3278
3279 int
3280 ext_register_operand (op, mode)
3281 register rtx op;
3282 enum machine_mode mode ATTRIBUTE_UNUSED;
3283 {
3284 int regno;
3285 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3286 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3287 return 0;
3288
3289 if (!register_operand (op, VOIDmode))
3290 return 0;
3291
3292 /* Be curefull to accept only registers having upper parts. */
3293 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3294 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3295 }
3296
3297 /* Return 1 if this is a valid binary floating-point operation.
3298 OP is the expression matched, and MODE is its mode. */
3299
3300 int
3301 binary_fp_operator (op, mode)
3302 register rtx op;
3303 enum machine_mode mode;
3304 {
3305 if (mode != VOIDmode && mode != GET_MODE (op))
3306 return 0;
3307
3308 switch (GET_CODE (op))
3309 {
3310 case PLUS:
3311 case MINUS:
3312 case MULT:
3313 case DIV:
3314 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3315
3316 default:
3317 return 0;
3318 }
3319 }
3320
3321 int
3322 mult_operator (op, mode)
3323 register rtx op;
3324 enum machine_mode mode ATTRIBUTE_UNUSED;
3325 {
3326 return GET_CODE (op) == MULT;
3327 }
3328
3329 int
3330 div_operator (op, mode)
3331 register rtx op;
3332 enum machine_mode mode ATTRIBUTE_UNUSED;
3333 {
3334 return GET_CODE (op) == DIV;
3335 }
3336
3337 int
3338 arith_or_logical_operator (op, mode)
3339 rtx op;
3340 enum machine_mode mode;
3341 {
3342 return ((mode == VOIDmode || GET_MODE (op) == mode)
3343 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3344 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3345 }
3346
3347 /* Returns 1 if OP is memory operand with a displacement. */
3348
3349 int
3350 memory_displacement_operand (op, mode)
3351 register rtx op;
3352 enum machine_mode mode;
3353 {
3354 struct ix86_address parts;
3355
3356 if (! memory_operand (op, mode))
3357 return 0;
3358
3359 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3360 abort ();
3361
3362 return parts.disp != NULL_RTX;
3363 }
3364
3365 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3366 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3367
3368 ??? It seems likely that this will only work because cmpsi is an
3369 expander, and no actual insns use this. */
3370
3371 int
3372 cmpsi_operand (op, mode)
3373 rtx op;
3374 enum machine_mode mode;
3375 {
3376 if (nonimmediate_operand (op, mode))
3377 return 1;
3378
3379 if (GET_CODE (op) == AND
3380 && GET_MODE (op) == SImode
3381 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3382 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3383 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3384 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3385 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3386 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3387 return 1;
3388
3389 return 0;
3390 }
3391
3392 /* Returns 1 if OP is memory operand that can not be represented by the
3393 modRM array. */
3394
3395 int
3396 long_memory_operand (op, mode)
3397 register rtx op;
3398 enum machine_mode mode;
3399 {
3400 if (! memory_operand (op, mode))
3401 return 0;
3402
3403 return memory_address_length (op) != 0;
3404 }
3405
3406 /* Return nonzero if the rtx is known aligned. */
3407
3408 int
3409 aligned_operand (op, mode)
3410 rtx op;
3411 enum machine_mode mode;
3412 {
3413 struct ix86_address parts;
3414
3415 if (!general_operand (op, mode))
3416 return 0;
3417
3418 /* Registers and immediate operands are always "aligned". */
3419 if (GET_CODE (op) != MEM)
3420 return 1;
3421
3422 /* Don't even try to do any aligned optimizations with volatiles. */
3423 if (MEM_VOLATILE_P (op))
3424 return 0;
3425
3426 op = XEXP (op, 0);
3427
3428 /* Pushes and pops are only valid on the stack pointer. */
3429 if (GET_CODE (op) == PRE_DEC
3430 || GET_CODE (op) == POST_INC)
3431 return 1;
3432
3433 /* Decode the address. */
3434 if (! ix86_decompose_address (op, &parts))
3435 abort ();
3436
3437 /* Look for some component that isn't known to be aligned. */
3438 if (parts.index)
3439 {
3440 if (parts.scale < 4
3441 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3442 return 0;
3443 }
3444 if (parts.base)
3445 {
3446 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3447 return 0;
3448 }
3449 if (parts.disp)
3450 {
3451 if (GET_CODE (parts.disp) != CONST_INT
3452 || (INTVAL (parts.disp) & 3) != 0)
3453 return 0;
3454 }
3455
3456 /* Didn't find one -- this must be an aligned address. */
3457 return 1;
3458 }
3459 \f
3460 /* Return true if the constant is something that can be loaded with
3461 a special instruction. Only handle 0.0 and 1.0; others are less
3462 worthwhile. */
3463
3464 int
3465 standard_80387_constant_p (x)
3466 rtx x;
3467 {
3468 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3469 return -1;
3470 /* Note that on the 80387, other constants, such as pi, that we should support
3471 too. On some machines, these are much slower to load as standard constant,
3472 than to load from doubles in memory. */
3473 if (x == CONST0_RTX (GET_MODE (x)))
3474 return 1;
3475 if (x == CONST1_RTX (GET_MODE (x)))
3476 return 2;
3477 return 0;
3478 }
3479
3480 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3481 */
3482 int
3483 standard_sse_constant_p (x)
3484 rtx x;
3485 {
3486 if (GET_CODE (x) != CONST_DOUBLE)
3487 return -1;
3488 return (x == CONST0_RTX (GET_MODE (x)));
3489 }
3490
3491 /* Returns 1 if OP contains a symbol reference */
3492
3493 int
3494 symbolic_reference_mentioned_p (op)
3495 rtx op;
3496 {
3497 register const char *fmt;
3498 register int i;
3499
3500 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3501 return 1;
3502
3503 fmt = GET_RTX_FORMAT (GET_CODE (op));
3504 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3505 {
3506 if (fmt[i] == 'E')
3507 {
3508 register int j;
3509
3510 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3511 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3512 return 1;
3513 }
3514
3515 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3516 return 1;
3517 }
3518
3519 return 0;
3520 }
3521
3522 /* Return 1 if it is appropriate to emit `ret' instructions in the
3523 body of a function. Do this only if the epilogue is simple, needing a
3524 couple of insns. Prior to reloading, we can't tell how many registers
3525 must be saved, so return 0 then. Return 0 if there is no frame
3526 marker to de-allocate.
3527
3528 If NON_SAVING_SETJMP is defined and true, then it is not possible
3529 for the epilogue to be simple, so return 0. This is a special case
3530 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3531 until final, but jump_optimize may need to know sooner if a
3532 `return' is OK. */
3533
3534 int
3535 ix86_can_use_return_insn_p ()
3536 {
3537 struct ix86_frame frame;
3538
3539 #ifdef NON_SAVING_SETJMP
3540 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3541 return 0;
3542 #endif
3543
3544 if (! reload_completed || frame_pointer_needed)
3545 return 0;
3546
3547 /* Don't allow more than 32 pop, since that's all we can do
3548 with one instruction. */
3549 if (current_function_pops_args
3550 && current_function_args_size >= 32768)
3551 return 0;
3552
3553 ix86_compute_frame_layout (&frame);
3554 return frame.to_allocate == 0 && frame.nregs == 0;
3555 }
3556 \f
3557 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3558 int
3559 x86_64_sign_extended_value (value)
3560 rtx value;
3561 {
3562 switch (GET_CODE (value))
3563 {
3564 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3565 to be at least 32 and this all acceptable constants are
3566 represented as CONST_INT. */
3567 case CONST_INT:
3568 if (HOST_BITS_PER_WIDE_INT == 32)
3569 return 1;
3570 else
3571 {
3572 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3573 return trunc_int_for_mode (val, SImode) == val;
3574 }
3575 break;
3576
3577 /* For certain code models, the symbolic references are known to fit. */
3578 case SYMBOL_REF:
3579 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3580
3581 /* For certain code models, the code is near as well. */
3582 case LABEL_REF:
3583 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3584
3585 /* We also may accept the offsetted memory references in certain special
3586 cases. */
3587 case CONST:
3588 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3589 && XVECLEN (XEXP (value, 0), 0) == 1
3590 && XINT (XEXP (value, 0), 1) == 15)
3591 return 1;
3592 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3593 {
3594 rtx op1 = XEXP (XEXP (value, 0), 0);
3595 rtx op2 = XEXP (XEXP (value, 0), 1);
3596 HOST_WIDE_INT offset;
3597
3598 if (ix86_cmodel == CM_LARGE)
3599 return 0;
3600 if (GET_CODE (op2) != CONST_INT)
3601 return 0;
3602 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3603 switch (GET_CODE (op1))
3604 {
3605 case SYMBOL_REF:
3606 /* For CM_SMALL assume that latest object is 1MB before
3607 end of 31bits boundary. We may also accept pretty
3608 large negative constants knowing that all objects are
3609 in the positive half of address space. */
3610 if (ix86_cmodel == CM_SMALL
3611 && offset < 1024*1024*1024
3612 && trunc_int_for_mode (offset, SImode) == offset)
3613 return 1;
3614 /* For CM_KERNEL we know that all object resist in the
3615 negative half of 32bits address space. We may not
3616 accept negative offsets, since they may be just off
3617 and we may accept pretty large positive ones. */
3618 if (ix86_cmodel == CM_KERNEL
3619 && offset > 0
3620 && trunc_int_for_mode (offset, SImode) == offset)
3621 return 1;
3622 break;
3623 case LABEL_REF:
3624 /* These conditions are similar to SYMBOL_REF ones, just the
3625 constraints for code models differ. */
3626 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3627 && offset < 1024*1024*1024
3628 && trunc_int_for_mode (offset, SImode) == offset)
3629 return 1;
3630 if (ix86_cmodel == CM_KERNEL
3631 && offset > 0
3632 && trunc_int_for_mode (offset, SImode) == offset)
3633 return 1;
3634 break;
3635 default:
3636 return 0;
3637 }
3638 }
3639 return 0;
3640 default:
3641 return 0;
3642 }
3643 }
3644
3645 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3646 int
3647 x86_64_zero_extended_value (value)
3648 rtx value;
3649 {
3650 switch (GET_CODE (value))
3651 {
3652 case CONST_DOUBLE:
3653 if (HOST_BITS_PER_WIDE_INT == 32)
3654 return (GET_MODE (value) == VOIDmode
3655 && !CONST_DOUBLE_HIGH (value));
3656 else
3657 return 0;
3658 case CONST_INT:
3659 if (HOST_BITS_PER_WIDE_INT == 32)
3660 return INTVAL (value) >= 0;
3661 else
3662 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3663 break;
3664
3665 /* For certain code models, the symbolic references are known to fit. */
3666 case SYMBOL_REF:
3667 return ix86_cmodel == CM_SMALL;
3668
3669 /* For certain code models, the code is near as well. */
3670 case LABEL_REF:
3671 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3672
3673 /* We also may accept the offsetted memory references in certain special
3674 cases. */
3675 case CONST:
3676 if (GET_CODE (XEXP (value, 0)) == PLUS)
3677 {
3678 rtx op1 = XEXP (XEXP (value, 0), 0);
3679 rtx op2 = XEXP (XEXP (value, 0), 1);
3680
3681 if (ix86_cmodel == CM_LARGE)
3682 return 0;
3683 switch (GET_CODE (op1))
3684 {
3685 case SYMBOL_REF:
3686 return 0;
3687 /* For small code model we may accept pretty large positive
3688 offsets, since one bit is available for free. Negative
3689 offsets are limited by the size of NULL pointer area
3690 specified by the ABI. */
3691 if (ix86_cmodel == CM_SMALL
3692 && GET_CODE (op2) == CONST_INT
3693 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3694 && (trunc_int_for_mode (INTVAL (op2), SImode)
3695 == INTVAL (op2)))
3696 return 1;
3697 /* ??? For the kernel, we may accept adjustment of
3698 -0x10000000, since we know that it will just convert
3699 negative address space to positive, but perhaps this
3700 is not worthwhile. */
3701 break;
3702 case LABEL_REF:
3703 /* These conditions are similar to SYMBOL_REF ones, just the
3704 constraints for code models differ. */
3705 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3706 && GET_CODE (op2) == CONST_INT
3707 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3708 && (trunc_int_for_mode (INTVAL (op2), SImode)
3709 == INTVAL (op2)))
3710 return 1;
3711 break;
3712 default:
3713 return 0;
3714 }
3715 }
3716 return 0;
3717 default:
3718 return 0;
3719 }
3720 }
3721
3722 /* Value should be nonzero if functions must have frame pointers.
3723 Zero means the frame pointer need not be set up (and parms may
3724 be accessed via the stack pointer) in functions that seem suitable. */
3725
3726 int
3727 ix86_frame_pointer_required ()
3728 {
3729 /* If we accessed previous frames, then the generated code expects
3730 to be able to access the saved ebp value in our frame. */
3731 if (cfun->machine->accesses_prev_frame)
3732 return 1;
3733
3734 /* Several x86 os'es need a frame pointer for other reasons,
3735 usually pertaining to setjmp. */
3736 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3737 return 1;
3738
3739 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3740 the frame pointer by default. Turn it back on now if we've not
3741 got a leaf function. */
3742 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3743 return 1;
3744
3745 return 0;
3746 }
3747
3748 /* Record that the current function accesses previous call frames. */
3749
3750 void
3751 ix86_setup_frame_addresses ()
3752 {
3753 cfun->machine->accesses_prev_frame = 1;
3754 }
3755 \f
3756 static char pic_label_name[32];
3757
3758 /* This function generates code for -fpic that loads %ebx with
3759 the return address of the caller and then returns. */
3760
3761 void
3762 ix86_asm_file_end (file)
3763 FILE *file;
3764 {
3765 rtx xops[2];
3766
3767 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3768 return;
3769
3770 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3771 to updating relocations to a section being discarded such that this
3772 doesn't work. Ought to detect this at configure time. */
3773 #if 0
3774 /* The trick here is to create a linkonce section containing the
3775 pic label thunk, but to refer to it with an internal label.
3776 Because the label is internal, we don't have inter-dso name
3777 binding issues on hosts that don't support ".hidden".
3778
3779 In order to use these macros, however, we must create a fake
3780 function decl. */
3781 if (targetm.have_named_sections)
3782 {
3783 tree decl = build_decl (FUNCTION_DECL,
3784 get_identifier ("i686.get_pc_thunk"),
3785 error_mark_node);
3786 DECL_ONE_ONLY (decl) = 1;
3787 UNIQUE_SECTION (decl, 0);
3788 named_section (decl, NULL);
3789 }
3790 else
3791 #else
3792 text_section ();
3793 #endif
3794
3795 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3796 internal (non-global) label that's being emitted, it didn't make
3797 sense to have .type information for local labels. This caused
3798 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3799 me debug info for a label that you're declaring non-global?) this
3800 was changed to call ASM_OUTPUT_LABEL() instead. */
3801
3802 ASM_OUTPUT_LABEL (file, pic_label_name);
3803
3804 xops[0] = pic_offset_table_rtx;
3805 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3806 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3807 output_asm_insn ("ret", xops);
3808 }
3809
3810 void
3811 load_pic_register ()
3812 {
3813 rtx gotsym, pclab;
3814
3815 if (TARGET_64BIT)
3816 abort ();
3817
3818 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3819
3820 if (TARGET_DEEP_BRANCH_PREDICTION)
3821 {
3822 if (! pic_label_name[0])
3823 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3824 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3825 }
3826 else
3827 {
3828 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3829 }
3830
3831 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3832
3833 if (! TARGET_DEEP_BRANCH_PREDICTION)
3834 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3835
3836 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3837 }
3838
3839 /* Generate an "push" pattern for input ARG. */
3840
3841 static rtx
3842 gen_push (arg)
3843 rtx arg;
3844 {
3845 return gen_rtx_SET (VOIDmode,
3846 gen_rtx_MEM (Pmode,
3847 gen_rtx_PRE_DEC (Pmode,
3848 stack_pointer_rtx)),
3849 arg);
3850 }
3851
3852 /* Return 1 if we need to save REGNO. */
3853 static int
3854 ix86_save_reg (regno, maybe_eh_return)
3855 int regno;
3856 int maybe_eh_return;
3857 {
3858 if (flag_pic
3859 && ! TARGET_64BIT
3860 && regno == PIC_OFFSET_TABLE_REGNUM
3861 && (current_function_uses_pic_offset_table
3862 || current_function_uses_const_pool
3863 || current_function_calls_eh_return))
3864 return 1;
3865
3866 if (current_function_calls_eh_return && maybe_eh_return)
3867 {
3868 unsigned i;
3869 for (i = 0; ; i++)
3870 {
3871 unsigned test = EH_RETURN_DATA_REGNO (i);
3872 if (test == INVALID_REGNUM)
3873 break;
3874 if (test == (unsigned) regno)
3875 return 1;
3876 }
3877 }
3878
3879 return (regs_ever_live[regno]
3880 && !call_used_regs[regno]
3881 && !fixed_regs[regno]
3882 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3883 }
3884
3885 /* Return number of registers to be saved on the stack. */
3886
3887 static int
3888 ix86_nsaved_regs ()
3889 {
3890 int nregs = 0;
3891 int regno;
3892
3893 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3894 if (ix86_save_reg (regno, true))
3895 nregs++;
3896 return nregs;
3897 }
3898
3899 /* Return the offset between two registers, one to be eliminated, and the other
3900 its replacement, at the start of a routine. */
3901
3902 HOST_WIDE_INT
3903 ix86_initial_elimination_offset (from, to)
3904 int from;
3905 int to;
3906 {
3907 struct ix86_frame frame;
3908 ix86_compute_frame_layout (&frame);
3909
3910 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3911 return frame.hard_frame_pointer_offset;
3912 else if (from == FRAME_POINTER_REGNUM
3913 && to == HARD_FRAME_POINTER_REGNUM)
3914 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3915 else
3916 {
3917 if (to != STACK_POINTER_REGNUM)
3918 abort ();
3919 else if (from == ARG_POINTER_REGNUM)
3920 return frame.stack_pointer_offset;
3921 else if (from != FRAME_POINTER_REGNUM)
3922 abort ();
3923 else
3924 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3925 }
3926 }
3927
3928 /* Fill structure ix86_frame about frame of currently computed function. */
3929
3930 static void
3931 ix86_compute_frame_layout (frame)
3932 struct ix86_frame *frame;
3933 {
3934 HOST_WIDE_INT total_size;
3935 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3936 int offset;
3937 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3938 HOST_WIDE_INT size = get_frame_size ();
3939
3940 frame->nregs = ix86_nsaved_regs ();
3941 total_size = size;
3942
3943 /* Skip return value and save base pointer. */
3944 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3945
3946 frame->hard_frame_pointer_offset = offset;
3947
3948 /* Do some sanity checking of stack_alignment_needed and
3949 preferred_alignment, since i386 port is the only using those features
3950 that may break easily. */
3951
3952 if (size && !stack_alignment_needed)
3953 abort ();
3954 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3955 abort ();
3956 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3957 abort ();
3958 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3959 abort ();
3960
3961 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3962 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3963
3964 /* Register save area */
3965 offset += frame->nregs * UNITS_PER_WORD;
3966
3967 /* Va-arg area */
3968 if (ix86_save_varrargs_registers)
3969 {
3970 offset += X86_64_VARARGS_SIZE;
3971 frame->va_arg_size = X86_64_VARARGS_SIZE;
3972 }
3973 else
3974 frame->va_arg_size = 0;
3975
3976 /* Align start of frame for local function. */
3977 frame->padding1 = ((offset + stack_alignment_needed - 1)
3978 & -stack_alignment_needed) - offset;
3979
3980 offset += frame->padding1;
3981
3982 /* Frame pointer points here. */
3983 frame->frame_pointer_offset = offset;
3984
3985 offset += size;
3986
3987 /* Add outgoing arguments area. */
3988 if (ACCUMULATE_OUTGOING_ARGS)
3989 {
3990 offset += current_function_outgoing_args_size;
3991 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3992 }
3993 else
3994 frame->outgoing_arguments_size = 0;
3995
3996 /* Align stack boundary. */
3997 frame->padding2 = ((offset + preferred_alignment - 1)
3998 & -preferred_alignment) - offset;
3999
4000 offset += frame->padding2;
4001
4002 /* We've reached end of stack frame. */
4003 frame->stack_pointer_offset = offset;
4004
4005 /* Size prologue needs to allocate. */
4006 frame->to_allocate =
4007 (size + frame->padding1 + frame->padding2
4008 + frame->outgoing_arguments_size + frame->va_arg_size);
4009
4010 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4011 && current_function_is_leaf)
4012 {
4013 frame->red_zone_size = frame->to_allocate;
4014 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4015 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4016 }
4017 else
4018 frame->red_zone_size = 0;
4019 frame->to_allocate -= frame->red_zone_size;
4020 frame->stack_pointer_offset -= frame->red_zone_size;
4021 #if 0
4022 fprintf (stderr, "nregs: %i\n", frame->nregs);
4023 fprintf (stderr, "size: %i\n", size);
4024 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4025 fprintf (stderr, "padding1: %i\n", frame->padding1);
4026 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4027 fprintf (stderr, "padding2: %i\n", frame->padding2);
4028 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4029 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4030 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4031 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4032 frame->hard_frame_pointer_offset);
4033 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4034 #endif
4035 }
4036
4037 /* Emit code to save registers in the prologue. */
4038
4039 static void
4040 ix86_emit_save_regs ()
4041 {
4042 register int regno;
4043 rtx insn;
4044
4045 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4046 if (ix86_save_reg (regno, true))
4047 {
4048 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4049 RTX_FRAME_RELATED_P (insn) = 1;
4050 }
4051 }
4052
4053 /* Emit code to save registers using MOV insns. First register
4054 is restored from POINTER + OFFSET. */
4055 static void
4056 ix86_emit_save_regs_using_mov (pointer, offset)
4057 rtx pointer;
4058 HOST_WIDE_INT offset;
4059 {
4060 int regno;
4061 rtx insn;
4062
4063 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4064 if (ix86_save_reg (regno, true))
4065 {
4066 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4067 Pmode, offset),
4068 gen_rtx_REG (Pmode, regno));
4069 RTX_FRAME_RELATED_P (insn) = 1;
4070 offset += UNITS_PER_WORD;
4071 }
4072 }
4073
4074 /* Expand the prologue into a bunch of separate insns. */
4075
4076 void
4077 ix86_expand_prologue ()
4078 {
4079 rtx insn;
4080 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4081 || current_function_uses_const_pool)
4082 && !TARGET_64BIT);
4083 struct ix86_frame frame;
4084 int use_mov = 0;
4085 HOST_WIDE_INT allocate;
4086
4087 if (!optimize_size)
4088 {
4089 use_fast_prologue_epilogue
4090 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4091 if (TARGET_PROLOGUE_USING_MOVE)
4092 use_mov = use_fast_prologue_epilogue;
4093 }
4094 ix86_compute_frame_layout (&frame);
4095
4096 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4097 slower on all targets. Also sdb doesn't like it. */
4098
4099 if (frame_pointer_needed)
4100 {
4101 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4102 RTX_FRAME_RELATED_P (insn) = 1;
4103
4104 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4105 RTX_FRAME_RELATED_P (insn) = 1;
4106 }
4107
4108 allocate = frame.to_allocate;
4109 /* In case we are dealing only with single register and empty frame,
4110 push is equivalent of the mov+add sequence. */
4111 if (allocate == 0 && frame.nregs <= 1)
4112 use_mov = 0;
4113
4114 if (!use_mov)
4115 ix86_emit_save_regs ();
4116 else
4117 allocate += frame.nregs * UNITS_PER_WORD;
4118
4119 if (allocate == 0)
4120 ;
4121 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4122 {
4123 insn = emit_insn (gen_pro_epilogue_adjust_stack
4124 (stack_pointer_rtx, stack_pointer_rtx,
4125 GEN_INT (-allocate)));
4126 RTX_FRAME_RELATED_P (insn) = 1;
4127 }
4128 else
4129 {
4130 /* ??? Is this only valid for Win32? */
4131
4132 rtx arg0, sym;
4133
4134 if (TARGET_64BIT)
4135 abort ();
4136
4137 arg0 = gen_rtx_REG (SImode, 0);
4138 emit_move_insn (arg0, GEN_INT (allocate));
4139
4140 sym = gen_rtx_MEM (FUNCTION_MODE,
4141 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4142 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4143
4144 CALL_INSN_FUNCTION_USAGE (insn)
4145 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4146 CALL_INSN_FUNCTION_USAGE (insn));
4147 }
4148 if (use_mov)
4149 {
4150 if (!frame_pointer_needed || !frame.to_allocate)
4151 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4152 else
4153 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4154 -frame.nregs * UNITS_PER_WORD);
4155 }
4156
4157 #ifdef SUBTARGET_PROLOGUE
4158 SUBTARGET_PROLOGUE;
4159 #endif
4160
4161 if (pic_reg_used)
4162 load_pic_register ();
4163
4164 /* If we are profiling, make sure no instructions are scheduled before
4165 the call to mcount. However, if -fpic, the above call will have
4166 done that. */
4167 if (current_function_profile && ! pic_reg_used)
4168 emit_insn (gen_blockage ());
4169 }
4170
4171 /* Emit code to restore saved registers using MOV insns. First register
4172 is restored from POINTER + OFFSET. */
4173 static void
4174 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4175 rtx pointer;
4176 int offset;
4177 int maybe_eh_return;
4178 {
4179 int regno;
4180
4181 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4182 if (ix86_save_reg (regno, maybe_eh_return))
4183 {
4184 emit_move_insn (gen_rtx_REG (Pmode, regno),
4185 adjust_address (gen_rtx_MEM (Pmode, pointer),
4186 Pmode, offset));
4187 offset += UNITS_PER_WORD;
4188 }
4189 }
4190
4191 /* Restore function stack, frame, and registers. */
4192
4193 void
4194 ix86_expand_epilogue (style)
4195 int style;
4196 {
4197 int regno;
4198 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4199 struct ix86_frame frame;
4200 HOST_WIDE_INT offset;
4201
4202 ix86_compute_frame_layout (&frame);
4203
4204 /* Calculate start of saved registers relative to ebp. Special care
4205 must be taken for the normal return case of a function using
4206 eh_return: the eax and edx registers are marked as saved, but not
4207 restored along this path. */
4208 offset = frame.nregs;
4209 if (current_function_calls_eh_return && style != 2)
4210 offset -= 2;
4211 offset *= -UNITS_PER_WORD;
4212
4213 /* If we're only restoring one register and sp is not valid then
4214 using a move instruction to restore the register since it's
4215 less work than reloading sp and popping the register.
4216
4217 The default code result in stack adjustment using add/lea instruction,
4218 while this code results in LEAVE instruction (or discrete equivalent),
4219 so it is profitable in some other cases as well. Especially when there
4220 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4221 and there is exactly one register to pop. This heruistic may need some
4222 tuning in future. */
4223 if ((!sp_valid && frame.nregs <= 1)
4224 || (TARGET_EPILOGUE_USING_MOVE
4225 && use_fast_prologue_epilogue
4226 && (frame.nregs > 1 || frame.to_allocate))
4227 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4228 || (frame_pointer_needed && TARGET_USE_LEAVE
4229 && use_fast_prologue_epilogue && frame.nregs == 1)
4230 || current_function_calls_eh_return)
4231 {
4232 /* Restore registers. We can use ebp or esp to address the memory
4233 locations. If both are available, default to ebp, since offsets
4234 are known to be small. Only exception is esp pointing directly to the
4235 end of block of saved registers, where we may simplify addressing
4236 mode. */
4237
4238 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4239 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4240 frame.to_allocate, style == 2);
4241 else
4242 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4243 offset, style == 2);
4244
4245 /* eh_return epilogues need %ecx added to the stack pointer. */
4246 if (style == 2)
4247 {
4248 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4249
4250 if (frame_pointer_needed)
4251 {
4252 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4253 tmp = plus_constant (tmp, UNITS_PER_WORD);
4254 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4255
4256 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4257 emit_move_insn (hard_frame_pointer_rtx, tmp);
4258
4259 emit_insn (gen_pro_epilogue_adjust_stack
4260 (stack_pointer_rtx, sa, const0_rtx));
4261 }
4262 else
4263 {
4264 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4265 tmp = plus_constant (tmp, (frame.to_allocate
4266 + frame.nregs * UNITS_PER_WORD));
4267 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4268 }
4269 }
4270 else if (!frame_pointer_needed)
4271 emit_insn (gen_pro_epilogue_adjust_stack
4272 (stack_pointer_rtx, stack_pointer_rtx,
4273 GEN_INT (frame.to_allocate
4274 + frame.nregs * UNITS_PER_WORD)));
4275 /* If not an i386, mov & pop is faster than "leave". */
4276 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4277 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4278 else
4279 {
4280 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4281 hard_frame_pointer_rtx,
4282 const0_rtx));
4283 if (TARGET_64BIT)
4284 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4285 else
4286 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4287 }
4288 }
4289 else
4290 {
4291 /* First step is to deallocate the stack frame so that we can
4292 pop the registers. */
4293 if (!sp_valid)
4294 {
4295 if (!frame_pointer_needed)
4296 abort ();
4297 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4298 hard_frame_pointer_rtx,
4299 GEN_INT (offset)));
4300 }
4301 else if (frame.to_allocate)
4302 emit_insn (gen_pro_epilogue_adjust_stack
4303 (stack_pointer_rtx, stack_pointer_rtx,
4304 GEN_INT (frame.to_allocate)));
4305
4306 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4307 if (ix86_save_reg (regno, false))
4308 {
4309 if (TARGET_64BIT)
4310 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4311 else
4312 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4313 }
4314 if (frame_pointer_needed)
4315 {
4316 /* Leave results in shorter dependency chains on CPUs that are
4317 able to grok it fast. */
4318 if (TARGET_USE_LEAVE)
4319 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4320 else if (TARGET_64BIT)
4321 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4322 else
4323 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4324 }
4325 }
4326
4327 /* Sibcall epilogues don't want a return instruction. */
4328 if (style == 0)
4329 return;
4330
4331 if (current_function_pops_args && current_function_args_size)
4332 {
4333 rtx popc = GEN_INT (current_function_pops_args);
4334
4335 /* i386 can only pop 64K bytes. If asked to pop more, pop
4336 return address, do explicit add, and jump indirectly to the
4337 caller. */
4338
4339 if (current_function_pops_args >= 65536)
4340 {
4341 rtx ecx = gen_rtx_REG (SImode, 2);
4342
4343 /* There are is no "pascal" calling convention in 64bit ABI. */
4344 if (TARGET_64BIT)
4345 abort ();
4346
4347 emit_insn (gen_popsi1 (ecx));
4348 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4349 emit_jump_insn (gen_return_indirect_internal (ecx));
4350 }
4351 else
4352 emit_jump_insn (gen_return_pop_internal (popc));
4353 }
4354 else
4355 emit_jump_insn (gen_return_internal ());
4356 }
4357 \f
4358 /* Extract the parts of an RTL expression that is a valid memory address
4359 for an instruction. Return 0 if the structure of the address is
4360 grossly off. Return -1 if the address contains ASHIFT, so it is not
4361 strictly valid, but still used for computing length of lea instruction.
4362 */
4363
4364 static int
4365 ix86_decompose_address (addr, out)
4366 register rtx addr;
4367 struct ix86_address *out;
4368 {
4369 rtx base = NULL_RTX;
4370 rtx index = NULL_RTX;
4371 rtx disp = NULL_RTX;
4372 HOST_WIDE_INT scale = 1;
4373 rtx scale_rtx = NULL_RTX;
4374 int retval = 1;
4375
4376 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4377 base = addr;
4378 else if (GET_CODE (addr) == PLUS)
4379 {
4380 rtx op0 = XEXP (addr, 0);
4381 rtx op1 = XEXP (addr, 1);
4382 enum rtx_code code0 = GET_CODE (op0);
4383 enum rtx_code code1 = GET_CODE (op1);
4384
4385 if (code0 == REG || code0 == SUBREG)
4386 {
4387 if (code1 == REG || code1 == SUBREG)
4388 index = op0, base = op1; /* index + base */
4389 else
4390 base = op0, disp = op1; /* base + displacement */
4391 }
4392 else if (code0 == MULT)
4393 {
4394 index = XEXP (op0, 0);
4395 scale_rtx = XEXP (op0, 1);
4396 if (code1 == REG || code1 == SUBREG)
4397 base = op1; /* index*scale + base */
4398 else
4399 disp = op1; /* index*scale + disp */
4400 }
4401 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4402 {
4403 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4404 scale_rtx = XEXP (XEXP (op0, 0), 1);
4405 base = XEXP (op0, 1);
4406 disp = op1;
4407 }
4408 else if (code0 == PLUS)
4409 {
4410 index = XEXP (op0, 0); /* index + base + disp */
4411 base = XEXP (op0, 1);
4412 disp = op1;
4413 }
4414 else
4415 return 0;
4416 }
4417 else if (GET_CODE (addr) == MULT)
4418 {
4419 index = XEXP (addr, 0); /* index*scale */
4420 scale_rtx = XEXP (addr, 1);
4421 }
4422 else if (GET_CODE (addr) == ASHIFT)
4423 {
4424 rtx tmp;
4425
4426 /* We're called for lea too, which implements ashift on occasion. */
4427 index = XEXP (addr, 0);
4428 tmp = XEXP (addr, 1);
4429 if (GET_CODE (tmp) != CONST_INT)
4430 return 0;
4431 scale = INTVAL (tmp);
4432 if ((unsigned HOST_WIDE_INT) scale > 3)
4433 return 0;
4434 scale = 1 << scale;
4435 retval = -1;
4436 }
4437 else
4438 disp = addr; /* displacement */
4439
4440 /* Extract the integral value of scale. */
4441 if (scale_rtx)
4442 {
4443 if (GET_CODE (scale_rtx) != CONST_INT)
4444 return 0;
4445 scale = INTVAL (scale_rtx);
4446 }
4447
4448 /* Allow arg pointer and stack pointer as index if there is not scaling */
4449 if (base && index && scale == 1
4450 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4451 || index == stack_pointer_rtx))
4452 {
4453 rtx tmp = base;
4454 base = index;
4455 index = tmp;
4456 }
4457
4458 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4459 if ((base == hard_frame_pointer_rtx
4460 || base == frame_pointer_rtx
4461 || base == arg_pointer_rtx) && !disp)
4462 disp = const0_rtx;
4463
4464 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4465 Avoid this by transforming to [%esi+0]. */
4466 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4467 && base && !index && !disp
4468 && REG_P (base)
4469 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4470 disp = const0_rtx;
4471
4472 /* Special case: encode reg+reg instead of reg*2. */
4473 if (!base && index && scale && scale == 2)
4474 base = index, scale = 1;
4475
4476 /* Special case: scaling cannot be encoded without base or displacement. */
4477 if (!base && !disp && index && scale != 1)
4478 disp = const0_rtx;
4479
4480 out->base = base;
4481 out->index = index;
4482 out->disp = disp;
4483 out->scale = scale;
4484
4485 return retval;
4486 }
4487 \f
4488 /* Return cost of the memory address x.
4489 For i386, it is better to use a complex address than let gcc copy
4490 the address into a reg and make a new pseudo. But not if the address
4491 requires to two regs - that would mean more pseudos with longer
4492 lifetimes. */
4493 int
4494 ix86_address_cost (x)
4495 rtx x;
4496 {
4497 struct ix86_address parts;
4498 int cost = 1;
4499
4500 if (!ix86_decompose_address (x, &parts))
4501 abort ();
4502
4503 /* More complex memory references are better. */
4504 if (parts.disp && parts.disp != const0_rtx)
4505 cost--;
4506
4507 /* Attempt to minimize number of registers in the address. */
4508 if ((parts.base
4509 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4510 || (parts.index
4511 && (!REG_P (parts.index)
4512 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4513 cost++;
4514
4515 if (parts.base
4516 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4517 && parts.index
4518 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4519 && parts.base != parts.index)
4520 cost++;
4521
4522 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4523 since it's predecode logic can't detect the length of instructions
4524 and it degenerates to vector decoded. Increase cost of such
4525 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4526 to split such addresses or even refuse such addresses at all.
4527
4528 Following addressing modes are affected:
4529 [base+scale*index]
4530 [scale*index+disp]
4531 [base+index]
4532
4533 The first and last case may be avoidable by explicitly coding the zero in
4534 memory address, but I don't have AMD-K6 machine handy to check this
4535 theory. */
4536
4537 if (TARGET_K6
4538 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4539 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4540 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4541 cost += 10;
4542
4543 return cost;
4544 }
4545 \f
4546 /* If X is a machine specific address (i.e. a symbol or label being
4547 referenced as a displacement from the GOT implemented using an
4548 UNSPEC), then return the base term. Otherwise return X. */
4549
4550 rtx
4551 ix86_find_base_term (x)
4552 rtx x;
4553 {
4554 rtx term;
4555
4556 if (TARGET_64BIT)
4557 {
4558 if (GET_CODE (x) != CONST)
4559 return x;
4560 term = XEXP (x, 0);
4561 if (GET_CODE (term) == PLUS
4562 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4563 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4564 term = XEXP (term, 0);
4565 if (GET_CODE (term) != UNSPEC
4566 || XVECLEN (term, 0) != 1
4567 || XINT (term, 1) != 15)
4568 return x;
4569
4570 term = XVECEXP (term, 0, 0);
4571
4572 if (GET_CODE (term) != SYMBOL_REF
4573 && GET_CODE (term) != LABEL_REF)
4574 return x;
4575
4576 return term;
4577 }
4578
4579 if (GET_CODE (x) != PLUS
4580 || XEXP (x, 0) != pic_offset_table_rtx
4581 || GET_CODE (XEXP (x, 1)) != CONST)
4582 return x;
4583
4584 term = XEXP (XEXP (x, 1), 0);
4585
4586 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4587 term = XEXP (term, 0);
4588
4589 if (GET_CODE (term) != UNSPEC
4590 || XVECLEN (term, 0) != 1
4591 || XINT (term, 1) != 7)
4592 return x;
4593
4594 term = XVECEXP (term, 0, 0);
4595
4596 if (GET_CODE (term) != SYMBOL_REF
4597 && GET_CODE (term) != LABEL_REF)
4598 return x;
4599
4600 return term;
4601 }
4602 \f
4603 /* Determine if a given CONST RTX is a valid memory displacement
4604 in PIC mode. */
4605
4606 int
4607 legitimate_pic_address_disp_p (disp)
4608 register rtx disp;
4609 {
4610 /* In 64bit mode we can allow direct addresses of symbols and labels
4611 when they are not dynamic symbols. */
4612 if (TARGET_64BIT)
4613 {
4614 rtx x = disp;
4615 if (GET_CODE (disp) == CONST)
4616 x = XEXP (disp, 0);
4617 /* ??? Handle PIC code models */
4618 if (GET_CODE (x) == PLUS
4619 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4620 && ix86_cmodel == CM_SMALL_PIC
4621 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4622 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4623 x = XEXP (x, 0);
4624 if (local_symbolic_operand (x, Pmode))
4625 return 1;
4626 }
4627 if (GET_CODE (disp) != CONST)
4628 return 0;
4629 disp = XEXP (disp, 0);
4630
4631 if (TARGET_64BIT)
4632 {
4633 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4634 of GOT tables. We should not need these anyway. */
4635 if (GET_CODE (disp) != UNSPEC
4636 || XVECLEN (disp, 0) != 1
4637 || XINT (disp, 1) != 15)
4638 return 0;
4639
4640 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4641 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4642 return 0;
4643 return 1;
4644 }
4645
4646 if (GET_CODE (disp) == PLUS)
4647 {
4648 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4649 return 0;
4650 disp = XEXP (disp, 0);
4651 }
4652
4653 if (GET_CODE (disp) != UNSPEC
4654 || XVECLEN (disp, 0) != 1)
4655 return 0;
4656
4657 /* Must be @GOT or @GOTOFF. */
4658 switch (XINT (disp, 1))
4659 {
4660 case 6: /* @GOT */
4661 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4662
4663 case 7: /* @GOTOFF */
4664 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4665 }
4666
4667 return 0;
4668 }
4669
4670 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4671 memory address for an instruction. The MODE argument is the machine mode
4672 for the MEM expression that wants to use this address.
4673
4674 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4675 convert common non-canonical forms to canonical form so that they will
4676 be recognized. */
4677
4678 int
4679 legitimate_address_p (mode, addr, strict)
4680 enum machine_mode mode;
4681 register rtx addr;
4682 int strict;
4683 {
4684 struct ix86_address parts;
4685 rtx base, index, disp;
4686 HOST_WIDE_INT scale;
4687 const char *reason = NULL;
4688 rtx reason_rtx = NULL_RTX;
4689
4690 if (TARGET_DEBUG_ADDR)
4691 {
4692 fprintf (stderr,
4693 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4694 GET_MODE_NAME (mode), strict);
4695 debug_rtx (addr);
4696 }
4697
4698 if (ix86_decompose_address (addr, &parts) <= 0)
4699 {
4700 reason = "decomposition failed";
4701 goto report_error;
4702 }
4703
4704 base = parts.base;
4705 index = parts.index;
4706 disp = parts.disp;
4707 scale = parts.scale;
4708
4709 /* Validate base register.
4710
4711 Don't allow SUBREG's here, it can lead to spill failures when the base
4712 is one word out of a two word structure, which is represented internally
4713 as a DImode int. */
4714
4715 if (base)
4716 {
4717 reason_rtx = base;
4718
4719 if (GET_CODE (base) != REG)
4720 {
4721 reason = "base is not a register";
4722 goto report_error;
4723 }
4724
4725 if (GET_MODE (base) != Pmode)
4726 {
4727 reason = "base is not in Pmode";
4728 goto report_error;
4729 }
4730
4731 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4732 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4733 {
4734 reason = "base is not valid";
4735 goto report_error;
4736 }
4737 }
4738
4739 /* Validate index register.
4740
4741 Don't allow SUBREG's here, it can lead to spill failures when the index
4742 is one word out of a two word structure, which is represented internally
4743 as a DImode int. */
4744
4745 if (index)
4746 {
4747 reason_rtx = index;
4748
4749 if (GET_CODE (index) != REG)
4750 {
4751 reason = "index is not a register";
4752 goto report_error;
4753 }
4754
4755 if (GET_MODE (index) != Pmode)
4756 {
4757 reason = "index is not in Pmode";
4758 goto report_error;
4759 }
4760
4761 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4762 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4763 {
4764 reason = "index is not valid";
4765 goto report_error;
4766 }
4767 }
4768
4769 /* Validate scale factor. */
4770 if (scale != 1)
4771 {
4772 reason_rtx = GEN_INT (scale);
4773 if (!index)
4774 {
4775 reason = "scale without index";
4776 goto report_error;
4777 }
4778
4779 if (scale != 2 && scale != 4 && scale != 8)
4780 {
4781 reason = "scale is not a valid multiplier";
4782 goto report_error;
4783 }
4784 }
4785
4786 /* Validate displacement. */
4787 if (disp)
4788 {
4789 reason_rtx = disp;
4790
4791 if (!CONSTANT_ADDRESS_P (disp))
4792 {
4793 reason = "displacement is not constant";
4794 goto report_error;
4795 }
4796
4797 if (TARGET_64BIT)
4798 {
4799 if (!x86_64_sign_extended_value (disp))
4800 {
4801 reason = "displacement is out of range";
4802 goto report_error;
4803 }
4804 }
4805 else
4806 {
4807 if (GET_CODE (disp) == CONST_DOUBLE)
4808 {
4809 reason = "displacement is a const_double";
4810 goto report_error;
4811 }
4812 }
4813
4814 if (flag_pic && SYMBOLIC_CONST (disp))
4815 {
4816 if (TARGET_64BIT && (index || base))
4817 {
4818 reason = "non-constant pic memory reference";
4819 goto report_error;
4820 }
4821 if (! legitimate_pic_address_disp_p (disp))
4822 {
4823 reason = "displacement is an invalid pic construct";
4824 goto report_error;
4825 }
4826
4827 /* This code used to verify that a symbolic pic displacement
4828 includes the pic_offset_table_rtx register.
4829
4830 While this is good idea, unfortunately these constructs may
4831 be created by "adds using lea" optimization for incorrect
4832 code like:
4833
4834 int a;
4835 int foo(int i)
4836 {
4837 return *(&a+i);
4838 }
4839
4840 This code is nonsensical, but results in addressing
4841 GOT table with pic_offset_table_rtx base. We can't
4842 just refuse it easily, since it gets matched by
4843 "addsi3" pattern, that later gets split to lea in the
4844 case output register differs from input. While this
4845 can be handled by separate addsi pattern for this case
4846 that never results in lea, this seems to be easier and
4847 correct fix for crash to disable this test. */
4848 }
4849 else if (HALF_PIC_P ())
4850 {
4851 if (! HALF_PIC_ADDRESS_P (disp)
4852 || (base != NULL_RTX || index != NULL_RTX))
4853 {
4854 reason = "displacement is an invalid half-pic reference";
4855 goto report_error;
4856 }
4857 }
4858 }
4859
4860 /* Everything looks valid. */
4861 if (TARGET_DEBUG_ADDR)
4862 fprintf (stderr, "Success.\n");
4863 return TRUE;
4864
4865 report_error:
4866 if (TARGET_DEBUG_ADDR)
4867 {
4868 fprintf (stderr, "Error: %s\n", reason);
4869 debug_rtx (reason_rtx);
4870 }
4871 return FALSE;
4872 }
4873 \f
4874 /* Return an unique alias set for the GOT. */
4875
4876 static HOST_WIDE_INT
4877 ix86_GOT_alias_set ()
4878 {
4879 static HOST_WIDE_INT set = -1;
4880 if (set == -1)
4881 set = new_alias_set ();
4882 return set;
4883 }
4884
4885 /* Return a legitimate reference for ORIG (an address) using the
4886 register REG. If REG is 0, a new pseudo is generated.
4887
4888 There are two types of references that must be handled:
4889
4890 1. Global data references must load the address from the GOT, via
4891 the PIC reg. An insn is emitted to do this load, and the reg is
4892 returned.
4893
4894 2. Static data references, constant pool addresses, and code labels
4895 compute the address as an offset from the GOT, whose base is in
4896 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4897 differentiate them from global data objects. The returned
4898 address is the PIC reg + an unspec constant.
4899
4900 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4901 reg also appears in the address. */
4902
4903 rtx
4904 legitimize_pic_address (orig, reg)
4905 rtx orig;
4906 rtx reg;
4907 {
4908 rtx addr = orig;
4909 rtx new = orig;
4910 rtx base;
4911
4912 if (local_symbolic_operand (addr, Pmode))
4913 {
4914 /* In 64bit mode we can address such objects directly. */
4915 if (TARGET_64BIT)
4916 new = addr;
4917 else
4918 {
4919 /* This symbol may be referenced via a displacement from the PIC
4920 base address (@GOTOFF). */
4921
4922 current_function_uses_pic_offset_table = 1;
4923 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4924 new = gen_rtx_CONST (Pmode, new);
4925 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4926
4927 if (reg != 0)
4928 {
4929 emit_move_insn (reg, new);
4930 new = reg;
4931 }
4932 }
4933 }
4934 else if (GET_CODE (addr) == SYMBOL_REF)
4935 {
4936 if (TARGET_64BIT)
4937 {
4938 current_function_uses_pic_offset_table = 1;
4939 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4940 new = gen_rtx_CONST (Pmode, new);
4941 new = gen_rtx_MEM (Pmode, new);
4942 RTX_UNCHANGING_P (new) = 1;
4943 set_mem_alias_set (new, ix86_GOT_alias_set ());
4944
4945 if (reg == 0)
4946 reg = gen_reg_rtx (Pmode);
4947 /* Use directly gen_movsi, otherwise the address is loaded
4948 into register for CSE. We don't want to CSE this addresses,
4949 instead we CSE addresses from the GOT table, so skip this. */
4950 emit_insn (gen_movsi (reg, new));
4951 new = reg;
4952 }
4953 else
4954 {
4955 /* This symbol must be referenced via a load from the
4956 Global Offset Table (@GOT). */
4957
4958 current_function_uses_pic_offset_table = 1;
4959 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4960 new = gen_rtx_CONST (Pmode, new);
4961 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4962 new = gen_rtx_MEM (Pmode, new);
4963 RTX_UNCHANGING_P (new) = 1;
4964 set_mem_alias_set (new, ix86_GOT_alias_set ());
4965
4966 if (reg == 0)
4967 reg = gen_reg_rtx (Pmode);
4968 emit_move_insn (reg, new);
4969 new = reg;
4970 }
4971 }
4972 else
4973 {
4974 if (GET_CODE (addr) == CONST)
4975 {
4976 addr = XEXP (addr, 0);
4977 if (GET_CODE (addr) == UNSPEC)
4978 {
4979 /* Check that the unspec is one of the ones we generate? */
4980 }
4981 else if (GET_CODE (addr) != PLUS)
4982 abort ();
4983 }
4984 if (GET_CODE (addr) == PLUS)
4985 {
4986 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4987
4988 /* Check first to see if this is a constant offset from a @GOTOFF
4989 symbol reference. */
4990 if (local_symbolic_operand (op0, Pmode)
4991 && GET_CODE (op1) == CONST_INT)
4992 {
4993 if (!TARGET_64BIT)
4994 {
4995 current_function_uses_pic_offset_table = 1;
4996 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4997 new = gen_rtx_PLUS (Pmode, new, op1);
4998 new = gen_rtx_CONST (Pmode, new);
4999 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5000
5001 if (reg != 0)
5002 {
5003 emit_move_insn (reg, new);
5004 new = reg;
5005 }
5006 }
5007 else
5008 {
5009 /* ??? We need to limit offsets here. */
5010 }
5011 }
5012 else
5013 {
5014 base = legitimize_pic_address (XEXP (addr, 0), reg);
5015 new = legitimize_pic_address (XEXP (addr, 1),
5016 base == reg ? NULL_RTX : reg);
5017
5018 if (GET_CODE (new) == CONST_INT)
5019 new = plus_constant (base, INTVAL (new));
5020 else
5021 {
5022 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5023 {
5024 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5025 new = XEXP (new, 1);
5026 }
5027 new = gen_rtx_PLUS (Pmode, base, new);
5028 }
5029 }
5030 }
5031 }
5032 return new;
5033 }
5034 \f
5035 /* Try machine-dependent ways of modifying an illegitimate address
5036 to be legitimate. If we find one, return the new, valid address.
5037 This macro is used in only one place: `memory_address' in explow.c.
5038
5039 OLDX is the address as it was before break_out_memory_refs was called.
5040 In some cases it is useful to look at this to decide what needs to be done.
5041
5042 MODE and WIN are passed so that this macro can use
5043 GO_IF_LEGITIMATE_ADDRESS.
5044
5045 It is always safe for this macro to do nothing. It exists to recognize
5046 opportunities to optimize the output.
5047
5048 For the 80386, we handle X+REG by loading X into a register R and
5049 using R+REG. R will go in a general reg and indexing will be used.
5050 However, if REG is a broken-out memory address or multiplication,
5051 nothing needs to be done because REG can certainly go in a general reg.
5052
5053 When -fpic is used, special handling is needed for symbolic references.
5054 See comments by legitimize_pic_address in i386.c for details. */
5055
5056 rtx
5057 legitimize_address (x, oldx, mode)
5058 register rtx x;
5059 register rtx oldx ATTRIBUTE_UNUSED;
5060 enum machine_mode mode;
5061 {
5062 int changed = 0;
5063 unsigned log;
5064
5065 if (TARGET_DEBUG_ADDR)
5066 {
5067 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5068 GET_MODE_NAME (mode));
5069 debug_rtx (x);
5070 }
5071
5072 if (flag_pic && SYMBOLIC_CONST (x))
5073 return legitimize_pic_address (x, 0);
5074
5075 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5076 if (GET_CODE (x) == ASHIFT
5077 && GET_CODE (XEXP (x, 1)) == CONST_INT
5078 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5079 {
5080 changed = 1;
5081 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5082 GEN_INT (1 << log));
5083 }
5084
5085 if (GET_CODE (x) == PLUS)
5086 {
5087 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5088
5089 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5090 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5091 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5092 {
5093 changed = 1;
5094 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5095 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5096 GEN_INT (1 << log));
5097 }
5098
5099 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5100 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5101 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5102 {
5103 changed = 1;
5104 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5105 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5106 GEN_INT (1 << log));
5107 }
5108
5109 /* Put multiply first if it isn't already. */
5110 if (GET_CODE (XEXP (x, 1)) == MULT)
5111 {
5112 rtx tmp = XEXP (x, 0);
5113 XEXP (x, 0) = XEXP (x, 1);
5114 XEXP (x, 1) = tmp;
5115 changed = 1;
5116 }
5117
5118 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5119 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5120 created by virtual register instantiation, register elimination, and
5121 similar optimizations. */
5122 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5123 {
5124 changed = 1;
5125 x = gen_rtx_PLUS (Pmode,
5126 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5127 XEXP (XEXP (x, 1), 0)),
5128 XEXP (XEXP (x, 1), 1));
5129 }
5130
5131 /* Canonicalize
5132 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5133 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5134 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5135 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5136 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5137 && CONSTANT_P (XEXP (x, 1)))
5138 {
5139 rtx constant;
5140 rtx other = NULL_RTX;
5141
5142 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5143 {
5144 constant = XEXP (x, 1);
5145 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5146 }
5147 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5148 {
5149 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5150 other = XEXP (x, 1);
5151 }
5152 else
5153 constant = 0;
5154
5155 if (constant)
5156 {
5157 changed = 1;
5158 x = gen_rtx_PLUS (Pmode,
5159 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5160 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5161 plus_constant (other, INTVAL (constant)));
5162 }
5163 }
5164
5165 if (changed && legitimate_address_p (mode, x, FALSE))
5166 return x;
5167
5168 if (GET_CODE (XEXP (x, 0)) == MULT)
5169 {
5170 changed = 1;
5171 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5172 }
5173
5174 if (GET_CODE (XEXP (x, 1)) == MULT)
5175 {
5176 changed = 1;
5177 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5178 }
5179
5180 if (changed
5181 && GET_CODE (XEXP (x, 1)) == REG
5182 && GET_CODE (XEXP (x, 0)) == REG)
5183 return x;
5184
5185 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5186 {
5187 changed = 1;
5188 x = legitimize_pic_address (x, 0);
5189 }
5190
5191 if (changed && legitimate_address_p (mode, x, FALSE))
5192 return x;
5193
5194 if (GET_CODE (XEXP (x, 0)) == REG)
5195 {
5196 register rtx temp = gen_reg_rtx (Pmode);
5197 register rtx val = force_operand (XEXP (x, 1), temp);
5198 if (val != temp)
5199 emit_move_insn (temp, val);
5200
5201 XEXP (x, 1) = temp;
5202 return x;
5203 }
5204
5205 else if (GET_CODE (XEXP (x, 1)) == REG)
5206 {
5207 register rtx temp = gen_reg_rtx (Pmode);
5208 register rtx val = force_operand (XEXP (x, 0), temp);
5209 if (val != temp)
5210 emit_move_insn (temp, val);
5211
5212 XEXP (x, 0) = temp;
5213 return x;
5214 }
5215 }
5216
5217 return x;
5218 }
5219 \f
5220 /* Print an integer constant expression in assembler syntax. Addition
5221 and subtraction are the only arithmetic that may appear in these
5222 expressions. FILE is the stdio stream to write to, X is the rtx, and
5223 CODE is the operand print code from the output string. */
5224
5225 static void
5226 output_pic_addr_const (file, x, code)
5227 FILE *file;
5228 rtx x;
5229 int code;
5230 {
5231 char buf[256];
5232
5233 switch (GET_CODE (x))
5234 {
5235 case PC:
5236 if (flag_pic)
5237 putc ('.', file);
5238 else
5239 abort ();
5240 break;
5241
5242 case SYMBOL_REF:
5243 assemble_name (file, XSTR (x, 0));
5244 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5245 fputs ("@PLT", file);
5246 break;
5247
5248 case LABEL_REF:
5249 x = XEXP (x, 0);
5250 /* FALLTHRU */
5251 case CODE_LABEL:
5252 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5253 assemble_name (asm_out_file, buf);
5254 break;
5255
5256 case CONST_INT:
5257 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5258 break;
5259
5260 case CONST:
5261 /* This used to output parentheses around the expression,
5262 but that does not work on the 386 (either ATT or BSD assembler). */
5263 output_pic_addr_const (file, XEXP (x, 0), code);
5264 break;
5265
5266 case CONST_DOUBLE:
5267 if (GET_MODE (x) == VOIDmode)
5268 {
5269 /* We can use %d if the number is <32 bits and positive. */
5270 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5271 fprintf (file, "0x%lx%08lx",
5272 (unsigned long) CONST_DOUBLE_HIGH (x),
5273 (unsigned long) CONST_DOUBLE_LOW (x));
5274 else
5275 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5276 }
5277 else
5278 /* We can't handle floating point constants;
5279 PRINT_OPERAND must handle them. */
5280 output_operand_lossage ("floating constant misused");
5281 break;
5282
5283 case PLUS:
5284 /* Some assemblers need integer constants to appear first. */
5285 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5286 {
5287 output_pic_addr_const (file, XEXP (x, 0), code);
5288 putc ('+', file);
5289 output_pic_addr_const (file, XEXP (x, 1), code);
5290 }
5291 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5292 {
5293 output_pic_addr_const (file, XEXP (x, 1), code);
5294 putc ('+', file);
5295 output_pic_addr_const (file, XEXP (x, 0), code);
5296 }
5297 else
5298 abort ();
5299 break;
5300
5301 case MINUS:
5302 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5303 output_pic_addr_const (file, XEXP (x, 0), code);
5304 putc ('-', file);
5305 output_pic_addr_const (file, XEXP (x, 1), code);
5306 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5307 break;
5308
5309 case UNSPEC:
5310 if (XVECLEN (x, 0) != 1)
5311 abort ();
5312 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5313 switch (XINT (x, 1))
5314 {
5315 case 6:
5316 fputs ("@GOT", file);
5317 break;
5318 case 7:
5319 fputs ("@GOTOFF", file);
5320 break;
5321 case 8:
5322 fputs ("@PLT", file);
5323 break;
5324 case 15:
5325 fputs ("@GOTPCREL(%RIP)", file);
5326 break;
5327 default:
5328 output_operand_lossage ("invalid UNSPEC as operand");
5329 break;
5330 }
5331 break;
5332
5333 default:
5334 output_operand_lossage ("invalid expression as operand");
5335 }
5336 }
5337
5338 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5339 We need to handle our special PIC relocations. */
5340
5341 void
5342 i386_dwarf_output_addr_const (file, x)
5343 FILE *file;
5344 rtx x;
5345 {
5346 #ifdef ASM_QUAD
5347 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5348 #else
5349 if (TARGET_64BIT)
5350 abort ();
5351 fprintf (file, "%s", ASM_LONG);
5352 #endif
5353 if (flag_pic)
5354 output_pic_addr_const (file, x, '\0');
5355 else
5356 output_addr_const (file, x);
5357 fputc ('\n', file);
5358 }
5359
5360 /* In the name of slightly smaller debug output, and to cater to
5361 general assembler losage, recognize PIC+GOTOFF and turn it back
5362 into a direct symbol reference. */
5363
5364 rtx
5365 i386_simplify_dwarf_addr (orig_x)
5366 rtx orig_x;
5367 {
5368 rtx x = orig_x, y;
5369
5370 if (TARGET_64BIT)
5371 {
5372 if (GET_CODE (x) != CONST
5373 || GET_CODE (XEXP (x, 0)) != UNSPEC
5374 || XINT (XEXP (x, 0), 1) != 15)
5375 return orig_x;
5376 return XVECEXP (XEXP (x, 0), 0, 0);
5377 }
5378
5379 if (GET_CODE (x) != PLUS
5380 || GET_CODE (XEXP (x, 1)) != CONST)
5381 return orig_x;
5382
5383 if (GET_CODE (XEXP (x, 0)) == REG
5384 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5385 /* %ebx + GOT/GOTOFF */
5386 y = NULL;
5387 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5388 {
5389 /* %ebx + %reg * scale + GOT/GOTOFF */
5390 y = XEXP (x, 0);
5391 if (GET_CODE (XEXP (y, 0)) == REG
5392 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5393 y = XEXP (y, 1);
5394 else if (GET_CODE (XEXP (y, 1)) == REG
5395 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5396 y = XEXP (y, 0);
5397 else
5398 return orig_x;
5399 if (GET_CODE (y) != REG
5400 && GET_CODE (y) != MULT
5401 && GET_CODE (y) != ASHIFT)
5402 return orig_x;
5403 }
5404 else
5405 return orig_x;
5406
5407 x = XEXP (XEXP (x, 1), 0);
5408 if (GET_CODE (x) == UNSPEC
5409 && (XINT (x, 1) == 6
5410 || XINT (x, 1) == 7))
5411 {
5412 if (y)
5413 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5414 return XVECEXP (x, 0, 0);
5415 }
5416
5417 if (GET_CODE (x) == PLUS
5418 && GET_CODE (XEXP (x, 0)) == UNSPEC
5419 && GET_CODE (XEXP (x, 1)) == CONST_INT
5420 && (XINT (XEXP (x, 0), 1) == 6
5421 || XINT (XEXP (x, 0), 1) == 7))
5422 {
5423 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5424 if (y)
5425 return gen_rtx_PLUS (Pmode, y, x);
5426 return x;
5427 }
5428
5429 return orig_x;
5430 }
5431 \f
5432 static void
5433 put_condition_code (code, mode, reverse, fp, file)
5434 enum rtx_code code;
5435 enum machine_mode mode;
5436 int reverse, fp;
5437 FILE *file;
5438 {
5439 const char *suffix;
5440
5441 if (mode == CCFPmode || mode == CCFPUmode)
5442 {
5443 enum rtx_code second_code, bypass_code;
5444 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5445 if (bypass_code != NIL || second_code != NIL)
5446 abort ();
5447 code = ix86_fp_compare_code_to_integer (code);
5448 mode = CCmode;
5449 }
5450 if (reverse)
5451 code = reverse_condition (code);
5452
5453 switch (code)
5454 {
5455 case EQ:
5456 suffix = "e";
5457 break;
5458 case NE:
5459 suffix = "ne";
5460 break;
5461 case GT:
5462 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5463 abort ();
5464 suffix = "g";
5465 break;
5466 case GTU:
5467 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5468 Those same assemblers have the same but opposite losage on cmov. */
5469 if (mode != CCmode)
5470 abort ();
5471 suffix = fp ? "nbe" : "a";
5472 break;
5473 case LT:
5474 if (mode == CCNOmode || mode == CCGOCmode)
5475 suffix = "s";
5476 else if (mode == CCmode || mode == CCGCmode)
5477 suffix = "l";
5478 else
5479 abort ();
5480 break;
5481 case LTU:
5482 if (mode != CCmode)
5483 abort ();
5484 suffix = "b";
5485 break;
5486 case GE:
5487 if (mode == CCNOmode || mode == CCGOCmode)
5488 suffix = "ns";
5489 else if (mode == CCmode || mode == CCGCmode)
5490 suffix = "ge";
5491 else
5492 abort ();
5493 break;
5494 case GEU:
5495 /* ??? As above. */
5496 if (mode != CCmode)
5497 abort ();
5498 suffix = fp ? "nb" : "ae";
5499 break;
5500 case LE:
5501 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5502 abort ();
5503 suffix = "le";
5504 break;
5505 case LEU:
5506 if (mode != CCmode)
5507 abort ();
5508 suffix = "be";
5509 break;
5510 case UNORDERED:
5511 suffix = fp ? "u" : "p";
5512 break;
5513 case ORDERED:
5514 suffix = fp ? "nu" : "np";
5515 break;
5516 default:
5517 abort ();
5518 }
5519 fputs (suffix, file);
5520 }
5521
5522 void
5523 print_reg (x, code, file)
5524 rtx x;
5525 int code;
5526 FILE *file;
5527 {
5528 if (REGNO (x) == ARG_POINTER_REGNUM
5529 || REGNO (x) == FRAME_POINTER_REGNUM
5530 || REGNO (x) == FLAGS_REG
5531 || REGNO (x) == FPSR_REG)
5532 abort ();
5533
5534 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
5535 putc ('%', file);
5536
5537 if (code == 'w' || MMX_REG_P (x))
5538 code = 2;
5539 else if (code == 'b')
5540 code = 1;
5541 else if (code == 'k')
5542 code = 4;
5543 else if (code == 'q')
5544 code = 8;
5545 else if (code == 'y')
5546 code = 3;
5547 else if (code == 'h')
5548 code = 0;
5549 else
5550 code = GET_MODE_SIZE (GET_MODE (x));
5551
5552 /* Irritatingly, AMD extended registers use different naming convention
5553 from the normal registers. */
5554 if (REX_INT_REG_P (x))
5555 {
5556 if (!TARGET_64BIT)
5557 abort ();
5558 switch (code)
5559 {
5560 case 0:
5561 error ("extended registers have no high halves");
5562 break;
5563 case 1:
5564 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5565 break;
5566 case 2:
5567 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5568 break;
5569 case 4:
5570 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5571 break;
5572 case 8:
5573 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5574 break;
5575 default:
5576 error ("unsupported operand size for extended register");
5577 break;
5578 }
5579 return;
5580 }
5581 switch (code)
5582 {
5583 case 3:
5584 if (STACK_TOP_P (x))
5585 {
5586 fputs ("st(0)", file);
5587 break;
5588 }
5589 /* FALLTHRU */
5590 case 8:
5591 case 4:
5592 case 12:
5593 if (! ANY_FP_REG_P (x))
5594 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5595 /* FALLTHRU */
5596 case 16:
5597 case 2:
5598 fputs (hi_reg_name[REGNO (x)], file);
5599 break;
5600 case 1:
5601 fputs (qi_reg_name[REGNO (x)], file);
5602 break;
5603 case 0:
5604 fputs (qi_high_reg_name[REGNO (x)], file);
5605 break;
5606 default:
5607 abort ();
5608 }
5609 }
5610
5611 /* Meaning of CODE:
5612 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5613 C -- print opcode suffix for set/cmov insn.
5614 c -- like C, but print reversed condition
5615 F,f -- likewise, but for floating-point.
5616 R -- print the prefix for register names.
5617 z -- print the opcode suffix for the size of the current operand.
5618 * -- print a star (in certain assembler syntax)
5619 A -- print an absolute memory reference.
5620 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5621 s -- print a shift double count, followed by the assemblers argument
5622 delimiter.
5623 b -- print the QImode name of the register for the indicated operand.
5624 %b0 would print %al if operands[0] is reg 0.
5625 w -- likewise, print the HImode name of the register.
5626 k -- likewise, print the SImode name of the register.
5627 q -- likewise, print the DImode name of the register.
5628 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5629 y -- print "st(0)" instead of "st" as a register.
5630 D -- print condition for SSE cmp instruction.
5631 P -- if PIC, print an @PLT suffix.
5632 X -- don't print any sort of PIC '@' suffix for a symbol.
5633 */
5634
5635 void
5636 print_operand (file, x, code)
5637 FILE *file;
5638 rtx x;
5639 int code;
5640 {
5641 if (code)
5642 {
5643 switch (code)
5644 {
5645 case '*':
5646 if (ASSEMBLER_DIALECT == ASM_ATT)
5647 putc ('*', file);
5648 return;
5649
5650 case 'A':
5651 if (ASSEMBLER_DIALECT == ASM_ATT)
5652 putc ('*', file);
5653 else if (ASSEMBLER_DIALECT == ASM_INTEL)
5654 {
5655 /* Intel syntax. For absolute addresses, registers should not
5656 be surrounded by braces. */
5657 if (GET_CODE (x) != REG)
5658 {
5659 putc ('[', file);
5660 PRINT_OPERAND (file, x, 0);
5661 putc (']', file);
5662 return;
5663 }
5664 }
5665 else
5666 abort ();
5667
5668 PRINT_OPERAND (file, x, 0);
5669 return;
5670
5671
5672 case 'L':
5673 if (ASSEMBLER_DIALECT == ASM_ATT)
5674 putc ('l', file);
5675 return;
5676
5677 case 'W':
5678 if (ASSEMBLER_DIALECT == ASM_ATT)
5679 putc ('w', file);
5680 return;
5681
5682 case 'B':
5683 if (ASSEMBLER_DIALECT == ASM_ATT)
5684 putc ('b', file);
5685 return;
5686
5687 case 'Q':
5688 if (ASSEMBLER_DIALECT == ASM_ATT)
5689 putc ('l', file);
5690 return;
5691
5692 case 'S':
5693 if (ASSEMBLER_DIALECT == ASM_ATT)
5694 putc ('s', file);
5695 return;
5696
5697 case 'T':
5698 if (ASSEMBLER_DIALECT == ASM_ATT)
5699 putc ('t', file);
5700 return;
5701
5702 case 'z':
5703 /* 387 opcodes don't get size suffixes if the operands are
5704 registers. */
5705
5706 if (STACK_REG_P (x))
5707 return;
5708
5709 /* this is the size of op from size of operand */
5710 switch (GET_MODE_SIZE (GET_MODE (x)))
5711 {
5712 case 2:
5713 #ifdef HAVE_GAS_FILDS_FISTS
5714 putc ('s', file);
5715 #endif
5716 return;
5717
5718 case 4:
5719 if (GET_MODE (x) == SFmode)
5720 {
5721 putc ('s', file);
5722 return;
5723 }
5724 else
5725 putc ('l', file);
5726 return;
5727
5728 case 12:
5729 case 16:
5730 putc ('t', file);
5731 return;
5732
5733 case 8:
5734 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5735 {
5736 #ifdef GAS_MNEMONICS
5737 putc ('q', file);
5738 #else
5739 putc ('l', file);
5740 putc ('l', file);
5741 #endif
5742 }
5743 else
5744 putc ('l', file);
5745 return;
5746
5747 default:
5748 abort ();
5749 }
5750
5751 case 'b':
5752 case 'w':
5753 case 'k':
5754 case 'q':
5755 case 'h':
5756 case 'y':
5757 case 'X':
5758 case 'P':
5759 break;
5760
5761 case 's':
5762 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5763 {
5764 PRINT_OPERAND (file, x, 0);
5765 putc (',', file);
5766 }
5767 return;
5768
5769 case 'D':
5770 /* Little bit of braindamage here. The SSE compare instructions
5771 does use completely different names for the comparisons that the
5772 fp conditional moves. */
5773 switch (GET_CODE (x))
5774 {
5775 case EQ:
5776 case UNEQ:
5777 fputs ("eq", file);
5778 break;
5779 case LT:
5780 case UNLT:
5781 fputs ("lt", file);
5782 break;
5783 case LE:
5784 case UNLE:
5785 fputs ("le", file);
5786 break;
5787 case UNORDERED:
5788 fputs ("unord", file);
5789 break;
5790 case NE:
5791 case LTGT:
5792 fputs ("neq", file);
5793 break;
5794 case UNGE:
5795 case GE:
5796 fputs ("nlt", file);
5797 break;
5798 case UNGT:
5799 case GT:
5800 fputs ("nle", file);
5801 break;
5802 case ORDERED:
5803 fputs ("ord", file);
5804 break;
5805 default:
5806 abort ();
5807 break;
5808 }
5809 return;
5810 case 'C':
5811 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5812 return;
5813 case 'F':
5814 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5815 return;
5816
5817 /* Like above, but reverse condition */
5818 case 'c':
5819 /* Check to see if argument to %c is really a constant
5820 and not a condition code which needs to be reversed. */
5821 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5822 {
5823 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5824 return;
5825 }
5826 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5827 return;
5828 case 'f':
5829 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5830 return;
5831 case '+':
5832 {
5833 rtx x;
5834
5835 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5836 return;
5837
5838 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5839 if (x)
5840 {
5841 int pred_val = INTVAL (XEXP (x, 0));
5842
5843 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5844 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5845 {
5846 int taken = pred_val > REG_BR_PROB_BASE / 2;
5847 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5848
5849 /* Emit hints only in the case default branch prediction
5850 heruistics would fail. */
5851 if (taken != cputaken)
5852 {
5853 /* We use 3e (DS) prefix for taken branches and
5854 2e (CS) prefix for not taken branches. */
5855 if (taken)
5856 fputs ("ds ; ", file);
5857 else
5858 fputs ("cs ; ", file);
5859 }
5860 }
5861 }
5862 return;
5863 }
5864 default:
5865 {
5866 char str[50];
5867 sprintf (str, "invalid operand code `%c'", code);
5868 output_operand_lossage (str);
5869 }
5870 }
5871 }
5872
5873 if (GET_CODE (x) == REG)
5874 {
5875 PRINT_REG (x, code, file);
5876 }
5877
5878 else if (GET_CODE (x) == MEM)
5879 {
5880 /* No `byte ptr' prefix for call instructions. */
5881 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
5882 {
5883 const char * size;
5884 switch (GET_MODE_SIZE (GET_MODE (x)))
5885 {
5886 case 1: size = "BYTE"; break;
5887 case 2: size = "WORD"; break;
5888 case 4: size = "DWORD"; break;
5889 case 8: size = "QWORD"; break;
5890 case 12: size = "XWORD"; break;
5891 case 16: size = "XMMWORD"; break;
5892 default:
5893 abort ();
5894 }
5895
5896 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5897 if (code == 'b')
5898 size = "BYTE";
5899 else if (code == 'w')
5900 size = "WORD";
5901 else if (code == 'k')
5902 size = "DWORD";
5903
5904 fputs (size, file);
5905 fputs (" PTR ", file);
5906 }
5907
5908 x = XEXP (x, 0);
5909 if (flag_pic && CONSTANT_ADDRESS_P (x))
5910 output_pic_addr_const (file, x, code);
5911 /* Avoid (%rip) for call operands. */
5912 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5913 && GET_CODE (x) != CONST_INT)
5914 output_addr_const (file, x);
5915 else
5916 output_address (x);
5917 }
5918
5919 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5920 {
5921 REAL_VALUE_TYPE r;
5922 long l;
5923
5924 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5925 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5926
5927 if (ASSEMBLER_DIALECT == ASM_ATT)
5928 putc ('$', file);
5929 fprintf (file, "0x%lx", l);
5930 }
5931
5932 /* These float cases don't actually occur as immediate operands. */
5933 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5934 {
5935 REAL_VALUE_TYPE r;
5936 char dstr[30];
5937
5938 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5939 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5940 fprintf (file, "%s", dstr);
5941 }
5942
5943 else if (GET_CODE (x) == CONST_DOUBLE
5944 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5945 {
5946 REAL_VALUE_TYPE r;
5947 char dstr[30];
5948
5949 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5950 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5951 fprintf (file, "%s", dstr);
5952 }
5953 else
5954 {
5955 if (code != 'P')
5956 {
5957 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5958 {
5959 if (ASSEMBLER_DIALECT == ASM_ATT)
5960 putc ('$', file);
5961 }
5962 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5963 || GET_CODE (x) == LABEL_REF)
5964 {
5965 if (ASSEMBLER_DIALECT == ASM_ATT)
5966 putc ('$', file);
5967 else
5968 fputs ("OFFSET FLAT:", file);
5969 }
5970 }
5971 if (GET_CODE (x) == CONST_INT)
5972 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5973 else if (flag_pic)
5974 output_pic_addr_const (file, x, code);
5975 else
5976 output_addr_const (file, x);
5977 }
5978 }
5979 \f
5980 /* Print a memory operand whose address is ADDR. */
5981
5982 void
5983 print_operand_address (file, addr)
5984 FILE *file;
5985 register rtx addr;
5986 {
5987 struct ix86_address parts;
5988 rtx base, index, disp;
5989 int scale;
5990
5991 if (! ix86_decompose_address (addr, &parts))
5992 abort ();
5993
5994 base = parts.base;
5995 index = parts.index;
5996 disp = parts.disp;
5997 scale = parts.scale;
5998
5999 if (!base && !index)
6000 {
6001 /* Displacement only requires special attention. */
6002
6003 if (GET_CODE (disp) == CONST_INT)
6004 {
6005 if (ASSEMBLER_DIALECT == ASM_INTEL)
6006 {
6007 if (USER_LABEL_PREFIX[0] == 0)
6008 putc ('%', file);
6009 fputs ("ds:", file);
6010 }
6011 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6012 }
6013 else if (flag_pic)
6014 output_pic_addr_const (file, addr, 0);
6015 else
6016 output_addr_const (file, addr);
6017
6018 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6019 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6020 fputs ("(%rip)", file);
6021 }
6022 else
6023 {
6024 if (ASSEMBLER_DIALECT == ASM_ATT)
6025 {
6026 if (disp)
6027 {
6028 if (flag_pic)
6029 output_pic_addr_const (file, disp, 0);
6030 else if (GET_CODE (disp) == LABEL_REF)
6031 output_asm_label (disp);
6032 else
6033 output_addr_const (file, disp);
6034 }
6035
6036 putc ('(', file);
6037 if (base)
6038 PRINT_REG (base, 0, file);
6039 if (index)
6040 {
6041 putc (',', file);
6042 PRINT_REG (index, 0, file);
6043 if (scale != 1)
6044 fprintf (file, ",%d", scale);
6045 }
6046 putc (')', file);
6047 }
6048 else
6049 {
6050 rtx offset = NULL_RTX;
6051
6052 if (disp)
6053 {
6054 /* Pull out the offset of a symbol; print any symbol itself. */
6055 if (GET_CODE (disp) == CONST
6056 && GET_CODE (XEXP (disp, 0)) == PLUS
6057 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6058 {
6059 offset = XEXP (XEXP (disp, 0), 1);
6060 disp = gen_rtx_CONST (VOIDmode,
6061 XEXP (XEXP (disp, 0), 0));
6062 }
6063
6064 if (flag_pic)
6065 output_pic_addr_const (file, disp, 0);
6066 else if (GET_CODE (disp) == LABEL_REF)
6067 output_asm_label (disp);
6068 else if (GET_CODE (disp) == CONST_INT)
6069 offset = disp;
6070 else
6071 output_addr_const (file, disp);
6072 }
6073
6074 putc ('[', file);
6075 if (base)
6076 {
6077 PRINT_REG (base, 0, file);
6078 if (offset)
6079 {
6080 if (INTVAL (offset) >= 0)
6081 putc ('+', file);
6082 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6083 }
6084 }
6085 else if (offset)
6086 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6087 else
6088 putc ('0', file);
6089
6090 if (index)
6091 {
6092 putc ('+', file);
6093 PRINT_REG (index, 0, file);
6094 if (scale != 1)
6095 fprintf (file, "*%d", scale);
6096 }
6097 putc (']', file);
6098 }
6099 }
6100 }
6101 \f
6102 /* Split one or more DImode RTL references into pairs of SImode
6103 references. The RTL can be REG, offsettable MEM, integer constant, or
6104 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6105 split and "num" is its length. lo_half and hi_half are output arrays
6106 that parallel "operands". */
6107
6108 void
6109 split_di (operands, num, lo_half, hi_half)
6110 rtx operands[];
6111 int num;
6112 rtx lo_half[], hi_half[];
6113 {
6114 while (num--)
6115 {
6116 rtx op = operands[num];
6117
6118 /* simplify_subreg refuse to split volatile memory addresses,
6119 but we still have to handle it. */
6120 if (GET_CODE (op) == MEM)
6121 {
6122 lo_half[num] = adjust_address (op, SImode, 0);
6123 hi_half[num] = adjust_address (op, SImode, 4);
6124 }
6125 else
6126 {
6127 lo_half[num] = simplify_gen_subreg (SImode, op,
6128 GET_MODE (op) == VOIDmode
6129 ? DImode : GET_MODE (op), 0);
6130 hi_half[num] = simplify_gen_subreg (SImode, op,
6131 GET_MODE (op) == VOIDmode
6132 ? DImode : GET_MODE (op), 4);
6133 }
6134 }
6135 }
6136 /* Split one or more TImode RTL references into pairs of SImode
6137 references. The RTL can be REG, offsettable MEM, integer constant, or
6138 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6139 split and "num" is its length. lo_half and hi_half are output arrays
6140 that parallel "operands". */
6141
6142 void
6143 split_ti (operands, num, lo_half, hi_half)
6144 rtx operands[];
6145 int num;
6146 rtx lo_half[], hi_half[];
6147 {
6148 while (num--)
6149 {
6150 rtx op = operands[num];
6151
6152 /* simplify_subreg refuse to split volatile memory addresses, but we
6153 still have to handle it. */
6154 if (GET_CODE (op) == MEM)
6155 {
6156 lo_half[num] = adjust_address (op, DImode, 0);
6157 hi_half[num] = adjust_address (op, DImode, 8);
6158 }
6159 else
6160 {
6161 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6162 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6163 }
6164 }
6165 }
6166 \f
6167 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6168 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6169 is the expression of the binary operation. The output may either be
6170 emitted here, or returned to the caller, like all output_* functions.
6171
6172 There is no guarantee that the operands are the same mode, as they
6173 might be within FLOAT or FLOAT_EXTEND expressions. */
6174
6175 #ifndef SYSV386_COMPAT
6176 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6177 wants to fix the assemblers because that causes incompatibility
6178 with gcc. No-one wants to fix gcc because that causes
6179 incompatibility with assemblers... You can use the option of
6180 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6181 #define SYSV386_COMPAT 1
6182 #endif
6183
6184 const char *
6185 output_387_binary_op (insn, operands)
6186 rtx insn;
6187 rtx *operands;
6188 {
6189 static char buf[30];
6190 const char *p;
6191 const char *ssep;
6192 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6193
6194 #ifdef ENABLE_CHECKING
6195 /* Even if we do not want to check the inputs, this documents input
6196 constraints. Which helps in understanding the following code. */
6197 if (STACK_REG_P (operands[0])
6198 && ((REG_P (operands[1])
6199 && REGNO (operands[0]) == REGNO (operands[1])
6200 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6201 || (REG_P (operands[2])
6202 && REGNO (operands[0]) == REGNO (operands[2])
6203 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6204 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6205 ; /* ok */
6206 else if (!is_sse)
6207 abort ();
6208 #endif
6209
6210 switch (GET_CODE (operands[3]))
6211 {
6212 case PLUS:
6213 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6214 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6215 p = "fiadd";
6216 else
6217 p = "fadd";
6218 ssep = "add";
6219 break;
6220
6221 case MINUS:
6222 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6223 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6224 p = "fisub";
6225 else
6226 p = "fsub";
6227 ssep = "sub";
6228 break;
6229
6230 case MULT:
6231 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6232 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6233 p = "fimul";
6234 else
6235 p = "fmul";
6236 ssep = "mul";
6237 break;
6238
6239 case DIV:
6240 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6241 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6242 p = "fidiv";
6243 else
6244 p = "fdiv";
6245 ssep = "div";
6246 break;
6247
6248 default:
6249 abort ();
6250 }
6251
6252 if (is_sse)
6253 {
6254 strcpy (buf, ssep);
6255 if (GET_MODE (operands[0]) == SFmode)
6256 strcat (buf, "ss\t{%2, %0|%0, %2}");
6257 else
6258 strcat (buf, "sd\t{%2, %0|%0, %2}");
6259 return buf;
6260 }
6261 strcpy (buf, p);
6262
6263 switch (GET_CODE (operands[3]))
6264 {
6265 case MULT:
6266 case PLUS:
6267 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6268 {
6269 rtx temp = operands[2];
6270 operands[2] = operands[1];
6271 operands[1] = temp;
6272 }
6273
6274 /* know operands[0] == operands[1]. */
6275
6276 if (GET_CODE (operands[2]) == MEM)
6277 {
6278 p = "%z2\t%2";
6279 break;
6280 }
6281
6282 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6283 {
6284 if (STACK_TOP_P (operands[0]))
6285 /* How is it that we are storing to a dead operand[2]?
6286 Well, presumably operands[1] is dead too. We can't
6287 store the result to st(0) as st(0) gets popped on this
6288 instruction. Instead store to operands[2] (which I
6289 think has to be st(1)). st(1) will be popped later.
6290 gcc <= 2.8.1 didn't have this check and generated
6291 assembly code that the Unixware assembler rejected. */
6292 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6293 else
6294 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6295 break;
6296 }
6297
6298 if (STACK_TOP_P (operands[0]))
6299 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6300 else
6301 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6302 break;
6303
6304 case MINUS:
6305 case DIV:
6306 if (GET_CODE (operands[1]) == MEM)
6307 {
6308 p = "r%z1\t%1";
6309 break;
6310 }
6311
6312 if (GET_CODE (operands[2]) == MEM)
6313 {
6314 p = "%z2\t%2";
6315 break;
6316 }
6317
6318 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6319 {
6320 #if SYSV386_COMPAT
6321 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6322 derived assemblers, confusingly reverse the direction of
6323 the operation for fsub{r} and fdiv{r} when the
6324 destination register is not st(0). The Intel assembler
6325 doesn't have this brain damage. Read !SYSV386_COMPAT to
6326 figure out what the hardware really does. */
6327 if (STACK_TOP_P (operands[0]))
6328 p = "{p\t%0, %2|rp\t%2, %0}";
6329 else
6330 p = "{rp\t%2, %0|p\t%0, %2}";
6331 #else
6332 if (STACK_TOP_P (operands[0]))
6333 /* As above for fmul/fadd, we can't store to st(0). */
6334 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6335 else
6336 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6337 #endif
6338 break;
6339 }
6340
6341 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6342 {
6343 #if SYSV386_COMPAT
6344 if (STACK_TOP_P (operands[0]))
6345 p = "{rp\t%0, %1|p\t%1, %0}";
6346 else
6347 p = "{p\t%1, %0|rp\t%0, %1}";
6348 #else
6349 if (STACK_TOP_P (operands[0]))
6350 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6351 else
6352 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6353 #endif
6354 break;
6355 }
6356
6357 if (STACK_TOP_P (operands[0]))
6358 {
6359 if (STACK_TOP_P (operands[1]))
6360 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6361 else
6362 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6363 break;
6364 }
6365 else if (STACK_TOP_P (operands[1]))
6366 {
6367 #if SYSV386_COMPAT
6368 p = "{\t%1, %0|r\t%0, %1}";
6369 #else
6370 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6371 #endif
6372 }
6373 else
6374 {
6375 #if SYSV386_COMPAT
6376 p = "{r\t%2, %0|\t%0, %2}";
6377 #else
6378 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6379 #endif
6380 }
6381 break;
6382
6383 default:
6384 abort ();
6385 }
6386
6387 strcat (buf, p);
6388 return buf;
6389 }
6390
6391 /* Output code to initialize control word copies used by
6392 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6393 is set to control word rounding downwards. */
6394 void
6395 emit_i387_cw_initialization (normal, round_down)
6396 rtx normal, round_down;
6397 {
6398 rtx reg = gen_reg_rtx (HImode);
6399
6400 emit_insn (gen_x86_fnstcw_1 (normal));
6401 emit_move_insn (reg, normal);
6402 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6403 && !TARGET_64BIT)
6404 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6405 else
6406 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6407 emit_move_insn (round_down, reg);
6408 }
6409
6410 /* Output code for INSN to convert a float to a signed int. OPERANDS
6411 are the insn operands. The output may be [HSD]Imode and the input
6412 operand may be [SDX]Fmode. */
6413
6414 const char *
6415 output_fix_trunc (insn, operands)
6416 rtx insn;
6417 rtx *operands;
6418 {
6419 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6420 int dimode_p = GET_MODE (operands[0]) == DImode;
6421
6422 /* Jump through a hoop or two for DImode, since the hardware has no
6423 non-popping instruction. We used to do this a different way, but
6424 that was somewhat fragile and broke with post-reload splitters. */
6425 if (dimode_p && !stack_top_dies)
6426 output_asm_insn ("fld\t%y1", operands);
6427
6428 if (!STACK_TOP_P (operands[1]))
6429 abort ();
6430
6431 if (GET_CODE (operands[0]) != MEM)
6432 abort ();
6433
6434 output_asm_insn ("fldcw\t%3", operands);
6435 if (stack_top_dies || dimode_p)
6436 output_asm_insn ("fistp%z0\t%0", operands);
6437 else
6438 output_asm_insn ("fist%z0\t%0", operands);
6439 output_asm_insn ("fldcw\t%2", operands);
6440
6441 return "";
6442 }
6443
6444 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6445 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6446 when fucom should be used. */
6447
6448 const char *
6449 output_fp_compare (insn, operands, eflags_p, unordered_p)
6450 rtx insn;
6451 rtx *operands;
6452 int eflags_p, unordered_p;
6453 {
6454 int stack_top_dies;
6455 rtx cmp_op0 = operands[0];
6456 rtx cmp_op1 = operands[1];
6457 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6458
6459 if (eflags_p == 2)
6460 {
6461 cmp_op0 = cmp_op1;
6462 cmp_op1 = operands[2];
6463 }
6464 if (is_sse)
6465 {
6466 if (GET_MODE (operands[0]) == SFmode)
6467 if (unordered_p)
6468 return "ucomiss\t{%1, %0|%0, %1}";
6469 else
6470 return "comiss\t{%1, %0|%0, %y}";
6471 else
6472 if (unordered_p)
6473 return "ucomisd\t{%1, %0|%0, %1}";
6474 else
6475 return "comisd\t{%1, %0|%0, %y}";
6476 }
6477
6478 if (! STACK_TOP_P (cmp_op0))
6479 abort ();
6480
6481 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6482
6483 if (STACK_REG_P (cmp_op1)
6484 && stack_top_dies
6485 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6486 && REGNO (cmp_op1) != FIRST_STACK_REG)
6487 {
6488 /* If both the top of the 387 stack dies, and the other operand
6489 is also a stack register that dies, then this must be a
6490 `fcompp' float compare */
6491
6492 if (eflags_p == 1)
6493 {
6494 /* There is no double popping fcomi variant. Fortunately,
6495 eflags is immune from the fstp's cc clobbering. */
6496 if (unordered_p)
6497 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6498 else
6499 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6500 return "fstp\t%y0";
6501 }
6502 else
6503 {
6504 if (eflags_p == 2)
6505 {
6506 if (unordered_p)
6507 return "fucompp\n\tfnstsw\t%0";
6508 else
6509 return "fcompp\n\tfnstsw\t%0";
6510 }
6511 else
6512 {
6513 if (unordered_p)
6514 return "fucompp";
6515 else
6516 return "fcompp";
6517 }
6518 }
6519 }
6520 else
6521 {
6522 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6523
6524 static const char * const alt[24] =
6525 {
6526 "fcom%z1\t%y1",
6527 "fcomp%z1\t%y1",
6528 "fucom%z1\t%y1",
6529 "fucomp%z1\t%y1",
6530
6531 "ficom%z1\t%y1",
6532 "ficomp%z1\t%y1",
6533 NULL,
6534 NULL,
6535
6536 "fcomi\t{%y1, %0|%0, %y1}",
6537 "fcomip\t{%y1, %0|%0, %y1}",
6538 "fucomi\t{%y1, %0|%0, %y1}",
6539 "fucomip\t{%y1, %0|%0, %y1}",
6540
6541 NULL,
6542 NULL,
6543 NULL,
6544 NULL,
6545
6546 "fcom%z2\t%y2\n\tfnstsw\t%0",
6547 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6548 "fucom%z2\t%y2\n\tfnstsw\t%0",
6549 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6550
6551 "ficom%z2\t%y2\n\tfnstsw\t%0",
6552 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6553 NULL,
6554 NULL
6555 };
6556
6557 int mask;
6558 const char *ret;
6559
6560 mask = eflags_p << 3;
6561 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6562 mask |= unordered_p << 1;
6563 mask |= stack_top_dies;
6564
6565 if (mask >= 24)
6566 abort ();
6567 ret = alt[mask];
6568 if (ret == NULL)
6569 abort ();
6570
6571 return ret;
6572 }
6573 }
6574
6575 void
6576 ix86_output_addr_vec_elt (file, value)
6577 FILE *file;
6578 int value;
6579 {
6580 const char *directive = ASM_LONG;
6581
6582 if (TARGET_64BIT)
6583 {
6584 #ifdef ASM_QUAD
6585 directive = ASM_QUAD;
6586 #else
6587 abort ();
6588 #endif
6589 }
6590
6591 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6592 }
6593
6594 void
6595 ix86_output_addr_diff_elt (file, value, rel)
6596 FILE *file;
6597 int value, rel;
6598 {
6599 if (TARGET_64BIT)
6600 fprintf (file, "%s%s%d-.+4+(.-%s%d)\n",
6601 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6602 else if (HAVE_AS_GOTOFF_IN_DATA)
6603 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6604 else
6605 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6606 ASM_LONG, LPREFIX, value);
6607 }
6608 \f
6609 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6610 for the target. */
6611
6612 void
6613 ix86_expand_clear (dest)
6614 rtx dest;
6615 {
6616 rtx tmp;
6617
6618 /* We play register width games, which are only valid after reload. */
6619 if (!reload_completed)
6620 abort ();
6621
6622 /* Avoid HImode and its attendant prefix byte. */
6623 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6624 dest = gen_rtx_REG (SImode, REGNO (dest));
6625
6626 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6627
6628 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6629 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6630 {
6631 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6632 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6633 }
6634
6635 emit_insn (tmp);
6636 }
6637
6638 void
6639 ix86_expand_move (mode, operands)
6640 enum machine_mode mode;
6641 rtx operands[];
6642 {
6643 int strict = (reload_in_progress || reload_completed);
6644 rtx insn;
6645
6646 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6647 {
6648 /* Emit insns to move operands[1] into operands[0]. */
6649
6650 if (GET_CODE (operands[0]) == MEM)
6651 operands[1] = force_reg (Pmode, operands[1]);
6652 else
6653 {
6654 rtx temp = operands[0];
6655 if (GET_CODE (temp) != REG)
6656 temp = gen_reg_rtx (Pmode);
6657 temp = legitimize_pic_address (operands[1], temp);
6658 if (temp == operands[0])
6659 return;
6660 operands[1] = temp;
6661 }
6662 }
6663 else
6664 {
6665 if (GET_CODE (operands[0]) == MEM
6666 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6667 || !push_operand (operands[0], mode))
6668 && GET_CODE (operands[1]) == MEM)
6669 operands[1] = force_reg (mode, operands[1]);
6670
6671 if (push_operand (operands[0], mode)
6672 && ! general_no_elim_operand (operands[1], mode))
6673 operands[1] = copy_to_mode_reg (mode, operands[1]);
6674
6675 /* Force large constants in 64bit compilation into register
6676 to get them CSEed. */
6677 if (TARGET_64BIT && mode == DImode
6678 && immediate_operand (operands[1], mode)
6679 && !x86_64_zero_extended_value (operands[1])
6680 && !register_operand (operands[0], mode)
6681 && optimize && !reload_completed && !reload_in_progress)
6682 operands[1] = copy_to_mode_reg (mode, operands[1]);
6683
6684 if (FLOAT_MODE_P (mode))
6685 {
6686 /* If we are loading a floating point constant to a register,
6687 force the value to memory now, since we'll get better code
6688 out the back end. */
6689
6690 if (strict)
6691 ;
6692 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6693 && register_operand (operands[0], mode))
6694 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6695 }
6696 }
6697
6698 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6699
6700 emit_insn (insn);
6701 }
6702
6703 void
6704 ix86_expand_vector_move (mode, operands)
6705 enum machine_mode mode;
6706 rtx operands[];
6707 {
6708 /* Force constants other than zero into memory. We do not know how
6709 the instructions used to build constants modify the upper 64 bits
6710 of the register, once we have that information we may be able
6711 to handle some of them more efficiently. */
6712 if ((reload_in_progress | reload_completed) == 0
6713 && register_operand (operands[0], mode)
6714 && CONSTANT_P (operands[1]))
6715 {
6716 rtx addr = gen_reg_rtx (Pmode);
6717 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6718 operands[1] = gen_rtx_MEM (mode, addr);
6719 }
6720
6721 /* Make operand1 a register if it isn't already. */
6722 if ((reload_in_progress | reload_completed) == 0
6723 && !register_operand (operands[0], mode)
6724 && !register_operand (operands[1], mode)
6725 && operands[1] != CONST0_RTX (mode))
6726 {
6727 rtx temp = force_reg (TImode, operands[1]);
6728 emit_move_insn (operands[0], temp);
6729 return;
6730 }
6731
6732 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6733 }
6734
6735 /* Attempt to expand a binary operator. Make the expansion closer to the
6736 actual machine, then just general_operand, which will allow 3 separate
6737 memory references (one output, two input) in a single insn. */
6738
6739 void
6740 ix86_expand_binary_operator (code, mode, operands)
6741 enum rtx_code code;
6742 enum machine_mode mode;
6743 rtx operands[];
6744 {
6745 int matching_memory;
6746 rtx src1, src2, dst, op, clob;
6747
6748 dst = operands[0];
6749 src1 = operands[1];
6750 src2 = operands[2];
6751
6752 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6753 if (GET_RTX_CLASS (code) == 'c'
6754 && (rtx_equal_p (dst, src2)
6755 || immediate_operand (src1, mode)))
6756 {
6757 rtx temp = src1;
6758 src1 = src2;
6759 src2 = temp;
6760 }
6761
6762 /* If the destination is memory, and we do not have matching source
6763 operands, do things in registers. */
6764 matching_memory = 0;
6765 if (GET_CODE (dst) == MEM)
6766 {
6767 if (rtx_equal_p (dst, src1))
6768 matching_memory = 1;
6769 else if (GET_RTX_CLASS (code) == 'c'
6770 && rtx_equal_p (dst, src2))
6771 matching_memory = 2;
6772 else
6773 dst = gen_reg_rtx (mode);
6774 }
6775
6776 /* Both source operands cannot be in memory. */
6777 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6778 {
6779 if (matching_memory != 2)
6780 src2 = force_reg (mode, src2);
6781 else
6782 src1 = force_reg (mode, src1);
6783 }
6784
6785 /* If the operation is not commutable, source 1 cannot be a constant
6786 or non-matching memory. */
6787 if ((CONSTANT_P (src1)
6788 || (!matching_memory && GET_CODE (src1) == MEM))
6789 && GET_RTX_CLASS (code) != 'c')
6790 src1 = force_reg (mode, src1);
6791
6792 /* If optimizing, copy to regs to improve CSE */
6793 if (optimize && ! no_new_pseudos)
6794 {
6795 if (GET_CODE (dst) == MEM)
6796 dst = gen_reg_rtx (mode);
6797 if (GET_CODE (src1) == MEM)
6798 src1 = force_reg (mode, src1);
6799 if (GET_CODE (src2) == MEM)
6800 src2 = force_reg (mode, src2);
6801 }
6802
6803 /* Emit the instruction. */
6804
6805 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6806 if (reload_in_progress)
6807 {
6808 /* Reload doesn't know about the flags register, and doesn't know that
6809 it doesn't want to clobber it. We can only do this with PLUS. */
6810 if (code != PLUS)
6811 abort ();
6812 emit_insn (op);
6813 }
6814 else
6815 {
6816 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6817 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6818 }
6819
6820 /* Fix up the destination if needed. */
6821 if (dst != operands[0])
6822 emit_move_insn (operands[0], dst);
6823 }
6824
6825 /* Return TRUE or FALSE depending on whether the binary operator meets the
6826 appropriate constraints. */
6827
6828 int
6829 ix86_binary_operator_ok (code, mode, operands)
6830 enum rtx_code code;
6831 enum machine_mode mode ATTRIBUTE_UNUSED;
6832 rtx operands[3];
6833 {
6834 /* Both source operands cannot be in memory. */
6835 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6836 return 0;
6837 /* If the operation is not commutable, source 1 cannot be a constant. */
6838 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6839 return 0;
6840 /* If the destination is memory, we must have a matching source operand. */
6841 if (GET_CODE (operands[0]) == MEM
6842 && ! (rtx_equal_p (operands[0], operands[1])
6843 || (GET_RTX_CLASS (code) == 'c'
6844 && rtx_equal_p (operands[0], operands[2]))))
6845 return 0;
6846 /* If the operation is not commutable and the source 1 is memory, we must
6847 have a matching destination. */
6848 if (GET_CODE (operands[1]) == MEM
6849 && GET_RTX_CLASS (code) != 'c'
6850 && ! rtx_equal_p (operands[0], operands[1]))
6851 return 0;
6852 return 1;
6853 }
6854
6855 /* Attempt to expand a unary operator. Make the expansion closer to the
6856 actual machine, then just general_operand, which will allow 2 separate
6857 memory references (one output, one input) in a single insn. */
6858
6859 void
6860 ix86_expand_unary_operator (code, mode, operands)
6861 enum rtx_code code;
6862 enum machine_mode mode;
6863 rtx operands[];
6864 {
6865 int matching_memory;
6866 rtx src, dst, op, clob;
6867
6868 dst = operands[0];
6869 src = operands[1];
6870
6871 /* If the destination is memory, and we do not have matching source
6872 operands, do things in registers. */
6873 matching_memory = 0;
6874 if (GET_CODE (dst) == MEM)
6875 {
6876 if (rtx_equal_p (dst, src))
6877 matching_memory = 1;
6878 else
6879 dst = gen_reg_rtx (mode);
6880 }
6881
6882 /* When source operand is memory, destination must match. */
6883 if (!matching_memory && GET_CODE (src) == MEM)
6884 src = force_reg (mode, src);
6885
6886 /* If optimizing, copy to regs to improve CSE */
6887 if (optimize && ! no_new_pseudos)
6888 {
6889 if (GET_CODE (dst) == MEM)
6890 dst = gen_reg_rtx (mode);
6891 if (GET_CODE (src) == MEM)
6892 src = force_reg (mode, src);
6893 }
6894
6895 /* Emit the instruction. */
6896
6897 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6898 if (reload_in_progress || code == NOT)
6899 {
6900 /* Reload doesn't know about the flags register, and doesn't know that
6901 it doesn't want to clobber it. */
6902 if (code != NOT)
6903 abort ();
6904 emit_insn (op);
6905 }
6906 else
6907 {
6908 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6909 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6910 }
6911
6912 /* Fix up the destination if needed. */
6913 if (dst != operands[0])
6914 emit_move_insn (operands[0], dst);
6915 }
6916
6917 /* Return TRUE or FALSE depending on whether the unary operator meets the
6918 appropriate constraints. */
6919
6920 int
6921 ix86_unary_operator_ok (code, mode, operands)
6922 enum rtx_code code ATTRIBUTE_UNUSED;
6923 enum machine_mode mode ATTRIBUTE_UNUSED;
6924 rtx operands[2] ATTRIBUTE_UNUSED;
6925 {
6926 /* If one of operands is memory, source and destination must match. */
6927 if ((GET_CODE (operands[0]) == MEM
6928 || GET_CODE (operands[1]) == MEM)
6929 && ! rtx_equal_p (operands[0], operands[1]))
6930 return FALSE;
6931 return TRUE;
6932 }
6933
6934 /* Return TRUE or FALSE depending on whether the first SET in INSN
6935 has source and destination with matching CC modes, and that the
6936 CC mode is at least as constrained as REQ_MODE. */
6937
6938 int
6939 ix86_match_ccmode (insn, req_mode)
6940 rtx insn;
6941 enum machine_mode req_mode;
6942 {
6943 rtx set;
6944 enum machine_mode set_mode;
6945
6946 set = PATTERN (insn);
6947 if (GET_CODE (set) == PARALLEL)
6948 set = XVECEXP (set, 0, 0);
6949 if (GET_CODE (set) != SET)
6950 abort ();
6951 if (GET_CODE (SET_SRC (set)) != COMPARE)
6952 abort ();
6953
6954 set_mode = GET_MODE (SET_DEST (set));
6955 switch (set_mode)
6956 {
6957 case CCNOmode:
6958 if (req_mode != CCNOmode
6959 && (req_mode != CCmode
6960 || XEXP (SET_SRC (set), 1) != const0_rtx))
6961 return 0;
6962 break;
6963 case CCmode:
6964 if (req_mode == CCGCmode)
6965 return 0;
6966 /* FALLTHRU */
6967 case CCGCmode:
6968 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6969 return 0;
6970 /* FALLTHRU */
6971 case CCGOCmode:
6972 if (req_mode == CCZmode)
6973 return 0;
6974 /* FALLTHRU */
6975 case CCZmode:
6976 break;
6977
6978 default:
6979 abort ();
6980 }
6981
6982 return (GET_MODE (SET_SRC (set)) == set_mode);
6983 }
6984
6985 /* Generate insn patterns to do an integer compare of OPERANDS. */
6986
6987 static rtx
6988 ix86_expand_int_compare (code, op0, op1)
6989 enum rtx_code code;
6990 rtx op0, op1;
6991 {
6992 enum machine_mode cmpmode;
6993 rtx tmp, flags;
6994
6995 cmpmode = SELECT_CC_MODE (code, op0, op1);
6996 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
6997
6998 /* This is very simple, but making the interface the same as in the
6999 FP case makes the rest of the code easier. */
7000 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7001 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7002
7003 /* Return the test that should be put into the flags user, i.e.
7004 the bcc, scc, or cmov instruction. */
7005 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7006 }
7007
7008 /* Figure out whether to use ordered or unordered fp comparisons.
7009 Return the appropriate mode to use. */
7010
7011 enum machine_mode
7012 ix86_fp_compare_mode (code)
7013 enum rtx_code code ATTRIBUTE_UNUSED;
7014 {
7015 /* ??? In order to make all comparisons reversible, we do all comparisons
7016 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7017 all forms trapping and nontrapping comparisons, we can make inequality
7018 comparisons trapping again, since it results in better code when using
7019 FCOM based compares. */
7020 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7021 }
7022
7023 enum machine_mode
7024 ix86_cc_mode (code, op0, op1)
7025 enum rtx_code code;
7026 rtx op0, op1;
7027 {
7028 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7029 return ix86_fp_compare_mode (code);
7030 switch (code)
7031 {
7032 /* Only zero flag is needed. */
7033 case EQ: /* ZF=0 */
7034 case NE: /* ZF!=0 */
7035 return CCZmode;
7036 /* Codes needing carry flag. */
7037 case GEU: /* CF=0 */
7038 case GTU: /* CF=0 & ZF=0 */
7039 case LTU: /* CF=1 */
7040 case LEU: /* CF=1 | ZF=1 */
7041 return CCmode;
7042 /* Codes possibly doable only with sign flag when
7043 comparing against zero. */
7044 case GE: /* SF=OF or SF=0 */
7045 case LT: /* SF<>OF or SF=1 */
7046 if (op1 == const0_rtx)
7047 return CCGOCmode;
7048 else
7049 /* For other cases Carry flag is not required. */
7050 return CCGCmode;
7051 /* Codes doable only with sign flag when comparing
7052 against zero, but we miss jump instruction for it
7053 so we need to use relational tests agains overflow
7054 that thus needs to be zero. */
7055 case GT: /* ZF=0 & SF=OF */
7056 case LE: /* ZF=1 | SF<>OF */
7057 if (op1 == const0_rtx)
7058 return CCNOmode;
7059 else
7060 return CCGCmode;
7061 /* strcmp pattern do (use flags) and combine may ask us for proper
7062 mode. */
7063 case USE:
7064 return CCmode;
7065 default:
7066 abort ();
7067 }
7068 }
7069
7070 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7071
7072 int
7073 ix86_use_fcomi_compare (code)
7074 enum rtx_code code ATTRIBUTE_UNUSED;
7075 {
7076 enum rtx_code swapped_code = swap_condition (code);
7077 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7078 || (ix86_fp_comparison_cost (swapped_code)
7079 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7080 }
7081
7082 /* Swap, force into registers, or otherwise massage the two operands
7083 to a fp comparison. The operands are updated in place; the new
7084 comparsion code is returned. */
7085
7086 static enum rtx_code
7087 ix86_prepare_fp_compare_args (code, pop0, pop1)
7088 enum rtx_code code;
7089 rtx *pop0, *pop1;
7090 {
7091 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7092 rtx op0 = *pop0, op1 = *pop1;
7093 enum machine_mode op_mode = GET_MODE (op0);
7094 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7095
7096 /* All of the unordered compare instructions only work on registers.
7097 The same is true of the XFmode compare instructions. The same is
7098 true of the fcomi compare instructions. */
7099
7100 if (!is_sse
7101 && (fpcmp_mode == CCFPUmode
7102 || op_mode == XFmode
7103 || op_mode == TFmode
7104 || ix86_use_fcomi_compare (code)))
7105 {
7106 op0 = force_reg (op_mode, op0);
7107 op1 = force_reg (op_mode, op1);
7108 }
7109 else
7110 {
7111 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7112 things around if they appear profitable, otherwise force op0
7113 into a register. */
7114
7115 if (standard_80387_constant_p (op0) == 0
7116 || (GET_CODE (op0) == MEM
7117 && ! (standard_80387_constant_p (op1) == 0
7118 || GET_CODE (op1) == MEM)))
7119 {
7120 rtx tmp;
7121 tmp = op0, op0 = op1, op1 = tmp;
7122 code = swap_condition (code);
7123 }
7124
7125 if (GET_CODE (op0) != REG)
7126 op0 = force_reg (op_mode, op0);
7127
7128 if (CONSTANT_P (op1))
7129 {
7130 if (standard_80387_constant_p (op1))
7131 op1 = force_reg (op_mode, op1);
7132 else
7133 op1 = validize_mem (force_const_mem (op_mode, op1));
7134 }
7135 }
7136
7137 /* Try to rearrange the comparison to make it cheaper. */
7138 if (ix86_fp_comparison_cost (code)
7139 > ix86_fp_comparison_cost (swap_condition (code))
7140 && (GET_CODE (op1) == REG || !no_new_pseudos))
7141 {
7142 rtx tmp;
7143 tmp = op0, op0 = op1, op1 = tmp;
7144 code = swap_condition (code);
7145 if (GET_CODE (op0) != REG)
7146 op0 = force_reg (op_mode, op0);
7147 }
7148
7149 *pop0 = op0;
7150 *pop1 = op1;
7151 return code;
7152 }
7153
7154 /* Convert comparison codes we use to represent FP comparison to integer
7155 code that will result in proper branch. Return UNKNOWN if no such code
7156 is available. */
7157 static enum rtx_code
7158 ix86_fp_compare_code_to_integer (code)
7159 enum rtx_code code;
7160 {
7161 switch (code)
7162 {
7163 case GT:
7164 return GTU;
7165 case GE:
7166 return GEU;
7167 case ORDERED:
7168 case UNORDERED:
7169 return code;
7170 break;
7171 case UNEQ:
7172 return EQ;
7173 break;
7174 case UNLT:
7175 return LTU;
7176 break;
7177 case UNLE:
7178 return LEU;
7179 break;
7180 case LTGT:
7181 return NE;
7182 break;
7183 default:
7184 return UNKNOWN;
7185 }
7186 }
7187
7188 /* Split comparison code CODE into comparisons we can do using branch
7189 instructions. BYPASS_CODE is comparison code for branch that will
7190 branch around FIRST_CODE and SECOND_CODE. If some of branches
7191 is not required, set value to NIL.
7192 We never require more than two branches. */
7193 static void
7194 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7195 enum rtx_code code, *bypass_code, *first_code, *second_code;
7196 {
7197 *first_code = code;
7198 *bypass_code = NIL;
7199 *second_code = NIL;
7200
7201 /* The fcomi comparison sets flags as follows:
7202
7203 cmp ZF PF CF
7204 > 0 0 0
7205 < 0 0 1
7206 = 1 0 0
7207 un 1 1 1 */
7208
7209 switch (code)
7210 {
7211 case GT: /* GTU - CF=0 & ZF=0 */
7212 case GE: /* GEU - CF=0 */
7213 case ORDERED: /* PF=0 */
7214 case UNORDERED: /* PF=1 */
7215 case UNEQ: /* EQ - ZF=1 */
7216 case UNLT: /* LTU - CF=1 */
7217 case UNLE: /* LEU - CF=1 | ZF=1 */
7218 case LTGT: /* EQ - ZF=0 */
7219 break;
7220 case LT: /* LTU - CF=1 - fails on unordered */
7221 *first_code = UNLT;
7222 *bypass_code = UNORDERED;
7223 break;
7224 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7225 *first_code = UNLE;
7226 *bypass_code = UNORDERED;
7227 break;
7228 case EQ: /* EQ - ZF=1 - fails on unordered */
7229 *first_code = UNEQ;
7230 *bypass_code = UNORDERED;
7231 break;
7232 case NE: /* NE - ZF=0 - fails on unordered */
7233 *first_code = LTGT;
7234 *second_code = UNORDERED;
7235 break;
7236 case UNGE: /* GEU - CF=0 - fails on unordered */
7237 *first_code = GE;
7238 *second_code = UNORDERED;
7239 break;
7240 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7241 *first_code = GT;
7242 *second_code = UNORDERED;
7243 break;
7244 default:
7245 abort ();
7246 }
7247 if (!TARGET_IEEE_FP)
7248 {
7249 *second_code = NIL;
7250 *bypass_code = NIL;
7251 }
7252 }
7253
7254 /* Return cost of comparison done fcom + arithmetics operations on AX.
7255 All following functions do use number of instructions as an cost metrics.
7256 In future this should be tweaked to compute bytes for optimize_size and
7257 take into account performance of various instructions on various CPUs. */
7258 static int
7259 ix86_fp_comparison_arithmetics_cost (code)
7260 enum rtx_code code;
7261 {
7262 if (!TARGET_IEEE_FP)
7263 return 4;
7264 /* The cost of code output by ix86_expand_fp_compare. */
7265 switch (code)
7266 {
7267 case UNLE:
7268 case UNLT:
7269 case LTGT:
7270 case GT:
7271 case GE:
7272 case UNORDERED:
7273 case ORDERED:
7274 case UNEQ:
7275 return 4;
7276 break;
7277 case LT:
7278 case NE:
7279 case EQ:
7280 case UNGE:
7281 return 5;
7282 break;
7283 case LE:
7284 case UNGT:
7285 return 6;
7286 break;
7287 default:
7288 abort ();
7289 }
7290 }
7291
7292 /* Return cost of comparison done using fcomi operation.
7293 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7294 static int
7295 ix86_fp_comparison_fcomi_cost (code)
7296 enum rtx_code code;
7297 {
7298 enum rtx_code bypass_code, first_code, second_code;
7299 /* Return arbitarily high cost when instruction is not supported - this
7300 prevents gcc from using it. */
7301 if (!TARGET_CMOVE)
7302 return 1024;
7303 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7304 return (bypass_code != NIL || second_code != NIL) + 2;
7305 }
7306
7307 /* Return cost of comparison done using sahf operation.
7308 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7309 static int
7310 ix86_fp_comparison_sahf_cost (code)
7311 enum rtx_code code;
7312 {
7313 enum rtx_code bypass_code, first_code, second_code;
7314 /* Return arbitarily high cost when instruction is not preferred - this
7315 avoids gcc from using it. */
7316 if (!TARGET_USE_SAHF && !optimize_size)
7317 return 1024;
7318 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7319 return (bypass_code != NIL || second_code != NIL) + 3;
7320 }
7321
7322 /* Compute cost of the comparison done using any method.
7323 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7324 static int
7325 ix86_fp_comparison_cost (code)
7326 enum rtx_code code;
7327 {
7328 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7329 int min;
7330
7331 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7332 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7333
7334 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7335 if (min > sahf_cost)
7336 min = sahf_cost;
7337 if (min > fcomi_cost)
7338 min = fcomi_cost;
7339 return min;
7340 }
7341
7342 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7343
7344 static rtx
7345 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7346 enum rtx_code code;
7347 rtx op0, op1, scratch;
7348 rtx *second_test;
7349 rtx *bypass_test;
7350 {
7351 enum machine_mode fpcmp_mode, intcmp_mode;
7352 rtx tmp, tmp2;
7353 int cost = ix86_fp_comparison_cost (code);
7354 enum rtx_code bypass_code, first_code, second_code;
7355
7356 fpcmp_mode = ix86_fp_compare_mode (code);
7357 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7358
7359 if (second_test)
7360 *second_test = NULL_RTX;
7361 if (bypass_test)
7362 *bypass_test = NULL_RTX;
7363
7364 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7365
7366 /* Do fcomi/sahf based test when profitable. */
7367 if ((bypass_code == NIL || bypass_test)
7368 && (second_code == NIL || second_test)
7369 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7370 {
7371 if (TARGET_CMOVE)
7372 {
7373 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7374 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7375 tmp);
7376 emit_insn (tmp);
7377 }
7378 else
7379 {
7380 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7381 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7382 if (!scratch)
7383 scratch = gen_reg_rtx (HImode);
7384 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7385 emit_insn (gen_x86_sahf_1 (scratch));
7386 }
7387
7388 /* The FP codes work out to act like unsigned. */
7389 intcmp_mode = fpcmp_mode;
7390 code = first_code;
7391 if (bypass_code != NIL)
7392 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7393 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7394 const0_rtx);
7395 if (second_code != NIL)
7396 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7397 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7398 const0_rtx);
7399 }
7400 else
7401 {
7402 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7403 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7404 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7405 if (!scratch)
7406 scratch = gen_reg_rtx (HImode);
7407 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7408
7409 /* In the unordered case, we have to check C2 for NaN's, which
7410 doesn't happen to work out to anything nice combination-wise.
7411 So do some bit twiddling on the value we've got in AH to come
7412 up with an appropriate set of condition codes. */
7413
7414 intcmp_mode = CCNOmode;
7415 switch (code)
7416 {
7417 case GT:
7418 case UNGT:
7419 if (code == GT || !TARGET_IEEE_FP)
7420 {
7421 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7422 code = EQ;
7423 }
7424 else
7425 {
7426 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7427 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7428 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7429 intcmp_mode = CCmode;
7430 code = GEU;
7431 }
7432 break;
7433 case LT:
7434 case UNLT:
7435 if (code == LT && TARGET_IEEE_FP)
7436 {
7437 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7438 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7439 intcmp_mode = CCmode;
7440 code = EQ;
7441 }
7442 else
7443 {
7444 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7445 code = NE;
7446 }
7447 break;
7448 case GE:
7449 case UNGE:
7450 if (code == GE || !TARGET_IEEE_FP)
7451 {
7452 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7453 code = EQ;
7454 }
7455 else
7456 {
7457 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7458 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7459 GEN_INT (0x01)));
7460 code = NE;
7461 }
7462 break;
7463 case LE:
7464 case UNLE:
7465 if (code == LE && TARGET_IEEE_FP)
7466 {
7467 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7468 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7469 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7470 intcmp_mode = CCmode;
7471 code = LTU;
7472 }
7473 else
7474 {
7475 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7476 code = NE;
7477 }
7478 break;
7479 case EQ:
7480 case UNEQ:
7481 if (code == EQ && TARGET_IEEE_FP)
7482 {
7483 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7484 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7485 intcmp_mode = CCmode;
7486 code = EQ;
7487 }
7488 else
7489 {
7490 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7491 code = NE;
7492 break;
7493 }
7494 break;
7495 case NE:
7496 case LTGT:
7497 if (code == NE && TARGET_IEEE_FP)
7498 {
7499 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7500 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7501 GEN_INT (0x40)));
7502 code = NE;
7503 }
7504 else
7505 {
7506 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7507 code = EQ;
7508 }
7509 break;
7510
7511 case UNORDERED:
7512 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7513 code = NE;
7514 break;
7515 case ORDERED:
7516 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7517 code = EQ;
7518 break;
7519
7520 default:
7521 abort ();
7522 }
7523 }
7524
7525 /* Return the test that should be put into the flags user, i.e.
7526 the bcc, scc, or cmov instruction. */
7527 return gen_rtx_fmt_ee (code, VOIDmode,
7528 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7529 const0_rtx);
7530 }
7531
7532 rtx
7533 ix86_expand_compare (code, second_test, bypass_test)
7534 enum rtx_code code;
7535 rtx *second_test, *bypass_test;
7536 {
7537 rtx op0, op1, ret;
7538 op0 = ix86_compare_op0;
7539 op1 = ix86_compare_op1;
7540
7541 if (second_test)
7542 *second_test = NULL_RTX;
7543 if (bypass_test)
7544 *bypass_test = NULL_RTX;
7545
7546 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7547 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7548 second_test, bypass_test);
7549 else
7550 ret = ix86_expand_int_compare (code, op0, op1);
7551
7552 return ret;
7553 }
7554
7555 /* Return true if the CODE will result in nontrivial jump sequence. */
7556 bool
7557 ix86_fp_jump_nontrivial_p (code)
7558 enum rtx_code code;
7559 {
7560 enum rtx_code bypass_code, first_code, second_code;
7561 if (!TARGET_CMOVE)
7562 return true;
7563 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7564 return bypass_code != NIL || second_code != NIL;
7565 }
7566
7567 void
7568 ix86_expand_branch (code, label)
7569 enum rtx_code code;
7570 rtx label;
7571 {
7572 rtx tmp;
7573
7574 switch (GET_MODE (ix86_compare_op0))
7575 {
7576 case QImode:
7577 case HImode:
7578 case SImode:
7579 simple:
7580 tmp = ix86_expand_compare (code, NULL, NULL);
7581 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7582 gen_rtx_LABEL_REF (VOIDmode, label),
7583 pc_rtx);
7584 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7585 return;
7586
7587 case SFmode:
7588 case DFmode:
7589 case XFmode:
7590 case TFmode:
7591 {
7592 rtvec vec;
7593 int use_fcomi;
7594 enum rtx_code bypass_code, first_code, second_code;
7595
7596 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7597 &ix86_compare_op1);
7598
7599 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7600
7601 /* Check whether we will use the natural sequence with one jump. If
7602 so, we can expand jump early. Otherwise delay expansion by
7603 creating compound insn to not confuse optimizers. */
7604 if (bypass_code == NIL && second_code == NIL
7605 && TARGET_CMOVE)
7606 {
7607 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7608 gen_rtx_LABEL_REF (VOIDmode, label),
7609 pc_rtx, NULL_RTX);
7610 }
7611 else
7612 {
7613 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7614 ix86_compare_op0, ix86_compare_op1);
7615 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7616 gen_rtx_LABEL_REF (VOIDmode, label),
7617 pc_rtx);
7618 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7619
7620 use_fcomi = ix86_use_fcomi_compare (code);
7621 vec = rtvec_alloc (3 + !use_fcomi);
7622 RTVEC_ELT (vec, 0) = tmp;
7623 RTVEC_ELT (vec, 1)
7624 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7625 RTVEC_ELT (vec, 2)
7626 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7627 if (! use_fcomi)
7628 RTVEC_ELT (vec, 3)
7629 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7630
7631 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7632 }
7633 return;
7634 }
7635
7636 case DImode:
7637 if (TARGET_64BIT)
7638 goto simple;
7639 /* Expand DImode branch into multiple compare+branch. */
7640 {
7641 rtx lo[2], hi[2], label2;
7642 enum rtx_code code1, code2, code3;
7643
7644 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7645 {
7646 tmp = ix86_compare_op0;
7647 ix86_compare_op0 = ix86_compare_op1;
7648 ix86_compare_op1 = tmp;
7649 code = swap_condition (code);
7650 }
7651 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7652 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7653
7654 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7655 avoid two branches. This costs one extra insn, so disable when
7656 optimizing for size. */
7657
7658 if ((code == EQ || code == NE)
7659 && (!optimize_size
7660 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7661 {
7662 rtx xor0, xor1;
7663
7664 xor1 = hi[0];
7665 if (hi[1] != const0_rtx)
7666 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7667 NULL_RTX, 0, OPTAB_WIDEN);
7668
7669 xor0 = lo[0];
7670 if (lo[1] != const0_rtx)
7671 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7672 NULL_RTX, 0, OPTAB_WIDEN);
7673
7674 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7675 NULL_RTX, 0, OPTAB_WIDEN);
7676
7677 ix86_compare_op0 = tmp;
7678 ix86_compare_op1 = const0_rtx;
7679 ix86_expand_branch (code, label);
7680 return;
7681 }
7682
7683 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7684 op1 is a constant and the low word is zero, then we can just
7685 examine the high word. */
7686
7687 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7688 switch (code)
7689 {
7690 case LT: case LTU: case GE: case GEU:
7691 ix86_compare_op0 = hi[0];
7692 ix86_compare_op1 = hi[1];
7693 ix86_expand_branch (code, label);
7694 return;
7695 default:
7696 break;
7697 }
7698
7699 /* Otherwise, we need two or three jumps. */
7700
7701 label2 = gen_label_rtx ();
7702
7703 code1 = code;
7704 code2 = swap_condition (code);
7705 code3 = unsigned_condition (code);
7706
7707 switch (code)
7708 {
7709 case LT: case GT: case LTU: case GTU:
7710 break;
7711
7712 case LE: code1 = LT; code2 = GT; break;
7713 case GE: code1 = GT; code2 = LT; break;
7714 case LEU: code1 = LTU; code2 = GTU; break;
7715 case GEU: code1 = GTU; code2 = LTU; break;
7716
7717 case EQ: code1 = NIL; code2 = NE; break;
7718 case NE: code2 = NIL; break;
7719
7720 default:
7721 abort ();
7722 }
7723
7724 /*
7725 * a < b =>
7726 * if (hi(a) < hi(b)) goto true;
7727 * if (hi(a) > hi(b)) goto false;
7728 * if (lo(a) < lo(b)) goto true;
7729 * false:
7730 */
7731
7732 ix86_compare_op0 = hi[0];
7733 ix86_compare_op1 = hi[1];
7734
7735 if (code1 != NIL)
7736 ix86_expand_branch (code1, label);
7737 if (code2 != NIL)
7738 ix86_expand_branch (code2, label2);
7739
7740 ix86_compare_op0 = lo[0];
7741 ix86_compare_op1 = lo[1];
7742 ix86_expand_branch (code3, label);
7743
7744 if (code2 != NIL)
7745 emit_label (label2);
7746 return;
7747 }
7748
7749 default:
7750 abort ();
7751 }
7752 }
7753
7754 /* Split branch based on floating point condition. */
7755 void
7756 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7757 enum rtx_code code;
7758 rtx op1, op2, target1, target2, tmp;
7759 {
7760 rtx second, bypass;
7761 rtx label = NULL_RTX;
7762 rtx condition;
7763 int bypass_probability = -1, second_probability = -1, probability = -1;
7764 rtx i;
7765
7766 if (target2 != pc_rtx)
7767 {
7768 rtx tmp = target2;
7769 code = reverse_condition_maybe_unordered (code);
7770 target2 = target1;
7771 target1 = tmp;
7772 }
7773
7774 condition = ix86_expand_fp_compare (code, op1, op2,
7775 tmp, &second, &bypass);
7776
7777 if (split_branch_probability >= 0)
7778 {
7779 /* Distribute the probabilities across the jumps.
7780 Assume the BYPASS and SECOND to be always test
7781 for UNORDERED. */
7782 probability = split_branch_probability;
7783
7784 /* Value of 1 is low enough to make no need for probability
7785 to be updated. Later we may run some experiments and see
7786 if unordered values are more frequent in practice. */
7787 if (bypass)
7788 bypass_probability = 1;
7789 if (second)
7790 second_probability = 1;
7791 }
7792 if (bypass != NULL_RTX)
7793 {
7794 label = gen_label_rtx ();
7795 i = emit_jump_insn (gen_rtx_SET
7796 (VOIDmode, pc_rtx,
7797 gen_rtx_IF_THEN_ELSE (VOIDmode,
7798 bypass,
7799 gen_rtx_LABEL_REF (VOIDmode,
7800 label),
7801 pc_rtx)));
7802 if (bypass_probability >= 0)
7803 REG_NOTES (i)
7804 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7805 GEN_INT (bypass_probability),
7806 REG_NOTES (i));
7807 }
7808 i = emit_jump_insn (gen_rtx_SET
7809 (VOIDmode, pc_rtx,
7810 gen_rtx_IF_THEN_ELSE (VOIDmode,
7811 condition, target1, target2)));
7812 if (probability >= 0)
7813 REG_NOTES (i)
7814 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7815 GEN_INT (probability),
7816 REG_NOTES (i));
7817 if (second != NULL_RTX)
7818 {
7819 i = emit_jump_insn (gen_rtx_SET
7820 (VOIDmode, pc_rtx,
7821 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7822 target2)));
7823 if (second_probability >= 0)
7824 REG_NOTES (i)
7825 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7826 GEN_INT (second_probability),
7827 REG_NOTES (i));
7828 }
7829 if (label != NULL_RTX)
7830 emit_label (label);
7831 }
7832
7833 int
7834 ix86_expand_setcc (code, dest)
7835 enum rtx_code code;
7836 rtx dest;
7837 {
7838 rtx ret, tmp, tmpreg;
7839 rtx second_test, bypass_test;
7840
7841 if (GET_MODE (ix86_compare_op0) == DImode
7842 && !TARGET_64BIT)
7843 return 0; /* FAIL */
7844
7845 if (GET_MODE (dest) != QImode)
7846 abort ();
7847
7848 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7849 PUT_MODE (ret, QImode);
7850
7851 tmp = dest;
7852 tmpreg = dest;
7853
7854 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7855 if (bypass_test || second_test)
7856 {
7857 rtx test = second_test;
7858 int bypass = 0;
7859 rtx tmp2 = gen_reg_rtx (QImode);
7860 if (bypass_test)
7861 {
7862 if (second_test)
7863 abort ();
7864 test = bypass_test;
7865 bypass = 1;
7866 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7867 }
7868 PUT_MODE (test, QImode);
7869 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7870
7871 if (bypass)
7872 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7873 else
7874 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7875 }
7876
7877 return 1; /* DONE */
7878 }
7879
7880 int
7881 ix86_expand_int_movcc (operands)
7882 rtx operands[];
7883 {
7884 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7885 rtx compare_seq, compare_op;
7886 rtx second_test, bypass_test;
7887 enum machine_mode mode = GET_MODE (operands[0]);
7888
7889 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7890 In case comparsion is done with immediate, we can convert it to LTU or
7891 GEU by altering the integer. */
7892
7893 if ((code == LEU || code == GTU)
7894 && GET_CODE (ix86_compare_op1) == CONST_INT
7895 && mode != HImode
7896 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
7897 && GET_CODE (operands[2]) == CONST_INT
7898 && GET_CODE (operands[3]) == CONST_INT)
7899 {
7900 if (code == LEU)
7901 code = LTU;
7902 else
7903 code = GEU;
7904 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7905 }
7906
7907 start_sequence ();
7908 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7909 compare_seq = gen_sequence ();
7910 end_sequence ();
7911
7912 compare_code = GET_CODE (compare_op);
7913
7914 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7915 HImode insns, we'd be swallowed in word prefix ops. */
7916
7917 if (mode != HImode
7918 && (mode != DImode || TARGET_64BIT)
7919 && GET_CODE (operands[2]) == CONST_INT
7920 && GET_CODE (operands[3]) == CONST_INT)
7921 {
7922 rtx out = operands[0];
7923 HOST_WIDE_INT ct = INTVAL (operands[2]);
7924 HOST_WIDE_INT cf = INTVAL (operands[3]);
7925 HOST_WIDE_INT diff;
7926
7927 if ((compare_code == LTU || compare_code == GEU)
7928 && !second_test && !bypass_test)
7929 {
7930
7931 /* Detect overlap between destination and compare sources. */
7932 rtx tmp = out;
7933
7934 /* To simplify rest of code, restrict to the GEU case. */
7935 if (compare_code == LTU)
7936 {
7937 int tmp = ct;
7938 ct = cf;
7939 cf = tmp;
7940 compare_code = reverse_condition (compare_code);
7941 code = reverse_condition (code);
7942 }
7943 diff = ct - cf;
7944
7945 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7946 || reg_overlap_mentioned_p (out, ix86_compare_op1))
7947 tmp = gen_reg_rtx (mode);
7948
7949 emit_insn (compare_seq);
7950 if (mode == DImode)
7951 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7952 else
7953 emit_insn (gen_x86_movsicc_0_m1 (tmp));
7954
7955 if (diff == 1)
7956 {
7957 /*
7958 * cmpl op0,op1
7959 * sbbl dest,dest
7960 * [addl dest, ct]
7961 *
7962 * Size 5 - 8.
7963 */
7964 if (ct)
7965 tmp = expand_simple_binop (mode, PLUS,
7966 tmp, GEN_INT (ct),
7967 tmp, 1, OPTAB_DIRECT);
7968 }
7969 else if (cf == -1)
7970 {
7971 /*
7972 * cmpl op0,op1
7973 * sbbl dest,dest
7974 * orl $ct, dest
7975 *
7976 * Size 8.
7977 */
7978 tmp = expand_simple_binop (mode, IOR,
7979 tmp, GEN_INT (ct),
7980 tmp, 1, OPTAB_DIRECT);
7981 }
7982 else if (diff == -1 && ct)
7983 {
7984 /*
7985 * cmpl op0,op1
7986 * sbbl dest,dest
7987 * xorl $-1, dest
7988 * [addl dest, cf]
7989 *
7990 * Size 8 - 11.
7991 */
7992 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
7993 if (cf)
7994 tmp = expand_simple_binop (mode, PLUS,
7995 tmp, GEN_INT (cf),
7996 tmp, 1, OPTAB_DIRECT);
7997 }
7998 else
7999 {
8000 /*
8001 * cmpl op0,op1
8002 * sbbl dest,dest
8003 * andl cf - ct, dest
8004 * [addl dest, ct]
8005 *
8006 * Size 8 - 11.
8007 */
8008 tmp = expand_simple_binop (mode, AND,
8009 tmp,
8010 GEN_INT (trunc_int_for_mode
8011 (cf - ct, mode)),
8012 tmp, 1, OPTAB_DIRECT);
8013 if (ct)
8014 tmp = expand_simple_binop (mode, PLUS,
8015 tmp, GEN_INT (ct),
8016 tmp, 1, OPTAB_DIRECT);
8017 }
8018
8019 if (tmp != out)
8020 emit_move_insn (out, tmp);
8021
8022 return 1; /* DONE */
8023 }
8024
8025 diff = ct - cf;
8026 if (diff < 0)
8027 {
8028 HOST_WIDE_INT tmp;
8029 tmp = ct, ct = cf, cf = tmp;
8030 diff = -diff;
8031 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8032 {
8033 /* We may be reversing unordered compare to normal compare, that
8034 is not valid in general (we may convert non-trapping condition
8035 to trapping one), however on i386 we currently emit all
8036 comparisons unordered. */
8037 compare_code = reverse_condition_maybe_unordered (compare_code);
8038 code = reverse_condition_maybe_unordered (code);
8039 }
8040 else
8041 {
8042 compare_code = reverse_condition (compare_code);
8043 code = reverse_condition (code);
8044 }
8045 }
8046 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8047 || diff == 3 || diff == 5 || diff == 9)
8048 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8049 {
8050 /*
8051 * xorl dest,dest
8052 * cmpl op1,op2
8053 * setcc dest
8054 * lea cf(dest*(ct-cf)),dest
8055 *
8056 * Size 14.
8057 *
8058 * This also catches the degenerate setcc-only case.
8059 */
8060
8061 rtx tmp;
8062 int nops;
8063
8064 out = emit_store_flag (out, code, ix86_compare_op0,
8065 ix86_compare_op1, VOIDmode, 0, 1);
8066
8067 nops = 0;
8068 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8069 done in proper mode to match. */
8070 if (diff == 1)
8071 tmp = out;
8072 else
8073 {
8074 rtx out1;
8075 out1 = out;
8076 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8077 nops++;
8078 if (diff & 1)
8079 {
8080 tmp = gen_rtx_PLUS (mode, tmp, out1);
8081 nops++;
8082 }
8083 }
8084 if (cf != 0)
8085 {
8086 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8087 nops++;
8088 }
8089 if (tmp != out
8090 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8091 {
8092 if (nops == 1)
8093 {
8094 rtx clob;
8095
8096 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8097 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8098
8099 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8100 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8101 emit_insn (tmp);
8102 }
8103 else
8104 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8105 }
8106 if (out != operands[0])
8107 emit_move_insn (operands[0], out);
8108
8109 return 1; /* DONE */
8110 }
8111
8112 /*
8113 * General case: Jumpful:
8114 * xorl dest,dest cmpl op1, op2
8115 * cmpl op1, op2 movl ct, dest
8116 * setcc dest jcc 1f
8117 * decl dest movl cf, dest
8118 * andl (cf-ct),dest 1:
8119 * addl ct,dest
8120 *
8121 * Size 20. Size 14.
8122 *
8123 * This is reasonably steep, but branch mispredict costs are
8124 * high on modern cpus, so consider failing only if optimizing
8125 * for space.
8126 *
8127 * %%% Parameterize branch_cost on the tuning architecture, then
8128 * use that. The 80386 couldn't care less about mispredicts.
8129 */
8130
8131 if (!optimize_size && !TARGET_CMOVE)
8132 {
8133 if (ct == 0)
8134 {
8135 ct = cf;
8136 cf = 0;
8137 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8138 {
8139 /* We may be reversing unordered compare to normal compare,
8140 that is not valid in general (we may convert non-trapping
8141 condition to trapping one), however on i386 we currently
8142 emit all comparisons unordered. */
8143 compare_code = reverse_condition_maybe_unordered (compare_code);
8144 code = reverse_condition_maybe_unordered (code);
8145 }
8146 else
8147 {
8148 compare_code = reverse_condition (compare_code);
8149 code = reverse_condition (code);
8150 }
8151 }
8152
8153 out = emit_store_flag (out, code, ix86_compare_op0,
8154 ix86_compare_op1, VOIDmode, 0, 1);
8155
8156 out = expand_simple_binop (mode, PLUS,
8157 out, constm1_rtx,
8158 out, 1, OPTAB_DIRECT);
8159 out = expand_simple_binop (mode, AND,
8160 out,
8161 GEN_INT (trunc_int_for_mode
8162 (cf - ct, mode)),
8163 out, 1, OPTAB_DIRECT);
8164 out = expand_simple_binop (mode, PLUS,
8165 out, GEN_INT (ct),
8166 out, 1, OPTAB_DIRECT);
8167 if (out != operands[0])
8168 emit_move_insn (operands[0], out);
8169
8170 return 1; /* DONE */
8171 }
8172 }
8173
8174 if (!TARGET_CMOVE)
8175 {
8176 /* Try a few things more with specific constants and a variable. */
8177
8178 optab op;
8179 rtx var, orig_out, out, tmp;
8180
8181 if (optimize_size)
8182 return 0; /* FAIL */
8183
8184 /* If one of the two operands is an interesting constant, load a
8185 constant with the above and mask it in with a logical operation. */
8186
8187 if (GET_CODE (operands[2]) == CONST_INT)
8188 {
8189 var = operands[3];
8190 if (INTVAL (operands[2]) == 0)
8191 operands[3] = constm1_rtx, op = and_optab;
8192 else if (INTVAL (operands[2]) == -1)
8193 operands[3] = const0_rtx, op = ior_optab;
8194 else
8195 return 0; /* FAIL */
8196 }
8197 else if (GET_CODE (operands[3]) == CONST_INT)
8198 {
8199 var = operands[2];
8200 if (INTVAL (operands[3]) == 0)
8201 operands[2] = constm1_rtx, op = and_optab;
8202 else if (INTVAL (operands[3]) == -1)
8203 operands[2] = const0_rtx, op = ior_optab;
8204 else
8205 return 0; /* FAIL */
8206 }
8207 else
8208 return 0; /* FAIL */
8209
8210 orig_out = operands[0];
8211 tmp = gen_reg_rtx (mode);
8212 operands[0] = tmp;
8213
8214 /* Recurse to get the constant loaded. */
8215 if (ix86_expand_int_movcc (operands) == 0)
8216 return 0; /* FAIL */
8217
8218 /* Mask in the interesting variable. */
8219 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8220 OPTAB_WIDEN);
8221 if (out != orig_out)
8222 emit_move_insn (orig_out, out);
8223
8224 return 1; /* DONE */
8225 }
8226
8227 /*
8228 * For comparison with above,
8229 *
8230 * movl cf,dest
8231 * movl ct,tmp
8232 * cmpl op1,op2
8233 * cmovcc tmp,dest
8234 *
8235 * Size 15.
8236 */
8237
8238 if (! nonimmediate_operand (operands[2], mode))
8239 operands[2] = force_reg (mode, operands[2]);
8240 if (! nonimmediate_operand (operands[3], mode))
8241 operands[3] = force_reg (mode, operands[3]);
8242
8243 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8244 {
8245 rtx tmp = gen_reg_rtx (mode);
8246 emit_move_insn (tmp, operands[3]);
8247 operands[3] = tmp;
8248 }
8249 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8250 {
8251 rtx tmp = gen_reg_rtx (mode);
8252 emit_move_insn (tmp, operands[2]);
8253 operands[2] = tmp;
8254 }
8255 if (! register_operand (operands[2], VOIDmode)
8256 && ! register_operand (operands[3], VOIDmode))
8257 operands[2] = force_reg (mode, operands[2]);
8258
8259 emit_insn (compare_seq);
8260 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8261 gen_rtx_IF_THEN_ELSE (mode,
8262 compare_op, operands[2],
8263 operands[3])));
8264 if (bypass_test)
8265 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8266 gen_rtx_IF_THEN_ELSE (mode,
8267 bypass_test,
8268 operands[3],
8269 operands[0])));
8270 if (second_test)
8271 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8272 gen_rtx_IF_THEN_ELSE (mode,
8273 second_test,
8274 operands[2],
8275 operands[0])));
8276
8277 return 1; /* DONE */
8278 }
8279
8280 int
8281 ix86_expand_fp_movcc (operands)
8282 rtx operands[];
8283 {
8284 enum rtx_code code;
8285 rtx tmp;
8286 rtx compare_op, second_test, bypass_test;
8287
8288 /* For SF/DFmode conditional moves based on comparisons
8289 in same mode, we may want to use SSE min/max instructions. */
8290 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8291 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8292 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8293 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8294 && (!TARGET_IEEE_FP
8295 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8296 /* We may be called from the post-reload splitter. */
8297 && (!REG_P (operands[0])
8298 || SSE_REG_P (operands[0])
8299 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8300 {
8301 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8302 code = GET_CODE (operands[1]);
8303
8304 /* See if we have (cross) match between comparison operands and
8305 conditional move operands. */
8306 if (rtx_equal_p (operands[2], op1))
8307 {
8308 rtx tmp = op0;
8309 op0 = op1;
8310 op1 = tmp;
8311 code = reverse_condition_maybe_unordered (code);
8312 }
8313 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8314 {
8315 /* Check for min operation. */
8316 if (code == LT)
8317 {
8318 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8319 if (memory_operand (op0, VOIDmode))
8320 op0 = force_reg (GET_MODE (operands[0]), op0);
8321 if (GET_MODE (operands[0]) == SFmode)
8322 emit_insn (gen_minsf3 (operands[0], op0, op1));
8323 else
8324 emit_insn (gen_mindf3 (operands[0], op0, op1));
8325 return 1;
8326 }
8327 /* Check for max operation. */
8328 if (code == GT)
8329 {
8330 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8331 if (memory_operand (op0, VOIDmode))
8332 op0 = force_reg (GET_MODE (operands[0]), op0);
8333 if (GET_MODE (operands[0]) == SFmode)
8334 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8335 else
8336 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8337 return 1;
8338 }
8339 }
8340 /* Manage condition to be sse_comparison_operator. In case we are
8341 in non-ieee mode, try to canonicalize the destination operand
8342 to be first in the comparison - this helps reload to avoid extra
8343 moves. */
8344 if (!sse_comparison_operator (operands[1], VOIDmode)
8345 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8346 {
8347 rtx tmp = ix86_compare_op0;
8348 ix86_compare_op0 = ix86_compare_op1;
8349 ix86_compare_op1 = tmp;
8350 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8351 VOIDmode, ix86_compare_op0,
8352 ix86_compare_op1);
8353 }
8354 /* Similary try to manage result to be first operand of conditional
8355 move. We also don't support the NE comparison on SSE, so try to
8356 avoid it. */
8357 if ((rtx_equal_p (operands[0], operands[3])
8358 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8359 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8360 {
8361 rtx tmp = operands[2];
8362 operands[2] = operands[3];
8363 operands[3] = tmp;
8364 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8365 (GET_CODE (operands[1])),
8366 VOIDmode, ix86_compare_op0,
8367 ix86_compare_op1);
8368 }
8369 if (GET_MODE (operands[0]) == SFmode)
8370 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8371 operands[2], operands[3],
8372 ix86_compare_op0, ix86_compare_op1));
8373 else
8374 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8375 operands[2], operands[3],
8376 ix86_compare_op0, ix86_compare_op1));
8377 return 1;
8378 }
8379
8380 /* The floating point conditional move instructions don't directly
8381 support conditions resulting from a signed integer comparison. */
8382
8383 code = GET_CODE (operands[1]);
8384 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8385
8386 /* The floating point conditional move instructions don't directly
8387 support signed integer comparisons. */
8388
8389 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8390 {
8391 if (second_test != NULL || bypass_test != NULL)
8392 abort ();
8393 tmp = gen_reg_rtx (QImode);
8394 ix86_expand_setcc (code, tmp);
8395 code = NE;
8396 ix86_compare_op0 = tmp;
8397 ix86_compare_op1 = const0_rtx;
8398 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8399 }
8400 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8401 {
8402 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8403 emit_move_insn (tmp, operands[3]);
8404 operands[3] = tmp;
8405 }
8406 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8407 {
8408 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8409 emit_move_insn (tmp, operands[2]);
8410 operands[2] = tmp;
8411 }
8412
8413 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8414 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8415 compare_op,
8416 operands[2],
8417 operands[3])));
8418 if (bypass_test)
8419 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8420 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8421 bypass_test,
8422 operands[3],
8423 operands[0])));
8424 if (second_test)
8425 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8426 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8427 second_test,
8428 operands[2],
8429 operands[0])));
8430
8431 return 1;
8432 }
8433
8434 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8435 works for floating pointer parameters and nonoffsetable memories.
8436 For pushes, it returns just stack offsets; the values will be saved
8437 in the right order. Maximally three parts are generated. */
8438
8439 static int
8440 ix86_split_to_parts (operand, parts, mode)
8441 rtx operand;
8442 rtx *parts;
8443 enum machine_mode mode;
8444 {
8445 int size;
8446
8447 if (!TARGET_64BIT)
8448 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8449 else
8450 size = (GET_MODE_SIZE (mode) + 4) / 8;
8451
8452 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8453 abort ();
8454 if (size < 2 || size > 3)
8455 abort ();
8456
8457 /* Optimize constant pool reference to immediates. This is used by fp moves,
8458 that force all constants to memory to allow combining. */
8459
8460 if (GET_CODE (operand) == MEM
8461 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8462 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8463 operand = get_pool_constant (XEXP (operand, 0));
8464
8465 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8466 {
8467 /* The only non-offsetable memories we handle are pushes. */
8468 if (! push_operand (operand, VOIDmode))
8469 abort ();
8470
8471 operand = copy_rtx (operand);
8472 PUT_MODE (operand, Pmode);
8473 parts[0] = parts[1] = parts[2] = operand;
8474 }
8475 else if (!TARGET_64BIT)
8476 {
8477 if (mode == DImode)
8478 split_di (&operand, 1, &parts[0], &parts[1]);
8479 else
8480 {
8481 if (REG_P (operand))
8482 {
8483 if (!reload_completed)
8484 abort ();
8485 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8486 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8487 if (size == 3)
8488 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8489 }
8490 else if (offsettable_memref_p (operand))
8491 {
8492 operand = adjust_address (operand, SImode, 0);
8493 parts[0] = operand;
8494 parts[1] = adjust_address (operand, SImode, 4);
8495 if (size == 3)
8496 parts[2] = adjust_address (operand, SImode, 8);
8497 }
8498 else if (GET_CODE (operand) == CONST_DOUBLE)
8499 {
8500 REAL_VALUE_TYPE r;
8501 long l[4];
8502
8503 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8504 switch (mode)
8505 {
8506 case XFmode:
8507 case TFmode:
8508 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8509 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8510 break;
8511 case DFmode:
8512 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8513 break;
8514 default:
8515 abort ();
8516 }
8517 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8518 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8519 }
8520 else
8521 abort ();
8522 }
8523 }
8524 else
8525 {
8526 if (mode == TImode)
8527 split_ti (&operand, 1, &parts[0], &parts[1]);
8528 if (mode == XFmode || mode == TFmode)
8529 {
8530 if (REG_P (operand))
8531 {
8532 if (!reload_completed)
8533 abort ();
8534 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8535 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8536 }
8537 else if (offsettable_memref_p (operand))
8538 {
8539 operand = adjust_address (operand, DImode, 0);
8540 parts[0] = operand;
8541 parts[1] = adjust_address (operand, SImode, 8);
8542 }
8543 else if (GET_CODE (operand) == CONST_DOUBLE)
8544 {
8545 REAL_VALUE_TYPE r;
8546 long l[3];
8547
8548 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8549 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8550 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8551 if (HOST_BITS_PER_WIDE_INT >= 64)
8552 parts[0]
8553 = GEN_INT (trunc_int_for_mode
8554 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8555 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8556 DImode));
8557 else
8558 parts[0] = immed_double_const (l[0], l[1], DImode);
8559 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8560 }
8561 else
8562 abort ();
8563 }
8564 }
8565
8566 return size;
8567 }
8568
8569 /* Emit insns to perform a move or push of DI, DF, and XF values.
8570 Return false when normal moves are needed; true when all required
8571 insns have been emitted. Operands 2-4 contain the input values
8572 int the correct order; operands 5-7 contain the output values. */
8573
8574 void
8575 ix86_split_long_move (operands)
8576 rtx operands[];
8577 {
8578 rtx part[2][3];
8579 int nparts;
8580 int push = 0;
8581 int collisions = 0;
8582 enum machine_mode mode = GET_MODE (operands[0]);
8583
8584 /* The DFmode expanders may ask us to move double.
8585 For 64bit target this is single move. By hiding the fact
8586 here we simplify i386.md splitters. */
8587 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8588 {
8589 /* Optimize constant pool reference to immediates. This is used by
8590 fp moves, that force all constants to memory to allow combining. */
8591
8592 if (GET_CODE (operands[1]) == MEM
8593 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8594 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8595 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8596 if (push_operand (operands[0], VOIDmode))
8597 {
8598 operands[0] = copy_rtx (operands[0]);
8599 PUT_MODE (operands[0], Pmode);
8600 }
8601 else
8602 operands[0] = gen_lowpart (DImode, operands[0]);
8603 operands[1] = gen_lowpart (DImode, operands[1]);
8604 emit_move_insn (operands[0], operands[1]);
8605 return;
8606 }
8607
8608 /* The only non-offsettable memory we handle is push. */
8609 if (push_operand (operands[0], VOIDmode))
8610 push = 1;
8611 else if (GET_CODE (operands[0]) == MEM
8612 && ! offsettable_memref_p (operands[0]))
8613 abort ();
8614
8615 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8616 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8617
8618 /* When emitting push, take care for source operands on the stack. */
8619 if (push && GET_CODE (operands[1]) == MEM
8620 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8621 {
8622 if (nparts == 3)
8623 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8624 XEXP (part[1][2], 0));
8625 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8626 XEXP (part[1][1], 0));
8627 }
8628
8629 /* We need to do copy in the right order in case an address register
8630 of the source overlaps the destination. */
8631 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8632 {
8633 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8634 collisions++;
8635 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8636 collisions++;
8637 if (nparts == 3
8638 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8639 collisions++;
8640
8641 /* Collision in the middle part can be handled by reordering. */
8642 if (collisions == 1 && nparts == 3
8643 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8644 {
8645 rtx tmp;
8646 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8647 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8648 }
8649
8650 /* If there are more collisions, we can't handle it by reordering.
8651 Do an lea to the last part and use only one colliding move. */
8652 else if (collisions > 1)
8653 {
8654 collisions = 1;
8655 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8656 XEXP (part[1][0], 0)));
8657 part[1][0] = change_address (part[1][0],
8658 TARGET_64BIT ? DImode : SImode,
8659 part[0][nparts - 1]);
8660 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8661 if (nparts == 3)
8662 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8663 }
8664 }
8665
8666 if (push)
8667 {
8668 if (!TARGET_64BIT)
8669 {
8670 if (nparts == 3)
8671 {
8672 /* We use only first 12 bytes of TFmode value, but for pushing we
8673 are required to adjust stack as if we were pushing real 16byte
8674 value. */
8675 if (mode == TFmode && !TARGET_64BIT)
8676 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8677 GEN_INT (-4)));
8678 emit_move_insn (part[0][2], part[1][2]);
8679 }
8680 }
8681 else
8682 {
8683 /* In 64bit mode we don't have 32bit push available. In case this is
8684 register, it is OK - we will just use larger counterpart. We also
8685 retype memory - these comes from attempt to avoid REX prefix on
8686 moving of second half of TFmode value. */
8687 if (GET_MODE (part[1][1]) == SImode)
8688 {
8689 if (GET_CODE (part[1][1]) == MEM)
8690 part[1][1] = adjust_address (part[1][1], DImode, 0);
8691 else if (REG_P (part[1][1]))
8692 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8693 else
8694 abort ();
8695 if (GET_MODE (part[1][0]) == SImode)
8696 part[1][0] = part[1][1];
8697 }
8698 }
8699 emit_move_insn (part[0][1], part[1][1]);
8700 emit_move_insn (part[0][0], part[1][0]);
8701 return;
8702 }
8703
8704 /* Choose correct order to not overwrite the source before it is copied. */
8705 if ((REG_P (part[0][0])
8706 && REG_P (part[1][1])
8707 && (REGNO (part[0][0]) == REGNO (part[1][1])
8708 || (nparts == 3
8709 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8710 || (collisions > 0
8711 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8712 {
8713 if (nparts == 3)
8714 {
8715 operands[2] = part[0][2];
8716 operands[3] = part[0][1];
8717 operands[4] = part[0][0];
8718 operands[5] = part[1][2];
8719 operands[6] = part[1][1];
8720 operands[7] = part[1][0];
8721 }
8722 else
8723 {
8724 operands[2] = part[0][1];
8725 operands[3] = part[0][0];
8726 operands[5] = part[1][1];
8727 operands[6] = part[1][0];
8728 }
8729 }
8730 else
8731 {
8732 if (nparts == 3)
8733 {
8734 operands[2] = part[0][0];
8735 operands[3] = part[0][1];
8736 operands[4] = part[0][2];
8737 operands[5] = part[1][0];
8738 operands[6] = part[1][1];
8739 operands[7] = part[1][2];
8740 }
8741 else
8742 {
8743 operands[2] = part[0][0];
8744 operands[3] = part[0][1];
8745 operands[5] = part[1][0];
8746 operands[6] = part[1][1];
8747 }
8748 }
8749 emit_move_insn (operands[2], operands[5]);
8750 emit_move_insn (operands[3], operands[6]);
8751 if (nparts == 3)
8752 emit_move_insn (operands[4], operands[7]);
8753
8754 return;
8755 }
8756
8757 void
8758 ix86_split_ashldi (operands, scratch)
8759 rtx *operands, scratch;
8760 {
8761 rtx low[2], high[2];
8762 int count;
8763
8764 if (GET_CODE (operands[2]) == CONST_INT)
8765 {
8766 split_di (operands, 2, low, high);
8767 count = INTVAL (operands[2]) & 63;
8768
8769 if (count >= 32)
8770 {
8771 emit_move_insn (high[0], low[1]);
8772 emit_move_insn (low[0], const0_rtx);
8773
8774 if (count > 32)
8775 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8776 }
8777 else
8778 {
8779 if (!rtx_equal_p (operands[0], operands[1]))
8780 emit_move_insn (operands[0], operands[1]);
8781 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8782 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8783 }
8784 }
8785 else
8786 {
8787 if (!rtx_equal_p (operands[0], operands[1]))
8788 emit_move_insn (operands[0], operands[1]);
8789
8790 split_di (operands, 1, low, high);
8791
8792 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8793 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8794
8795 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8796 {
8797 if (! no_new_pseudos)
8798 scratch = force_reg (SImode, const0_rtx);
8799 else
8800 emit_move_insn (scratch, const0_rtx);
8801
8802 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8803 scratch));
8804 }
8805 else
8806 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8807 }
8808 }
8809
8810 void
8811 ix86_split_ashrdi (operands, scratch)
8812 rtx *operands, scratch;
8813 {
8814 rtx low[2], high[2];
8815 int count;
8816
8817 if (GET_CODE (operands[2]) == CONST_INT)
8818 {
8819 split_di (operands, 2, low, high);
8820 count = INTVAL (operands[2]) & 63;
8821
8822 if (count >= 32)
8823 {
8824 emit_move_insn (low[0], high[1]);
8825
8826 if (! reload_completed)
8827 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8828 else
8829 {
8830 emit_move_insn (high[0], low[0]);
8831 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8832 }
8833
8834 if (count > 32)
8835 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8836 }
8837 else
8838 {
8839 if (!rtx_equal_p (operands[0], operands[1]))
8840 emit_move_insn (operands[0], operands[1]);
8841 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8842 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8843 }
8844 }
8845 else
8846 {
8847 if (!rtx_equal_p (operands[0], operands[1]))
8848 emit_move_insn (operands[0], operands[1]);
8849
8850 split_di (operands, 1, low, high);
8851
8852 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8853 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8854
8855 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8856 {
8857 if (! no_new_pseudos)
8858 scratch = gen_reg_rtx (SImode);
8859 emit_move_insn (scratch, high[0]);
8860 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8861 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8862 scratch));
8863 }
8864 else
8865 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8866 }
8867 }
8868
8869 void
8870 ix86_split_lshrdi (operands, scratch)
8871 rtx *operands, scratch;
8872 {
8873 rtx low[2], high[2];
8874 int count;
8875
8876 if (GET_CODE (operands[2]) == CONST_INT)
8877 {
8878 split_di (operands, 2, low, high);
8879 count = INTVAL (operands[2]) & 63;
8880
8881 if (count >= 32)
8882 {
8883 emit_move_insn (low[0], high[1]);
8884 emit_move_insn (high[0], const0_rtx);
8885
8886 if (count > 32)
8887 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8888 }
8889 else
8890 {
8891 if (!rtx_equal_p (operands[0], operands[1]))
8892 emit_move_insn (operands[0], operands[1]);
8893 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8894 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8895 }
8896 }
8897 else
8898 {
8899 if (!rtx_equal_p (operands[0], operands[1]))
8900 emit_move_insn (operands[0], operands[1]);
8901
8902 split_di (operands, 1, low, high);
8903
8904 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8905 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8906
8907 /* Heh. By reversing the arguments, we can reuse this pattern. */
8908 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8909 {
8910 if (! no_new_pseudos)
8911 scratch = force_reg (SImode, const0_rtx);
8912 else
8913 emit_move_insn (scratch, const0_rtx);
8914
8915 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8916 scratch));
8917 }
8918 else
8919 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8920 }
8921 }
8922
8923 /* Helper function for the string operations below. Dest VARIABLE whether
8924 it is aligned to VALUE bytes. If true, jump to the label. */
8925 static rtx
8926 ix86_expand_aligntest (variable, value)
8927 rtx variable;
8928 int value;
8929 {
8930 rtx label = gen_label_rtx ();
8931 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8932 if (GET_MODE (variable) == DImode)
8933 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8934 else
8935 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8936 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8937 1, label);
8938 return label;
8939 }
8940
8941 /* Adjust COUNTER by the VALUE. */
8942 static void
8943 ix86_adjust_counter (countreg, value)
8944 rtx countreg;
8945 HOST_WIDE_INT value;
8946 {
8947 if (GET_MODE (countreg) == DImode)
8948 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8949 else
8950 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8951 }
8952
8953 /* Zero extend possibly SImode EXP to Pmode register. */
8954 rtx
8955 ix86_zero_extend_to_Pmode (exp)
8956 rtx exp;
8957 {
8958 rtx r;
8959 if (GET_MODE (exp) == VOIDmode)
8960 return force_reg (Pmode, exp);
8961 if (GET_MODE (exp) == Pmode)
8962 return copy_to_mode_reg (Pmode, exp);
8963 r = gen_reg_rtx (Pmode);
8964 emit_insn (gen_zero_extendsidi2 (r, exp));
8965 return r;
8966 }
8967
8968 /* Expand string move (memcpy) operation. Use i386 string operations when
8969 profitable. expand_clrstr contains similar code. */
8970 int
8971 ix86_expand_movstr (dst, src, count_exp, align_exp)
8972 rtx dst, src, count_exp, align_exp;
8973 {
8974 rtx srcreg, destreg, countreg;
8975 enum machine_mode counter_mode;
8976 HOST_WIDE_INT align = 0;
8977 unsigned HOST_WIDE_INT count = 0;
8978 rtx insns;
8979
8980 start_sequence ();
8981
8982 if (GET_CODE (align_exp) == CONST_INT)
8983 align = INTVAL (align_exp);
8984
8985 /* This simple hack avoids all inlining code and simplifies code below. */
8986 if (!TARGET_ALIGN_STRINGOPS)
8987 align = 64;
8988
8989 if (GET_CODE (count_exp) == CONST_INT)
8990 count = INTVAL (count_exp);
8991
8992 /* Figure out proper mode for counter. For 32bits it is always SImode,
8993 for 64bits use SImode when possible, otherwise DImode.
8994 Set count to number of bytes copied when known at compile time. */
8995 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
8996 || x86_64_zero_extended_value (count_exp))
8997 counter_mode = SImode;
8998 else
8999 counter_mode = DImode;
9000
9001 if (counter_mode != SImode && counter_mode != DImode)
9002 abort ();
9003
9004 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9005 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9006
9007 emit_insn (gen_cld ());
9008
9009 /* When optimizing for size emit simple rep ; movsb instruction for
9010 counts not divisible by 4. */
9011
9012 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9013 {
9014 countreg = ix86_zero_extend_to_Pmode (count_exp);
9015 if (TARGET_64BIT)
9016 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9017 destreg, srcreg, countreg));
9018 else
9019 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9020 destreg, srcreg, countreg));
9021 }
9022
9023 /* For constant aligned (or small unaligned) copies use rep movsl
9024 followed by code copying the rest. For PentiumPro ensure 8 byte
9025 alignment to allow rep movsl acceleration. */
9026
9027 else if (count != 0
9028 && (align >= 8
9029 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9030 || optimize_size || count < (unsigned int) 64))
9031 {
9032 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9033 if (count & ~(size - 1))
9034 {
9035 countreg = copy_to_mode_reg (counter_mode,
9036 GEN_INT ((count >> (size == 4 ? 2 : 3))
9037 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9038 countreg = ix86_zero_extend_to_Pmode (countreg);
9039 if (size == 4)
9040 {
9041 if (TARGET_64BIT)
9042 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9043 destreg, srcreg, countreg));
9044 else
9045 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9046 destreg, srcreg, countreg));
9047 }
9048 else
9049 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9050 destreg, srcreg, countreg));
9051 }
9052 if (size == 8 && (count & 0x04))
9053 emit_insn (gen_strmovsi (destreg, srcreg));
9054 if (count & 0x02)
9055 emit_insn (gen_strmovhi (destreg, srcreg));
9056 if (count & 0x01)
9057 emit_insn (gen_strmovqi (destreg, srcreg));
9058 }
9059 /* The generic code based on the glibc implementation:
9060 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9061 allowing accelerated copying there)
9062 - copy the data using rep movsl
9063 - copy the rest. */
9064 else
9065 {
9066 rtx countreg2;
9067 rtx label = NULL;
9068
9069 /* In case we don't know anything about the alignment, default to
9070 library version, since it is usually equally fast and result in
9071 shorter code. */
9072 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9073 {
9074 end_sequence ();
9075 return 0;
9076 }
9077
9078 if (TARGET_SINGLE_STRINGOP)
9079 emit_insn (gen_cld ());
9080
9081 countreg2 = gen_reg_rtx (Pmode);
9082 countreg = copy_to_mode_reg (counter_mode, count_exp);
9083
9084 /* We don't use loops to align destination and to copy parts smaller
9085 than 4 bytes, because gcc is able to optimize such code better (in
9086 the case the destination or the count really is aligned, gcc is often
9087 able to predict the branches) and also it is friendlier to the
9088 hardware branch prediction.
9089
9090 Using loops is benefical for generic case, because we can
9091 handle small counts using the loops. Many CPUs (such as Athlon)
9092 have large REP prefix setup costs.
9093
9094 This is quite costy. Maybe we can revisit this decision later or
9095 add some customizability to this code. */
9096
9097 if (count == 0
9098 && align < (TARGET_PENTIUMPRO && (count == 0
9099 || count >= (unsigned int) 260)
9100 ? 8 : UNITS_PER_WORD))
9101 {
9102 label = gen_label_rtx ();
9103 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9104 LEU, 0, counter_mode, 1, label);
9105 }
9106 if (align <= 1)
9107 {
9108 rtx label = ix86_expand_aligntest (destreg, 1);
9109 emit_insn (gen_strmovqi (destreg, srcreg));
9110 ix86_adjust_counter (countreg, 1);
9111 emit_label (label);
9112 LABEL_NUSES (label) = 1;
9113 }
9114 if (align <= 2)
9115 {
9116 rtx label = ix86_expand_aligntest (destreg, 2);
9117 emit_insn (gen_strmovhi (destreg, srcreg));
9118 ix86_adjust_counter (countreg, 2);
9119 emit_label (label);
9120 LABEL_NUSES (label) = 1;
9121 }
9122 if (align <= 4
9123 && ((TARGET_PENTIUMPRO && (count == 0
9124 || count >= (unsigned int) 260))
9125 || TARGET_64BIT))
9126 {
9127 rtx label = ix86_expand_aligntest (destreg, 4);
9128 emit_insn (gen_strmovsi (destreg, srcreg));
9129 ix86_adjust_counter (countreg, 4);
9130 emit_label (label);
9131 LABEL_NUSES (label) = 1;
9132 }
9133
9134 if (!TARGET_SINGLE_STRINGOP)
9135 emit_insn (gen_cld ());
9136 if (TARGET_64BIT)
9137 {
9138 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9139 GEN_INT (3)));
9140 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9141 destreg, srcreg, countreg2));
9142 }
9143 else
9144 {
9145 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9146 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9147 destreg, srcreg, countreg2));
9148 }
9149
9150 if (label)
9151 {
9152 emit_label (label);
9153 LABEL_NUSES (label) = 1;
9154 }
9155 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9156 emit_insn (gen_strmovsi (destreg, srcreg));
9157 if ((align <= 4 || count == 0) && TARGET_64BIT)
9158 {
9159 rtx label = ix86_expand_aligntest (countreg, 4);
9160 emit_insn (gen_strmovsi (destreg, srcreg));
9161 emit_label (label);
9162 LABEL_NUSES (label) = 1;
9163 }
9164 if (align > 2 && count != 0 && (count & 2))
9165 emit_insn (gen_strmovhi (destreg, srcreg));
9166 if (align <= 2 || count == 0)
9167 {
9168 rtx label = ix86_expand_aligntest (countreg, 2);
9169 emit_insn (gen_strmovhi (destreg, srcreg));
9170 emit_label (label);
9171 LABEL_NUSES (label) = 1;
9172 }
9173 if (align > 1 && count != 0 && (count & 1))
9174 emit_insn (gen_strmovqi (destreg, srcreg));
9175 if (align <= 1 || count == 0)
9176 {
9177 rtx label = ix86_expand_aligntest (countreg, 1);
9178 emit_insn (gen_strmovqi (destreg, srcreg));
9179 emit_label (label);
9180 LABEL_NUSES (label) = 1;
9181 }
9182 }
9183
9184 insns = get_insns ();
9185 end_sequence ();
9186
9187 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9188 emit_insns (insns);
9189 return 1;
9190 }
9191
9192 /* Expand string clear operation (bzero). Use i386 string operations when
9193 profitable. expand_movstr contains similar code. */
9194 int
9195 ix86_expand_clrstr (src, count_exp, align_exp)
9196 rtx src, count_exp, align_exp;
9197 {
9198 rtx destreg, zeroreg, countreg;
9199 enum machine_mode counter_mode;
9200 HOST_WIDE_INT align = 0;
9201 unsigned HOST_WIDE_INT count = 0;
9202
9203 if (GET_CODE (align_exp) == CONST_INT)
9204 align = INTVAL (align_exp);
9205
9206 /* This simple hack avoids all inlining code and simplifies code below. */
9207 if (!TARGET_ALIGN_STRINGOPS)
9208 align = 32;
9209
9210 if (GET_CODE (count_exp) == CONST_INT)
9211 count = INTVAL (count_exp);
9212 /* Figure out proper mode for counter. For 32bits it is always SImode,
9213 for 64bits use SImode when possible, otherwise DImode.
9214 Set count to number of bytes copied when known at compile time. */
9215 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9216 || x86_64_zero_extended_value (count_exp))
9217 counter_mode = SImode;
9218 else
9219 counter_mode = DImode;
9220
9221 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9222
9223 emit_insn (gen_cld ());
9224
9225 /* When optimizing for size emit simple rep ; movsb instruction for
9226 counts not divisible by 4. */
9227
9228 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9229 {
9230 countreg = ix86_zero_extend_to_Pmode (count_exp);
9231 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9232 if (TARGET_64BIT)
9233 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9234 destreg, countreg));
9235 else
9236 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9237 destreg, countreg));
9238 }
9239 else if (count != 0
9240 && (align >= 8
9241 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9242 || optimize_size || count < (unsigned int) 64))
9243 {
9244 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9245 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9246 if (count & ~(size - 1))
9247 {
9248 countreg = copy_to_mode_reg (counter_mode,
9249 GEN_INT ((count >> (size == 4 ? 2 : 3))
9250 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9251 countreg = ix86_zero_extend_to_Pmode (countreg);
9252 if (size == 4)
9253 {
9254 if (TARGET_64BIT)
9255 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9256 destreg, countreg));
9257 else
9258 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9259 destreg, countreg));
9260 }
9261 else
9262 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9263 destreg, countreg));
9264 }
9265 if (size == 8 && (count & 0x04))
9266 emit_insn (gen_strsetsi (destreg,
9267 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9268 if (count & 0x02)
9269 emit_insn (gen_strsethi (destreg,
9270 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9271 if (count & 0x01)
9272 emit_insn (gen_strsetqi (destreg,
9273 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9274 }
9275 else
9276 {
9277 rtx countreg2;
9278 rtx label = NULL;
9279
9280 /* In case we don't know anything about the alignment, default to
9281 library version, since it is usually equally fast and result in
9282 shorter code. */
9283 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9284 return 0;
9285
9286 if (TARGET_SINGLE_STRINGOP)
9287 emit_insn (gen_cld ());
9288
9289 countreg2 = gen_reg_rtx (Pmode);
9290 countreg = copy_to_mode_reg (counter_mode, count_exp);
9291 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9292
9293 if (count == 0
9294 && align < (TARGET_PENTIUMPRO && (count == 0
9295 || count >= (unsigned int) 260)
9296 ? 8 : UNITS_PER_WORD))
9297 {
9298 label = gen_label_rtx ();
9299 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9300 LEU, 0, counter_mode, 1, label);
9301 }
9302 if (align <= 1)
9303 {
9304 rtx label = ix86_expand_aligntest (destreg, 1);
9305 emit_insn (gen_strsetqi (destreg,
9306 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9307 ix86_adjust_counter (countreg, 1);
9308 emit_label (label);
9309 LABEL_NUSES (label) = 1;
9310 }
9311 if (align <= 2)
9312 {
9313 rtx label = ix86_expand_aligntest (destreg, 2);
9314 emit_insn (gen_strsethi (destreg,
9315 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9316 ix86_adjust_counter (countreg, 2);
9317 emit_label (label);
9318 LABEL_NUSES (label) = 1;
9319 }
9320 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9321 || count >= (unsigned int) 260))
9322 {
9323 rtx label = ix86_expand_aligntest (destreg, 4);
9324 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9325 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9326 : zeroreg)));
9327 ix86_adjust_counter (countreg, 4);
9328 emit_label (label);
9329 LABEL_NUSES (label) = 1;
9330 }
9331
9332 if (!TARGET_SINGLE_STRINGOP)
9333 emit_insn (gen_cld ());
9334 if (TARGET_64BIT)
9335 {
9336 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9337 GEN_INT (3)));
9338 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9339 destreg, countreg2));
9340 }
9341 else
9342 {
9343 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9344 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9345 destreg, countreg2));
9346 }
9347
9348 if (label)
9349 {
9350 emit_label (label);
9351 LABEL_NUSES (label) = 1;
9352 }
9353 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9354 emit_insn (gen_strsetsi (destreg,
9355 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9356 if (TARGET_64BIT && (align <= 4 || count == 0))
9357 {
9358 rtx label = ix86_expand_aligntest (destreg, 2);
9359 emit_insn (gen_strsetsi (destreg,
9360 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9361 emit_label (label);
9362 LABEL_NUSES (label) = 1;
9363 }
9364 if (align > 2 && count != 0 && (count & 2))
9365 emit_insn (gen_strsethi (destreg,
9366 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9367 if (align <= 2 || count == 0)
9368 {
9369 rtx label = ix86_expand_aligntest (destreg, 2);
9370 emit_insn (gen_strsethi (destreg,
9371 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9372 emit_label (label);
9373 LABEL_NUSES (label) = 1;
9374 }
9375 if (align > 1 && count != 0 && (count & 1))
9376 emit_insn (gen_strsetqi (destreg,
9377 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9378 if (align <= 1 || count == 0)
9379 {
9380 rtx label = ix86_expand_aligntest (destreg, 1);
9381 emit_insn (gen_strsetqi (destreg,
9382 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9383 emit_label (label);
9384 LABEL_NUSES (label) = 1;
9385 }
9386 }
9387 return 1;
9388 }
9389 /* Expand strlen. */
9390 int
9391 ix86_expand_strlen (out, src, eoschar, align)
9392 rtx out, src, eoschar, align;
9393 {
9394 rtx addr, scratch1, scratch2, scratch3, scratch4;
9395
9396 /* The generic case of strlen expander is long. Avoid it's
9397 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9398
9399 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9400 && !TARGET_INLINE_ALL_STRINGOPS
9401 && !optimize_size
9402 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9403 return 0;
9404
9405 addr = force_reg (Pmode, XEXP (src, 0));
9406 scratch1 = gen_reg_rtx (Pmode);
9407
9408 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9409 && !optimize_size)
9410 {
9411 /* Well it seems that some optimizer does not combine a call like
9412 foo(strlen(bar), strlen(bar));
9413 when the move and the subtraction is done here. It does calculate
9414 the length just once when these instructions are done inside of
9415 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9416 often used and I use one fewer register for the lifetime of
9417 output_strlen_unroll() this is better. */
9418
9419 emit_move_insn (out, addr);
9420
9421 ix86_expand_strlensi_unroll_1 (out, align);
9422
9423 /* strlensi_unroll_1 returns the address of the zero at the end of
9424 the string, like memchr(), so compute the length by subtracting
9425 the start address. */
9426 if (TARGET_64BIT)
9427 emit_insn (gen_subdi3 (out, out, addr));
9428 else
9429 emit_insn (gen_subsi3 (out, out, addr));
9430 }
9431 else
9432 {
9433 scratch2 = gen_reg_rtx (Pmode);
9434 scratch3 = gen_reg_rtx (Pmode);
9435 scratch4 = force_reg (Pmode, constm1_rtx);
9436
9437 emit_move_insn (scratch3, addr);
9438 eoschar = force_reg (QImode, eoschar);
9439
9440 emit_insn (gen_cld ());
9441 if (TARGET_64BIT)
9442 {
9443 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9444 align, scratch4, scratch3));
9445 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9446 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9447 }
9448 else
9449 {
9450 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9451 align, scratch4, scratch3));
9452 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9453 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9454 }
9455 }
9456 return 1;
9457 }
9458
9459 /* Expand the appropriate insns for doing strlen if not just doing
9460 repnz; scasb
9461
9462 out = result, initialized with the start address
9463 align_rtx = alignment of the address.
9464 scratch = scratch register, initialized with the startaddress when
9465 not aligned, otherwise undefined
9466
9467 This is just the body. It needs the initialisations mentioned above and
9468 some address computing at the end. These things are done in i386.md. */
9469
9470 static void
9471 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9472 rtx out, align_rtx;
9473 {
9474 int align;
9475 rtx tmp;
9476 rtx align_2_label = NULL_RTX;
9477 rtx align_3_label = NULL_RTX;
9478 rtx align_4_label = gen_label_rtx ();
9479 rtx end_0_label = gen_label_rtx ();
9480 rtx mem;
9481 rtx tmpreg = gen_reg_rtx (SImode);
9482 rtx scratch = gen_reg_rtx (SImode);
9483
9484 align = 0;
9485 if (GET_CODE (align_rtx) == CONST_INT)
9486 align = INTVAL (align_rtx);
9487
9488 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9489
9490 /* Is there a known alignment and is it less than 4? */
9491 if (align < 4)
9492 {
9493 rtx scratch1 = gen_reg_rtx (Pmode);
9494 emit_move_insn (scratch1, out);
9495 /* Is there a known alignment and is it not 2? */
9496 if (align != 2)
9497 {
9498 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9499 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9500
9501 /* Leave just the 3 lower bits. */
9502 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9503 NULL_RTX, 0, OPTAB_WIDEN);
9504
9505 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9506 Pmode, 1, align_4_label);
9507 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9508 Pmode, 1, align_2_label);
9509 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9510 Pmode, 1, align_3_label);
9511 }
9512 else
9513 {
9514 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9515 check if is aligned to 4 - byte. */
9516
9517 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9518 NULL_RTX, 0, OPTAB_WIDEN);
9519
9520 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9521 Pmode, 1, align_4_label);
9522 }
9523
9524 mem = gen_rtx_MEM (QImode, out);
9525
9526 /* Now compare the bytes. */
9527
9528 /* Compare the first n unaligned byte on a byte per byte basis. */
9529 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9530 QImode, 1, end_0_label);
9531
9532 /* Increment the address. */
9533 if (TARGET_64BIT)
9534 emit_insn (gen_adddi3 (out, out, const1_rtx));
9535 else
9536 emit_insn (gen_addsi3 (out, out, const1_rtx));
9537
9538 /* Not needed with an alignment of 2 */
9539 if (align != 2)
9540 {
9541 emit_label (align_2_label);
9542
9543 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9544 end_0_label);
9545
9546 if (TARGET_64BIT)
9547 emit_insn (gen_adddi3 (out, out, const1_rtx));
9548 else
9549 emit_insn (gen_addsi3 (out, out, const1_rtx));
9550
9551 emit_label (align_3_label);
9552 }
9553
9554 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9555 end_0_label);
9556
9557 if (TARGET_64BIT)
9558 emit_insn (gen_adddi3 (out, out, const1_rtx));
9559 else
9560 emit_insn (gen_addsi3 (out, out, const1_rtx));
9561 }
9562
9563 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9564 align this loop. It gives only huge programs, but does not help to
9565 speed up. */
9566 emit_label (align_4_label);
9567
9568 mem = gen_rtx_MEM (SImode, out);
9569 emit_move_insn (scratch, mem);
9570 if (TARGET_64BIT)
9571 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9572 else
9573 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9574
9575 /* This formula yields a nonzero result iff one of the bytes is zero.
9576 This saves three branches inside loop and many cycles. */
9577
9578 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9579 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9580 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9581 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9582 GEN_INT (trunc_int_for_mode
9583 (0x80808080, SImode))));
9584 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9585 align_4_label);
9586
9587 if (TARGET_CMOVE)
9588 {
9589 rtx reg = gen_reg_rtx (SImode);
9590 rtx reg2 = gen_reg_rtx (Pmode);
9591 emit_move_insn (reg, tmpreg);
9592 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9593
9594 /* If zero is not in the first two bytes, move two bytes forward. */
9595 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9596 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9597 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9598 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9599 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9600 reg,
9601 tmpreg)));
9602 /* Emit lea manually to avoid clobbering of flags. */
9603 emit_insn (gen_rtx_SET (SImode, reg2,
9604 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9605
9606 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9607 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9608 emit_insn (gen_rtx_SET (VOIDmode, out,
9609 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9610 reg2,
9611 out)));
9612
9613 }
9614 else
9615 {
9616 rtx end_2_label = gen_label_rtx ();
9617 /* Is zero in the first two bytes? */
9618
9619 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9620 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9621 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9622 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9623 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9624 pc_rtx);
9625 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9626 JUMP_LABEL (tmp) = end_2_label;
9627
9628 /* Not in the first two. Move two bytes forward. */
9629 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9630 if (TARGET_64BIT)
9631 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9632 else
9633 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9634
9635 emit_label (end_2_label);
9636
9637 }
9638
9639 /* Avoid branch in fixing the byte. */
9640 tmpreg = gen_lowpart (QImode, tmpreg);
9641 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9642 if (TARGET_64BIT)
9643 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9644 else
9645 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9646
9647 emit_label (end_0_label);
9648 }
9649 \f
9650 /* Clear stack slot assignments remembered from previous functions.
9651 This is called from INIT_EXPANDERS once before RTL is emitted for each
9652 function. */
9653
9654 static void
9655 ix86_init_machine_status (p)
9656 struct function *p;
9657 {
9658 p->machine = (struct machine_function *)
9659 xcalloc (1, sizeof (struct machine_function));
9660 }
9661
9662 /* Mark machine specific bits of P for GC. */
9663 static void
9664 ix86_mark_machine_status (p)
9665 struct function *p;
9666 {
9667 struct machine_function *machine = p->machine;
9668 enum machine_mode mode;
9669 int n;
9670
9671 if (! machine)
9672 return;
9673
9674 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9675 mode = (enum machine_mode) ((int) mode + 1))
9676 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9677 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9678 }
9679
9680 static void
9681 ix86_free_machine_status (p)
9682 struct function *p;
9683 {
9684 free (p->machine);
9685 p->machine = NULL;
9686 }
9687
9688 /* Return a MEM corresponding to a stack slot with mode MODE.
9689 Allocate a new slot if necessary.
9690
9691 The RTL for a function can have several slots available: N is
9692 which slot to use. */
9693
9694 rtx
9695 assign_386_stack_local (mode, n)
9696 enum machine_mode mode;
9697 int n;
9698 {
9699 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9700 abort ();
9701
9702 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9703 ix86_stack_locals[(int) mode][n]
9704 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9705
9706 return ix86_stack_locals[(int) mode][n];
9707 }
9708 \f
9709 /* Calculate the length of the memory address in the instruction
9710 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9711
9712 static int
9713 memory_address_length (addr)
9714 rtx addr;
9715 {
9716 struct ix86_address parts;
9717 rtx base, index, disp;
9718 int len;
9719
9720 if (GET_CODE (addr) == PRE_DEC
9721 || GET_CODE (addr) == POST_INC
9722 || GET_CODE (addr) == PRE_MODIFY
9723 || GET_CODE (addr) == POST_MODIFY)
9724 return 0;
9725
9726 if (! ix86_decompose_address (addr, &parts))
9727 abort ();
9728
9729 base = parts.base;
9730 index = parts.index;
9731 disp = parts.disp;
9732 len = 0;
9733
9734 /* Register Indirect. */
9735 if (base && !index && !disp)
9736 {
9737 /* Special cases: ebp and esp need the two-byte modrm form. */
9738 if (addr == stack_pointer_rtx
9739 || addr == arg_pointer_rtx
9740 || addr == frame_pointer_rtx
9741 || addr == hard_frame_pointer_rtx)
9742 len = 1;
9743 }
9744
9745 /* Direct Addressing. */
9746 else if (disp && !base && !index)
9747 len = 4;
9748
9749 else
9750 {
9751 /* Find the length of the displacement constant. */
9752 if (disp)
9753 {
9754 if (GET_CODE (disp) == CONST_INT
9755 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9756 len = 1;
9757 else
9758 len = 4;
9759 }
9760
9761 /* An index requires the two-byte modrm form. */
9762 if (index)
9763 len += 1;
9764 }
9765
9766 return len;
9767 }
9768
9769 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9770 expect that insn have 8bit immediate alternative. */
9771 int
9772 ix86_attr_length_immediate_default (insn, shortform)
9773 rtx insn;
9774 int shortform;
9775 {
9776 int len = 0;
9777 int i;
9778 extract_insn_cached (insn);
9779 for (i = recog_data.n_operands - 1; i >= 0; --i)
9780 if (CONSTANT_P (recog_data.operand[i]))
9781 {
9782 if (len)
9783 abort ();
9784 if (shortform
9785 && GET_CODE (recog_data.operand[i]) == CONST_INT
9786 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9787 len = 1;
9788 else
9789 {
9790 switch (get_attr_mode (insn))
9791 {
9792 case MODE_QI:
9793 len+=1;
9794 break;
9795 case MODE_HI:
9796 len+=2;
9797 break;
9798 case MODE_SI:
9799 len+=4;
9800 break;
9801 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9802 case MODE_DI:
9803 len+=4;
9804 break;
9805 default:
9806 fatal_insn ("unknown insn mode", insn);
9807 }
9808 }
9809 }
9810 return len;
9811 }
9812 /* Compute default value for "length_address" attribute. */
9813 int
9814 ix86_attr_length_address_default (insn)
9815 rtx insn;
9816 {
9817 int i;
9818 extract_insn_cached (insn);
9819 for (i = recog_data.n_operands - 1; i >= 0; --i)
9820 if (GET_CODE (recog_data.operand[i]) == MEM)
9821 {
9822 return memory_address_length (XEXP (recog_data.operand[i], 0));
9823 break;
9824 }
9825 return 0;
9826 }
9827 \f
9828 /* Return the maximum number of instructions a cpu can issue. */
9829
9830 static int
9831 ix86_issue_rate ()
9832 {
9833 switch (ix86_cpu)
9834 {
9835 case PROCESSOR_PENTIUM:
9836 case PROCESSOR_K6:
9837 return 2;
9838
9839 case PROCESSOR_PENTIUMPRO:
9840 case PROCESSOR_PENTIUM4:
9841 case PROCESSOR_ATHLON:
9842 return 3;
9843
9844 default:
9845 return 1;
9846 }
9847 }
9848
9849 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9850 by DEP_INSN and nothing set by DEP_INSN. */
9851
9852 static int
9853 ix86_flags_dependant (insn, dep_insn, insn_type)
9854 rtx insn, dep_insn;
9855 enum attr_type insn_type;
9856 {
9857 rtx set, set2;
9858
9859 /* Simplify the test for uninteresting insns. */
9860 if (insn_type != TYPE_SETCC
9861 && insn_type != TYPE_ICMOV
9862 && insn_type != TYPE_FCMOV
9863 && insn_type != TYPE_IBR)
9864 return 0;
9865
9866 if ((set = single_set (dep_insn)) != 0)
9867 {
9868 set = SET_DEST (set);
9869 set2 = NULL_RTX;
9870 }
9871 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9872 && XVECLEN (PATTERN (dep_insn), 0) == 2
9873 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9874 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9875 {
9876 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9877 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9878 }
9879 else
9880 return 0;
9881
9882 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9883 return 0;
9884
9885 /* This test is true if the dependent insn reads the flags but
9886 not any other potentially set register. */
9887 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9888 return 0;
9889
9890 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9891 return 0;
9892
9893 return 1;
9894 }
9895
9896 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9897 address with operands set by DEP_INSN. */
9898
9899 static int
9900 ix86_agi_dependant (insn, dep_insn, insn_type)
9901 rtx insn, dep_insn;
9902 enum attr_type insn_type;
9903 {
9904 rtx addr;
9905
9906 if (insn_type == TYPE_LEA
9907 && TARGET_PENTIUM)
9908 {
9909 addr = PATTERN (insn);
9910 if (GET_CODE (addr) == SET)
9911 ;
9912 else if (GET_CODE (addr) == PARALLEL
9913 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9914 addr = XVECEXP (addr, 0, 0);
9915 else
9916 abort ();
9917 addr = SET_SRC (addr);
9918 }
9919 else
9920 {
9921 int i;
9922 extract_insn_cached (insn);
9923 for (i = recog_data.n_operands - 1; i >= 0; --i)
9924 if (GET_CODE (recog_data.operand[i]) == MEM)
9925 {
9926 addr = XEXP (recog_data.operand[i], 0);
9927 goto found;
9928 }
9929 return 0;
9930 found:;
9931 }
9932
9933 return modified_in_p (addr, dep_insn);
9934 }
9935
9936 static int
9937 ix86_adjust_cost (insn, link, dep_insn, cost)
9938 rtx insn, link, dep_insn;
9939 int cost;
9940 {
9941 enum attr_type insn_type, dep_insn_type;
9942 enum attr_memory memory, dep_memory;
9943 rtx set, set2;
9944 int dep_insn_code_number;
9945
9946 /* Anti and output depenancies have zero cost on all CPUs. */
9947 if (REG_NOTE_KIND (link) != 0)
9948 return 0;
9949
9950 dep_insn_code_number = recog_memoized (dep_insn);
9951
9952 /* If we can't recognize the insns, we can't really do anything. */
9953 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
9954 return cost;
9955
9956 insn_type = get_attr_type (insn);
9957 dep_insn_type = get_attr_type (dep_insn);
9958
9959 switch (ix86_cpu)
9960 {
9961 case PROCESSOR_PENTIUM:
9962 /* Address Generation Interlock adds a cycle of latency. */
9963 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9964 cost += 1;
9965
9966 /* ??? Compares pair with jump/setcc. */
9967 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9968 cost = 0;
9969
9970 /* Floating point stores require value to be ready one cycle ealier. */
9971 if (insn_type == TYPE_FMOV
9972 && get_attr_memory (insn) == MEMORY_STORE
9973 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9974 cost += 1;
9975 break;
9976
9977 case PROCESSOR_PENTIUMPRO:
9978 memory = get_attr_memory (insn);
9979 dep_memory = get_attr_memory (dep_insn);
9980
9981 /* Since we can't represent delayed latencies of load+operation,
9982 increase the cost here for non-imov insns. */
9983 if (dep_insn_type != TYPE_IMOV
9984 && dep_insn_type != TYPE_FMOV
9985 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
9986 cost += 1;
9987
9988 /* INT->FP conversion is expensive. */
9989 if (get_attr_fp_int_src (dep_insn))
9990 cost += 5;
9991
9992 /* There is one cycle extra latency between an FP op and a store. */
9993 if (insn_type == TYPE_FMOV
9994 && (set = single_set (dep_insn)) != NULL_RTX
9995 && (set2 = single_set (insn)) != NULL_RTX
9996 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
9997 && GET_CODE (SET_DEST (set2)) == MEM)
9998 cost += 1;
9999
10000 /* Show ability of reorder buffer to hide latency of load by executing
10001 in parallel with previous instruction in case
10002 previous instruction is not needed to compute the address. */
10003 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10004 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10005 {
10006 /* Claim moves to take one cycle, as core can issue one load
10007 at time and the next load can start cycle later. */
10008 if (dep_insn_type == TYPE_IMOV
10009 || dep_insn_type == TYPE_FMOV)
10010 cost = 1;
10011 else if (cost > 1)
10012 cost--;
10013 }
10014 break;
10015
10016 case PROCESSOR_K6:
10017 memory = get_attr_memory (insn);
10018 dep_memory = get_attr_memory (dep_insn);
10019 /* The esp dependency is resolved before the instruction is really
10020 finished. */
10021 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10022 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10023 return 1;
10024
10025 /* Since we can't represent delayed latencies of load+operation,
10026 increase the cost here for non-imov insns. */
10027 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10028 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10029
10030 /* INT->FP conversion is expensive. */
10031 if (get_attr_fp_int_src (dep_insn))
10032 cost += 5;
10033
10034 /* Show ability of reorder buffer to hide latency of load by executing
10035 in parallel with previous instruction in case
10036 previous instruction is not needed to compute the address. */
10037 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10038 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10039 {
10040 /* Claim moves to take one cycle, as core can issue one load
10041 at time and the next load can start cycle later. */
10042 if (dep_insn_type == TYPE_IMOV
10043 || dep_insn_type == TYPE_FMOV)
10044 cost = 1;
10045 else if (cost > 2)
10046 cost -= 2;
10047 else
10048 cost = 1;
10049 }
10050 break;
10051
10052 case PROCESSOR_ATHLON:
10053 memory = get_attr_memory (insn);
10054 dep_memory = get_attr_memory (dep_insn);
10055
10056 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10057 {
10058 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10059 cost += 2;
10060 else
10061 cost += 3;
10062 }
10063 /* Show ability of reorder buffer to hide latency of load by executing
10064 in parallel with previous instruction in case
10065 previous instruction is not needed to compute the address. */
10066 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10067 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10068 {
10069 /* Claim moves to take one cycle, as core can issue one load
10070 at time and the next load can start cycle later. */
10071 if (dep_insn_type == TYPE_IMOV
10072 || dep_insn_type == TYPE_FMOV)
10073 cost = 0;
10074 else if (cost >= 3)
10075 cost -= 3;
10076 else
10077 cost = 0;
10078 }
10079
10080 default:
10081 break;
10082 }
10083
10084 return cost;
10085 }
10086
10087 static union
10088 {
10089 struct ppro_sched_data
10090 {
10091 rtx decode[3];
10092 int issued_this_cycle;
10093 } ppro;
10094 } ix86_sched_data;
10095
10096 static int
10097 ix86_safe_length (insn)
10098 rtx insn;
10099 {
10100 if (recog_memoized (insn) >= 0)
10101 return get_attr_length (insn);
10102 else
10103 return 128;
10104 }
10105
10106 static int
10107 ix86_safe_length_prefix (insn)
10108 rtx insn;
10109 {
10110 if (recog_memoized (insn) >= 0)
10111 return get_attr_length (insn);
10112 else
10113 return 0;
10114 }
10115
10116 static enum attr_memory
10117 ix86_safe_memory (insn)
10118 rtx insn;
10119 {
10120 if (recog_memoized (insn) >= 0)
10121 return get_attr_memory (insn);
10122 else
10123 return MEMORY_UNKNOWN;
10124 }
10125
10126 static enum attr_pent_pair
10127 ix86_safe_pent_pair (insn)
10128 rtx insn;
10129 {
10130 if (recog_memoized (insn) >= 0)
10131 return get_attr_pent_pair (insn);
10132 else
10133 return PENT_PAIR_NP;
10134 }
10135
10136 static enum attr_ppro_uops
10137 ix86_safe_ppro_uops (insn)
10138 rtx insn;
10139 {
10140 if (recog_memoized (insn) >= 0)
10141 return get_attr_ppro_uops (insn);
10142 else
10143 return PPRO_UOPS_MANY;
10144 }
10145
10146 static void
10147 ix86_dump_ppro_packet (dump)
10148 FILE *dump;
10149 {
10150 if (ix86_sched_data.ppro.decode[0])
10151 {
10152 fprintf (dump, "PPRO packet: %d",
10153 INSN_UID (ix86_sched_data.ppro.decode[0]));
10154 if (ix86_sched_data.ppro.decode[1])
10155 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10156 if (ix86_sched_data.ppro.decode[2])
10157 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10158 fputc ('\n', dump);
10159 }
10160 }
10161
10162 /* We're beginning a new block. Initialize data structures as necessary. */
10163
10164 static void
10165 ix86_sched_init (dump, sched_verbose, veclen)
10166 FILE *dump ATTRIBUTE_UNUSED;
10167 int sched_verbose ATTRIBUTE_UNUSED;
10168 int veclen ATTRIBUTE_UNUSED;
10169 {
10170 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10171 }
10172
10173 /* Shift INSN to SLOT, and shift everything else down. */
10174
10175 static void
10176 ix86_reorder_insn (insnp, slot)
10177 rtx *insnp, *slot;
10178 {
10179 if (insnp != slot)
10180 {
10181 rtx insn = *insnp;
10182 do
10183 insnp[0] = insnp[1];
10184 while (++insnp != slot);
10185 *insnp = insn;
10186 }
10187 }
10188
10189 /* Find an instruction with given pairability and minimal amount of cycles
10190 lost by the fact that the CPU waits for both pipelines to finish before
10191 reading next instructions. Also take care that both instructions together
10192 can not exceed 7 bytes. */
10193
10194 static rtx *
10195 ix86_pent_find_pair (e_ready, ready, type, first)
10196 rtx *e_ready;
10197 rtx *ready;
10198 enum attr_pent_pair type;
10199 rtx first;
10200 {
10201 int mincycles, cycles;
10202 enum attr_pent_pair tmp;
10203 enum attr_memory memory;
10204 rtx *insnp, *bestinsnp = NULL;
10205
10206 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10207 return NULL;
10208
10209 memory = ix86_safe_memory (first);
10210 cycles = result_ready_cost (first);
10211 mincycles = INT_MAX;
10212
10213 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10214 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10215 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10216 {
10217 enum attr_memory second_memory;
10218 int secondcycles, currentcycles;
10219
10220 second_memory = ix86_safe_memory (*insnp);
10221 secondcycles = result_ready_cost (*insnp);
10222 currentcycles = abs (cycles - secondcycles);
10223
10224 if (secondcycles >= 1 && cycles >= 1)
10225 {
10226 /* Two read/modify/write instructions together takes two
10227 cycles longer. */
10228 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10229 currentcycles += 2;
10230
10231 /* Read modify/write instruction followed by read/modify
10232 takes one cycle longer. */
10233 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10234 && tmp != PENT_PAIR_UV
10235 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10236 currentcycles += 1;
10237 }
10238 if (currentcycles < mincycles)
10239 bestinsnp = insnp, mincycles = currentcycles;
10240 }
10241
10242 return bestinsnp;
10243 }
10244
10245 /* Subroutines of ix86_sched_reorder. */
10246
10247 static void
10248 ix86_sched_reorder_pentium (ready, e_ready)
10249 rtx *ready;
10250 rtx *e_ready;
10251 {
10252 enum attr_pent_pair pair1, pair2;
10253 rtx *insnp;
10254
10255 /* This wouldn't be necessary if Haifa knew that static insn ordering
10256 is important to which pipe an insn is issued to. So we have to make
10257 some minor rearrangements. */
10258
10259 pair1 = ix86_safe_pent_pair (*e_ready);
10260
10261 /* If the first insn is non-pairable, let it be. */
10262 if (pair1 == PENT_PAIR_NP)
10263 return;
10264
10265 pair2 = PENT_PAIR_NP;
10266 insnp = 0;
10267
10268 /* If the first insn is UV or PV pairable, search for a PU
10269 insn to go with. */
10270 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10271 {
10272 insnp = ix86_pent_find_pair (e_ready-1, ready,
10273 PENT_PAIR_PU, *e_ready);
10274 if (insnp)
10275 pair2 = PENT_PAIR_PU;
10276 }
10277
10278 /* If the first insn is PU or UV pairable, search for a PV
10279 insn to go with. */
10280 if (pair2 == PENT_PAIR_NP
10281 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10282 {
10283 insnp = ix86_pent_find_pair (e_ready-1, ready,
10284 PENT_PAIR_PV, *e_ready);
10285 if (insnp)
10286 pair2 = PENT_PAIR_PV;
10287 }
10288
10289 /* If the first insn is pairable, search for a UV
10290 insn to go with. */
10291 if (pair2 == PENT_PAIR_NP)
10292 {
10293 insnp = ix86_pent_find_pair (e_ready-1, ready,
10294 PENT_PAIR_UV, *e_ready);
10295 if (insnp)
10296 pair2 = PENT_PAIR_UV;
10297 }
10298
10299 if (pair2 == PENT_PAIR_NP)
10300 return;
10301
10302 /* Found something! Decide if we need to swap the order. */
10303 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10304 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10305 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10306 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10307 ix86_reorder_insn (insnp, e_ready);
10308 else
10309 ix86_reorder_insn (insnp, e_ready - 1);
10310 }
10311
10312 static void
10313 ix86_sched_reorder_ppro (ready, e_ready)
10314 rtx *ready;
10315 rtx *e_ready;
10316 {
10317 rtx decode[3];
10318 enum attr_ppro_uops cur_uops;
10319 int issued_this_cycle;
10320 rtx *insnp;
10321 int i;
10322
10323 /* At this point .ppro.decode contains the state of the three
10324 decoders from last "cycle". That is, those insns that were
10325 actually independent. But here we're scheduling for the
10326 decoder, and we may find things that are decodable in the
10327 same cycle. */
10328
10329 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10330 issued_this_cycle = 0;
10331
10332 insnp = e_ready;
10333 cur_uops = ix86_safe_ppro_uops (*insnp);
10334
10335 /* If the decoders are empty, and we've a complex insn at the
10336 head of the priority queue, let it issue without complaint. */
10337 if (decode[0] == NULL)
10338 {
10339 if (cur_uops == PPRO_UOPS_MANY)
10340 {
10341 decode[0] = *insnp;
10342 goto ppro_done;
10343 }
10344
10345 /* Otherwise, search for a 2-4 uop unsn to issue. */
10346 while (cur_uops != PPRO_UOPS_FEW)
10347 {
10348 if (insnp == ready)
10349 break;
10350 cur_uops = ix86_safe_ppro_uops (*--insnp);
10351 }
10352
10353 /* If so, move it to the head of the line. */
10354 if (cur_uops == PPRO_UOPS_FEW)
10355 ix86_reorder_insn (insnp, e_ready);
10356
10357 /* Issue the head of the queue. */
10358 issued_this_cycle = 1;
10359 decode[0] = *e_ready--;
10360 }
10361
10362 /* Look for simple insns to fill in the other two slots. */
10363 for (i = 1; i < 3; ++i)
10364 if (decode[i] == NULL)
10365 {
10366 if (ready >= e_ready)
10367 goto ppro_done;
10368
10369 insnp = e_ready;
10370 cur_uops = ix86_safe_ppro_uops (*insnp);
10371 while (cur_uops != PPRO_UOPS_ONE)
10372 {
10373 if (insnp == ready)
10374 break;
10375 cur_uops = ix86_safe_ppro_uops (*--insnp);
10376 }
10377
10378 /* Found one. Move it to the head of the queue and issue it. */
10379 if (cur_uops == PPRO_UOPS_ONE)
10380 {
10381 ix86_reorder_insn (insnp, e_ready);
10382 decode[i] = *e_ready--;
10383 issued_this_cycle++;
10384 continue;
10385 }
10386
10387 /* ??? Didn't find one. Ideally, here we would do a lazy split
10388 of 2-uop insns, issue one and queue the other. */
10389 }
10390
10391 ppro_done:
10392 if (issued_this_cycle == 0)
10393 issued_this_cycle = 1;
10394 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10395 }
10396
10397 /* We are about to being issuing insns for this clock cycle.
10398 Override the default sort algorithm to better slot instructions. */
10399 static int
10400 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10401 FILE *dump ATTRIBUTE_UNUSED;
10402 int sched_verbose ATTRIBUTE_UNUSED;
10403 rtx *ready;
10404 int *n_readyp;
10405 int clock_var ATTRIBUTE_UNUSED;
10406 {
10407 int n_ready = *n_readyp;
10408 rtx *e_ready = ready + n_ready - 1;
10409
10410 if (n_ready < 2)
10411 goto out;
10412
10413 switch (ix86_cpu)
10414 {
10415 default:
10416 break;
10417
10418 case PROCESSOR_PENTIUM:
10419 ix86_sched_reorder_pentium (ready, e_ready);
10420 break;
10421
10422 case PROCESSOR_PENTIUMPRO:
10423 ix86_sched_reorder_ppro (ready, e_ready);
10424 break;
10425 }
10426
10427 out:
10428 return ix86_issue_rate ();
10429 }
10430
10431 /* We are about to issue INSN. Return the number of insns left on the
10432 ready queue that can be issued this cycle. */
10433
10434 static int
10435 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10436 FILE *dump;
10437 int sched_verbose;
10438 rtx insn;
10439 int can_issue_more;
10440 {
10441 int i;
10442 switch (ix86_cpu)
10443 {
10444 default:
10445 return can_issue_more - 1;
10446
10447 case PROCESSOR_PENTIUMPRO:
10448 {
10449 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10450
10451 if (uops == PPRO_UOPS_MANY)
10452 {
10453 if (sched_verbose)
10454 ix86_dump_ppro_packet (dump);
10455 ix86_sched_data.ppro.decode[0] = insn;
10456 ix86_sched_data.ppro.decode[1] = NULL;
10457 ix86_sched_data.ppro.decode[2] = NULL;
10458 if (sched_verbose)
10459 ix86_dump_ppro_packet (dump);
10460 ix86_sched_data.ppro.decode[0] = NULL;
10461 }
10462 else if (uops == PPRO_UOPS_FEW)
10463 {
10464 if (sched_verbose)
10465 ix86_dump_ppro_packet (dump);
10466 ix86_sched_data.ppro.decode[0] = insn;
10467 ix86_sched_data.ppro.decode[1] = NULL;
10468 ix86_sched_data.ppro.decode[2] = NULL;
10469 }
10470 else
10471 {
10472 for (i = 0; i < 3; ++i)
10473 if (ix86_sched_data.ppro.decode[i] == NULL)
10474 {
10475 ix86_sched_data.ppro.decode[i] = insn;
10476 break;
10477 }
10478 if (i == 3)
10479 abort ();
10480 if (i == 2)
10481 {
10482 if (sched_verbose)
10483 ix86_dump_ppro_packet (dump);
10484 ix86_sched_data.ppro.decode[0] = NULL;
10485 ix86_sched_data.ppro.decode[1] = NULL;
10486 ix86_sched_data.ppro.decode[2] = NULL;
10487 }
10488 }
10489 }
10490 return --ix86_sched_data.ppro.issued_this_cycle;
10491 }
10492 }
10493 \f
10494 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10495 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10496 appropriate. */
10497
10498 void
10499 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10500 rtx insns;
10501 rtx dstref, srcref, dstreg, srcreg;
10502 {
10503 rtx insn;
10504
10505 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10506 if (INSN_P (insn))
10507 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10508 dstreg, srcreg);
10509 }
10510
10511 /* Subroutine of above to actually do the updating by recursively walking
10512 the rtx. */
10513
10514 static void
10515 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10516 rtx x;
10517 rtx dstref, srcref, dstreg, srcreg;
10518 {
10519 enum rtx_code code = GET_CODE (x);
10520 const char *format_ptr = GET_RTX_FORMAT (code);
10521 int i, j;
10522
10523 if (code == MEM && XEXP (x, 0) == dstreg)
10524 MEM_COPY_ATTRIBUTES (x, dstref);
10525 else if (code == MEM && XEXP (x, 0) == srcreg)
10526 MEM_COPY_ATTRIBUTES (x, srcref);
10527
10528 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10529 {
10530 if (*format_ptr == 'e')
10531 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10532 dstreg, srcreg);
10533 else if (*format_ptr == 'E')
10534 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10535 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10536 dstreg, srcreg);
10537 }
10538 }
10539 \f
10540 /* Compute the alignment given to a constant that is being placed in memory.
10541 EXP is the constant and ALIGN is the alignment that the object would
10542 ordinarily have.
10543 The value of this function is used instead of that alignment to align
10544 the object. */
10545
10546 int
10547 ix86_constant_alignment (exp, align)
10548 tree exp;
10549 int align;
10550 {
10551 if (TREE_CODE (exp) == REAL_CST)
10552 {
10553 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10554 return 64;
10555 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10556 return 128;
10557 }
10558 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10559 && align < 256)
10560 return 256;
10561
10562 return align;
10563 }
10564
10565 /* Compute the alignment for a static variable.
10566 TYPE is the data type, and ALIGN is the alignment that
10567 the object would ordinarily have. The value of this function is used
10568 instead of that alignment to align the object. */
10569
10570 int
10571 ix86_data_alignment (type, align)
10572 tree type;
10573 int align;
10574 {
10575 if (AGGREGATE_TYPE_P (type)
10576 && TYPE_SIZE (type)
10577 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10578 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10579 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10580 return 256;
10581
10582 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10583 to 16byte boundary. */
10584 if (TARGET_64BIT)
10585 {
10586 if (AGGREGATE_TYPE_P (type)
10587 && TYPE_SIZE (type)
10588 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10589 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10590 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10591 return 128;
10592 }
10593
10594 if (TREE_CODE (type) == ARRAY_TYPE)
10595 {
10596 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10597 return 64;
10598 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10599 return 128;
10600 }
10601 else if (TREE_CODE (type) == COMPLEX_TYPE)
10602 {
10603
10604 if (TYPE_MODE (type) == DCmode && align < 64)
10605 return 64;
10606 if (TYPE_MODE (type) == XCmode && align < 128)
10607 return 128;
10608 }
10609 else if ((TREE_CODE (type) == RECORD_TYPE
10610 || TREE_CODE (type) == UNION_TYPE
10611 || TREE_CODE (type) == QUAL_UNION_TYPE)
10612 && TYPE_FIELDS (type))
10613 {
10614 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10615 return 64;
10616 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10617 return 128;
10618 }
10619 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10620 || TREE_CODE (type) == INTEGER_TYPE)
10621 {
10622 if (TYPE_MODE (type) == DFmode && align < 64)
10623 return 64;
10624 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10625 return 128;
10626 }
10627
10628 return align;
10629 }
10630
10631 /* Compute the alignment for a local variable.
10632 TYPE is the data type, and ALIGN is the alignment that
10633 the object would ordinarily have. The value of this macro is used
10634 instead of that alignment to align the object. */
10635
10636 int
10637 ix86_local_alignment (type, align)
10638 tree type;
10639 int align;
10640 {
10641 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10642 to 16byte boundary. */
10643 if (TARGET_64BIT)
10644 {
10645 if (AGGREGATE_TYPE_P (type)
10646 && TYPE_SIZE (type)
10647 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10648 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10649 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10650 return 128;
10651 }
10652 if (TREE_CODE (type) == ARRAY_TYPE)
10653 {
10654 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10655 return 64;
10656 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10657 return 128;
10658 }
10659 else if (TREE_CODE (type) == COMPLEX_TYPE)
10660 {
10661 if (TYPE_MODE (type) == DCmode && align < 64)
10662 return 64;
10663 if (TYPE_MODE (type) == XCmode && align < 128)
10664 return 128;
10665 }
10666 else if ((TREE_CODE (type) == RECORD_TYPE
10667 || TREE_CODE (type) == UNION_TYPE
10668 || TREE_CODE (type) == QUAL_UNION_TYPE)
10669 && TYPE_FIELDS (type))
10670 {
10671 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10672 return 64;
10673 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10674 return 128;
10675 }
10676 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10677 || TREE_CODE (type) == INTEGER_TYPE)
10678 {
10679
10680 if (TYPE_MODE (type) == DFmode && align < 64)
10681 return 64;
10682 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10683 return 128;
10684 }
10685 return align;
10686 }
10687 \f
10688 /* Emit RTL insns to initialize the variable parts of a trampoline.
10689 FNADDR is an RTX for the address of the function's pure code.
10690 CXT is an RTX for the static chain value for the function. */
10691 void
10692 x86_initialize_trampoline (tramp, fnaddr, cxt)
10693 rtx tramp, fnaddr, cxt;
10694 {
10695 if (!TARGET_64BIT)
10696 {
10697 /* Compute offset from the end of the jmp to the target function. */
10698 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10699 plus_constant (tramp, 10),
10700 NULL_RTX, 1, OPTAB_DIRECT);
10701 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10702 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10703 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10704 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10705 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10706 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10707 }
10708 else
10709 {
10710 int offset = 0;
10711 /* Try to load address using shorter movl instead of movabs.
10712 We may want to support movq for kernel mode, but kernel does not use
10713 trampolines at the moment. */
10714 if (x86_64_zero_extended_value (fnaddr))
10715 {
10716 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10717 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10718 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10719 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10720 gen_lowpart (SImode, fnaddr));
10721 offset += 6;
10722 }
10723 else
10724 {
10725 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10726 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10727 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10728 fnaddr);
10729 offset += 10;
10730 }
10731 /* Load static chain using movabs to r10. */
10732 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10733 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10734 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10735 cxt);
10736 offset += 10;
10737 /* Jump to the r11 */
10738 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10739 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10740 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10741 GEN_INT (trunc_int_for_mode (0xe3, QImode)));
10742 offset += 3;
10743 if (offset > TRAMPOLINE_SIZE)
10744 abort ();
10745 }
10746 }
10747 \f
10748 #define def_builtin(MASK, NAME, TYPE, CODE) \
10749 do { \
10750 if ((MASK) & target_flags) \
10751 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10752 } while (0)
10753
10754 struct builtin_description
10755 {
10756 const unsigned int mask;
10757 const enum insn_code icode;
10758 const char *const name;
10759 const enum ix86_builtins code;
10760 const enum rtx_code comparison;
10761 const unsigned int flag;
10762 };
10763
10764 static const struct builtin_description bdesc_comi[] =
10765 {
10766 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10767 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10768 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10769 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10770 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10771 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10772 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10773 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10774 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10775 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10776 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10777 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10778 };
10779
10780 static const struct builtin_description bdesc_2arg[] =
10781 {
10782 /* SSE */
10783 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10784 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10785 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10786 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10787 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10788 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10789 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10790 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10791
10792 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10793 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10794 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10795 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10796 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10797 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10798 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10799 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10800 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10801 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10802 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10803 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10804 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10805 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10806 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10807 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10808 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10809 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10810 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10811 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10812 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10813 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10814 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10815 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10816
10817 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10818 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10819 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10820 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10821
10822 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10823 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10824 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10825 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10826 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10827
10828 /* MMX */
10829 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10830 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10831 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10832 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10833 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10834 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10835
10836 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10837 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10838 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10839 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10840 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10841 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10842 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10843 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10844
10845 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10846 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10847 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10848
10849 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10850 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10851 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10852 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10853
10854 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10855 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10856
10857 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10858 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10859 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10860 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10861 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10862 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10863
10864 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10865 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10866 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10867 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10868
10869 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10870 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10871 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10872 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10873 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10874 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10875
10876 /* Special. */
10877 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10878 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10879 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10880
10881 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10882 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10883
10884 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10885 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10886 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10887 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10888 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10889 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10890
10891 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10892 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10893 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10894 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10895 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10896 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10897
10898 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10899 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10900 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10901 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10902
10903 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10904 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10905
10906 };
10907
10908 static const struct builtin_description bdesc_1arg[] =
10909 {
10910 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10911 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10912
10913 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10914 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10915 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10916
10917 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10918 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10919 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10920 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10921
10922 };
10923
10924 void
10925 ix86_init_builtins ()
10926 {
10927 if (TARGET_MMX)
10928 ix86_init_mmx_sse_builtins ();
10929 }
10930
10931 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10932 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10933 builtins. */
10934 static void
10935 ix86_init_mmx_sse_builtins ()
10936 {
10937 const struct builtin_description * d;
10938 size_t i;
10939 tree endlink = void_list_node;
10940
10941 tree pchar_type_node = build_pointer_type (char_type_node);
10942 tree pfloat_type_node = build_pointer_type (float_type_node);
10943 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10944 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10945
10946 /* Comparisons. */
10947 tree int_ftype_v4sf_v4sf
10948 = build_function_type (integer_type_node,
10949 tree_cons (NULL_TREE, V4SF_type_node,
10950 tree_cons (NULL_TREE,
10951 V4SF_type_node,
10952 endlink)));
10953 tree v4si_ftype_v4sf_v4sf
10954 = build_function_type (V4SI_type_node,
10955 tree_cons (NULL_TREE, V4SF_type_node,
10956 tree_cons (NULL_TREE,
10957 V4SF_type_node,
10958 endlink)));
10959 /* MMX/SSE/integer conversions. */
10960 tree int_ftype_v4sf
10961 = build_function_type (integer_type_node,
10962 tree_cons (NULL_TREE, V4SF_type_node,
10963 endlink));
10964 tree int_ftype_v8qi
10965 = build_function_type (integer_type_node,
10966 tree_cons (NULL_TREE, V8QI_type_node,
10967 endlink));
10968 tree v4sf_ftype_v4sf_int
10969 = build_function_type (V4SF_type_node,
10970 tree_cons (NULL_TREE, V4SF_type_node,
10971 tree_cons (NULL_TREE, integer_type_node,
10972 endlink)));
10973 tree v4sf_ftype_v4sf_v2si
10974 = build_function_type (V4SF_type_node,
10975 tree_cons (NULL_TREE, V4SF_type_node,
10976 tree_cons (NULL_TREE, V2SI_type_node,
10977 endlink)));
10978 tree int_ftype_v4hi_int
10979 = build_function_type (integer_type_node,
10980 tree_cons (NULL_TREE, V4HI_type_node,
10981 tree_cons (NULL_TREE, integer_type_node,
10982 endlink)));
10983 tree v4hi_ftype_v4hi_int_int
10984 = build_function_type (V4HI_type_node,
10985 tree_cons (NULL_TREE, V4HI_type_node,
10986 tree_cons (NULL_TREE, integer_type_node,
10987 tree_cons (NULL_TREE,
10988 integer_type_node,
10989 endlink))));
10990 /* Miscellaneous. */
10991 tree v8qi_ftype_v4hi_v4hi
10992 = build_function_type (V8QI_type_node,
10993 tree_cons (NULL_TREE, V4HI_type_node,
10994 tree_cons (NULL_TREE, V4HI_type_node,
10995 endlink)));
10996 tree v4hi_ftype_v2si_v2si
10997 = build_function_type (V4HI_type_node,
10998 tree_cons (NULL_TREE, V2SI_type_node,
10999 tree_cons (NULL_TREE, V2SI_type_node,
11000 endlink)));
11001 tree v4sf_ftype_v4sf_v4sf_int
11002 = build_function_type (V4SF_type_node,
11003 tree_cons (NULL_TREE, V4SF_type_node,
11004 tree_cons (NULL_TREE, V4SF_type_node,
11005 tree_cons (NULL_TREE,
11006 integer_type_node,
11007 endlink))));
11008 tree v4hi_ftype_v8qi_v8qi
11009 = build_function_type (V4HI_type_node,
11010 tree_cons (NULL_TREE, V8QI_type_node,
11011 tree_cons (NULL_TREE, V8QI_type_node,
11012 endlink)));
11013 tree v2si_ftype_v4hi_v4hi
11014 = build_function_type (V2SI_type_node,
11015 tree_cons (NULL_TREE, V4HI_type_node,
11016 tree_cons (NULL_TREE, V4HI_type_node,
11017 endlink)));
11018 tree v4hi_ftype_v4hi_int
11019 = build_function_type (V4HI_type_node,
11020 tree_cons (NULL_TREE, V4HI_type_node,
11021 tree_cons (NULL_TREE, integer_type_node,
11022 endlink)));
11023 tree v4hi_ftype_v4hi_di
11024 = build_function_type (V4HI_type_node,
11025 tree_cons (NULL_TREE, V4HI_type_node,
11026 tree_cons (NULL_TREE,
11027 long_long_integer_type_node,
11028 endlink)));
11029 tree v2si_ftype_v2si_di
11030 = build_function_type (V2SI_type_node,
11031 tree_cons (NULL_TREE, V2SI_type_node,
11032 tree_cons (NULL_TREE,
11033 long_long_integer_type_node,
11034 endlink)));
11035 tree void_ftype_void
11036 = build_function_type (void_type_node, endlink);
11037 tree void_ftype_unsigned
11038 = build_function_type (void_type_node,
11039 tree_cons (NULL_TREE, unsigned_type_node,
11040 endlink));
11041 tree unsigned_ftype_void
11042 = build_function_type (unsigned_type_node, endlink);
11043 tree di_ftype_void
11044 = build_function_type (long_long_unsigned_type_node, endlink);
11045 tree v4sf_ftype_void
11046 = build_function_type (V4SF_type_node, endlink);
11047 tree v2si_ftype_v4sf
11048 = build_function_type (V2SI_type_node,
11049 tree_cons (NULL_TREE, V4SF_type_node,
11050 endlink));
11051 /* Loads/stores. */
11052 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11053 tree_cons (NULL_TREE, V8QI_type_node,
11054 tree_cons (NULL_TREE,
11055 pchar_type_node,
11056 endlink)));
11057 tree void_ftype_v8qi_v8qi_pchar
11058 = build_function_type (void_type_node, maskmovq_args);
11059 tree v4sf_ftype_pfloat
11060 = build_function_type (V4SF_type_node,
11061 tree_cons (NULL_TREE, pfloat_type_node,
11062 endlink));
11063 /* @@@ the type is bogus */
11064 tree v4sf_ftype_v4sf_pv2si
11065 = build_function_type (V4SF_type_node,
11066 tree_cons (NULL_TREE, V4SF_type_node,
11067 tree_cons (NULL_TREE, pv2si_type_node,
11068 endlink)));
11069 tree void_ftype_pv2si_v4sf
11070 = build_function_type (void_type_node,
11071 tree_cons (NULL_TREE, pv2si_type_node,
11072 tree_cons (NULL_TREE, V4SF_type_node,
11073 endlink)));
11074 tree void_ftype_pfloat_v4sf
11075 = build_function_type (void_type_node,
11076 tree_cons (NULL_TREE, pfloat_type_node,
11077 tree_cons (NULL_TREE, V4SF_type_node,
11078 endlink)));
11079 tree void_ftype_pdi_di
11080 = build_function_type (void_type_node,
11081 tree_cons (NULL_TREE, pdi_type_node,
11082 tree_cons (NULL_TREE,
11083 long_long_unsigned_type_node,
11084 endlink)));
11085 /* Normal vector unops. */
11086 tree v4sf_ftype_v4sf
11087 = build_function_type (V4SF_type_node,
11088 tree_cons (NULL_TREE, V4SF_type_node,
11089 endlink));
11090
11091 /* Normal vector binops. */
11092 tree v4sf_ftype_v4sf_v4sf
11093 = build_function_type (V4SF_type_node,
11094 tree_cons (NULL_TREE, V4SF_type_node,
11095 tree_cons (NULL_TREE, V4SF_type_node,
11096 endlink)));
11097 tree v8qi_ftype_v8qi_v8qi
11098 = build_function_type (V8QI_type_node,
11099 tree_cons (NULL_TREE, V8QI_type_node,
11100 tree_cons (NULL_TREE, V8QI_type_node,
11101 endlink)));
11102 tree v4hi_ftype_v4hi_v4hi
11103 = build_function_type (V4HI_type_node,
11104 tree_cons (NULL_TREE, V4HI_type_node,
11105 tree_cons (NULL_TREE, V4HI_type_node,
11106 endlink)));
11107 tree v2si_ftype_v2si_v2si
11108 = build_function_type (V2SI_type_node,
11109 tree_cons (NULL_TREE, V2SI_type_node,
11110 tree_cons (NULL_TREE, V2SI_type_node,
11111 endlink)));
11112 tree di_ftype_di_di
11113 = build_function_type (long_long_unsigned_type_node,
11114 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11115 tree_cons (NULL_TREE,
11116 long_long_unsigned_type_node,
11117 endlink)));
11118
11119 tree v2si_ftype_v2sf
11120 = build_function_type (V2SI_type_node,
11121 tree_cons (NULL_TREE, V2SF_type_node,
11122 endlink));
11123 tree v2sf_ftype_v2si
11124 = build_function_type (V2SF_type_node,
11125 tree_cons (NULL_TREE, V2SI_type_node,
11126 endlink));
11127 tree v2si_ftype_v2si
11128 = build_function_type (V2SI_type_node,
11129 tree_cons (NULL_TREE, V2SI_type_node,
11130 endlink));
11131 tree v2sf_ftype_v2sf
11132 = build_function_type (V2SF_type_node,
11133 tree_cons (NULL_TREE, V2SF_type_node,
11134 endlink));
11135 tree v2sf_ftype_v2sf_v2sf
11136 = build_function_type (V2SF_type_node,
11137 tree_cons (NULL_TREE, V2SF_type_node,
11138 tree_cons (NULL_TREE,
11139 V2SF_type_node,
11140 endlink)));
11141 tree v2si_ftype_v2sf_v2sf
11142 = build_function_type (V2SI_type_node,
11143 tree_cons (NULL_TREE, V2SF_type_node,
11144 tree_cons (NULL_TREE,
11145 V2SF_type_node,
11146 endlink)));
11147
11148 /* Add all builtins that are more or less simple operations on two
11149 operands. */
11150 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11151 {
11152 /* Use one of the operands; the target can have a different mode for
11153 mask-generating compares. */
11154 enum machine_mode mode;
11155 tree type;
11156
11157 if (d->name == 0)
11158 continue;
11159 mode = insn_data[d->icode].operand[1].mode;
11160
11161 switch (mode)
11162 {
11163 case V4SFmode:
11164 type = v4sf_ftype_v4sf_v4sf;
11165 break;
11166 case V8QImode:
11167 type = v8qi_ftype_v8qi_v8qi;
11168 break;
11169 case V4HImode:
11170 type = v4hi_ftype_v4hi_v4hi;
11171 break;
11172 case V2SImode:
11173 type = v2si_ftype_v2si_v2si;
11174 break;
11175 case DImode:
11176 type = di_ftype_di_di;
11177 break;
11178
11179 default:
11180 abort ();
11181 }
11182
11183 /* Override for comparisons. */
11184 if (d->icode == CODE_FOR_maskcmpv4sf3
11185 || d->icode == CODE_FOR_maskncmpv4sf3
11186 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11187 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11188 type = v4si_ftype_v4sf_v4sf;
11189
11190 def_builtin (d->mask, d->name, type, d->code);
11191 }
11192
11193 /* Add the remaining MMX insns with somewhat more complicated types. */
11194 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11195 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11196 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11197 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11198 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11199 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11200 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11201
11202 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11203 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11204 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11205
11206 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11207 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11208
11209 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11210 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11211
11212 /* comi/ucomi insns. */
11213 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11214 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11215
11216 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11217 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11218 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11219
11220 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11221 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11222 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11223 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11224 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11225 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11226
11227 def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11228 def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11229 def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11230 def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11231
11232 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11233 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11234
11235 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11236
11237 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11238 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11239 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11240 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11241 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11242 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11243
11244 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11245 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11246 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11247 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11248
11249 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11250 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11251 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11252 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11253
11254 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11255
11256 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11257
11258 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11259 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11260 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11261 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11262 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11263 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11264
11265 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11266
11267 /* Original 3DNow! */
11268 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11269 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11270 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11271 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11272 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11286 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11287 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11288
11289 /* 3DNow! extension as used in the Athlon CPU. */
11290 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11291 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11292 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11293 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11294 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11295 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11296
11297 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11298 }
11299
11300 /* Errors in the source file can cause expand_expr to return const0_rtx
11301 where we expect a vector. To avoid crashing, use one of the vector
11302 clear instructions. */
11303 static rtx
11304 safe_vector_operand (x, mode)
11305 rtx x;
11306 enum machine_mode mode;
11307 {
11308 if (x != const0_rtx)
11309 return x;
11310 x = gen_reg_rtx (mode);
11311
11312 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11313 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11314 : gen_rtx_SUBREG (DImode, x, 0)));
11315 else
11316 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11317 : gen_rtx_SUBREG (V4SFmode, x, 0)));
11318 return x;
11319 }
11320
11321 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11322
11323 static rtx
11324 ix86_expand_binop_builtin (icode, arglist, target)
11325 enum insn_code icode;
11326 tree arglist;
11327 rtx target;
11328 {
11329 rtx pat;
11330 tree arg0 = TREE_VALUE (arglist);
11331 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11332 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11333 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11334 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11335 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11336 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11337
11338 if (VECTOR_MODE_P (mode0))
11339 op0 = safe_vector_operand (op0, mode0);
11340 if (VECTOR_MODE_P (mode1))
11341 op1 = safe_vector_operand (op1, mode1);
11342
11343 if (! target
11344 || GET_MODE (target) != tmode
11345 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11346 target = gen_reg_rtx (tmode);
11347
11348 /* In case the insn wants input operands in modes different from
11349 the result, abort. */
11350 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11351 abort ();
11352
11353 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11354 op0 = copy_to_mode_reg (mode0, op0);
11355 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11356 op1 = copy_to_mode_reg (mode1, op1);
11357
11358 pat = GEN_FCN (icode) (target, op0, op1);
11359 if (! pat)
11360 return 0;
11361 emit_insn (pat);
11362 return target;
11363 }
11364
11365 /* In type_for_mode we restrict the ability to create TImode types
11366 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11367 to have a V4SFmode signature. Convert them in-place to TImode. */
11368
11369 static rtx
11370 ix86_expand_timode_binop_builtin (icode, arglist, target)
11371 enum insn_code icode;
11372 tree arglist;
11373 rtx target;
11374 {
11375 rtx pat;
11376 tree arg0 = TREE_VALUE (arglist);
11377 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11378 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11379 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11380
11381 op0 = gen_lowpart (TImode, op0);
11382 op1 = gen_lowpart (TImode, op1);
11383 target = gen_reg_rtx (TImode);
11384
11385 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11386 op0 = copy_to_mode_reg (TImode, op0);
11387 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11388 op1 = copy_to_mode_reg (TImode, op1);
11389
11390 pat = GEN_FCN (icode) (target, op0, op1);
11391 if (! pat)
11392 return 0;
11393 emit_insn (pat);
11394
11395 return gen_lowpart (V4SFmode, target);
11396 }
11397
11398 /* Subroutine of ix86_expand_builtin to take care of stores. */
11399
11400 static rtx
11401 ix86_expand_store_builtin (icode, arglist)
11402 enum insn_code icode;
11403 tree arglist;
11404 {
11405 rtx pat;
11406 tree arg0 = TREE_VALUE (arglist);
11407 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11408 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11409 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11410 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11411 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11412
11413 if (VECTOR_MODE_P (mode1))
11414 op1 = safe_vector_operand (op1, mode1);
11415
11416 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11417 pat = GEN_FCN (icode) (op0, op1);
11418 if (pat)
11419 emit_insn (pat);
11420 return 0;
11421 }
11422
11423 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11424
11425 static rtx
11426 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11427 enum insn_code icode;
11428 tree arglist;
11429 rtx target;
11430 int do_load;
11431 {
11432 rtx pat;
11433 tree arg0 = TREE_VALUE (arglist);
11434 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11435 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11436 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11437
11438 if (! target
11439 || GET_MODE (target) != tmode
11440 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11441 target = gen_reg_rtx (tmode);
11442 if (do_load)
11443 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11444 else
11445 {
11446 if (VECTOR_MODE_P (mode0))
11447 op0 = safe_vector_operand (op0, mode0);
11448
11449 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11450 op0 = copy_to_mode_reg (mode0, op0);
11451 }
11452
11453 pat = GEN_FCN (icode) (target, op0);
11454 if (! pat)
11455 return 0;
11456 emit_insn (pat);
11457 return target;
11458 }
11459
11460 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11461 sqrtss, rsqrtss, rcpss. */
11462
11463 static rtx
11464 ix86_expand_unop1_builtin (icode, arglist, target)
11465 enum insn_code icode;
11466 tree arglist;
11467 rtx target;
11468 {
11469 rtx pat;
11470 tree arg0 = TREE_VALUE (arglist);
11471 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11472 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11473 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11474
11475 if (! target
11476 || GET_MODE (target) != tmode
11477 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11478 target = gen_reg_rtx (tmode);
11479
11480 if (VECTOR_MODE_P (mode0))
11481 op0 = safe_vector_operand (op0, mode0);
11482
11483 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11484 op0 = copy_to_mode_reg (mode0, op0);
11485
11486 pat = GEN_FCN (icode) (target, op0, op0);
11487 if (! pat)
11488 return 0;
11489 emit_insn (pat);
11490 return target;
11491 }
11492
11493 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11494
11495 static rtx
11496 ix86_expand_sse_compare (d, arglist, target)
11497 const struct builtin_description *d;
11498 tree arglist;
11499 rtx target;
11500 {
11501 rtx pat;
11502 tree arg0 = TREE_VALUE (arglist);
11503 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11504 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11505 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11506 rtx op2;
11507 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11508 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11509 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11510 enum rtx_code comparison = d->comparison;
11511
11512 if (VECTOR_MODE_P (mode0))
11513 op0 = safe_vector_operand (op0, mode0);
11514 if (VECTOR_MODE_P (mode1))
11515 op1 = safe_vector_operand (op1, mode1);
11516
11517 /* Swap operands if we have a comparison that isn't available in
11518 hardware. */
11519 if (d->flag)
11520 {
11521 rtx tmp = gen_reg_rtx (mode1);
11522 emit_move_insn (tmp, op1);
11523 op1 = op0;
11524 op0 = tmp;
11525 }
11526
11527 if (! target
11528 || GET_MODE (target) != tmode
11529 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11530 target = gen_reg_rtx (tmode);
11531
11532 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11533 op0 = copy_to_mode_reg (mode0, op0);
11534 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11535 op1 = copy_to_mode_reg (mode1, op1);
11536
11537 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11538 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11539 if (! pat)
11540 return 0;
11541 emit_insn (pat);
11542 return target;
11543 }
11544
11545 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11546
11547 static rtx
11548 ix86_expand_sse_comi (d, arglist, target)
11549 const struct builtin_description *d;
11550 tree arglist;
11551 rtx target;
11552 {
11553 rtx pat;
11554 tree arg0 = TREE_VALUE (arglist);
11555 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11556 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11557 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11558 rtx op2;
11559 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11560 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11561 enum rtx_code comparison = d->comparison;
11562
11563 if (VECTOR_MODE_P (mode0))
11564 op0 = safe_vector_operand (op0, mode0);
11565 if (VECTOR_MODE_P (mode1))
11566 op1 = safe_vector_operand (op1, mode1);
11567
11568 /* Swap operands if we have a comparison that isn't available in
11569 hardware. */
11570 if (d->flag)
11571 {
11572 rtx tmp = op1;
11573 op1 = op0;
11574 op0 = tmp;
11575 }
11576
11577 target = gen_reg_rtx (SImode);
11578 emit_move_insn (target, const0_rtx);
11579 target = gen_rtx_SUBREG (QImode, target, 0);
11580
11581 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11582 op0 = copy_to_mode_reg (mode0, op0);
11583 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11584 op1 = copy_to_mode_reg (mode1, op1);
11585
11586 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11587 pat = GEN_FCN (d->icode) (op0, op1, op2);
11588 if (! pat)
11589 return 0;
11590 emit_insn (pat);
11591 emit_insn (gen_rtx_SET (VOIDmode,
11592 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11593 gen_rtx_fmt_ee (comparison, QImode,
11594 gen_rtx_REG (CCmode, FLAGS_REG),
11595 const0_rtx)));
11596
11597 return SUBREG_REG (target);
11598 }
11599
11600 /* Expand an expression EXP that calls a built-in function,
11601 with result going to TARGET if that's convenient
11602 (and in mode MODE if that's convenient).
11603 SUBTARGET may be used as the target for computing one of EXP's operands.
11604 IGNORE is nonzero if the value is to be ignored. */
11605
11606 rtx
11607 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11608 tree exp;
11609 rtx target;
11610 rtx subtarget ATTRIBUTE_UNUSED;
11611 enum machine_mode mode ATTRIBUTE_UNUSED;
11612 int ignore ATTRIBUTE_UNUSED;
11613 {
11614 const struct builtin_description *d;
11615 size_t i;
11616 enum insn_code icode;
11617 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11618 tree arglist = TREE_OPERAND (exp, 1);
11619 tree arg0, arg1, arg2;
11620 rtx op0, op1, op2, pat;
11621 enum machine_mode tmode, mode0, mode1, mode2;
11622 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11623
11624 switch (fcode)
11625 {
11626 case IX86_BUILTIN_EMMS:
11627 emit_insn (gen_emms ());
11628 return 0;
11629
11630 case IX86_BUILTIN_SFENCE:
11631 emit_insn (gen_sfence ());
11632 return 0;
11633
11634 case IX86_BUILTIN_PEXTRW:
11635 icode = CODE_FOR_mmx_pextrw;
11636 arg0 = TREE_VALUE (arglist);
11637 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11638 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11639 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11640 tmode = insn_data[icode].operand[0].mode;
11641 mode0 = insn_data[icode].operand[1].mode;
11642 mode1 = insn_data[icode].operand[2].mode;
11643
11644 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11645 op0 = copy_to_mode_reg (mode0, op0);
11646 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11647 {
11648 /* @@@ better error message */
11649 error ("selector must be an immediate");
11650 return gen_reg_rtx (tmode);
11651 }
11652 if (target == 0
11653 || GET_MODE (target) != tmode
11654 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11655 target = gen_reg_rtx (tmode);
11656 pat = GEN_FCN (icode) (target, op0, op1);
11657 if (! pat)
11658 return 0;
11659 emit_insn (pat);
11660 return target;
11661
11662 case IX86_BUILTIN_PINSRW:
11663 icode = CODE_FOR_mmx_pinsrw;
11664 arg0 = TREE_VALUE (arglist);
11665 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11666 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11667 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11668 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11669 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11670 tmode = insn_data[icode].operand[0].mode;
11671 mode0 = insn_data[icode].operand[1].mode;
11672 mode1 = insn_data[icode].operand[2].mode;
11673 mode2 = insn_data[icode].operand[3].mode;
11674
11675 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11676 op0 = copy_to_mode_reg (mode0, op0);
11677 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11678 op1 = copy_to_mode_reg (mode1, op1);
11679 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11680 {
11681 /* @@@ better error message */
11682 error ("selector must be an immediate");
11683 return const0_rtx;
11684 }
11685 if (target == 0
11686 || GET_MODE (target) != tmode
11687 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11688 target = gen_reg_rtx (tmode);
11689 pat = GEN_FCN (icode) (target, op0, op1, op2);
11690 if (! pat)
11691 return 0;
11692 emit_insn (pat);
11693 return target;
11694
11695 case IX86_BUILTIN_MASKMOVQ:
11696 icode = CODE_FOR_mmx_maskmovq;
11697 /* Note the arg order is different from the operand order. */
11698 arg1 = TREE_VALUE (arglist);
11699 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11700 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11701 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11702 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11703 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11704 mode0 = insn_data[icode].operand[0].mode;
11705 mode1 = insn_data[icode].operand[1].mode;
11706 mode2 = insn_data[icode].operand[2].mode;
11707
11708 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11709 op0 = copy_to_mode_reg (mode0, op0);
11710 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11711 op1 = copy_to_mode_reg (mode1, op1);
11712 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11713 op2 = copy_to_mode_reg (mode2, op2);
11714 pat = GEN_FCN (icode) (op0, op1, op2);
11715 if (! pat)
11716 return 0;
11717 emit_insn (pat);
11718 return 0;
11719
11720 case IX86_BUILTIN_SQRTSS:
11721 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11722 case IX86_BUILTIN_RSQRTSS:
11723 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11724 case IX86_BUILTIN_RCPSS:
11725 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11726
11727 case IX86_BUILTIN_ANDPS:
11728 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
11729 arglist, target);
11730 case IX86_BUILTIN_ANDNPS:
11731 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
11732 arglist, target);
11733 case IX86_BUILTIN_ORPS:
11734 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
11735 arglist, target);
11736 case IX86_BUILTIN_XORPS:
11737 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
11738 arglist, target);
11739
11740 case IX86_BUILTIN_LOADAPS:
11741 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11742
11743 case IX86_BUILTIN_LOADUPS:
11744 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11745
11746 case IX86_BUILTIN_STOREAPS:
11747 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
11748 case IX86_BUILTIN_STOREUPS:
11749 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
11750
11751 case IX86_BUILTIN_LOADSS:
11752 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11753
11754 case IX86_BUILTIN_STORESS:
11755 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
11756
11757 case IX86_BUILTIN_LOADHPS:
11758 case IX86_BUILTIN_LOADLPS:
11759 icode = (fcode == IX86_BUILTIN_LOADHPS
11760 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11761 arg0 = TREE_VALUE (arglist);
11762 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11763 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11764 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11765 tmode = insn_data[icode].operand[0].mode;
11766 mode0 = insn_data[icode].operand[1].mode;
11767 mode1 = insn_data[icode].operand[2].mode;
11768
11769 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11770 op0 = copy_to_mode_reg (mode0, op0);
11771 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11772 if (target == 0
11773 || GET_MODE (target) != tmode
11774 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11775 target = gen_reg_rtx (tmode);
11776 pat = GEN_FCN (icode) (target, op0, op1);
11777 if (! pat)
11778 return 0;
11779 emit_insn (pat);
11780 return target;
11781
11782 case IX86_BUILTIN_STOREHPS:
11783 case IX86_BUILTIN_STORELPS:
11784 icode = (fcode == IX86_BUILTIN_STOREHPS
11785 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11786 arg0 = TREE_VALUE (arglist);
11787 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11788 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11789 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11790 mode0 = insn_data[icode].operand[1].mode;
11791 mode1 = insn_data[icode].operand[2].mode;
11792
11793 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11794 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11795 op1 = copy_to_mode_reg (mode1, op1);
11796
11797 pat = GEN_FCN (icode) (op0, op0, op1);
11798 if (! pat)
11799 return 0;
11800 emit_insn (pat);
11801 return 0;
11802
11803 case IX86_BUILTIN_MOVNTPS:
11804 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
11805 case IX86_BUILTIN_MOVNTQ:
11806 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
11807
11808 case IX86_BUILTIN_LDMXCSR:
11809 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11810 target = assign_386_stack_local (SImode, 0);
11811 emit_move_insn (target, op0);
11812 emit_insn (gen_ldmxcsr (target));
11813 return 0;
11814
11815 case IX86_BUILTIN_STMXCSR:
11816 target = assign_386_stack_local (SImode, 0);
11817 emit_insn (gen_stmxcsr (target));
11818 return copy_to_mode_reg (SImode, target);
11819
11820 case IX86_BUILTIN_SHUFPS:
11821 icode = CODE_FOR_sse_shufps;
11822 arg0 = TREE_VALUE (arglist);
11823 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11824 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11825 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11826 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11827 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11828 tmode = insn_data[icode].operand[0].mode;
11829 mode0 = insn_data[icode].operand[1].mode;
11830 mode1 = insn_data[icode].operand[2].mode;
11831 mode2 = insn_data[icode].operand[3].mode;
11832
11833 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11834 op0 = copy_to_mode_reg (mode0, op0);
11835 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11836 op1 = copy_to_mode_reg (mode1, op1);
11837 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11838 {
11839 /* @@@ better error message */
11840 error ("mask must be an immediate");
11841 return gen_reg_rtx (tmode);
11842 }
11843 if (target == 0
11844 || GET_MODE (target) != tmode
11845 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11846 target = gen_reg_rtx (tmode);
11847 pat = GEN_FCN (icode) (target, op0, op1, op2);
11848 if (! pat)
11849 return 0;
11850 emit_insn (pat);
11851 return target;
11852
11853 case IX86_BUILTIN_PSHUFW:
11854 icode = CODE_FOR_mmx_pshufw;
11855 arg0 = TREE_VALUE (arglist);
11856 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11857 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11858 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11859 tmode = insn_data[icode].operand[0].mode;
11860 mode1 = insn_data[icode].operand[1].mode;
11861 mode2 = insn_data[icode].operand[2].mode;
11862
11863 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11864 op0 = copy_to_mode_reg (mode1, op0);
11865 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
11866 {
11867 /* @@@ better error message */
11868 error ("mask must be an immediate");
11869 return const0_rtx;
11870 }
11871 if (target == 0
11872 || GET_MODE (target) != tmode
11873 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11874 target = gen_reg_rtx (tmode);
11875 pat = GEN_FCN (icode) (target, op0, op1);
11876 if (! pat)
11877 return 0;
11878 emit_insn (pat);
11879 return target;
11880
11881 case IX86_BUILTIN_FEMMS:
11882 emit_insn (gen_femms ());
11883 return NULL_RTX;
11884
11885 case IX86_BUILTIN_PAVGUSB:
11886 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11887
11888 case IX86_BUILTIN_PF2ID:
11889 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11890
11891 case IX86_BUILTIN_PFACC:
11892 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11893
11894 case IX86_BUILTIN_PFADD:
11895 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11896
11897 case IX86_BUILTIN_PFCMPEQ:
11898 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11899
11900 case IX86_BUILTIN_PFCMPGE:
11901 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11902
11903 case IX86_BUILTIN_PFCMPGT:
11904 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11905
11906 case IX86_BUILTIN_PFMAX:
11907 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11908
11909 case IX86_BUILTIN_PFMIN:
11910 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11911
11912 case IX86_BUILTIN_PFMUL:
11913 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11914
11915 case IX86_BUILTIN_PFRCP:
11916 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11917
11918 case IX86_BUILTIN_PFRCPIT1:
11919 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11920
11921 case IX86_BUILTIN_PFRCPIT2:
11922 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11923
11924 case IX86_BUILTIN_PFRSQIT1:
11925 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11926
11927 case IX86_BUILTIN_PFRSQRT:
11928 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11929
11930 case IX86_BUILTIN_PFSUB:
11931 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11932
11933 case IX86_BUILTIN_PFSUBR:
11934 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11935
11936 case IX86_BUILTIN_PI2FD:
11937 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11938
11939 case IX86_BUILTIN_PMULHRW:
11940 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11941
11942 case IX86_BUILTIN_PF2IW:
11943 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11944
11945 case IX86_BUILTIN_PFNACC:
11946 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11947
11948 case IX86_BUILTIN_PFPNACC:
11949 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11950
11951 case IX86_BUILTIN_PI2FW:
11952 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11953
11954 case IX86_BUILTIN_PSWAPDSI:
11955 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11956
11957 case IX86_BUILTIN_PSWAPDSF:
11958 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11959
11960 case IX86_BUILTIN_SSE_ZERO:
11961 target = gen_reg_rtx (V4SFmode);
11962 emit_insn (gen_sse_clrv4sf (target));
11963 return target;
11964
11965 case IX86_BUILTIN_MMX_ZERO:
11966 target = gen_reg_rtx (DImode);
11967 emit_insn (gen_mmx_clrdi (target));
11968 return target;
11969
11970 default:
11971 break;
11972 }
11973
11974 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11975 if (d->code == fcode)
11976 {
11977 /* Compares are treated specially. */
11978 if (d->icode == CODE_FOR_maskcmpv4sf3
11979 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11980 || d->icode == CODE_FOR_maskncmpv4sf3
11981 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11982 return ix86_expand_sse_compare (d, arglist, target);
11983
11984 return ix86_expand_binop_builtin (d->icode, arglist, target);
11985 }
11986
11987 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
11988 if (d->code == fcode)
11989 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
11990
11991 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11992 if (d->code == fcode)
11993 return ix86_expand_sse_comi (d, arglist, target);
11994
11995 /* @@@ Should really do something sensible here. */
11996 return 0;
11997 }
11998
11999 /* Store OPERAND to the memory after reload is completed. This means
12000 that we can't easily use assign_stack_local. */
12001 rtx
12002 ix86_force_to_memory (mode, operand)
12003 enum machine_mode mode;
12004 rtx operand;
12005 {
12006 rtx result;
12007 if (!reload_completed)
12008 abort ();
12009 if (TARGET_64BIT && TARGET_RED_ZONE)
12010 {
12011 result = gen_rtx_MEM (mode,
12012 gen_rtx_PLUS (Pmode,
12013 stack_pointer_rtx,
12014 GEN_INT (-RED_ZONE_SIZE)));
12015 emit_move_insn (result, operand);
12016 }
12017 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12018 {
12019 switch (mode)
12020 {
12021 case HImode:
12022 case SImode:
12023 operand = gen_lowpart (DImode, operand);
12024 /* FALLTHRU */
12025 case DImode:
12026 emit_insn (
12027 gen_rtx_SET (VOIDmode,
12028 gen_rtx_MEM (DImode,
12029 gen_rtx_PRE_DEC (DImode,
12030 stack_pointer_rtx)),
12031 operand));
12032 break;
12033 default:
12034 abort ();
12035 }
12036 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12037 }
12038 else
12039 {
12040 switch (mode)
12041 {
12042 case DImode:
12043 {
12044 rtx operands[2];
12045 split_di (&operand, 1, operands, operands + 1);
12046 emit_insn (
12047 gen_rtx_SET (VOIDmode,
12048 gen_rtx_MEM (SImode,
12049 gen_rtx_PRE_DEC (Pmode,
12050 stack_pointer_rtx)),
12051 operands[1]));
12052 emit_insn (
12053 gen_rtx_SET (VOIDmode,
12054 gen_rtx_MEM (SImode,
12055 gen_rtx_PRE_DEC (Pmode,
12056 stack_pointer_rtx)),
12057 operands[0]));
12058 }
12059 break;
12060 case HImode:
12061 /* It is better to store HImodes as SImodes. */
12062 if (!TARGET_PARTIAL_REG_STALL)
12063 operand = gen_lowpart (SImode, operand);
12064 /* FALLTHRU */
12065 case SImode:
12066 emit_insn (
12067 gen_rtx_SET (VOIDmode,
12068 gen_rtx_MEM (GET_MODE (operand),
12069 gen_rtx_PRE_DEC (SImode,
12070 stack_pointer_rtx)),
12071 operand));
12072 break;
12073 default:
12074 abort ();
12075 }
12076 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12077 }
12078 return result;
12079 }
12080
12081 /* Free operand from the memory. */
12082 void
12083 ix86_free_from_memory (mode)
12084 enum machine_mode mode;
12085 {
12086 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12087 {
12088 int size;
12089
12090 if (mode == DImode || TARGET_64BIT)
12091 size = 8;
12092 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12093 size = 2;
12094 else
12095 size = 4;
12096 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12097 to pop or add instruction if registers are available. */
12098 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12099 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12100 GEN_INT (size))));
12101 }
12102 }
12103
12104 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12105 QImode must go into class Q_REGS.
12106 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12107 movdf to do mem-to-mem moves through integer regs. */
12108 enum reg_class
12109 ix86_preferred_reload_class (x, class)
12110 rtx x;
12111 enum reg_class class;
12112 {
12113 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12114 {
12115 /* SSE can't load any constant directly yet. */
12116 if (SSE_CLASS_P (class))
12117 return NO_REGS;
12118 /* Floats can load 0 and 1. */
12119 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12120 {
12121 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12122 if (MAYBE_SSE_CLASS_P (class))
12123 return (reg_class_subset_p (class, GENERAL_REGS)
12124 ? GENERAL_REGS : FLOAT_REGS);
12125 else
12126 return class;
12127 }
12128 /* General regs can load everything. */
12129 if (reg_class_subset_p (class, GENERAL_REGS))
12130 return GENERAL_REGS;
12131 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12132 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12133 return NO_REGS;
12134 }
12135 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12136 return NO_REGS;
12137 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12138 return Q_REGS;
12139 return class;
12140 }
12141
12142 /* If we are copying between general and FP registers, we need a memory
12143 location. The same is true for SSE and MMX registers.
12144
12145 The macro can't work reliably when one of the CLASSES is class containing
12146 registers from multiple units (SSE, MMX, integer). We avoid this by never
12147 combining those units in single alternative in the machine description.
12148 Ensure that this constraint holds to avoid unexpected surprises.
12149
12150 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12151 enforce these sanity checks. */
12152 int
12153 ix86_secondary_memory_needed (class1, class2, mode, strict)
12154 enum reg_class class1, class2;
12155 enum machine_mode mode;
12156 int strict;
12157 {
12158 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12159 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12160 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12161 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12162 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12163 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12164 {
12165 if (strict)
12166 abort ();
12167 else
12168 return 1;
12169 }
12170 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12171 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12172 && (mode) != SImode)
12173 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12174 && (mode) != SImode));
12175 }
12176 /* Return the cost of moving data from a register in class CLASS1 to
12177 one in class CLASS2.
12178
12179 It is not required that the cost always equal 2 when FROM is the same as TO;
12180 on some machines it is expensive to move between registers if they are not
12181 general registers. */
12182 int
12183 ix86_register_move_cost (mode, class1, class2)
12184 enum machine_mode mode;
12185 enum reg_class class1, class2;
12186 {
12187 /* In case we require secondary memory, compute cost of the store followed
12188 by load. In case of copying from general_purpose_register we may emit
12189 multiple stores followed by single load causing memory size mismatch
12190 stall. Count this as arbitarily high cost of 20. */
12191 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12192 {
12193 int add_cost = 0;
12194 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12195 add_cost = 20;
12196 return (MEMORY_MOVE_COST (mode, class1, 0)
12197 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12198 }
12199 /* Moves between SSE/MMX and integer unit are expensive. */
12200 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12201 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12202 return ix86_cost->mmxsse_to_integer;
12203 if (MAYBE_FLOAT_CLASS_P (class1))
12204 return ix86_cost->fp_move;
12205 if (MAYBE_SSE_CLASS_P (class1))
12206 return ix86_cost->sse_move;
12207 if (MAYBE_MMX_CLASS_P (class1))
12208 return ix86_cost->mmx_move;
12209 return 2;
12210 }
12211
12212 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12213 int
12214 ix86_hard_regno_mode_ok (regno, mode)
12215 int regno;
12216 enum machine_mode mode;
12217 {
12218 /* Flags and only flags can only hold CCmode values. */
12219 if (CC_REGNO_P (regno))
12220 return GET_MODE_CLASS (mode) == MODE_CC;
12221 if (GET_MODE_CLASS (mode) == MODE_CC
12222 || GET_MODE_CLASS (mode) == MODE_RANDOM
12223 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12224 return 0;
12225 if (FP_REGNO_P (regno))
12226 return VALID_FP_MODE_P (mode);
12227 if (SSE_REGNO_P (regno))
12228 return VALID_SSE_REG_MODE (mode);
12229 if (MMX_REGNO_P (regno))
12230 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12231 /* We handle both integer and floats in the general purpose registers.
12232 In future we should be able to handle vector modes as well. */
12233 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12234 return 0;
12235 /* Take care for QImode values - they can be in non-QI regs, but then
12236 they do cause partial register stalls. */
12237 if (regno < 4 || mode != QImode || TARGET_64BIT)
12238 return 1;
12239 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12240 }
12241
12242 /* Return the cost of moving data of mode M between a
12243 register and memory. A value of 2 is the default; this cost is
12244 relative to those in `REGISTER_MOVE_COST'.
12245
12246 If moving between registers and memory is more expensive than
12247 between two registers, you should define this macro to express the
12248 relative cost.
12249
12250 Model also increased moving costs of QImode registers in non
12251 Q_REGS classes.
12252 */
12253 int
12254 ix86_memory_move_cost (mode, class, in)
12255 enum machine_mode mode;
12256 enum reg_class class;
12257 int in;
12258 {
12259 if (FLOAT_CLASS_P (class))
12260 {
12261 int index;
12262 switch (mode)
12263 {
12264 case SFmode:
12265 index = 0;
12266 break;
12267 case DFmode:
12268 index = 1;
12269 break;
12270 case XFmode:
12271 case TFmode:
12272 index = 2;
12273 break;
12274 default:
12275 return 100;
12276 }
12277 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12278 }
12279 if (SSE_CLASS_P (class))
12280 {
12281 int index;
12282 switch (GET_MODE_SIZE (mode))
12283 {
12284 case 4:
12285 index = 0;
12286 break;
12287 case 8:
12288 index = 1;
12289 break;
12290 case 16:
12291 index = 2;
12292 break;
12293 default:
12294 return 100;
12295 }
12296 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12297 }
12298 if (MMX_CLASS_P (class))
12299 {
12300 int index;
12301 switch (GET_MODE_SIZE (mode))
12302 {
12303 case 4:
12304 index = 0;
12305 break;
12306 case 8:
12307 index = 1;
12308 break;
12309 default:
12310 return 100;
12311 }
12312 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12313 }
12314 switch (GET_MODE_SIZE (mode))
12315 {
12316 case 1:
12317 if (in)
12318 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12319 : ix86_cost->movzbl_load);
12320 else
12321 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12322 : ix86_cost->int_store[0] + 4);
12323 break;
12324 case 2:
12325 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12326 default:
12327 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12328 if (mode == TFmode)
12329 mode = XFmode;
12330 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12331 * (int) GET_MODE_SIZE (mode) / 4);
12332 }
12333 }
12334
12335 #ifdef DO_GLOBAL_CTORS_BODY
12336 static void
12337 ix86_svr3_asm_out_constructor (symbol, priority)
12338 rtx symbol;
12339 int priority ATTRIBUTE_UNUSED;
12340 {
12341 init_section ();
12342 fputs ("\tpushl $", asm_out_file);
12343 assemble_name (asm_out_file, XSTR (symbol, 0));
12344 fputc ('\n', asm_out_file);
12345 }
12346 #endif
12347
12348 /* Order the registers for register allocator. */
12349
12350 void
12351 x86_order_regs_for_local_alloc ()
12352 {
12353 int pos = 0;
12354 int i;
12355
12356 /* First allocate the local general purpose registers. */
12357 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12358 if (GENERAL_REGNO_P (i) && call_used_regs[i])
12359 reg_alloc_order [pos++] = i;
12360
12361 /* Global general purpose registers. */
12362 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12363 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
12364 reg_alloc_order [pos++] = i;
12365
12366 /* x87 registers come first in case we are doing FP math
12367 using them. */
12368 if (!TARGET_SSE_MATH)
12369 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12370 reg_alloc_order [pos++] = i;
12371
12372 /* SSE registers. */
12373 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
12374 reg_alloc_order [pos++] = i;
12375 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
12376 reg_alloc_order [pos++] = i;
12377
12378 /* x87 registerts. */
12379 if (TARGET_SSE_MATH)
12380 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12381 reg_alloc_order [pos++] = i;
12382
12383 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
12384 reg_alloc_order [pos++] = i;
12385
12386 /* Initialize the rest of array as we do not allocate some registers
12387 at all. */
12388 while (pos < FIRST_PSEUDO_REGISTER)
12389 reg_alloc_order [pos++] = 0;
12390 }
This page took 0.565082 seconds and 6 git commands to generate.