]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.c (ix86_expand_vector_move): Use the mode of the operand, rather than assuming...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT (-1)
48 #endif
49
50 /* Processor costs (relative to an add) */
51 static const
52 struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
60 3, /* cost of movsx */
61 3, /* cost of movzx */
62 0, /* "large" insn */
63 2, /* MOVE_RATIO */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
84 0, /* size of prefetch block */
85 0, /* number of parallel prefetches */
86 };
87 /* Processor costs (relative to an add) */
88 static const
89 struct processor_costs i386_cost = { /* 386 specific costs */
90 1, /* cost of an add instruction */
91 1, /* cost of a lea instruction */
92 3, /* variable shift costs */
93 2, /* constant shift costs */
94 6, /* cost of starting a multiply */
95 1, /* cost of multiply per each bit set */
96 23, /* cost of a divide/mod */
97 3, /* cost of movsx */
98 2, /* cost of movzx */
99 15, /* "large" insn */
100 3, /* MOVE_RATIO */
101 4, /* cost for loading QImode using movzbl */
102 {2, 4, 2}, /* cost of loading integer registers
103 in QImode, HImode and SImode.
104 Relative to reg-reg move (2). */
105 {2, 4, 2}, /* cost of storing integer registers */
106 2, /* cost of reg,reg fld/fst */
107 {8, 8, 8}, /* cost of loading fp registers
108 in SFmode, DFmode and XFmode */
109 {8, 8, 8}, /* cost of loading integer registers */
110 2, /* cost of moving MMX register */
111 {4, 8}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {4, 8}, /* cost of storing MMX registers
114 in SImode and DImode */
115 2, /* cost of moving SSE register */
116 {4, 8, 16}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {4, 8, 16}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of prefetch block */
122 0, /* number of parallel prefetches */
123 };
124
125 static const
126 struct processor_costs i486_cost = { /* 486 specific costs */
127 1, /* cost of an add instruction */
128 1, /* cost of a lea instruction */
129 3, /* variable shift costs */
130 2, /* constant shift costs */
131 12, /* cost of starting a multiply */
132 1, /* cost of multiply per each bit set */
133 40, /* cost of a divide/mod */
134 3, /* cost of movsx */
135 2, /* cost of movzx */
136 15, /* "large" insn */
137 3, /* MOVE_RATIO */
138 4, /* cost for loading QImode using movzbl */
139 {2, 4, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 4, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {8, 8, 8}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {8, 8, 8}, /* cost of loading integer registers */
147 2, /* cost of moving MMX register */
148 {4, 8}, /* cost of loading MMX registers
149 in SImode and DImode */
150 {4, 8}, /* cost of storing MMX registers
151 in SImode and DImode */
152 2, /* cost of moving SSE register */
153 {4, 8, 16}, /* cost of loading SSE registers
154 in SImode, DImode and TImode */
155 {4, 8, 16}, /* cost of storing SSE registers
156 in SImode, DImode and TImode */
157 3, /* MMX or SSE register to integer */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 };
161
162 static const
163 struct processor_costs pentium_cost = {
164 1, /* cost of an add instruction */
165 1, /* cost of a lea instruction */
166 4, /* variable shift costs */
167 1, /* constant shift costs */
168 11, /* cost of starting a multiply */
169 0, /* cost of multiply per each bit set */
170 25, /* cost of a divide/mod */
171 3, /* cost of movsx */
172 2, /* cost of movzx */
173 8, /* "large" insn */
174 6, /* MOVE_RATIO */
175 6, /* cost for loading QImode using movzbl */
176 {2, 4, 2}, /* cost of loading integer registers
177 in QImode, HImode and SImode.
178 Relative to reg-reg move (2). */
179 {2, 4, 2}, /* cost of storing integer registers */
180 2, /* cost of reg,reg fld/fst */
181 {2, 2, 6}, /* cost of loading fp registers
182 in SFmode, DFmode and XFmode */
183 {4, 4, 6}, /* cost of loading integer registers */
184 8, /* cost of moving MMX register */
185 {8, 8}, /* cost of loading MMX registers
186 in SImode and DImode */
187 {8, 8}, /* cost of storing MMX registers
188 in SImode and DImode */
189 2, /* cost of moving SSE register */
190 {4, 8, 16}, /* cost of loading SSE registers
191 in SImode, DImode and TImode */
192 {4, 8, 16}, /* cost of storing SSE registers
193 in SImode, DImode and TImode */
194 3, /* MMX or SSE register to integer */
195 0, /* size of prefetch block */
196 0, /* number of parallel prefetches */
197 };
198
199 static const
200 struct processor_costs pentiumpro_cost = {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
203 1, /* variable shift costs */
204 1, /* constant shift costs */
205 4, /* cost of starting a multiply */
206 0, /* cost of multiply per each bit set */
207 17, /* cost of a divide/mod */
208 1, /* cost of movsx */
209 1, /* cost of movzx */
210 8, /* "large" insn */
211 6, /* MOVE_RATIO */
212 2, /* cost for loading QImode using movzbl */
213 {4, 4, 4}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
215 Relative to reg-reg move (2). */
216 {2, 2, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
220 {4, 4, 6}, /* cost of loading integer registers */
221 2, /* cost of moving MMX register */
222 {2, 2}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {2, 2}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {2, 2, 8}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {2, 2, 8}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 32, /* size of prefetch block */
233 6, /* number of parallel prefetches */
234 };
235
236 static const
237 struct processor_costs k6_cost = {
238 1, /* cost of an add instruction */
239 2, /* cost of a lea instruction */
240 1, /* variable shift costs */
241 1, /* constant shift costs */
242 3, /* cost of starting a multiply */
243 0, /* cost of multiply per each bit set */
244 18, /* cost of a divide/mod */
245 2, /* cost of movsx */
246 2, /* cost of movzx */
247 8, /* "large" insn */
248 4, /* MOVE_RATIO */
249 3, /* cost for loading QImode using movzbl */
250 {4, 5, 4}, /* cost of loading integer registers
251 in QImode, HImode and SImode.
252 Relative to reg-reg move (2). */
253 {2, 3, 2}, /* cost of storing integer registers */
254 4, /* cost of reg,reg fld/fst */
255 {6, 6, 6}, /* cost of loading fp registers
256 in SFmode, DFmode and XFmode */
257 {4, 4, 4}, /* cost of loading integer registers */
258 2, /* cost of moving MMX register */
259 {2, 2}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {2, 2}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {2, 2, 8}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {2, 2, 8}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 6, /* MMX or SSE register to integer */
269 32, /* size of prefetch block */
270 1, /* number of parallel prefetches */
271 };
272
273 static const
274 struct processor_costs athlon_cost = {
275 1, /* cost of an add instruction */
276 2, /* cost of a lea instruction */
277 1, /* variable shift costs */
278 1, /* constant shift costs */
279 5, /* cost of starting a multiply */
280 0, /* cost of multiply per each bit set */
281 42, /* cost of a divide/mod */
282 1, /* cost of movsx */
283 1, /* cost of movzx */
284 8, /* "large" insn */
285 9, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {4, 5, 4}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 3, 2}, /* cost of storing integer registers */
291 4, /* cost of reg,reg fld/fst */
292 {6, 6, 20}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {4, 4, 16}, /* cost of loading integer registers */
295 2, /* cost of moving MMX register */
296 {2, 2}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {2, 2}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {2, 2, 8}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {2, 2, 8}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
305 6, /* MMX or SSE register to integer */
306 64, /* size of prefetch block */
307 6, /* number of parallel prefetches */
308 };
309
310 static const
311 struct processor_costs pentium4_cost = {
312 1, /* cost of an add instruction */
313 1, /* cost of a lea instruction */
314 8, /* variable shift costs */
315 8, /* constant shift costs */
316 30, /* cost of starting a multiply */
317 0, /* cost of multiply per each bit set */
318 112, /* cost of a divide/mod */
319 1, /* cost of movsx */
320 1, /* cost of movzx */
321 16, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 5, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 3, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of loading integer registers */
332 2, /* cost of moving MMX register */
333 {2, 2}, /* cost of loading MMX registers
334 in SImode and DImode */
335 {2, 2}, /* cost of storing MMX registers
336 in SImode and DImode */
337 12, /* cost of moving SSE register */
338 {12, 12, 12}, /* cost of loading SSE registers
339 in SImode, DImode and TImode */
340 {2, 2, 8}, /* cost of storing SSE registers
341 in SImode, DImode and TImode */
342 10, /* MMX or SSE register to integer */
343 64, /* size of prefetch block */
344 6, /* number of parallel prefetches */
345 };
346
347 const struct processor_costs *ix86_cost = &pentium_cost;
348
349 /* Processor feature/optimization bitmasks. */
350 #define m_386 (1<<PROCESSOR_I386)
351 #define m_486 (1<<PROCESSOR_I486)
352 #define m_PENT (1<<PROCESSOR_PENTIUM)
353 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
354 #define m_K6 (1<<PROCESSOR_K6)
355 #define m_ATHLON (1<<PROCESSOR_ATHLON)
356 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
357
358 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
359 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
360 const int x86_zero_extend_with_and = m_486 | m_PENT;
361 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
362 const int x86_double_with_add = ~m_386;
363 const int x86_use_bit_test = m_386;
364 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
365 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
366 const int x86_3dnow_a = m_ATHLON;
367 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
368 const int x86_branch_hints = m_PENT4;
369 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
370 const int x86_partial_reg_stall = m_PPRO;
371 const int x86_use_loop = m_K6;
372 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
373 const int x86_use_mov0 = m_K6;
374 const int x86_use_cltd = ~(m_PENT | m_K6);
375 const int x86_read_modify_write = ~m_PENT;
376 const int x86_read_modify = ~(m_PENT | m_PPRO);
377 const int x86_split_long_moves = m_PPRO;
378 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
379 const int x86_single_stringop = m_386 | m_PENT4;
380 const int x86_qimode_math = ~(0);
381 const int x86_promote_qi_regs = 0;
382 const int x86_himode_math = ~(m_PPRO);
383 const int x86_promote_hi_regs = m_PPRO;
384 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
385 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
386 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
387 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
388 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
389 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
390 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
391 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
392 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
393 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_decompose_lea = m_PENT4;
395
396 /* In case the avreage insn count for single function invocation is
397 lower than this constant, emit fast (but longer) prologue and
398 epilogue code. */
399 #define FAST_PROLOGUE_INSN_COUNT 30
400 /* Set by prologue expander and used by epilogue expander to determine
401 the style used. */
402 static int use_fast_prologue_epilogue;
403
404 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
405
406 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
407 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
408 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
409
410 /* Array of the smallest class containing reg number REGNO, indexed by
411 REGNO. Used by REGNO_REG_CLASS in i386.h. */
412
413 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
414 {
415 /* ax, dx, cx, bx */
416 AREG, DREG, CREG, BREG,
417 /* si, di, bp, sp */
418 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
419 /* FP registers */
420 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
421 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
422 /* arg pointer */
423 NON_Q_REGS,
424 /* flags, fpsr, dirflag, frame */
425 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
426 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
427 SSE_REGS, SSE_REGS,
428 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
429 MMX_REGS, MMX_REGS,
430 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
431 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
433 SSE_REGS, SSE_REGS,
434 };
435
436 /* The "default" register map used in 32bit mode. */
437
438 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
439 {
440 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
441 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
442 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
443 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
444 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
445 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
446 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
447 };
448
449 static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
450 1 /*RDX*/, 2 /*RCX*/,
451 FIRST_REX_INT_REG /*R8 */,
452 FIRST_REX_INT_REG + 1 /*R9 */};
453 static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
454
455 /* The "default" register map used in 64bit mode. */
456 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
457 {
458 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
459 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
460 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
461 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
462 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
463 8,9,10,11,12,13,14,15, /* extended integer registers */
464 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
465 };
466
467 /* Define the register numbers to be used in Dwarf debugging information.
468 The SVR4 reference port C compiler uses the following register numbers
469 in its Dwarf output code:
470 0 for %eax (gcc regno = 0)
471 1 for %ecx (gcc regno = 2)
472 2 for %edx (gcc regno = 1)
473 3 for %ebx (gcc regno = 3)
474 4 for %esp (gcc regno = 7)
475 5 for %ebp (gcc regno = 6)
476 6 for %esi (gcc regno = 4)
477 7 for %edi (gcc regno = 5)
478 The following three DWARF register numbers are never generated by
479 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
480 believes these numbers have these meanings.
481 8 for %eip (no gcc equivalent)
482 9 for %eflags (gcc regno = 17)
483 10 for %trapno (no gcc equivalent)
484 It is not at all clear how we should number the FP stack registers
485 for the x86 architecture. If the version of SDB on x86/svr4 were
486 a bit less brain dead with respect to floating-point then we would
487 have a precedent to follow with respect to DWARF register numbers
488 for x86 FP registers, but the SDB on x86/svr4 is so completely
489 broken with respect to FP registers that it is hardly worth thinking
490 of it as something to strive for compatibility with.
491 The version of x86/svr4 SDB I have at the moment does (partially)
492 seem to believe that DWARF register number 11 is associated with
493 the x86 register %st(0), but that's about all. Higher DWARF
494 register numbers don't seem to be associated with anything in
495 particular, and even for DWARF regno 11, SDB only seems to under-
496 stand that it should say that a variable lives in %st(0) (when
497 asked via an `=' command) if we said it was in DWARF regno 11,
498 but SDB still prints garbage when asked for the value of the
499 variable in question (via a `/' command).
500 (Also note that the labels SDB prints for various FP stack regs
501 when doing an `x' command are all wrong.)
502 Note that these problems generally don't affect the native SVR4
503 C compiler because it doesn't allow the use of -O with -g and
504 because when it is *not* optimizing, it allocates a memory
505 location for each floating-point variable, and the memory
506 location is what gets described in the DWARF AT_location
507 attribute for the variable in question.
508 Regardless of the severe mental illness of the x86/svr4 SDB, we
509 do something sensible here and we use the following DWARF
510 register numbers. Note that these are all stack-top-relative
511 numbers.
512 11 for %st(0) (gcc regno = 8)
513 12 for %st(1) (gcc regno = 9)
514 13 for %st(2) (gcc regno = 10)
515 14 for %st(3) (gcc regno = 11)
516 15 for %st(4) (gcc regno = 12)
517 16 for %st(5) (gcc regno = 13)
518 17 for %st(6) (gcc regno = 14)
519 18 for %st(7) (gcc regno = 15)
520 */
521 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
522 {
523 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
524 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
525 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
526 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
527 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
528 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
529 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
530 };
531
532 /* Test and compare insns in i386.md store the information needed to
533 generate branch and scc insns here. */
534
535 rtx ix86_compare_op0 = NULL_RTX;
536 rtx ix86_compare_op1 = NULL_RTX;
537
538 #define MAX_386_STACK_LOCALS 3
539 /* Size of the register save area. */
540 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
541
542 /* Define the structure for the machine field in struct function. */
543 struct machine_function
544 {
545 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
546 int save_varrargs_registers;
547 int accesses_prev_frame;
548 };
549
550 #define ix86_stack_locals (cfun->machine->stack_locals)
551 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
552
553 /* Structure describing stack frame layout.
554 Stack grows downward:
555
556 [arguments]
557 <- ARG_POINTER
558 saved pc
559
560 saved frame pointer if frame_pointer_needed
561 <- HARD_FRAME_POINTER
562 [saved regs]
563
564 [padding1] \
565 )
566 [va_arg registers] (
567 > to_allocate <- FRAME_POINTER
568 [frame] (
569 )
570 [padding2] /
571 */
572 struct ix86_frame
573 {
574 int nregs;
575 int padding1;
576 int va_arg_size;
577 HOST_WIDE_INT frame;
578 int padding2;
579 int outgoing_arguments_size;
580 int red_zone_size;
581
582 HOST_WIDE_INT to_allocate;
583 /* The offsets relative to ARG_POINTER. */
584 HOST_WIDE_INT frame_pointer_offset;
585 HOST_WIDE_INT hard_frame_pointer_offset;
586 HOST_WIDE_INT stack_pointer_offset;
587 };
588
589 /* Used to enable/disable debugging features. */
590 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
591 /* Code model option as passed by user. */
592 const char *ix86_cmodel_string;
593 /* Parsed value. */
594 enum cmodel ix86_cmodel;
595 /* Asm dialect. */
596 const char *ix86_asm_string;
597 enum asm_dialect ix86_asm_dialect = ASM_ATT;
598
599 /* which cpu are we scheduling for */
600 enum processor_type ix86_cpu;
601
602 /* which unit we are generating floating point math for */
603 enum fpmath_unit ix86_fpmath;
604
605 /* which instruction set architecture to use. */
606 int ix86_arch;
607
608 /* Strings to hold which cpu and instruction set architecture to use. */
609 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
610 const char *ix86_arch_string; /* for -march=<xxx> */
611 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
612
613 /* # of registers to use to pass arguments. */
614 const char *ix86_regparm_string;
615
616 /* true if sse prefetch instruction is not NOOP. */
617 int x86_prefetch_sse;
618
619 /* ix86_regparm_string as a number */
620 int ix86_regparm;
621
622 /* Alignment to use for loops and jumps: */
623
624 /* Power of two alignment for loops. */
625 const char *ix86_align_loops_string;
626
627 /* Power of two alignment for non-loop jumps. */
628 const char *ix86_align_jumps_string;
629
630 /* Power of two alignment for stack boundary in bytes. */
631 const char *ix86_preferred_stack_boundary_string;
632
633 /* Preferred alignment for stack boundary in bits. */
634 int ix86_preferred_stack_boundary;
635
636 /* Values 1-5: see jump.c */
637 int ix86_branch_cost;
638 const char *ix86_branch_cost_string;
639
640 /* Power of two alignment for functions. */
641 const char *ix86_align_funcs_string;
642
643 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
644 static char internal_label_prefix[16];
645 static int internal_label_prefix_len;
646 \f
647 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
648 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
649 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
650 int, int, FILE *));
651 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
652 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
653 rtx *, rtx *));
654 static rtx gen_push PARAMS ((rtx));
655 static int memory_address_length PARAMS ((rtx addr));
656 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
657 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
658 static int ix86_safe_length PARAMS ((rtx));
659 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
660 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
661 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
662 static void ix86_dump_ppro_packet PARAMS ((FILE *));
663 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
664 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
665 rtx));
666 static void ix86_init_machine_status PARAMS ((struct function *));
667 static void ix86_mark_machine_status PARAMS ((struct function *));
668 static void ix86_free_machine_status PARAMS ((struct function *));
669 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
670 static int ix86_safe_length_prefix PARAMS ((rtx));
671 static int ix86_nsaved_regs PARAMS ((void));
672 static void ix86_emit_save_regs PARAMS ((void));
673 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
674 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
675 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
676 static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *));
677 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
678 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
679 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
680 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
681 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
682 static int ix86_issue_rate PARAMS ((void));
683 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
684 static void ix86_sched_init PARAMS ((FILE *, int, int));
685 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
686 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
687 static void ix86_init_mmx_sse_builtins PARAMS ((void));
688
689 struct ix86_address
690 {
691 rtx base, index, disp;
692 HOST_WIDE_INT scale;
693 };
694
695 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
696
697 struct builtin_description;
698 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
699 tree, rtx));
700 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
701 tree, rtx));
702 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
703 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
704 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
705 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
706 tree, rtx));
707 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
708 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
709 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
710 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
711 enum rtx_code *,
712 enum rtx_code *,
713 enum rtx_code *));
714 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
715 rtx *, rtx *));
716 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
717 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
718 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
719 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
720 static int ix86_save_reg PARAMS ((int, int));
721 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
722 static int ix86_comp_type_attributes PARAMS ((tree, tree));
723 const struct attribute_spec ix86_attribute_table[];
724 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
725 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
726
727 #ifdef DO_GLOBAL_CTORS_BODY
728 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
729 #endif
730
731 /* Register class used for passing given 64bit part of the argument.
732 These represent classes as documented by the PS ABI, with the exception
733 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
734 use SF or DFmode move instead of DImode to avoid reformating penalties.
735
736 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
737 whenever possible (upper half does contain padding).
738 */
739 enum x86_64_reg_class
740 {
741 X86_64_NO_CLASS,
742 X86_64_INTEGER_CLASS,
743 X86_64_INTEGERSI_CLASS,
744 X86_64_SSE_CLASS,
745 X86_64_SSESF_CLASS,
746 X86_64_SSEDF_CLASS,
747 X86_64_SSEUP_CLASS,
748 X86_64_X87_CLASS,
749 X86_64_X87UP_CLASS,
750 X86_64_MEMORY_CLASS
751 };
752 static const char * const x86_64_reg_class_name[] =
753 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
754
755 #define MAX_CLASSES 4
756 static int classify_argument PARAMS ((enum machine_mode, tree,
757 enum x86_64_reg_class [MAX_CLASSES],
758 int));
759 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
760 int *));
761 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
762 const int *, int));
763 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
764 enum x86_64_reg_class));
765 \f
766 /* Initialize the GCC target structure. */
767 #undef TARGET_ATTRIBUTE_TABLE
768 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
769 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
770 # undef TARGET_MERGE_DECL_ATTRIBUTES
771 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
772 #endif
773
774 #undef TARGET_COMP_TYPE_ATTRIBUTES
775 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
776
777 #undef TARGET_INIT_BUILTINS
778 #define TARGET_INIT_BUILTINS ix86_init_builtins
779
780 #undef TARGET_EXPAND_BUILTIN
781 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
782
783 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
784 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
785 HOST_WIDE_INT));
786 # undef TARGET_ASM_FUNCTION_PROLOGUE
787 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
788 #endif
789
790 #undef TARGET_ASM_OPEN_PAREN
791 #define TARGET_ASM_OPEN_PAREN ""
792 #undef TARGET_ASM_CLOSE_PAREN
793 #define TARGET_ASM_CLOSE_PAREN ""
794
795 #undef TARGET_ASM_ALIGNED_HI_OP
796 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
797 #undef TARGET_ASM_ALIGNED_SI_OP
798 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
799 #ifdef ASM_QUAD
800 #undef TARGET_ASM_ALIGNED_DI_OP
801 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
802 #endif
803
804 #undef TARGET_ASM_UNALIGNED_HI_OP
805 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
806 #undef TARGET_ASM_UNALIGNED_SI_OP
807 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
808 #undef TARGET_ASM_UNALIGNED_DI_OP
809 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
810
811 #undef TARGET_SCHED_ADJUST_COST
812 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
813 #undef TARGET_SCHED_ISSUE_RATE
814 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
815 #undef TARGET_SCHED_VARIABLE_ISSUE
816 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
817 #undef TARGET_SCHED_INIT
818 #define TARGET_SCHED_INIT ix86_sched_init
819 #undef TARGET_SCHED_REORDER
820 #define TARGET_SCHED_REORDER ix86_sched_reorder
821
822 struct gcc_target targetm = TARGET_INITIALIZER;
823 \f
824 /* Sometimes certain combinations of command options do not make
825 sense on a particular target machine. You can define a macro
826 `OVERRIDE_OPTIONS' to take account of this. This macro, if
827 defined, is executed once just after all the command options have
828 been parsed.
829
830 Don't use this macro to turn on various extra optimizations for
831 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
832
833 void
834 override_options ()
835 {
836 int i;
837 /* Comes from final.c -- no real reason to change it. */
838 #define MAX_CODE_ALIGN 16
839
840 static struct ptt
841 {
842 const struct processor_costs *cost; /* Processor costs */
843 const int target_enable; /* Target flags to enable. */
844 const int target_disable; /* Target flags to disable. */
845 const int align_loop; /* Default alignments. */
846 const int align_loop_max_skip;
847 const int align_jump;
848 const int align_jump_max_skip;
849 const int align_func;
850 const int branch_cost;
851 }
852 const processor_target_table[PROCESSOR_max] =
853 {
854 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
855 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
856 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
857 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
858 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
859 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
860 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
861 };
862
863 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
864 static struct pta
865 {
866 const char *const name; /* processor name or nickname. */
867 const enum processor_type processor;
868 const enum pta_flags
869 {
870 PTA_SSE = 1,
871 PTA_SSE2 = 2,
872 PTA_MMX = 4,
873 PTA_PREFETCH_SSE = 8,
874 PTA_3DNOW = 16,
875 PTA_3DNOW_A = 64
876 } flags;
877 }
878 const processor_alias_table[] =
879 {
880 {"i386", PROCESSOR_I386, 0},
881 {"i486", PROCESSOR_I486, 0},
882 {"i586", PROCESSOR_PENTIUM, 0},
883 {"pentium", PROCESSOR_PENTIUM, 0},
884 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
885 {"i686", PROCESSOR_PENTIUMPRO, 0},
886 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
887 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
888 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
889 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
890 PTA_MMX | PTA_PREFETCH_SSE},
891 {"k6", PROCESSOR_K6, PTA_MMX},
892 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
893 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
894 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
895 | PTA_3DNOW_A},
896 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
897 | PTA_3DNOW | PTA_3DNOW_A},
898 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
899 | PTA_3DNOW_A | PTA_SSE},
900 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
901 | PTA_3DNOW_A | PTA_SSE},
902 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
903 | PTA_3DNOW_A | PTA_SSE},
904 };
905
906 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
907
908 #ifdef SUBTARGET_OVERRIDE_OPTIONS
909 SUBTARGET_OVERRIDE_OPTIONS;
910 #endif
911
912 if (!ix86_cpu_string && ix86_arch_string)
913 ix86_cpu_string = ix86_arch_string;
914 if (!ix86_cpu_string)
915 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
916 if (!ix86_arch_string)
917 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
918
919 if (ix86_cmodel_string != 0)
920 {
921 if (!strcmp (ix86_cmodel_string, "small"))
922 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
923 else if (flag_pic)
924 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
925 else if (!strcmp (ix86_cmodel_string, "32"))
926 ix86_cmodel = CM_32;
927 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
928 ix86_cmodel = CM_KERNEL;
929 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
930 ix86_cmodel = CM_MEDIUM;
931 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
932 ix86_cmodel = CM_LARGE;
933 else
934 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
935 }
936 else
937 {
938 ix86_cmodel = CM_32;
939 if (TARGET_64BIT)
940 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
941 }
942 if (ix86_asm_string != 0)
943 {
944 if (!strcmp (ix86_asm_string, "intel"))
945 ix86_asm_dialect = ASM_INTEL;
946 else if (!strcmp (ix86_asm_string, "att"))
947 ix86_asm_dialect = ASM_ATT;
948 else
949 error ("bad value (%s) for -masm= switch", ix86_asm_string);
950 }
951 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
952 error ("code model `%s' not supported in the %s bit mode",
953 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
954 if (ix86_cmodel == CM_LARGE)
955 sorry ("code model `large' not supported yet");
956 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
957 sorry ("%i-bit mode not compiled in",
958 (target_flags & MASK_64BIT) ? 64 : 32);
959
960 for (i = 0; i < pta_size; i++)
961 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
962 {
963 ix86_arch = processor_alias_table[i].processor;
964 /* Default cpu tuning to the architecture. */
965 ix86_cpu = ix86_arch;
966 if (processor_alias_table[i].flags & PTA_MMX
967 && !(target_flags & MASK_MMX_SET))
968 target_flags |= MASK_MMX;
969 if (processor_alias_table[i].flags & PTA_3DNOW
970 && !(target_flags & MASK_3DNOW_SET))
971 target_flags |= MASK_3DNOW;
972 if (processor_alias_table[i].flags & PTA_3DNOW_A
973 && !(target_flags & MASK_3DNOW_A_SET))
974 target_flags |= MASK_3DNOW_A;
975 if (processor_alias_table[i].flags & PTA_SSE
976 && !(target_flags & MASK_SSE_SET))
977 target_flags |= MASK_SSE;
978 if (processor_alias_table[i].flags & PTA_SSE2
979 && !(target_flags & MASK_SSE2_SET))
980 target_flags |= MASK_SSE2;
981 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
982 x86_prefetch_sse = true;
983 break;
984 }
985
986 if (i == pta_size)
987 error ("bad value (%s) for -march= switch", ix86_arch_string);
988
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
991 {
992 ix86_cpu = processor_alias_table[i].processor;
993 break;
994 }
995 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
996 x86_prefetch_sse = true;
997 if (i == pta_size)
998 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
999
1000 if (optimize_size)
1001 ix86_cost = &size_cost;
1002 else
1003 ix86_cost = processor_target_table[ix86_cpu].cost;
1004 target_flags |= processor_target_table[ix86_cpu].target_enable;
1005 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1006
1007 /* Arrange to set up i386_stack_locals for all functions. */
1008 init_machine_status = ix86_init_machine_status;
1009 mark_machine_status = ix86_mark_machine_status;
1010 free_machine_status = ix86_free_machine_status;
1011
1012 /* Validate -mregparm= value. */
1013 if (ix86_regparm_string)
1014 {
1015 i = atoi (ix86_regparm_string);
1016 if (i < 0 || i > REGPARM_MAX)
1017 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1018 else
1019 ix86_regparm = i;
1020 }
1021 else
1022 if (TARGET_64BIT)
1023 ix86_regparm = REGPARM_MAX;
1024
1025 /* If the user has provided any of the -malign-* options,
1026 warn and use that value only if -falign-* is not set.
1027 Remove this code in GCC 3.2 or later. */
1028 if (ix86_align_loops_string)
1029 {
1030 warning ("-malign-loops is obsolete, use -falign-loops");
1031 if (align_loops == 0)
1032 {
1033 i = atoi (ix86_align_loops_string);
1034 if (i < 0 || i > MAX_CODE_ALIGN)
1035 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1036 else
1037 align_loops = 1 << i;
1038 }
1039 }
1040
1041 if (ix86_align_jumps_string)
1042 {
1043 warning ("-malign-jumps is obsolete, use -falign-jumps");
1044 if (align_jumps == 0)
1045 {
1046 i = atoi (ix86_align_jumps_string);
1047 if (i < 0 || i > MAX_CODE_ALIGN)
1048 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1049 else
1050 align_jumps = 1 << i;
1051 }
1052 }
1053
1054 if (ix86_align_funcs_string)
1055 {
1056 warning ("-malign-functions is obsolete, use -falign-functions");
1057 if (align_functions == 0)
1058 {
1059 i = atoi (ix86_align_funcs_string);
1060 if (i < 0 || i > MAX_CODE_ALIGN)
1061 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1062 else
1063 align_functions = 1 << i;
1064 }
1065 }
1066
1067 /* Default align_* from the processor table. */
1068 if (align_loops == 0)
1069 {
1070 align_loops = processor_target_table[ix86_cpu].align_loop;
1071 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1072 }
1073 if (align_jumps == 0)
1074 {
1075 align_jumps = processor_target_table[ix86_cpu].align_jump;
1076 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1077 }
1078 if (align_functions == 0)
1079 {
1080 align_functions = processor_target_table[ix86_cpu].align_func;
1081 }
1082
1083 /* Validate -mpreferred-stack-boundary= value, or provide default.
1084 The default of 128 bits is for Pentium III's SSE __m128, but we
1085 don't want additional code to keep the stack aligned when
1086 optimizing for code size. */
1087 ix86_preferred_stack_boundary = (optimize_size
1088 ? TARGET_64BIT ? 64 : 32
1089 : 128);
1090 if (ix86_preferred_stack_boundary_string)
1091 {
1092 i = atoi (ix86_preferred_stack_boundary_string);
1093 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1094 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1095 TARGET_64BIT ? 3 : 2);
1096 else
1097 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1098 }
1099
1100 /* Validate -mbranch-cost= value, or provide default. */
1101 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1102 if (ix86_branch_cost_string)
1103 {
1104 i = atoi (ix86_branch_cost_string);
1105 if (i < 0 || i > 5)
1106 error ("-mbranch-cost=%d is not between 0 and 5", i);
1107 else
1108 ix86_branch_cost = i;
1109 }
1110
1111 /* Keep nonleaf frame pointers. */
1112 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1113 flag_omit_frame_pointer = 1;
1114
1115 /* If we're doing fast math, we don't care about comparison order
1116 wrt NaNs. This lets us use a shorter comparison sequence. */
1117 if (flag_unsafe_math_optimizations)
1118 target_flags &= ~MASK_IEEE_FP;
1119
1120 if (TARGET_64BIT)
1121 {
1122 if (TARGET_ALIGN_DOUBLE)
1123 error ("-malign-double makes no sense in the 64bit mode");
1124 if (TARGET_RTD)
1125 error ("-mrtd calling convention not supported in the 64bit mode");
1126 /* Enable by default the SSE and MMX builtins. */
1127 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1128 ix86_fpmath = FPMATH_SSE;
1129 }
1130 else
1131 ix86_fpmath = FPMATH_387;
1132
1133 if (ix86_fpmath_string != 0)
1134 {
1135 if (! strcmp (ix86_fpmath_string, "387"))
1136 ix86_fpmath = FPMATH_387;
1137 else if (! strcmp (ix86_fpmath_string, "sse"))
1138 {
1139 if (!TARGET_SSE)
1140 {
1141 warning ("SSE instruction set disabled, using 387 arithmetics");
1142 ix86_fpmath = FPMATH_387;
1143 }
1144 else
1145 ix86_fpmath = FPMATH_SSE;
1146 }
1147 else if (! strcmp (ix86_fpmath_string, "387,sse")
1148 || ! strcmp (ix86_fpmath_string, "sse,387"))
1149 {
1150 if (!TARGET_SSE)
1151 {
1152 warning ("SSE instruction set disabled, using 387 arithmetics");
1153 ix86_fpmath = FPMATH_387;
1154 }
1155 else if (!TARGET_80387)
1156 {
1157 warning ("387 instruction set disabled, using SSE arithmetics");
1158 ix86_fpmath = FPMATH_SSE;
1159 }
1160 else
1161 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1162 }
1163 else
1164 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1165 }
1166
1167 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1168 on by -msse. */
1169 if (TARGET_SSE)
1170 {
1171 target_flags |= MASK_MMX;
1172 x86_prefetch_sse = true;
1173 }
1174
1175 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1176 if (TARGET_3DNOW)
1177 {
1178 target_flags |= MASK_MMX;
1179 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1180 extensions it adds. */
1181 if (x86_3dnow_a & (1 << ix86_arch))
1182 target_flags |= MASK_3DNOW_A;
1183 }
1184 if ((x86_accumulate_outgoing_args & CPUMASK)
1185 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1186 && !optimize_size)
1187 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1188
1189 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1190 {
1191 char *p;
1192 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1193 p = strchr (internal_label_prefix, 'X');
1194 internal_label_prefix_len = p - internal_label_prefix;
1195 *p = '\0';
1196 }
1197 }
1198 \f
1199 void
1200 optimization_options (level, size)
1201 int level;
1202 int size ATTRIBUTE_UNUSED;
1203 {
1204 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1205 make the problem with not enough registers even worse. */
1206 #ifdef INSN_SCHEDULING
1207 if (level > 1)
1208 flag_schedule_insns = 0;
1209 #endif
1210 if (TARGET_64BIT && optimize >= 1)
1211 flag_omit_frame_pointer = 1;
1212 if (TARGET_64BIT)
1213 {
1214 flag_pcc_struct_return = 0;
1215 flag_asynchronous_unwind_tables = 1;
1216 }
1217 }
1218 \f
1219 /* Table of valid machine attributes. */
1220 const struct attribute_spec ix86_attribute_table[] =
1221 {
1222 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1223 /* Stdcall attribute says callee is responsible for popping arguments
1224 if they are not variable. */
1225 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1226 /* Cdecl attribute says the callee is a normal C declaration */
1227 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1228 /* Regparm attribute specifies how many integer arguments are to be
1229 passed in registers. */
1230 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1231 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1232 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1233 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1234 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1235 #endif
1236 { NULL, 0, 0, false, false, false, NULL }
1237 };
1238
1239 /* Handle a "cdecl" or "stdcall" attribute;
1240 arguments as in struct attribute_spec.handler. */
1241 static tree
1242 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1243 tree *node;
1244 tree name;
1245 tree args ATTRIBUTE_UNUSED;
1246 int flags ATTRIBUTE_UNUSED;
1247 bool *no_add_attrs;
1248 {
1249 if (TREE_CODE (*node) != FUNCTION_TYPE
1250 && TREE_CODE (*node) != METHOD_TYPE
1251 && TREE_CODE (*node) != FIELD_DECL
1252 && TREE_CODE (*node) != TYPE_DECL)
1253 {
1254 warning ("`%s' attribute only applies to functions",
1255 IDENTIFIER_POINTER (name));
1256 *no_add_attrs = true;
1257 }
1258
1259 if (TARGET_64BIT)
1260 {
1261 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1262 *no_add_attrs = true;
1263 }
1264
1265 return NULL_TREE;
1266 }
1267
1268 /* Handle a "regparm" attribute;
1269 arguments as in struct attribute_spec.handler. */
1270 static tree
1271 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1272 tree *node;
1273 tree name;
1274 tree args;
1275 int flags ATTRIBUTE_UNUSED;
1276 bool *no_add_attrs;
1277 {
1278 if (TREE_CODE (*node) != FUNCTION_TYPE
1279 && TREE_CODE (*node) != METHOD_TYPE
1280 && TREE_CODE (*node) != FIELD_DECL
1281 && TREE_CODE (*node) != TYPE_DECL)
1282 {
1283 warning ("`%s' attribute only applies to functions",
1284 IDENTIFIER_POINTER (name));
1285 *no_add_attrs = true;
1286 }
1287 else
1288 {
1289 tree cst;
1290
1291 cst = TREE_VALUE (args);
1292 if (TREE_CODE (cst) != INTEGER_CST)
1293 {
1294 warning ("`%s' attribute requires an integer constant argument",
1295 IDENTIFIER_POINTER (name));
1296 *no_add_attrs = true;
1297 }
1298 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1299 {
1300 warning ("argument to `%s' attribute larger than %d",
1301 IDENTIFIER_POINTER (name), REGPARM_MAX);
1302 *no_add_attrs = true;
1303 }
1304 }
1305
1306 return NULL_TREE;
1307 }
1308
1309 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1310
1311 /* Generate the assembly code for function entry. FILE is a stdio
1312 stream to output the code to. SIZE is an int: how many units of
1313 temporary storage to allocate.
1314
1315 Refer to the array `regs_ever_live' to determine which registers to
1316 save; `regs_ever_live[I]' is nonzero if register number I is ever
1317 used in the function. This function is responsible for knowing
1318 which registers should not be saved even if used.
1319
1320 We override it here to allow for the new profiling code to go before
1321 the prologue and the old mcount code to go after the prologue (and
1322 after %ebx has been set up for ELF shared library support). */
1323
1324 static void
1325 ix86_osf_output_function_prologue (file, size)
1326 FILE *file;
1327 HOST_WIDE_INT size;
1328 {
1329 const char *prefix = "";
1330 const char *const lprefix = LPREFIX;
1331 int labelno = profile_label_no;
1332
1333 #ifdef OSF_OS
1334
1335 if (TARGET_UNDERSCORES)
1336 prefix = "_";
1337
1338 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1339 {
1340 if (!flag_pic && !HALF_PIC_P ())
1341 {
1342 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1343 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1344 }
1345
1346 else if (HALF_PIC_P ())
1347 {
1348 rtx symref;
1349
1350 HALF_PIC_EXTERNAL ("_mcount_ptr");
1351 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1352 "_mcount_ptr"));
1353
1354 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1355 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1356 XSTR (symref, 0));
1357 fprintf (file, "\tcall *(%%eax)\n");
1358 }
1359
1360 else
1361 {
1362 static int call_no = 0;
1363
1364 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1365 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1366 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1367 lprefix, call_no++);
1368 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1369 lprefix, labelno);
1370 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1371 prefix);
1372 fprintf (file, "\tcall *(%%eax)\n");
1373 }
1374 }
1375
1376 #else /* !OSF_OS */
1377
1378 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1379 {
1380 if (!flag_pic)
1381 {
1382 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1383 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1384 }
1385
1386 else
1387 {
1388 static int call_no = 0;
1389
1390 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1391 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1392 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1393 lprefix, call_no++);
1394 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1395 lprefix, labelno);
1396 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1397 prefix);
1398 fprintf (file, "\tcall *(%%eax)\n");
1399 }
1400 }
1401 #endif /* !OSF_OS */
1402
1403 function_prologue (file, size);
1404 }
1405
1406 #endif /* OSF_OS || TARGET_OSF1ELF */
1407
1408 /* Return 0 if the attributes for two types are incompatible, 1 if they
1409 are compatible, and 2 if they are nearly compatible (which causes a
1410 warning to be generated). */
1411
1412 static int
1413 ix86_comp_type_attributes (type1, type2)
1414 tree type1;
1415 tree type2;
1416 {
1417 /* Check for mismatch of non-default calling convention. */
1418 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1419
1420 if (TREE_CODE (type1) != FUNCTION_TYPE)
1421 return 1;
1422
1423 /* Check for mismatched return types (cdecl vs stdcall). */
1424 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1425 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1426 return 0;
1427 return 1;
1428 }
1429 \f
1430 /* Value is the number of bytes of arguments automatically
1431 popped when returning from a subroutine call.
1432 FUNDECL is the declaration node of the function (as a tree),
1433 FUNTYPE is the data type of the function (as a tree),
1434 or for a library call it is an identifier node for the subroutine name.
1435 SIZE is the number of bytes of arguments passed on the stack.
1436
1437 On the 80386, the RTD insn may be used to pop them if the number
1438 of args is fixed, but if the number is variable then the caller
1439 must pop them all. RTD can't be used for library calls now
1440 because the library is compiled with the Unix compiler.
1441 Use of RTD is a selectable option, since it is incompatible with
1442 standard Unix calling sequences. If the option is not selected,
1443 the caller must always pop the args.
1444
1445 The attribute stdcall is equivalent to RTD on a per module basis. */
1446
1447 int
1448 ix86_return_pops_args (fundecl, funtype, size)
1449 tree fundecl;
1450 tree funtype;
1451 int size;
1452 {
1453 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1454
1455 /* Cdecl functions override -mrtd, and never pop the stack. */
1456 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1457
1458 /* Stdcall functions will pop the stack if not variable args. */
1459 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1460 rtd = 1;
1461
1462 if (rtd
1463 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1464 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1465 == void_type_node)))
1466 return size;
1467 }
1468
1469 /* Lose any fake structure return argument. */
1470 if (aggregate_value_p (TREE_TYPE (funtype))
1471 && !TARGET_64BIT)
1472 return GET_MODE_SIZE (Pmode);
1473
1474 return 0;
1475 }
1476 \f
1477 /* Argument support functions. */
1478
1479 /* Return true when register may be used to pass function parameters. */
1480 bool
1481 ix86_function_arg_regno_p (regno)
1482 int regno;
1483 {
1484 int i;
1485 if (!TARGET_64BIT)
1486 return (regno < REGPARM_MAX
1487 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1488 if (SSE_REGNO_P (regno) && TARGET_SSE)
1489 return true;
1490 /* RAX is used as hidden argument to va_arg functions. */
1491 if (!regno)
1492 return true;
1493 for (i = 0; i < REGPARM_MAX; i++)
1494 if (regno == x86_64_int_parameter_registers[i])
1495 return true;
1496 return false;
1497 }
1498
1499 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1500 for a call to a function whose data type is FNTYPE.
1501 For a library call, FNTYPE is 0. */
1502
1503 void
1504 init_cumulative_args (cum, fntype, libname)
1505 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1506 tree fntype; /* tree ptr for function decl */
1507 rtx libname; /* SYMBOL_REF of library name or 0 */
1508 {
1509 static CUMULATIVE_ARGS zero_cum;
1510 tree param, next_param;
1511
1512 if (TARGET_DEBUG_ARG)
1513 {
1514 fprintf (stderr, "\ninit_cumulative_args (");
1515 if (fntype)
1516 fprintf (stderr, "fntype code = %s, ret code = %s",
1517 tree_code_name[(int) TREE_CODE (fntype)],
1518 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1519 else
1520 fprintf (stderr, "no fntype");
1521
1522 if (libname)
1523 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1524 }
1525
1526 *cum = zero_cum;
1527
1528 /* Set up the number of registers to use for passing arguments. */
1529 cum->nregs = ix86_regparm;
1530 cum->sse_nregs = SSE_REGPARM_MAX;
1531 if (fntype && !TARGET_64BIT)
1532 {
1533 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1534
1535 if (attr)
1536 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1537 }
1538 cum->maybe_vaarg = false;
1539
1540 /* Determine if this function has variable arguments. This is
1541 indicated by the last argument being 'void_type_mode' if there
1542 are no variable arguments. If there are variable arguments, then
1543 we won't pass anything in registers */
1544
1545 if (cum->nregs)
1546 {
1547 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1548 param != 0; param = next_param)
1549 {
1550 next_param = TREE_CHAIN (param);
1551 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1552 {
1553 if (!TARGET_64BIT)
1554 cum->nregs = 0;
1555 cum->maybe_vaarg = true;
1556 }
1557 }
1558 }
1559 if ((!fntype && !libname)
1560 || (fntype && !TYPE_ARG_TYPES (fntype)))
1561 cum->maybe_vaarg = 1;
1562
1563 if (TARGET_DEBUG_ARG)
1564 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1565
1566 return;
1567 }
1568
1569 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1570 of this code is to classify each 8bytes of incoming argument by the register
1571 class and assign registers accordingly. */
1572
1573 /* Return the union class of CLASS1 and CLASS2.
1574 See the x86-64 PS ABI for details. */
1575
1576 static enum x86_64_reg_class
1577 merge_classes (class1, class2)
1578 enum x86_64_reg_class class1, class2;
1579 {
1580 /* Rule #1: If both classes are equal, this is the resulting class. */
1581 if (class1 == class2)
1582 return class1;
1583
1584 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1585 the other class. */
1586 if (class1 == X86_64_NO_CLASS)
1587 return class2;
1588 if (class2 == X86_64_NO_CLASS)
1589 return class1;
1590
1591 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1592 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1593 return X86_64_MEMORY_CLASS;
1594
1595 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1596 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1597 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1598 return X86_64_INTEGERSI_CLASS;
1599 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1600 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1601 return X86_64_INTEGER_CLASS;
1602
1603 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1604 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1605 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1606 return X86_64_MEMORY_CLASS;
1607
1608 /* Rule #6: Otherwise class SSE is used. */
1609 return X86_64_SSE_CLASS;
1610 }
1611
1612 /* Classify the argument of type TYPE and mode MODE.
1613 CLASSES will be filled by the register class used to pass each word
1614 of the operand. The number of words is returned. In case the parameter
1615 should be passed in memory, 0 is returned. As a special case for zero
1616 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1617
1618 BIT_OFFSET is used internally for handling records and specifies offset
1619 of the offset in bits modulo 256 to avoid overflow cases.
1620
1621 See the x86-64 PS ABI for details.
1622 */
1623
1624 static int
1625 classify_argument (mode, type, classes, bit_offset)
1626 enum machine_mode mode;
1627 tree type;
1628 enum x86_64_reg_class classes[MAX_CLASSES];
1629 int bit_offset;
1630 {
1631 int bytes =
1632 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1633 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1634
1635 if (type && AGGREGATE_TYPE_P (type))
1636 {
1637 int i;
1638 tree field;
1639 enum x86_64_reg_class subclasses[MAX_CLASSES];
1640
1641 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1642 if (bytes > 16)
1643 return 0;
1644
1645 for (i = 0; i < words; i++)
1646 classes[i] = X86_64_NO_CLASS;
1647
1648 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1649 signalize memory class, so handle it as special case. */
1650 if (!words)
1651 {
1652 classes[0] = X86_64_NO_CLASS;
1653 return 1;
1654 }
1655
1656 /* Classify each field of record and merge classes. */
1657 if (TREE_CODE (type) == RECORD_TYPE)
1658 {
1659 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1660 {
1661 if (TREE_CODE (field) == FIELD_DECL)
1662 {
1663 int num;
1664
1665 /* Bitfields are always classified as integer. Handle them
1666 early, since later code would consider them to be
1667 misaligned integers. */
1668 if (DECL_BIT_FIELD (field))
1669 {
1670 for (i = int_bit_position (field) / 8 / 8;
1671 i < (int_bit_position (field)
1672 + tree_low_cst (DECL_SIZE (field), 0)
1673 + 63) / 8 / 8; i++)
1674 classes[i] =
1675 merge_classes (X86_64_INTEGER_CLASS,
1676 classes[i]);
1677 }
1678 else
1679 {
1680 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1681 TREE_TYPE (field), subclasses,
1682 (int_bit_position (field)
1683 + bit_offset) % 256);
1684 if (!num)
1685 return 0;
1686 for (i = 0; i < num; i++)
1687 {
1688 int pos =
1689 (int_bit_position (field) + bit_offset) / 8 / 8;
1690 classes[i + pos] =
1691 merge_classes (subclasses[i], classes[i + pos]);
1692 }
1693 }
1694 }
1695 }
1696 }
1697 /* Arrays are handled as small records. */
1698 else if (TREE_CODE (type) == ARRAY_TYPE)
1699 {
1700 int num;
1701 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1702 TREE_TYPE (type), subclasses, bit_offset);
1703 if (!num)
1704 return 0;
1705
1706 /* The partial classes are now full classes. */
1707 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1708 subclasses[0] = X86_64_SSE_CLASS;
1709 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1710 subclasses[0] = X86_64_INTEGER_CLASS;
1711
1712 for (i = 0; i < words; i++)
1713 classes[i] = subclasses[i % num];
1714 }
1715 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1716 else if (TREE_CODE (type) == UNION_TYPE)
1717 {
1718 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1719 {
1720 if (TREE_CODE (field) == FIELD_DECL)
1721 {
1722 int num;
1723 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1724 TREE_TYPE (field), subclasses,
1725 bit_offset);
1726 if (!num)
1727 return 0;
1728 for (i = 0; i < num; i++)
1729 classes[i] = merge_classes (subclasses[i], classes[i]);
1730 }
1731 }
1732 }
1733 else
1734 abort ();
1735
1736 /* Final merger cleanup. */
1737 for (i = 0; i < words; i++)
1738 {
1739 /* If one class is MEMORY, everything should be passed in
1740 memory. */
1741 if (classes[i] == X86_64_MEMORY_CLASS)
1742 return 0;
1743
1744 /* The X86_64_SSEUP_CLASS should be always preceded by
1745 X86_64_SSE_CLASS. */
1746 if (classes[i] == X86_64_SSEUP_CLASS
1747 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1748 classes[i] = X86_64_SSE_CLASS;
1749
1750 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1751 if (classes[i] == X86_64_X87UP_CLASS
1752 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1753 classes[i] = X86_64_SSE_CLASS;
1754 }
1755 return words;
1756 }
1757
1758 /* Compute alignment needed. We align all types to natural boundaries with
1759 exception of XFmode that is aligned to 64bits. */
1760 if (mode != VOIDmode && mode != BLKmode)
1761 {
1762 int mode_alignment = GET_MODE_BITSIZE (mode);
1763
1764 if (mode == XFmode)
1765 mode_alignment = 128;
1766 else if (mode == XCmode)
1767 mode_alignment = 256;
1768 /* Misaligned fields are always returned in memory. */
1769 if (bit_offset % mode_alignment)
1770 return 0;
1771 }
1772
1773 /* Classification of atomic types. */
1774 switch (mode)
1775 {
1776 case DImode:
1777 case SImode:
1778 case HImode:
1779 case QImode:
1780 case CSImode:
1781 case CHImode:
1782 case CQImode:
1783 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1784 classes[0] = X86_64_INTEGERSI_CLASS;
1785 else
1786 classes[0] = X86_64_INTEGER_CLASS;
1787 return 1;
1788 case CDImode:
1789 case TImode:
1790 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1791 return 2;
1792 case CTImode:
1793 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1794 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1795 return 4;
1796 case SFmode:
1797 if (!(bit_offset % 64))
1798 classes[0] = X86_64_SSESF_CLASS;
1799 else
1800 classes[0] = X86_64_SSE_CLASS;
1801 return 1;
1802 case DFmode:
1803 classes[0] = X86_64_SSEDF_CLASS;
1804 return 1;
1805 case TFmode:
1806 classes[0] = X86_64_X87_CLASS;
1807 classes[1] = X86_64_X87UP_CLASS;
1808 return 2;
1809 case TCmode:
1810 classes[0] = X86_64_X87_CLASS;
1811 classes[1] = X86_64_X87UP_CLASS;
1812 classes[2] = X86_64_X87_CLASS;
1813 classes[3] = X86_64_X87UP_CLASS;
1814 return 4;
1815 case DCmode:
1816 classes[0] = X86_64_SSEDF_CLASS;
1817 classes[1] = X86_64_SSEDF_CLASS;
1818 return 2;
1819 case SCmode:
1820 classes[0] = X86_64_SSE_CLASS;
1821 return 1;
1822 case V4SFmode:
1823 case V4SImode:
1824 classes[0] = X86_64_SSE_CLASS;
1825 classes[1] = X86_64_SSEUP_CLASS;
1826 return 2;
1827 case V2SFmode:
1828 case V2SImode:
1829 case V4HImode:
1830 case V8QImode:
1831 classes[0] = X86_64_SSE_CLASS;
1832 return 1;
1833 case BLKmode:
1834 case VOIDmode:
1835 return 0;
1836 default:
1837 abort ();
1838 }
1839 }
1840
1841 /* Examine the argument and return set number of register required in each
1842 class. Return 0 iff parameter should be passed in memory. */
1843 static int
1844 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1845 enum machine_mode mode;
1846 tree type;
1847 int *int_nregs, *sse_nregs;
1848 int in_return;
1849 {
1850 enum x86_64_reg_class class[MAX_CLASSES];
1851 int n = classify_argument (mode, type, class, 0);
1852
1853 *int_nregs = 0;
1854 *sse_nregs = 0;
1855 if (!n)
1856 return 0;
1857 for (n--; n >= 0; n--)
1858 switch (class[n])
1859 {
1860 case X86_64_INTEGER_CLASS:
1861 case X86_64_INTEGERSI_CLASS:
1862 (*int_nregs)++;
1863 break;
1864 case X86_64_SSE_CLASS:
1865 case X86_64_SSESF_CLASS:
1866 case X86_64_SSEDF_CLASS:
1867 (*sse_nregs)++;
1868 break;
1869 case X86_64_NO_CLASS:
1870 case X86_64_SSEUP_CLASS:
1871 break;
1872 case X86_64_X87_CLASS:
1873 case X86_64_X87UP_CLASS:
1874 if (!in_return)
1875 return 0;
1876 break;
1877 case X86_64_MEMORY_CLASS:
1878 abort ();
1879 }
1880 return 1;
1881 }
1882 /* Construct container for the argument used by GCC interface. See
1883 FUNCTION_ARG for the detailed description. */
1884 static rtx
1885 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1886 enum machine_mode mode;
1887 tree type;
1888 int in_return;
1889 int nintregs, nsseregs;
1890 const int * intreg;
1891 int sse_regno;
1892 {
1893 enum machine_mode tmpmode;
1894 int bytes =
1895 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1896 enum x86_64_reg_class class[MAX_CLASSES];
1897 int n;
1898 int i;
1899 int nexps = 0;
1900 int needed_sseregs, needed_intregs;
1901 rtx exp[MAX_CLASSES];
1902 rtx ret;
1903
1904 n = classify_argument (mode, type, class, 0);
1905 if (TARGET_DEBUG_ARG)
1906 {
1907 if (!n)
1908 fprintf (stderr, "Memory class\n");
1909 else
1910 {
1911 fprintf (stderr, "Classes:");
1912 for (i = 0; i < n; i++)
1913 {
1914 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1915 }
1916 fprintf (stderr, "\n");
1917 }
1918 }
1919 if (!n)
1920 return NULL;
1921 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1922 return NULL;
1923 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1924 return NULL;
1925
1926 /* First construct simple cases. Avoid SCmode, since we want to use
1927 single register to pass this type. */
1928 if (n == 1 && mode != SCmode)
1929 switch (class[0])
1930 {
1931 case X86_64_INTEGER_CLASS:
1932 case X86_64_INTEGERSI_CLASS:
1933 return gen_rtx_REG (mode, intreg[0]);
1934 case X86_64_SSE_CLASS:
1935 case X86_64_SSESF_CLASS:
1936 case X86_64_SSEDF_CLASS:
1937 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1938 case X86_64_X87_CLASS:
1939 return gen_rtx_REG (mode, FIRST_STACK_REG);
1940 case X86_64_NO_CLASS:
1941 /* Zero sized array, struct or class. */
1942 return NULL;
1943 default:
1944 abort ();
1945 }
1946 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1947 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1948 if (n == 2
1949 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1950 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1951 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1952 && class[1] == X86_64_INTEGER_CLASS
1953 && (mode == CDImode || mode == TImode)
1954 && intreg[0] + 1 == intreg[1])
1955 return gen_rtx_REG (mode, intreg[0]);
1956 if (n == 4
1957 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1958 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1959 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1960
1961 /* Otherwise figure out the entries of the PARALLEL. */
1962 for (i = 0; i < n; i++)
1963 {
1964 switch (class[i])
1965 {
1966 case X86_64_NO_CLASS:
1967 break;
1968 case X86_64_INTEGER_CLASS:
1969 case X86_64_INTEGERSI_CLASS:
1970 /* Merge TImodes on aligned occassions here too. */
1971 if (i * 8 + 8 > bytes)
1972 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1973 else if (class[i] == X86_64_INTEGERSI_CLASS)
1974 tmpmode = SImode;
1975 else
1976 tmpmode = DImode;
1977 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1978 if (tmpmode == BLKmode)
1979 tmpmode = DImode;
1980 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1981 gen_rtx_REG (tmpmode, *intreg),
1982 GEN_INT (i*8));
1983 intreg++;
1984 break;
1985 case X86_64_SSESF_CLASS:
1986 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1987 gen_rtx_REG (SFmode,
1988 SSE_REGNO (sse_regno)),
1989 GEN_INT (i*8));
1990 sse_regno++;
1991 break;
1992 case X86_64_SSEDF_CLASS:
1993 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1994 gen_rtx_REG (DFmode,
1995 SSE_REGNO (sse_regno)),
1996 GEN_INT (i*8));
1997 sse_regno++;
1998 break;
1999 case X86_64_SSE_CLASS:
2000 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2001 tmpmode = TImode, i++;
2002 else
2003 tmpmode = DImode;
2004 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2005 gen_rtx_REG (tmpmode,
2006 SSE_REGNO (sse_regno)),
2007 GEN_INT (i*8));
2008 sse_regno++;
2009 break;
2010 default:
2011 abort ();
2012 }
2013 }
2014 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2015 for (i = 0; i < nexps; i++)
2016 XVECEXP (ret, 0, i) = exp [i];
2017 return ret;
2018 }
2019
2020 /* Update the data in CUM to advance over an argument
2021 of mode MODE and data type TYPE.
2022 (TYPE is null for libcalls where that information may not be available.) */
2023
2024 void
2025 function_arg_advance (cum, mode, type, named)
2026 CUMULATIVE_ARGS *cum; /* current arg information */
2027 enum machine_mode mode; /* current arg mode */
2028 tree type; /* type of the argument or 0 if lib support */
2029 int named; /* whether or not the argument was named */
2030 {
2031 int bytes =
2032 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2033 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2034
2035 if (TARGET_DEBUG_ARG)
2036 fprintf (stderr,
2037 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2038 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2039 if (TARGET_64BIT)
2040 {
2041 int int_nregs, sse_nregs;
2042 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2043 cum->words += words;
2044 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2045 {
2046 cum->nregs -= int_nregs;
2047 cum->sse_nregs -= sse_nregs;
2048 cum->regno += int_nregs;
2049 cum->sse_regno += sse_nregs;
2050 }
2051 else
2052 cum->words += words;
2053 }
2054 else
2055 {
2056 if (TARGET_SSE && mode == TImode)
2057 {
2058 cum->sse_words += words;
2059 cum->sse_nregs -= 1;
2060 cum->sse_regno += 1;
2061 if (cum->sse_nregs <= 0)
2062 {
2063 cum->sse_nregs = 0;
2064 cum->sse_regno = 0;
2065 }
2066 }
2067 else
2068 {
2069 cum->words += words;
2070 cum->nregs -= words;
2071 cum->regno += words;
2072
2073 if (cum->nregs <= 0)
2074 {
2075 cum->nregs = 0;
2076 cum->regno = 0;
2077 }
2078 }
2079 }
2080 return;
2081 }
2082
2083 /* Define where to put the arguments to a function.
2084 Value is zero to push the argument on the stack,
2085 or a hard register in which to store the argument.
2086
2087 MODE is the argument's machine mode.
2088 TYPE is the data type of the argument (as a tree).
2089 This is null for libcalls where that information may
2090 not be available.
2091 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2092 the preceding args and about the function being called.
2093 NAMED is nonzero if this argument is a named parameter
2094 (otherwise it is an extra parameter matching an ellipsis). */
2095
2096 rtx
2097 function_arg (cum, mode, type, named)
2098 CUMULATIVE_ARGS *cum; /* current arg information */
2099 enum machine_mode mode; /* current arg mode */
2100 tree type; /* type of the argument or 0 if lib support */
2101 int named; /* != 0 for normal args, == 0 for ... args */
2102 {
2103 rtx ret = NULL_RTX;
2104 int bytes =
2105 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2106 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2107
2108 /* Handle an hidden AL argument containing number of registers for varargs
2109 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2110 any AL settings. */
2111 if (mode == VOIDmode)
2112 {
2113 if (TARGET_64BIT)
2114 return GEN_INT (cum->maybe_vaarg
2115 ? (cum->sse_nregs < 0
2116 ? SSE_REGPARM_MAX
2117 : cum->sse_regno)
2118 : -1);
2119 else
2120 return constm1_rtx;
2121 }
2122 if (TARGET_64BIT)
2123 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2124 &x86_64_int_parameter_registers [cum->regno],
2125 cum->sse_regno);
2126 else
2127 switch (mode)
2128 {
2129 /* For now, pass fp/complex values on the stack. */
2130 default:
2131 break;
2132
2133 case BLKmode:
2134 case DImode:
2135 case SImode:
2136 case HImode:
2137 case QImode:
2138 if (words <= cum->nregs)
2139 ret = gen_rtx_REG (mode, cum->regno);
2140 break;
2141 case TImode:
2142 if (cum->sse_nregs)
2143 ret = gen_rtx_REG (mode, cum->sse_regno);
2144 break;
2145 }
2146
2147 if (TARGET_DEBUG_ARG)
2148 {
2149 fprintf (stderr,
2150 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2151 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2152
2153 if (ret)
2154 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2155 else
2156 fprintf (stderr, ", stack");
2157
2158 fprintf (stderr, " )\n");
2159 }
2160
2161 return ret;
2162 }
2163
2164 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2165 and type. */
2166
2167 int
2168 ix86_function_arg_boundary (mode, type)
2169 enum machine_mode mode;
2170 tree type;
2171 {
2172 int align;
2173 if (!TARGET_64BIT)
2174 return PARM_BOUNDARY;
2175 if (type)
2176 align = TYPE_ALIGN (type);
2177 else
2178 align = GET_MODE_ALIGNMENT (mode);
2179 if (align < PARM_BOUNDARY)
2180 align = PARM_BOUNDARY;
2181 if (align > 128)
2182 align = 128;
2183 return align;
2184 }
2185
2186 /* Return true if N is a possible register number of function value. */
2187 bool
2188 ix86_function_value_regno_p (regno)
2189 int regno;
2190 {
2191 if (!TARGET_64BIT)
2192 {
2193 return ((regno) == 0
2194 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2195 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2196 }
2197 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2198 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2199 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2200 }
2201
2202 /* Define how to find the value returned by a function.
2203 VALTYPE is the data type of the value (as a tree).
2204 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2205 otherwise, FUNC is 0. */
2206 rtx
2207 ix86_function_value (valtype)
2208 tree valtype;
2209 {
2210 if (TARGET_64BIT)
2211 {
2212 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2213 REGPARM_MAX, SSE_REGPARM_MAX,
2214 x86_64_int_return_registers, 0);
2215 /* For zero sized structures, construct_continer return NULL, but we need
2216 to keep rest of compiler happy by returning meaningfull value. */
2217 if (!ret)
2218 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2219 return ret;
2220 }
2221 else
2222 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2223 }
2224
2225 /* Return false iff type is returned in memory. */
2226 int
2227 ix86_return_in_memory (type)
2228 tree type;
2229 {
2230 int needed_intregs, needed_sseregs;
2231 if (TARGET_64BIT)
2232 {
2233 return !examine_argument (TYPE_MODE (type), type, 1,
2234 &needed_intregs, &needed_sseregs);
2235 }
2236 else
2237 {
2238 if (TYPE_MODE (type) == BLKmode
2239 || (VECTOR_MODE_P (TYPE_MODE (type))
2240 && int_size_in_bytes (type) == 8)
2241 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2242 && TYPE_MODE (type) != TFmode
2243 && !VECTOR_MODE_P (TYPE_MODE (type))))
2244 return 1;
2245 return 0;
2246 }
2247 }
2248
2249 /* Define how to find the value returned by a library function
2250 assuming the value has mode MODE. */
2251 rtx
2252 ix86_libcall_value (mode)
2253 enum machine_mode mode;
2254 {
2255 if (TARGET_64BIT)
2256 {
2257 switch (mode)
2258 {
2259 case SFmode:
2260 case SCmode:
2261 case DFmode:
2262 case DCmode:
2263 return gen_rtx_REG (mode, FIRST_SSE_REG);
2264 case TFmode:
2265 case TCmode:
2266 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2267 default:
2268 return gen_rtx_REG (mode, 0);
2269 }
2270 }
2271 else
2272 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2273 }
2274 \f
2275 /* Create the va_list data type. */
2276
2277 tree
2278 ix86_build_va_list ()
2279 {
2280 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2281
2282 /* For i386 we use plain pointer to argument area. */
2283 if (!TARGET_64BIT)
2284 return build_pointer_type (char_type_node);
2285
2286 record = make_lang_type (RECORD_TYPE);
2287 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2288
2289 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2290 unsigned_type_node);
2291 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2292 unsigned_type_node);
2293 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2294 ptr_type_node);
2295 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2296 ptr_type_node);
2297
2298 DECL_FIELD_CONTEXT (f_gpr) = record;
2299 DECL_FIELD_CONTEXT (f_fpr) = record;
2300 DECL_FIELD_CONTEXT (f_ovf) = record;
2301 DECL_FIELD_CONTEXT (f_sav) = record;
2302
2303 TREE_CHAIN (record) = type_decl;
2304 TYPE_NAME (record) = type_decl;
2305 TYPE_FIELDS (record) = f_gpr;
2306 TREE_CHAIN (f_gpr) = f_fpr;
2307 TREE_CHAIN (f_fpr) = f_ovf;
2308 TREE_CHAIN (f_ovf) = f_sav;
2309
2310 layout_type (record);
2311
2312 /* The correct type is an array type of one element. */
2313 return build_array_type (record, build_index_type (size_zero_node));
2314 }
2315
2316 /* Perform any needed actions needed for a function that is receiving a
2317 variable number of arguments.
2318
2319 CUM is as above.
2320
2321 MODE and TYPE are the mode and type of the current parameter.
2322
2323 PRETEND_SIZE is a variable that should be set to the amount of stack
2324 that must be pushed by the prolog to pretend that our caller pushed
2325 it.
2326
2327 Normally, this macro will push all remaining incoming registers on the
2328 stack and set PRETEND_SIZE to the length of the registers pushed. */
2329
2330 void
2331 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2332 CUMULATIVE_ARGS *cum;
2333 enum machine_mode mode;
2334 tree type;
2335 int *pretend_size ATTRIBUTE_UNUSED;
2336 int no_rtl;
2337
2338 {
2339 CUMULATIVE_ARGS next_cum;
2340 rtx save_area = NULL_RTX, mem;
2341 rtx label;
2342 rtx label_ref;
2343 rtx tmp_reg;
2344 rtx nsse_reg;
2345 int set;
2346 tree fntype;
2347 int stdarg_p;
2348 int i;
2349
2350 if (!TARGET_64BIT)
2351 return;
2352
2353 /* Indicate to allocate space on the stack for varargs save area. */
2354 ix86_save_varrargs_registers = 1;
2355
2356 fntype = TREE_TYPE (current_function_decl);
2357 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2358 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2359 != void_type_node));
2360
2361 /* For varargs, we do not want to skip the dummy va_dcl argument.
2362 For stdargs, we do want to skip the last named argument. */
2363 next_cum = *cum;
2364 if (stdarg_p)
2365 function_arg_advance (&next_cum, mode, type, 1);
2366
2367 if (!no_rtl)
2368 save_area = frame_pointer_rtx;
2369
2370 set = get_varargs_alias_set ();
2371
2372 for (i = next_cum.regno; i < ix86_regparm; i++)
2373 {
2374 mem = gen_rtx_MEM (Pmode,
2375 plus_constant (save_area, i * UNITS_PER_WORD));
2376 set_mem_alias_set (mem, set);
2377 emit_move_insn (mem, gen_rtx_REG (Pmode,
2378 x86_64_int_parameter_registers[i]));
2379 }
2380
2381 if (next_cum.sse_nregs)
2382 {
2383 /* Now emit code to save SSE registers. The AX parameter contains number
2384 of SSE parameter regsiters used to call this function. We use
2385 sse_prologue_save insn template that produces computed jump across
2386 SSE saves. We need some preparation work to get this working. */
2387
2388 label = gen_label_rtx ();
2389 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2390
2391 /* Compute address to jump to :
2392 label - 5*eax + nnamed_sse_arguments*5 */
2393 tmp_reg = gen_reg_rtx (Pmode);
2394 nsse_reg = gen_reg_rtx (Pmode);
2395 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2396 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2397 gen_rtx_MULT (Pmode, nsse_reg,
2398 GEN_INT (4))));
2399 if (next_cum.sse_regno)
2400 emit_move_insn
2401 (nsse_reg,
2402 gen_rtx_CONST (DImode,
2403 gen_rtx_PLUS (DImode,
2404 label_ref,
2405 GEN_INT (next_cum.sse_regno * 4))));
2406 else
2407 emit_move_insn (nsse_reg, label_ref);
2408 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2409
2410 /* Compute address of memory block we save into. We always use pointer
2411 pointing 127 bytes after first byte to store - this is needed to keep
2412 instruction size limited by 4 bytes. */
2413 tmp_reg = gen_reg_rtx (Pmode);
2414 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2415 plus_constant (save_area,
2416 8 * REGPARM_MAX + 127)));
2417 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2418 set_mem_alias_set (mem, set);
2419 set_mem_align (mem, BITS_PER_WORD);
2420
2421 /* And finally do the dirty job! */
2422 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2423 GEN_INT (next_cum.sse_regno), label));
2424 }
2425
2426 }
2427
2428 /* Implement va_start. */
2429
2430 void
2431 ix86_va_start (stdarg_p, valist, nextarg)
2432 int stdarg_p;
2433 tree valist;
2434 rtx nextarg;
2435 {
2436 HOST_WIDE_INT words, n_gpr, n_fpr;
2437 tree f_gpr, f_fpr, f_ovf, f_sav;
2438 tree gpr, fpr, ovf, sav, t;
2439
2440 /* Only 64bit target needs something special. */
2441 if (!TARGET_64BIT)
2442 {
2443 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2444 return;
2445 }
2446
2447 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2448 f_fpr = TREE_CHAIN (f_gpr);
2449 f_ovf = TREE_CHAIN (f_fpr);
2450 f_sav = TREE_CHAIN (f_ovf);
2451
2452 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2453 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2454 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2455 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2456 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2457
2458 /* Count number of gp and fp argument registers used. */
2459 words = current_function_args_info.words;
2460 n_gpr = current_function_args_info.regno;
2461 n_fpr = current_function_args_info.sse_regno;
2462
2463 if (TARGET_DEBUG_ARG)
2464 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2465 (int) words, (int) n_gpr, (int) n_fpr);
2466
2467 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2468 build_int_2 (n_gpr * 8, 0));
2469 TREE_SIDE_EFFECTS (t) = 1;
2470 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2471
2472 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2473 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2474 TREE_SIDE_EFFECTS (t) = 1;
2475 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2476
2477 /* Find the overflow area. */
2478 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2479 if (words != 0)
2480 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2481 build_int_2 (words * UNITS_PER_WORD, 0));
2482 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2483 TREE_SIDE_EFFECTS (t) = 1;
2484 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2485
2486 /* Find the register save area.
2487 Prologue of the function save it right above stack frame. */
2488 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2489 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2490 TREE_SIDE_EFFECTS (t) = 1;
2491 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2492 }
2493
2494 /* Implement va_arg. */
2495 rtx
2496 ix86_va_arg (valist, type)
2497 tree valist, type;
2498 {
2499 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2500 tree f_gpr, f_fpr, f_ovf, f_sav;
2501 tree gpr, fpr, ovf, sav, t;
2502 int size, rsize;
2503 rtx lab_false, lab_over = NULL_RTX;
2504 rtx addr_rtx, r;
2505 rtx container;
2506
2507 /* Only 64bit target needs something special. */
2508 if (!TARGET_64BIT)
2509 {
2510 return std_expand_builtin_va_arg (valist, type);
2511 }
2512
2513 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2514 f_fpr = TREE_CHAIN (f_gpr);
2515 f_ovf = TREE_CHAIN (f_fpr);
2516 f_sav = TREE_CHAIN (f_ovf);
2517
2518 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2519 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2520 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2521 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2522 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2523
2524 size = int_size_in_bytes (type);
2525 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2526
2527 container = construct_container (TYPE_MODE (type), type, 0,
2528 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2529 /*
2530 * Pull the value out of the saved registers ...
2531 */
2532
2533 addr_rtx = gen_reg_rtx (Pmode);
2534
2535 if (container)
2536 {
2537 rtx int_addr_rtx, sse_addr_rtx;
2538 int needed_intregs, needed_sseregs;
2539 int need_temp;
2540
2541 lab_over = gen_label_rtx ();
2542 lab_false = gen_label_rtx ();
2543
2544 examine_argument (TYPE_MODE (type), type, 0,
2545 &needed_intregs, &needed_sseregs);
2546
2547
2548 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2549 || TYPE_ALIGN (type) > 128);
2550
2551 /* In case we are passing structure, verify that it is consetuctive block
2552 on the register save area. If not we need to do moves. */
2553 if (!need_temp && !REG_P (container))
2554 {
2555 /* Verify that all registers are strictly consetuctive */
2556 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2557 {
2558 int i;
2559
2560 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2561 {
2562 rtx slot = XVECEXP (container, 0, i);
2563 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2564 || INTVAL (XEXP (slot, 1)) != i * 16)
2565 need_temp = 1;
2566 }
2567 }
2568 else
2569 {
2570 int i;
2571
2572 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2573 {
2574 rtx slot = XVECEXP (container, 0, i);
2575 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2576 || INTVAL (XEXP (slot, 1)) != i * 8)
2577 need_temp = 1;
2578 }
2579 }
2580 }
2581 if (!need_temp)
2582 {
2583 int_addr_rtx = addr_rtx;
2584 sse_addr_rtx = addr_rtx;
2585 }
2586 else
2587 {
2588 int_addr_rtx = gen_reg_rtx (Pmode);
2589 sse_addr_rtx = gen_reg_rtx (Pmode);
2590 }
2591 /* First ensure that we fit completely in registers. */
2592 if (needed_intregs)
2593 {
2594 emit_cmp_and_jump_insns (expand_expr
2595 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2596 GEN_INT ((REGPARM_MAX - needed_intregs +
2597 1) * 8), GE, const1_rtx, SImode,
2598 1, lab_false);
2599 }
2600 if (needed_sseregs)
2601 {
2602 emit_cmp_and_jump_insns (expand_expr
2603 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2604 GEN_INT ((SSE_REGPARM_MAX -
2605 needed_sseregs + 1) * 16 +
2606 REGPARM_MAX * 8), GE, const1_rtx,
2607 SImode, 1, lab_false);
2608 }
2609
2610 /* Compute index to start of area used for integer regs. */
2611 if (needed_intregs)
2612 {
2613 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2614 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2615 if (r != int_addr_rtx)
2616 emit_move_insn (int_addr_rtx, r);
2617 }
2618 if (needed_sseregs)
2619 {
2620 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2621 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2622 if (r != sse_addr_rtx)
2623 emit_move_insn (sse_addr_rtx, r);
2624 }
2625 if (need_temp)
2626 {
2627 int i;
2628 rtx mem;
2629
2630 /* Never use the memory itself, as it has the alias set. */
2631 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2632 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2633 set_mem_alias_set (mem, get_varargs_alias_set ());
2634 set_mem_align (mem, BITS_PER_UNIT);
2635
2636 for (i = 0; i < XVECLEN (container, 0); i++)
2637 {
2638 rtx slot = XVECEXP (container, 0, i);
2639 rtx reg = XEXP (slot, 0);
2640 enum machine_mode mode = GET_MODE (reg);
2641 rtx src_addr;
2642 rtx src_mem;
2643 int src_offset;
2644 rtx dest_mem;
2645
2646 if (SSE_REGNO_P (REGNO (reg)))
2647 {
2648 src_addr = sse_addr_rtx;
2649 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2650 }
2651 else
2652 {
2653 src_addr = int_addr_rtx;
2654 src_offset = REGNO (reg) * 8;
2655 }
2656 src_mem = gen_rtx_MEM (mode, src_addr);
2657 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2658 src_mem = adjust_address (src_mem, mode, src_offset);
2659 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2660 emit_move_insn (dest_mem, src_mem);
2661 }
2662 }
2663
2664 if (needed_intregs)
2665 {
2666 t =
2667 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2668 build_int_2 (needed_intregs * 8, 0));
2669 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2670 TREE_SIDE_EFFECTS (t) = 1;
2671 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2672 }
2673 if (needed_sseregs)
2674 {
2675 t =
2676 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2677 build_int_2 (needed_sseregs * 16, 0));
2678 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2679 TREE_SIDE_EFFECTS (t) = 1;
2680 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2681 }
2682
2683 emit_jump_insn (gen_jump (lab_over));
2684 emit_barrier ();
2685 emit_label (lab_false);
2686 }
2687
2688 /* ... otherwise out of the overflow area. */
2689
2690 /* Care for on-stack alignment if needed. */
2691 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2692 t = ovf;
2693 else
2694 {
2695 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2696 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2697 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2698 }
2699 t = save_expr (t);
2700
2701 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2702 if (r != addr_rtx)
2703 emit_move_insn (addr_rtx, r);
2704
2705 t =
2706 build (PLUS_EXPR, TREE_TYPE (t), t,
2707 build_int_2 (rsize * UNITS_PER_WORD, 0));
2708 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2709 TREE_SIDE_EFFECTS (t) = 1;
2710 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2711
2712 if (container)
2713 emit_label (lab_over);
2714
2715 return addr_rtx;
2716 }
2717 \f
2718 /* Return nonzero if OP is general operand representable on x86_64. */
2719
2720 int
2721 x86_64_general_operand (op, mode)
2722 rtx op;
2723 enum machine_mode mode;
2724 {
2725 if (!TARGET_64BIT)
2726 return general_operand (op, mode);
2727 if (nonimmediate_operand (op, mode))
2728 return 1;
2729 return x86_64_sign_extended_value (op);
2730 }
2731
2732 /* Return nonzero if OP is general operand representable on x86_64
2733 as either sign extended or zero extended constant. */
2734
2735 int
2736 x86_64_szext_general_operand (op, mode)
2737 rtx op;
2738 enum machine_mode mode;
2739 {
2740 if (!TARGET_64BIT)
2741 return general_operand (op, mode);
2742 if (nonimmediate_operand (op, mode))
2743 return 1;
2744 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2745 }
2746
2747 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2748
2749 int
2750 x86_64_nonmemory_operand (op, mode)
2751 rtx op;
2752 enum machine_mode mode;
2753 {
2754 if (!TARGET_64BIT)
2755 return nonmemory_operand (op, mode);
2756 if (register_operand (op, mode))
2757 return 1;
2758 return x86_64_sign_extended_value (op);
2759 }
2760
2761 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2762
2763 int
2764 x86_64_movabs_operand (op, mode)
2765 rtx op;
2766 enum machine_mode mode;
2767 {
2768 if (!TARGET_64BIT || !flag_pic)
2769 return nonmemory_operand (op, mode);
2770 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2771 return 1;
2772 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2773 return 1;
2774 return 0;
2775 }
2776
2777 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2778
2779 int
2780 x86_64_szext_nonmemory_operand (op, mode)
2781 rtx op;
2782 enum machine_mode mode;
2783 {
2784 if (!TARGET_64BIT)
2785 return nonmemory_operand (op, mode);
2786 if (register_operand (op, mode))
2787 return 1;
2788 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2789 }
2790
2791 /* Return nonzero if OP is immediate operand representable on x86_64. */
2792
2793 int
2794 x86_64_immediate_operand (op, mode)
2795 rtx op;
2796 enum machine_mode mode;
2797 {
2798 if (!TARGET_64BIT)
2799 return immediate_operand (op, mode);
2800 return x86_64_sign_extended_value (op);
2801 }
2802
2803 /* Return nonzero if OP is immediate operand representable on x86_64. */
2804
2805 int
2806 x86_64_zext_immediate_operand (op, mode)
2807 rtx op;
2808 enum machine_mode mode ATTRIBUTE_UNUSED;
2809 {
2810 return x86_64_zero_extended_value (op);
2811 }
2812
2813 /* Return nonzero if OP is (const_int 1), else return zero. */
2814
2815 int
2816 const_int_1_operand (op, mode)
2817 rtx op;
2818 enum machine_mode mode ATTRIBUTE_UNUSED;
2819 {
2820 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2821 }
2822
2823 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2824 reference and a constant. */
2825
2826 int
2827 symbolic_operand (op, mode)
2828 register rtx op;
2829 enum machine_mode mode ATTRIBUTE_UNUSED;
2830 {
2831 switch (GET_CODE (op))
2832 {
2833 case SYMBOL_REF:
2834 case LABEL_REF:
2835 return 1;
2836
2837 case CONST:
2838 op = XEXP (op, 0);
2839 if (GET_CODE (op) == SYMBOL_REF
2840 || GET_CODE (op) == LABEL_REF
2841 || (GET_CODE (op) == UNSPEC
2842 && (XINT (op, 1) == 6
2843 || XINT (op, 1) == 7
2844 || XINT (op, 1) == 15)))
2845 return 1;
2846 if (GET_CODE (op) != PLUS
2847 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2848 return 0;
2849
2850 op = XEXP (op, 0);
2851 if (GET_CODE (op) == SYMBOL_REF
2852 || GET_CODE (op) == LABEL_REF)
2853 return 1;
2854 /* Only @GOTOFF gets offsets. */
2855 if (GET_CODE (op) != UNSPEC
2856 || XINT (op, 1) != 7)
2857 return 0;
2858
2859 op = XVECEXP (op, 0, 0);
2860 if (GET_CODE (op) == SYMBOL_REF
2861 || GET_CODE (op) == LABEL_REF)
2862 return 1;
2863 return 0;
2864
2865 default:
2866 return 0;
2867 }
2868 }
2869
2870 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2871
2872 int
2873 pic_symbolic_operand (op, mode)
2874 register rtx op;
2875 enum machine_mode mode ATTRIBUTE_UNUSED;
2876 {
2877 if (GET_CODE (op) != CONST)
2878 return 0;
2879 op = XEXP (op, 0);
2880 if (TARGET_64BIT)
2881 {
2882 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2883 return 1;
2884 }
2885 else
2886 {
2887 if (GET_CODE (op) == UNSPEC)
2888 return 1;
2889 if (GET_CODE (op) != PLUS
2890 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2891 return 0;
2892 op = XEXP (op, 0);
2893 if (GET_CODE (op) == UNSPEC)
2894 return 1;
2895 }
2896 return 0;
2897 }
2898
2899 /* Return true if OP is a symbolic operand that resolves locally. */
2900
2901 static int
2902 local_symbolic_operand (op, mode)
2903 rtx op;
2904 enum machine_mode mode ATTRIBUTE_UNUSED;
2905 {
2906 if (GET_CODE (op) == LABEL_REF)
2907 return 1;
2908
2909 if (GET_CODE (op) == CONST
2910 && GET_CODE (XEXP (op, 0)) == PLUS
2911 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2912 op = XEXP (XEXP (op, 0), 0);
2913
2914 if (GET_CODE (op) != SYMBOL_REF)
2915 return 0;
2916
2917 /* These we've been told are local by varasm and encode_section_info
2918 respectively. */
2919 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2920 return 1;
2921
2922 /* There is, however, a not insubstantial body of code in the rest of
2923 the compiler that assumes it can just stick the results of
2924 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2925 /* ??? This is a hack. Should update the body of the compiler to
2926 always create a DECL an invoke ENCODE_SECTION_INFO. */
2927 if (strncmp (XSTR (op, 0), internal_label_prefix,
2928 internal_label_prefix_len) == 0)
2929 return 1;
2930
2931 return 0;
2932 }
2933
2934 /* Test for a valid operand for a call instruction. Don't allow the
2935 arg pointer register or virtual regs since they may decay into
2936 reg + const, which the patterns can't handle. */
2937
2938 int
2939 call_insn_operand (op, mode)
2940 rtx op;
2941 enum machine_mode mode ATTRIBUTE_UNUSED;
2942 {
2943 /* Disallow indirect through a virtual register. This leads to
2944 compiler aborts when trying to eliminate them. */
2945 if (GET_CODE (op) == REG
2946 && (op == arg_pointer_rtx
2947 || op == frame_pointer_rtx
2948 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2949 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2950 return 0;
2951
2952 /* Disallow `call 1234'. Due to varying assembler lameness this
2953 gets either rejected or translated to `call .+1234'. */
2954 if (GET_CODE (op) == CONST_INT)
2955 return 0;
2956
2957 /* Explicitly allow SYMBOL_REF even if pic. */
2958 if (GET_CODE (op) == SYMBOL_REF)
2959 return 1;
2960
2961 /* Half-pic doesn't allow anything but registers and constants.
2962 We've just taken care of the later. */
2963 if (HALF_PIC_P ())
2964 return register_operand (op, Pmode);
2965
2966 /* Otherwise we can allow any general_operand in the address. */
2967 return general_operand (op, Pmode);
2968 }
2969
2970 int
2971 constant_call_address_operand (op, mode)
2972 rtx op;
2973 enum machine_mode mode ATTRIBUTE_UNUSED;
2974 {
2975 if (GET_CODE (op) == CONST
2976 && GET_CODE (XEXP (op, 0)) == PLUS
2977 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2978 op = XEXP (XEXP (op, 0), 0);
2979 return GET_CODE (op) == SYMBOL_REF;
2980 }
2981
2982 /* Match exactly zero and one. */
2983
2984 int
2985 const0_operand (op, mode)
2986 register rtx op;
2987 enum machine_mode mode;
2988 {
2989 return op == CONST0_RTX (mode);
2990 }
2991
2992 int
2993 const1_operand (op, mode)
2994 register rtx op;
2995 enum machine_mode mode ATTRIBUTE_UNUSED;
2996 {
2997 return op == const1_rtx;
2998 }
2999
3000 /* Match 2, 4, or 8. Used for leal multiplicands. */
3001
3002 int
3003 const248_operand (op, mode)
3004 register rtx op;
3005 enum machine_mode mode ATTRIBUTE_UNUSED;
3006 {
3007 return (GET_CODE (op) == CONST_INT
3008 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3009 }
3010
3011 /* True if this is a constant appropriate for an increment or decremenmt. */
3012
3013 int
3014 incdec_operand (op, mode)
3015 register rtx op;
3016 enum machine_mode mode ATTRIBUTE_UNUSED;
3017 {
3018 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3019 registers, since carry flag is not set. */
3020 if (TARGET_PENTIUM4 && !optimize_size)
3021 return 0;
3022 return op == const1_rtx || op == constm1_rtx;
3023 }
3024
3025 /* Return nonzero if OP is acceptable as operand of DImode shift
3026 expander. */
3027
3028 int
3029 shiftdi_operand (op, mode)
3030 rtx op;
3031 enum machine_mode mode ATTRIBUTE_UNUSED;
3032 {
3033 if (TARGET_64BIT)
3034 return nonimmediate_operand (op, mode);
3035 else
3036 return register_operand (op, mode);
3037 }
3038
3039 /* Return false if this is the stack pointer, or any other fake
3040 register eliminable to the stack pointer. Otherwise, this is
3041 a register operand.
3042
3043 This is used to prevent esp from being used as an index reg.
3044 Which would only happen in pathological cases. */
3045
3046 int
3047 reg_no_sp_operand (op, mode)
3048 register rtx op;
3049 enum machine_mode mode;
3050 {
3051 rtx t = op;
3052 if (GET_CODE (t) == SUBREG)
3053 t = SUBREG_REG (t);
3054 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3055 return 0;
3056
3057 return register_operand (op, mode);
3058 }
3059
3060 int
3061 mmx_reg_operand (op, mode)
3062 register rtx op;
3063 enum machine_mode mode ATTRIBUTE_UNUSED;
3064 {
3065 return MMX_REG_P (op);
3066 }
3067
3068 /* Return false if this is any eliminable register. Otherwise
3069 general_operand. */
3070
3071 int
3072 general_no_elim_operand (op, mode)
3073 register rtx op;
3074 enum machine_mode mode;
3075 {
3076 rtx t = op;
3077 if (GET_CODE (t) == SUBREG)
3078 t = SUBREG_REG (t);
3079 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3080 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3081 || t == virtual_stack_dynamic_rtx)
3082 return 0;
3083 if (REG_P (t)
3084 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3085 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3086 return 0;
3087
3088 return general_operand (op, mode);
3089 }
3090
3091 /* Return false if this is any eliminable register. Otherwise
3092 register_operand or const_int. */
3093
3094 int
3095 nonmemory_no_elim_operand (op, mode)
3096 register rtx op;
3097 enum machine_mode mode;
3098 {
3099 rtx t = op;
3100 if (GET_CODE (t) == SUBREG)
3101 t = SUBREG_REG (t);
3102 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3103 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3104 || t == virtual_stack_dynamic_rtx)
3105 return 0;
3106
3107 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3108 }
3109
3110 /* Return true if op is a Q_REGS class register. */
3111
3112 int
3113 q_regs_operand (op, mode)
3114 register rtx op;
3115 enum machine_mode mode;
3116 {
3117 if (mode != VOIDmode && GET_MODE (op) != mode)
3118 return 0;
3119 if (GET_CODE (op) == SUBREG)
3120 op = SUBREG_REG (op);
3121 return QI_REG_P (op);
3122 }
3123
3124 /* Return true if op is a NON_Q_REGS class register. */
3125
3126 int
3127 non_q_regs_operand (op, mode)
3128 register rtx op;
3129 enum machine_mode mode;
3130 {
3131 if (mode != VOIDmode && GET_MODE (op) != mode)
3132 return 0;
3133 if (GET_CODE (op) == SUBREG)
3134 op = SUBREG_REG (op);
3135 return NON_QI_REG_P (op);
3136 }
3137
3138 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3139 insns. */
3140 int
3141 sse_comparison_operator (op, mode)
3142 rtx op;
3143 enum machine_mode mode ATTRIBUTE_UNUSED;
3144 {
3145 enum rtx_code code = GET_CODE (op);
3146 switch (code)
3147 {
3148 /* Operations supported directly. */
3149 case EQ:
3150 case LT:
3151 case LE:
3152 case UNORDERED:
3153 case NE:
3154 case UNGE:
3155 case UNGT:
3156 case ORDERED:
3157 return 1;
3158 /* These are equivalent to ones above in non-IEEE comparisons. */
3159 case UNEQ:
3160 case UNLT:
3161 case UNLE:
3162 case LTGT:
3163 case GE:
3164 case GT:
3165 return !TARGET_IEEE_FP;
3166 default:
3167 return 0;
3168 }
3169 }
3170 /* Return 1 if OP is a valid comparison operator in valid mode. */
3171 int
3172 ix86_comparison_operator (op, mode)
3173 register rtx op;
3174 enum machine_mode mode;
3175 {
3176 enum machine_mode inmode;
3177 enum rtx_code code = GET_CODE (op);
3178 if (mode != VOIDmode && GET_MODE (op) != mode)
3179 return 0;
3180 if (GET_RTX_CLASS (code) != '<')
3181 return 0;
3182 inmode = GET_MODE (XEXP (op, 0));
3183
3184 if (inmode == CCFPmode || inmode == CCFPUmode)
3185 {
3186 enum rtx_code second_code, bypass_code;
3187 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3188 return (bypass_code == NIL && second_code == NIL);
3189 }
3190 switch (code)
3191 {
3192 case EQ: case NE:
3193 return 1;
3194 case LT: case GE:
3195 if (inmode == CCmode || inmode == CCGCmode
3196 || inmode == CCGOCmode || inmode == CCNOmode)
3197 return 1;
3198 return 0;
3199 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3200 if (inmode == CCmode)
3201 return 1;
3202 return 0;
3203 case GT: case LE:
3204 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3205 return 1;
3206 return 0;
3207 default:
3208 return 0;
3209 }
3210 }
3211
3212 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3213
3214 int
3215 fcmov_comparison_operator (op, mode)
3216 register rtx op;
3217 enum machine_mode mode;
3218 {
3219 enum machine_mode inmode;
3220 enum rtx_code code = GET_CODE (op);
3221 if (mode != VOIDmode && GET_MODE (op) != mode)
3222 return 0;
3223 if (GET_RTX_CLASS (code) != '<')
3224 return 0;
3225 inmode = GET_MODE (XEXP (op, 0));
3226 if (inmode == CCFPmode || inmode == CCFPUmode)
3227 {
3228 enum rtx_code second_code, bypass_code;
3229 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3230 if (bypass_code != NIL || second_code != NIL)
3231 return 0;
3232 code = ix86_fp_compare_code_to_integer (code);
3233 }
3234 /* i387 supports just limited amount of conditional codes. */
3235 switch (code)
3236 {
3237 case LTU: case GTU: case LEU: case GEU:
3238 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3239 return 1;
3240 return 0;
3241 case ORDERED: case UNORDERED:
3242 case EQ: case NE:
3243 return 1;
3244 default:
3245 return 0;
3246 }
3247 }
3248
3249 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3250
3251 int
3252 promotable_binary_operator (op, mode)
3253 register rtx op;
3254 enum machine_mode mode ATTRIBUTE_UNUSED;
3255 {
3256 switch (GET_CODE (op))
3257 {
3258 case MULT:
3259 /* Modern CPUs have same latency for HImode and SImode multiply,
3260 but 386 and 486 do HImode multiply faster. */
3261 return ix86_cpu > PROCESSOR_I486;
3262 case PLUS:
3263 case AND:
3264 case IOR:
3265 case XOR:
3266 case ASHIFT:
3267 return 1;
3268 default:
3269 return 0;
3270 }
3271 }
3272
3273 /* Nearly general operand, but accept any const_double, since we wish
3274 to be able to drop them into memory rather than have them get pulled
3275 into registers. */
3276
3277 int
3278 cmp_fp_expander_operand (op, mode)
3279 register rtx op;
3280 enum machine_mode mode;
3281 {
3282 if (mode != VOIDmode && mode != GET_MODE (op))
3283 return 0;
3284 if (GET_CODE (op) == CONST_DOUBLE)
3285 return 1;
3286 return general_operand (op, mode);
3287 }
3288
3289 /* Match an SI or HImode register for a zero_extract. */
3290
3291 int
3292 ext_register_operand (op, mode)
3293 register rtx op;
3294 enum machine_mode mode ATTRIBUTE_UNUSED;
3295 {
3296 int regno;
3297 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3298 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3299 return 0;
3300
3301 if (!register_operand (op, VOIDmode))
3302 return 0;
3303
3304 /* Be curefull to accept only registers having upper parts. */
3305 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3306 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3307 }
3308
3309 /* Return 1 if this is a valid binary floating-point operation.
3310 OP is the expression matched, and MODE is its mode. */
3311
3312 int
3313 binary_fp_operator (op, mode)
3314 register rtx op;
3315 enum machine_mode mode;
3316 {
3317 if (mode != VOIDmode && mode != GET_MODE (op))
3318 return 0;
3319
3320 switch (GET_CODE (op))
3321 {
3322 case PLUS:
3323 case MINUS:
3324 case MULT:
3325 case DIV:
3326 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3327
3328 default:
3329 return 0;
3330 }
3331 }
3332
3333 int
3334 mult_operator (op, mode)
3335 register rtx op;
3336 enum machine_mode mode ATTRIBUTE_UNUSED;
3337 {
3338 return GET_CODE (op) == MULT;
3339 }
3340
3341 int
3342 div_operator (op, mode)
3343 register rtx op;
3344 enum machine_mode mode ATTRIBUTE_UNUSED;
3345 {
3346 return GET_CODE (op) == DIV;
3347 }
3348
3349 int
3350 arith_or_logical_operator (op, mode)
3351 rtx op;
3352 enum machine_mode mode;
3353 {
3354 return ((mode == VOIDmode || GET_MODE (op) == mode)
3355 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3356 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3357 }
3358
3359 /* Returns 1 if OP is memory operand with a displacement. */
3360
3361 int
3362 memory_displacement_operand (op, mode)
3363 register rtx op;
3364 enum machine_mode mode;
3365 {
3366 struct ix86_address parts;
3367
3368 if (! memory_operand (op, mode))
3369 return 0;
3370
3371 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3372 abort ();
3373
3374 return parts.disp != NULL_RTX;
3375 }
3376
3377 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3378 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3379
3380 ??? It seems likely that this will only work because cmpsi is an
3381 expander, and no actual insns use this. */
3382
3383 int
3384 cmpsi_operand (op, mode)
3385 rtx op;
3386 enum machine_mode mode;
3387 {
3388 if (nonimmediate_operand (op, mode))
3389 return 1;
3390
3391 if (GET_CODE (op) == AND
3392 && GET_MODE (op) == SImode
3393 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3394 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3395 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3396 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3397 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3398 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3399 return 1;
3400
3401 return 0;
3402 }
3403
3404 /* Returns 1 if OP is memory operand that can not be represented by the
3405 modRM array. */
3406
3407 int
3408 long_memory_operand (op, mode)
3409 register rtx op;
3410 enum machine_mode mode;
3411 {
3412 if (! memory_operand (op, mode))
3413 return 0;
3414
3415 return memory_address_length (op) != 0;
3416 }
3417
3418 /* Return nonzero if the rtx is known aligned. */
3419
3420 int
3421 aligned_operand (op, mode)
3422 rtx op;
3423 enum machine_mode mode;
3424 {
3425 struct ix86_address parts;
3426
3427 if (!general_operand (op, mode))
3428 return 0;
3429
3430 /* Registers and immediate operands are always "aligned". */
3431 if (GET_CODE (op) != MEM)
3432 return 1;
3433
3434 /* Don't even try to do any aligned optimizations with volatiles. */
3435 if (MEM_VOLATILE_P (op))
3436 return 0;
3437
3438 op = XEXP (op, 0);
3439
3440 /* Pushes and pops are only valid on the stack pointer. */
3441 if (GET_CODE (op) == PRE_DEC
3442 || GET_CODE (op) == POST_INC)
3443 return 1;
3444
3445 /* Decode the address. */
3446 if (! ix86_decompose_address (op, &parts))
3447 abort ();
3448
3449 /* Look for some component that isn't known to be aligned. */
3450 if (parts.index)
3451 {
3452 if (parts.scale < 4
3453 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3454 return 0;
3455 }
3456 if (parts.base)
3457 {
3458 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3459 return 0;
3460 }
3461 if (parts.disp)
3462 {
3463 if (GET_CODE (parts.disp) != CONST_INT
3464 || (INTVAL (parts.disp) & 3) != 0)
3465 return 0;
3466 }
3467
3468 /* Didn't find one -- this must be an aligned address. */
3469 return 1;
3470 }
3471 \f
3472 /* Return true if the constant is something that can be loaded with
3473 a special instruction. Only handle 0.0 and 1.0; others are less
3474 worthwhile. */
3475
3476 int
3477 standard_80387_constant_p (x)
3478 rtx x;
3479 {
3480 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3481 return -1;
3482 /* Note that on the 80387, other constants, such as pi, that we should support
3483 too. On some machines, these are much slower to load as standard constant,
3484 than to load from doubles in memory. */
3485 if (x == CONST0_RTX (GET_MODE (x)))
3486 return 1;
3487 if (x == CONST1_RTX (GET_MODE (x)))
3488 return 2;
3489 return 0;
3490 }
3491
3492 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3493 */
3494 int
3495 standard_sse_constant_p (x)
3496 rtx x;
3497 {
3498 if (GET_CODE (x) != CONST_DOUBLE)
3499 return -1;
3500 return (x == CONST0_RTX (GET_MODE (x)));
3501 }
3502
3503 /* Returns 1 if OP contains a symbol reference */
3504
3505 int
3506 symbolic_reference_mentioned_p (op)
3507 rtx op;
3508 {
3509 register const char *fmt;
3510 register int i;
3511
3512 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3513 return 1;
3514
3515 fmt = GET_RTX_FORMAT (GET_CODE (op));
3516 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3517 {
3518 if (fmt[i] == 'E')
3519 {
3520 register int j;
3521
3522 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3523 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3524 return 1;
3525 }
3526
3527 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3528 return 1;
3529 }
3530
3531 return 0;
3532 }
3533
3534 /* Return 1 if it is appropriate to emit `ret' instructions in the
3535 body of a function. Do this only if the epilogue is simple, needing a
3536 couple of insns. Prior to reloading, we can't tell how many registers
3537 must be saved, so return 0 then. Return 0 if there is no frame
3538 marker to de-allocate.
3539
3540 If NON_SAVING_SETJMP is defined and true, then it is not possible
3541 for the epilogue to be simple, so return 0. This is a special case
3542 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3543 until final, but jump_optimize may need to know sooner if a
3544 `return' is OK. */
3545
3546 int
3547 ix86_can_use_return_insn_p ()
3548 {
3549 struct ix86_frame frame;
3550
3551 #ifdef NON_SAVING_SETJMP
3552 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3553 return 0;
3554 #endif
3555
3556 if (! reload_completed || frame_pointer_needed)
3557 return 0;
3558
3559 /* Don't allow more than 32 pop, since that's all we can do
3560 with one instruction. */
3561 if (current_function_pops_args
3562 && current_function_args_size >= 32768)
3563 return 0;
3564
3565 ix86_compute_frame_layout (&frame);
3566 return frame.to_allocate == 0 && frame.nregs == 0;
3567 }
3568 \f
3569 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3570 int
3571 x86_64_sign_extended_value (value)
3572 rtx value;
3573 {
3574 switch (GET_CODE (value))
3575 {
3576 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3577 to be at least 32 and this all acceptable constants are
3578 represented as CONST_INT. */
3579 case CONST_INT:
3580 if (HOST_BITS_PER_WIDE_INT == 32)
3581 return 1;
3582 else
3583 {
3584 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3585 return trunc_int_for_mode (val, SImode) == val;
3586 }
3587 break;
3588
3589 /* For certain code models, the symbolic references are known to fit. */
3590 case SYMBOL_REF:
3591 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3592
3593 /* For certain code models, the code is near as well. */
3594 case LABEL_REF:
3595 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3596
3597 /* We also may accept the offsetted memory references in certain special
3598 cases. */
3599 case CONST:
3600 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3601 && XVECLEN (XEXP (value, 0), 0) == 1
3602 && XINT (XEXP (value, 0), 1) == 15)
3603 return 1;
3604 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3605 {
3606 rtx op1 = XEXP (XEXP (value, 0), 0);
3607 rtx op2 = XEXP (XEXP (value, 0), 1);
3608 HOST_WIDE_INT offset;
3609
3610 if (ix86_cmodel == CM_LARGE)
3611 return 0;
3612 if (GET_CODE (op2) != CONST_INT)
3613 return 0;
3614 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3615 switch (GET_CODE (op1))
3616 {
3617 case SYMBOL_REF:
3618 /* For CM_SMALL assume that latest object is 1MB before
3619 end of 31bits boundary. We may also accept pretty
3620 large negative constants knowing that all objects are
3621 in the positive half of address space. */
3622 if (ix86_cmodel == CM_SMALL
3623 && offset < 1024*1024*1024
3624 && trunc_int_for_mode (offset, SImode) == offset)
3625 return 1;
3626 /* For CM_KERNEL we know that all object resist in the
3627 negative half of 32bits address space. We may not
3628 accept negative offsets, since they may be just off
3629 and we may accept pretty large positive ones. */
3630 if (ix86_cmodel == CM_KERNEL
3631 && offset > 0
3632 && trunc_int_for_mode (offset, SImode) == offset)
3633 return 1;
3634 break;
3635 case LABEL_REF:
3636 /* These conditions are similar to SYMBOL_REF ones, just the
3637 constraints for code models differ. */
3638 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3639 && offset < 1024*1024*1024
3640 && trunc_int_for_mode (offset, SImode) == offset)
3641 return 1;
3642 if (ix86_cmodel == CM_KERNEL
3643 && offset > 0
3644 && trunc_int_for_mode (offset, SImode) == offset)
3645 return 1;
3646 break;
3647 default:
3648 return 0;
3649 }
3650 }
3651 return 0;
3652 default:
3653 return 0;
3654 }
3655 }
3656
3657 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3658 int
3659 x86_64_zero_extended_value (value)
3660 rtx value;
3661 {
3662 switch (GET_CODE (value))
3663 {
3664 case CONST_DOUBLE:
3665 if (HOST_BITS_PER_WIDE_INT == 32)
3666 return (GET_MODE (value) == VOIDmode
3667 && !CONST_DOUBLE_HIGH (value));
3668 else
3669 return 0;
3670 case CONST_INT:
3671 if (HOST_BITS_PER_WIDE_INT == 32)
3672 return INTVAL (value) >= 0;
3673 else
3674 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3675 break;
3676
3677 /* For certain code models, the symbolic references are known to fit. */
3678 case SYMBOL_REF:
3679 return ix86_cmodel == CM_SMALL;
3680
3681 /* For certain code models, the code is near as well. */
3682 case LABEL_REF:
3683 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3684
3685 /* We also may accept the offsetted memory references in certain special
3686 cases. */
3687 case CONST:
3688 if (GET_CODE (XEXP (value, 0)) == PLUS)
3689 {
3690 rtx op1 = XEXP (XEXP (value, 0), 0);
3691 rtx op2 = XEXP (XEXP (value, 0), 1);
3692
3693 if (ix86_cmodel == CM_LARGE)
3694 return 0;
3695 switch (GET_CODE (op1))
3696 {
3697 case SYMBOL_REF:
3698 return 0;
3699 /* For small code model we may accept pretty large positive
3700 offsets, since one bit is available for free. Negative
3701 offsets are limited by the size of NULL pointer area
3702 specified by the ABI. */
3703 if (ix86_cmodel == CM_SMALL
3704 && GET_CODE (op2) == CONST_INT
3705 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3706 && (trunc_int_for_mode (INTVAL (op2), SImode)
3707 == INTVAL (op2)))
3708 return 1;
3709 /* ??? For the kernel, we may accept adjustment of
3710 -0x10000000, since we know that it will just convert
3711 negative address space to positive, but perhaps this
3712 is not worthwhile. */
3713 break;
3714 case LABEL_REF:
3715 /* These conditions are similar to SYMBOL_REF ones, just the
3716 constraints for code models differ. */
3717 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3718 && GET_CODE (op2) == CONST_INT
3719 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3720 && (trunc_int_for_mode (INTVAL (op2), SImode)
3721 == INTVAL (op2)))
3722 return 1;
3723 break;
3724 default:
3725 return 0;
3726 }
3727 }
3728 return 0;
3729 default:
3730 return 0;
3731 }
3732 }
3733
3734 /* Value should be nonzero if functions must have frame pointers.
3735 Zero means the frame pointer need not be set up (and parms may
3736 be accessed via the stack pointer) in functions that seem suitable. */
3737
3738 int
3739 ix86_frame_pointer_required ()
3740 {
3741 /* If we accessed previous frames, then the generated code expects
3742 to be able to access the saved ebp value in our frame. */
3743 if (cfun->machine->accesses_prev_frame)
3744 return 1;
3745
3746 /* Several x86 os'es need a frame pointer for other reasons,
3747 usually pertaining to setjmp. */
3748 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3749 return 1;
3750
3751 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3752 the frame pointer by default. Turn it back on now if we've not
3753 got a leaf function. */
3754 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3755 return 1;
3756
3757 return 0;
3758 }
3759
3760 /* Record that the current function accesses previous call frames. */
3761
3762 void
3763 ix86_setup_frame_addresses ()
3764 {
3765 cfun->machine->accesses_prev_frame = 1;
3766 }
3767 \f
3768 static char pic_label_name[32];
3769
3770 /* This function generates code for -fpic that loads %ebx with
3771 the return address of the caller and then returns. */
3772
3773 void
3774 ix86_asm_file_end (file)
3775 FILE *file;
3776 {
3777 rtx xops[2];
3778
3779 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3780 return;
3781
3782 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3783 to updating relocations to a section being discarded such that this
3784 doesn't work. Ought to detect this at configure time. */
3785 #if 0
3786 /* The trick here is to create a linkonce section containing the
3787 pic label thunk, but to refer to it with an internal label.
3788 Because the label is internal, we don't have inter-dso name
3789 binding issues on hosts that don't support ".hidden".
3790
3791 In order to use these macros, however, we must create a fake
3792 function decl. */
3793 if (targetm.have_named_sections)
3794 {
3795 tree decl = build_decl (FUNCTION_DECL,
3796 get_identifier ("i686.get_pc_thunk"),
3797 error_mark_node);
3798 DECL_ONE_ONLY (decl) = 1;
3799 UNIQUE_SECTION (decl, 0);
3800 named_section (decl, NULL);
3801 }
3802 else
3803 #else
3804 text_section ();
3805 #endif
3806
3807 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3808 internal (non-global) label that's being emitted, it didn't make
3809 sense to have .type information for local labels. This caused
3810 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3811 me debug info for a label that you're declaring non-global?) this
3812 was changed to call ASM_OUTPUT_LABEL() instead. */
3813
3814 ASM_OUTPUT_LABEL (file, pic_label_name);
3815
3816 xops[0] = pic_offset_table_rtx;
3817 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3818 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3819 output_asm_insn ("ret", xops);
3820 }
3821
3822 void
3823 load_pic_register ()
3824 {
3825 rtx gotsym, pclab;
3826
3827 if (TARGET_64BIT)
3828 abort ();
3829
3830 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3831
3832 if (TARGET_DEEP_BRANCH_PREDICTION)
3833 {
3834 if (! pic_label_name[0])
3835 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3836 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3837 }
3838 else
3839 {
3840 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3841 }
3842
3843 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3844
3845 if (! TARGET_DEEP_BRANCH_PREDICTION)
3846 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3847
3848 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3849 }
3850
3851 /* Generate an "push" pattern for input ARG. */
3852
3853 static rtx
3854 gen_push (arg)
3855 rtx arg;
3856 {
3857 return gen_rtx_SET (VOIDmode,
3858 gen_rtx_MEM (Pmode,
3859 gen_rtx_PRE_DEC (Pmode,
3860 stack_pointer_rtx)),
3861 arg);
3862 }
3863
3864 /* Return 1 if we need to save REGNO. */
3865 static int
3866 ix86_save_reg (regno, maybe_eh_return)
3867 int regno;
3868 int maybe_eh_return;
3869 {
3870 if (flag_pic
3871 && ! TARGET_64BIT
3872 && regno == PIC_OFFSET_TABLE_REGNUM
3873 && (current_function_uses_pic_offset_table
3874 || current_function_uses_const_pool
3875 || current_function_calls_eh_return))
3876 return 1;
3877
3878 if (current_function_calls_eh_return && maybe_eh_return)
3879 {
3880 unsigned i;
3881 for (i = 0; ; i++)
3882 {
3883 unsigned test = EH_RETURN_DATA_REGNO (i);
3884 if (test == INVALID_REGNUM)
3885 break;
3886 if (test == (unsigned) regno)
3887 return 1;
3888 }
3889 }
3890
3891 return (regs_ever_live[regno]
3892 && !call_used_regs[regno]
3893 && !fixed_regs[regno]
3894 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3895 }
3896
3897 /* Return number of registers to be saved on the stack. */
3898
3899 static int
3900 ix86_nsaved_regs ()
3901 {
3902 int nregs = 0;
3903 int regno;
3904
3905 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3906 if (ix86_save_reg (regno, true))
3907 nregs++;
3908 return nregs;
3909 }
3910
3911 /* Return the offset between two registers, one to be eliminated, and the other
3912 its replacement, at the start of a routine. */
3913
3914 HOST_WIDE_INT
3915 ix86_initial_elimination_offset (from, to)
3916 int from;
3917 int to;
3918 {
3919 struct ix86_frame frame;
3920 ix86_compute_frame_layout (&frame);
3921
3922 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3923 return frame.hard_frame_pointer_offset;
3924 else if (from == FRAME_POINTER_REGNUM
3925 && to == HARD_FRAME_POINTER_REGNUM)
3926 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3927 else
3928 {
3929 if (to != STACK_POINTER_REGNUM)
3930 abort ();
3931 else if (from == ARG_POINTER_REGNUM)
3932 return frame.stack_pointer_offset;
3933 else if (from != FRAME_POINTER_REGNUM)
3934 abort ();
3935 else
3936 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3937 }
3938 }
3939
3940 /* Fill structure ix86_frame about frame of currently computed function. */
3941
3942 static void
3943 ix86_compute_frame_layout (frame)
3944 struct ix86_frame *frame;
3945 {
3946 HOST_WIDE_INT total_size;
3947 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3948 int offset;
3949 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3950 HOST_WIDE_INT size = get_frame_size ();
3951
3952 frame->nregs = ix86_nsaved_regs ();
3953 total_size = size;
3954
3955 /* Skip return value and save base pointer. */
3956 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3957
3958 frame->hard_frame_pointer_offset = offset;
3959
3960 /* Do some sanity checking of stack_alignment_needed and
3961 preferred_alignment, since i386 port is the only using those features
3962 that may break easily. */
3963
3964 if (size && !stack_alignment_needed)
3965 abort ();
3966 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3967 abort ();
3968 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3969 abort ();
3970 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3971 abort ();
3972
3973 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3974 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3975
3976 /* Register save area */
3977 offset += frame->nregs * UNITS_PER_WORD;
3978
3979 /* Va-arg area */
3980 if (ix86_save_varrargs_registers)
3981 {
3982 offset += X86_64_VARARGS_SIZE;
3983 frame->va_arg_size = X86_64_VARARGS_SIZE;
3984 }
3985 else
3986 frame->va_arg_size = 0;
3987
3988 /* Align start of frame for local function. */
3989 frame->padding1 = ((offset + stack_alignment_needed - 1)
3990 & -stack_alignment_needed) - offset;
3991
3992 offset += frame->padding1;
3993
3994 /* Frame pointer points here. */
3995 frame->frame_pointer_offset = offset;
3996
3997 offset += size;
3998
3999 /* Add outgoing arguments area. */
4000 if (ACCUMULATE_OUTGOING_ARGS)
4001 {
4002 offset += current_function_outgoing_args_size;
4003 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4004 }
4005 else
4006 frame->outgoing_arguments_size = 0;
4007
4008 /* Align stack boundary. */
4009 frame->padding2 = ((offset + preferred_alignment - 1)
4010 & -preferred_alignment) - offset;
4011
4012 offset += frame->padding2;
4013
4014 /* We've reached end of stack frame. */
4015 frame->stack_pointer_offset = offset;
4016
4017 /* Size prologue needs to allocate. */
4018 frame->to_allocate =
4019 (size + frame->padding1 + frame->padding2
4020 + frame->outgoing_arguments_size + frame->va_arg_size);
4021
4022 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4023 && current_function_is_leaf)
4024 {
4025 frame->red_zone_size = frame->to_allocate;
4026 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4027 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4028 }
4029 else
4030 frame->red_zone_size = 0;
4031 frame->to_allocate -= frame->red_zone_size;
4032 frame->stack_pointer_offset -= frame->red_zone_size;
4033 #if 0
4034 fprintf (stderr, "nregs: %i\n", frame->nregs);
4035 fprintf (stderr, "size: %i\n", size);
4036 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4037 fprintf (stderr, "padding1: %i\n", frame->padding1);
4038 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4039 fprintf (stderr, "padding2: %i\n", frame->padding2);
4040 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4041 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4042 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4043 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4044 frame->hard_frame_pointer_offset);
4045 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4046 #endif
4047 }
4048
4049 /* Emit code to save registers in the prologue. */
4050
4051 static void
4052 ix86_emit_save_regs ()
4053 {
4054 register int regno;
4055 rtx insn;
4056
4057 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4058 if (ix86_save_reg (regno, true))
4059 {
4060 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4061 RTX_FRAME_RELATED_P (insn) = 1;
4062 }
4063 }
4064
4065 /* Emit code to save registers using MOV insns. First register
4066 is restored from POINTER + OFFSET. */
4067 static void
4068 ix86_emit_save_regs_using_mov (pointer, offset)
4069 rtx pointer;
4070 HOST_WIDE_INT offset;
4071 {
4072 int regno;
4073 rtx insn;
4074
4075 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4076 if (ix86_save_reg (regno, true))
4077 {
4078 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4079 Pmode, offset),
4080 gen_rtx_REG (Pmode, regno));
4081 RTX_FRAME_RELATED_P (insn) = 1;
4082 offset += UNITS_PER_WORD;
4083 }
4084 }
4085
4086 /* Expand the prologue into a bunch of separate insns. */
4087
4088 void
4089 ix86_expand_prologue ()
4090 {
4091 rtx insn;
4092 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4093 || current_function_uses_const_pool)
4094 && !TARGET_64BIT);
4095 struct ix86_frame frame;
4096 int use_mov = 0;
4097 HOST_WIDE_INT allocate;
4098
4099 if (!optimize_size)
4100 {
4101 use_fast_prologue_epilogue
4102 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4103 if (TARGET_PROLOGUE_USING_MOVE)
4104 use_mov = use_fast_prologue_epilogue;
4105 }
4106 ix86_compute_frame_layout (&frame);
4107
4108 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4109 slower on all targets. Also sdb doesn't like it. */
4110
4111 if (frame_pointer_needed)
4112 {
4113 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4114 RTX_FRAME_RELATED_P (insn) = 1;
4115
4116 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4117 RTX_FRAME_RELATED_P (insn) = 1;
4118 }
4119
4120 allocate = frame.to_allocate;
4121 /* In case we are dealing only with single register and empty frame,
4122 push is equivalent of the mov+add sequence. */
4123 if (allocate == 0 && frame.nregs <= 1)
4124 use_mov = 0;
4125
4126 if (!use_mov)
4127 ix86_emit_save_regs ();
4128 else
4129 allocate += frame.nregs * UNITS_PER_WORD;
4130
4131 if (allocate == 0)
4132 ;
4133 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4134 {
4135 insn = emit_insn (gen_pro_epilogue_adjust_stack
4136 (stack_pointer_rtx, stack_pointer_rtx,
4137 GEN_INT (-allocate)));
4138 RTX_FRAME_RELATED_P (insn) = 1;
4139 }
4140 else
4141 {
4142 /* ??? Is this only valid for Win32? */
4143
4144 rtx arg0, sym;
4145
4146 if (TARGET_64BIT)
4147 abort ();
4148
4149 arg0 = gen_rtx_REG (SImode, 0);
4150 emit_move_insn (arg0, GEN_INT (allocate));
4151
4152 sym = gen_rtx_MEM (FUNCTION_MODE,
4153 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4154 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4155
4156 CALL_INSN_FUNCTION_USAGE (insn)
4157 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4158 CALL_INSN_FUNCTION_USAGE (insn));
4159 }
4160 if (use_mov)
4161 {
4162 if (!frame_pointer_needed || !frame.to_allocate)
4163 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4164 else
4165 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4166 -frame.nregs * UNITS_PER_WORD);
4167 }
4168
4169 #ifdef SUBTARGET_PROLOGUE
4170 SUBTARGET_PROLOGUE;
4171 #endif
4172
4173 if (pic_reg_used)
4174 load_pic_register ();
4175
4176 /* If we are profiling, make sure no instructions are scheduled before
4177 the call to mcount. However, if -fpic, the above call will have
4178 done that. */
4179 if (current_function_profile && ! pic_reg_used)
4180 emit_insn (gen_blockage ());
4181 }
4182
4183 /* Emit code to restore saved registers using MOV insns. First register
4184 is restored from POINTER + OFFSET. */
4185 static void
4186 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4187 rtx pointer;
4188 int offset;
4189 int maybe_eh_return;
4190 {
4191 int regno;
4192
4193 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4194 if (ix86_save_reg (regno, maybe_eh_return))
4195 {
4196 emit_move_insn (gen_rtx_REG (Pmode, regno),
4197 adjust_address (gen_rtx_MEM (Pmode, pointer),
4198 Pmode, offset));
4199 offset += UNITS_PER_WORD;
4200 }
4201 }
4202
4203 /* Restore function stack, frame, and registers. */
4204
4205 void
4206 ix86_expand_epilogue (style)
4207 int style;
4208 {
4209 int regno;
4210 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4211 struct ix86_frame frame;
4212 HOST_WIDE_INT offset;
4213
4214 ix86_compute_frame_layout (&frame);
4215
4216 /* Calculate start of saved registers relative to ebp. Special care
4217 must be taken for the normal return case of a function using
4218 eh_return: the eax and edx registers are marked as saved, but not
4219 restored along this path. */
4220 offset = frame.nregs;
4221 if (current_function_calls_eh_return && style != 2)
4222 offset -= 2;
4223 offset *= -UNITS_PER_WORD;
4224
4225 /* If we're only restoring one register and sp is not valid then
4226 using a move instruction to restore the register since it's
4227 less work than reloading sp and popping the register.
4228
4229 The default code result in stack adjustment using add/lea instruction,
4230 while this code results in LEAVE instruction (or discrete equivalent),
4231 so it is profitable in some other cases as well. Especially when there
4232 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4233 and there is exactly one register to pop. This heruistic may need some
4234 tuning in future. */
4235 if ((!sp_valid && frame.nregs <= 1)
4236 || (TARGET_EPILOGUE_USING_MOVE
4237 && use_fast_prologue_epilogue
4238 && (frame.nregs > 1 || frame.to_allocate))
4239 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4240 || (frame_pointer_needed && TARGET_USE_LEAVE
4241 && use_fast_prologue_epilogue && frame.nregs == 1)
4242 || current_function_calls_eh_return)
4243 {
4244 /* Restore registers. We can use ebp or esp to address the memory
4245 locations. If both are available, default to ebp, since offsets
4246 are known to be small. Only exception is esp pointing directly to the
4247 end of block of saved registers, where we may simplify addressing
4248 mode. */
4249
4250 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4251 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4252 frame.to_allocate, style == 2);
4253 else
4254 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4255 offset, style == 2);
4256
4257 /* eh_return epilogues need %ecx added to the stack pointer. */
4258 if (style == 2)
4259 {
4260 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4261
4262 if (frame_pointer_needed)
4263 {
4264 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4265 tmp = plus_constant (tmp, UNITS_PER_WORD);
4266 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4267
4268 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4269 emit_move_insn (hard_frame_pointer_rtx, tmp);
4270
4271 emit_insn (gen_pro_epilogue_adjust_stack
4272 (stack_pointer_rtx, sa, const0_rtx));
4273 }
4274 else
4275 {
4276 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4277 tmp = plus_constant (tmp, (frame.to_allocate
4278 + frame.nregs * UNITS_PER_WORD));
4279 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4280 }
4281 }
4282 else if (!frame_pointer_needed)
4283 emit_insn (gen_pro_epilogue_adjust_stack
4284 (stack_pointer_rtx, stack_pointer_rtx,
4285 GEN_INT (frame.to_allocate
4286 + frame.nregs * UNITS_PER_WORD)));
4287 /* If not an i386, mov & pop is faster than "leave". */
4288 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4289 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4290 else
4291 {
4292 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4293 hard_frame_pointer_rtx,
4294 const0_rtx));
4295 if (TARGET_64BIT)
4296 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4297 else
4298 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4299 }
4300 }
4301 else
4302 {
4303 /* First step is to deallocate the stack frame so that we can
4304 pop the registers. */
4305 if (!sp_valid)
4306 {
4307 if (!frame_pointer_needed)
4308 abort ();
4309 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4310 hard_frame_pointer_rtx,
4311 GEN_INT (offset)));
4312 }
4313 else if (frame.to_allocate)
4314 emit_insn (gen_pro_epilogue_adjust_stack
4315 (stack_pointer_rtx, stack_pointer_rtx,
4316 GEN_INT (frame.to_allocate)));
4317
4318 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4319 if (ix86_save_reg (regno, false))
4320 {
4321 if (TARGET_64BIT)
4322 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4323 else
4324 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4325 }
4326 if (frame_pointer_needed)
4327 {
4328 /* Leave results in shorter dependency chains on CPUs that are
4329 able to grok it fast. */
4330 if (TARGET_USE_LEAVE)
4331 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4332 else if (TARGET_64BIT)
4333 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4334 else
4335 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4336 }
4337 }
4338
4339 /* Sibcall epilogues don't want a return instruction. */
4340 if (style == 0)
4341 return;
4342
4343 if (current_function_pops_args && current_function_args_size)
4344 {
4345 rtx popc = GEN_INT (current_function_pops_args);
4346
4347 /* i386 can only pop 64K bytes. If asked to pop more, pop
4348 return address, do explicit add, and jump indirectly to the
4349 caller. */
4350
4351 if (current_function_pops_args >= 65536)
4352 {
4353 rtx ecx = gen_rtx_REG (SImode, 2);
4354
4355 /* There are is no "pascal" calling convention in 64bit ABI. */
4356 if (TARGET_64BIT)
4357 abort ();
4358
4359 emit_insn (gen_popsi1 (ecx));
4360 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4361 emit_jump_insn (gen_return_indirect_internal (ecx));
4362 }
4363 else
4364 emit_jump_insn (gen_return_pop_internal (popc));
4365 }
4366 else
4367 emit_jump_insn (gen_return_internal ());
4368 }
4369 \f
4370 /* Extract the parts of an RTL expression that is a valid memory address
4371 for an instruction. Return 0 if the structure of the address is
4372 grossly off. Return -1 if the address contains ASHIFT, so it is not
4373 strictly valid, but still used for computing length of lea instruction.
4374 */
4375
4376 static int
4377 ix86_decompose_address (addr, out)
4378 register rtx addr;
4379 struct ix86_address *out;
4380 {
4381 rtx base = NULL_RTX;
4382 rtx index = NULL_RTX;
4383 rtx disp = NULL_RTX;
4384 HOST_WIDE_INT scale = 1;
4385 rtx scale_rtx = NULL_RTX;
4386 int retval = 1;
4387
4388 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4389 base = addr;
4390 else if (GET_CODE (addr) == PLUS)
4391 {
4392 rtx op0 = XEXP (addr, 0);
4393 rtx op1 = XEXP (addr, 1);
4394 enum rtx_code code0 = GET_CODE (op0);
4395 enum rtx_code code1 = GET_CODE (op1);
4396
4397 if (code0 == REG || code0 == SUBREG)
4398 {
4399 if (code1 == REG || code1 == SUBREG)
4400 index = op0, base = op1; /* index + base */
4401 else
4402 base = op0, disp = op1; /* base + displacement */
4403 }
4404 else if (code0 == MULT)
4405 {
4406 index = XEXP (op0, 0);
4407 scale_rtx = XEXP (op0, 1);
4408 if (code1 == REG || code1 == SUBREG)
4409 base = op1; /* index*scale + base */
4410 else
4411 disp = op1; /* index*scale + disp */
4412 }
4413 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4414 {
4415 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4416 scale_rtx = XEXP (XEXP (op0, 0), 1);
4417 base = XEXP (op0, 1);
4418 disp = op1;
4419 }
4420 else if (code0 == PLUS)
4421 {
4422 index = XEXP (op0, 0); /* index + base + disp */
4423 base = XEXP (op0, 1);
4424 disp = op1;
4425 }
4426 else
4427 return 0;
4428 }
4429 else if (GET_CODE (addr) == MULT)
4430 {
4431 index = XEXP (addr, 0); /* index*scale */
4432 scale_rtx = XEXP (addr, 1);
4433 }
4434 else if (GET_CODE (addr) == ASHIFT)
4435 {
4436 rtx tmp;
4437
4438 /* We're called for lea too, which implements ashift on occasion. */
4439 index = XEXP (addr, 0);
4440 tmp = XEXP (addr, 1);
4441 if (GET_CODE (tmp) != CONST_INT)
4442 return 0;
4443 scale = INTVAL (tmp);
4444 if ((unsigned HOST_WIDE_INT) scale > 3)
4445 return 0;
4446 scale = 1 << scale;
4447 retval = -1;
4448 }
4449 else
4450 disp = addr; /* displacement */
4451
4452 /* Extract the integral value of scale. */
4453 if (scale_rtx)
4454 {
4455 if (GET_CODE (scale_rtx) != CONST_INT)
4456 return 0;
4457 scale = INTVAL (scale_rtx);
4458 }
4459
4460 /* Allow arg pointer and stack pointer as index if there is not scaling */
4461 if (base && index && scale == 1
4462 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4463 || index == stack_pointer_rtx))
4464 {
4465 rtx tmp = base;
4466 base = index;
4467 index = tmp;
4468 }
4469
4470 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4471 if ((base == hard_frame_pointer_rtx
4472 || base == frame_pointer_rtx
4473 || base == arg_pointer_rtx) && !disp)
4474 disp = const0_rtx;
4475
4476 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4477 Avoid this by transforming to [%esi+0]. */
4478 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4479 && base && !index && !disp
4480 && REG_P (base)
4481 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4482 disp = const0_rtx;
4483
4484 /* Special case: encode reg+reg instead of reg*2. */
4485 if (!base && index && scale && scale == 2)
4486 base = index, scale = 1;
4487
4488 /* Special case: scaling cannot be encoded without base or displacement. */
4489 if (!base && !disp && index && scale != 1)
4490 disp = const0_rtx;
4491
4492 out->base = base;
4493 out->index = index;
4494 out->disp = disp;
4495 out->scale = scale;
4496
4497 return retval;
4498 }
4499 \f
4500 /* Return cost of the memory address x.
4501 For i386, it is better to use a complex address than let gcc copy
4502 the address into a reg and make a new pseudo. But not if the address
4503 requires to two regs - that would mean more pseudos with longer
4504 lifetimes. */
4505 int
4506 ix86_address_cost (x)
4507 rtx x;
4508 {
4509 struct ix86_address parts;
4510 int cost = 1;
4511
4512 if (!ix86_decompose_address (x, &parts))
4513 abort ();
4514
4515 /* More complex memory references are better. */
4516 if (parts.disp && parts.disp != const0_rtx)
4517 cost--;
4518
4519 /* Attempt to minimize number of registers in the address. */
4520 if ((parts.base
4521 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4522 || (parts.index
4523 && (!REG_P (parts.index)
4524 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4525 cost++;
4526
4527 if (parts.base
4528 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4529 && parts.index
4530 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4531 && parts.base != parts.index)
4532 cost++;
4533
4534 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4535 since it's predecode logic can't detect the length of instructions
4536 and it degenerates to vector decoded. Increase cost of such
4537 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4538 to split such addresses or even refuse such addresses at all.
4539
4540 Following addressing modes are affected:
4541 [base+scale*index]
4542 [scale*index+disp]
4543 [base+index]
4544
4545 The first and last case may be avoidable by explicitly coding the zero in
4546 memory address, but I don't have AMD-K6 machine handy to check this
4547 theory. */
4548
4549 if (TARGET_K6
4550 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4551 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4552 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4553 cost += 10;
4554
4555 return cost;
4556 }
4557 \f
4558 /* If X is a machine specific address (i.e. a symbol or label being
4559 referenced as a displacement from the GOT implemented using an
4560 UNSPEC), then return the base term. Otherwise return X. */
4561
4562 rtx
4563 ix86_find_base_term (x)
4564 rtx x;
4565 {
4566 rtx term;
4567
4568 if (TARGET_64BIT)
4569 {
4570 if (GET_CODE (x) != CONST)
4571 return x;
4572 term = XEXP (x, 0);
4573 if (GET_CODE (term) == PLUS
4574 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4575 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4576 term = XEXP (term, 0);
4577 if (GET_CODE (term) != UNSPEC
4578 || XVECLEN (term, 0) != 1
4579 || XINT (term, 1) != 15)
4580 return x;
4581
4582 term = XVECEXP (term, 0, 0);
4583
4584 if (GET_CODE (term) != SYMBOL_REF
4585 && GET_CODE (term) != LABEL_REF)
4586 return x;
4587
4588 return term;
4589 }
4590
4591 if (GET_CODE (x) != PLUS
4592 || XEXP (x, 0) != pic_offset_table_rtx
4593 || GET_CODE (XEXP (x, 1)) != CONST)
4594 return x;
4595
4596 term = XEXP (XEXP (x, 1), 0);
4597
4598 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4599 term = XEXP (term, 0);
4600
4601 if (GET_CODE (term) != UNSPEC
4602 || XVECLEN (term, 0) != 1
4603 || XINT (term, 1) != 7)
4604 return x;
4605
4606 term = XVECEXP (term, 0, 0);
4607
4608 if (GET_CODE (term) != SYMBOL_REF
4609 && GET_CODE (term) != LABEL_REF)
4610 return x;
4611
4612 return term;
4613 }
4614 \f
4615 /* Determine if a given CONST RTX is a valid memory displacement
4616 in PIC mode. */
4617
4618 int
4619 legitimate_pic_address_disp_p (disp)
4620 register rtx disp;
4621 {
4622 /* In 64bit mode we can allow direct addresses of symbols and labels
4623 when they are not dynamic symbols. */
4624 if (TARGET_64BIT)
4625 {
4626 rtx x = disp;
4627 if (GET_CODE (disp) == CONST)
4628 x = XEXP (disp, 0);
4629 /* ??? Handle PIC code models */
4630 if (GET_CODE (x) == PLUS
4631 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4632 && ix86_cmodel == CM_SMALL_PIC
4633 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4634 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4635 x = XEXP (x, 0);
4636 if (local_symbolic_operand (x, Pmode))
4637 return 1;
4638 }
4639 if (GET_CODE (disp) != CONST)
4640 return 0;
4641 disp = XEXP (disp, 0);
4642
4643 if (TARGET_64BIT)
4644 {
4645 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4646 of GOT tables. We should not need these anyway. */
4647 if (GET_CODE (disp) != UNSPEC
4648 || XVECLEN (disp, 0) != 1
4649 || XINT (disp, 1) != 15)
4650 return 0;
4651
4652 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4653 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4654 return 0;
4655 return 1;
4656 }
4657
4658 if (GET_CODE (disp) == PLUS)
4659 {
4660 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4661 return 0;
4662 disp = XEXP (disp, 0);
4663 }
4664
4665 if (GET_CODE (disp) != UNSPEC
4666 || XVECLEN (disp, 0) != 1)
4667 return 0;
4668
4669 /* Must be @GOT or @GOTOFF. */
4670 switch (XINT (disp, 1))
4671 {
4672 case 6: /* @GOT */
4673 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4674
4675 case 7: /* @GOTOFF */
4676 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4677 }
4678
4679 return 0;
4680 }
4681
4682 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4683 memory address for an instruction. The MODE argument is the machine mode
4684 for the MEM expression that wants to use this address.
4685
4686 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4687 convert common non-canonical forms to canonical form so that they will
4688 be recognized. */
4689
4690 int
4691 legitimate_address_p (mode, addr, strict)
4692 enum machine_mode mode;
4693 register rtx addr;
4694 int strict;
4695 {
4696 struct ix86_address parts;
4697 rtx base, index, disp;
4698 HOST_WIDE_INT scale;
4699 const char *reason = NULL;
4700 rtx reason_rtx = NULL_RTX;
4701
4702 if (TARGET_DEBUG_ADDR)
4703 {
4704 fprintf (stderr,
4705 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4706 GET_MODE_NAME (mode), strict);
4707 debug_rtx (addr);
4708 }
4709
4710 if (ix86_decompose_address (addr, &parts) <= 0)
4711 {
4712 reason = "decomposition failed";
4713 goto report_error;
4714 }
4715
4716 base = parts.base;
4717 index = parts.index;
4718 disp = parts.disp;
4719 scale = parts.scale;
4720
4721 /* Validate base register.
4722
4723 Don't allow SUBREG's here, it can lead to spill failures when the base
4724 is one word out of a two word structure, which is represented internally
4725 as a DImode int. */
4726
4727 if (base)
4728 {
4729 reason_rtx = base;
4730
4731 if (GET_CODE (base) != REG)
4732 {
4733 reason = "base is not a register";
4734 goto report_error;
4735 }
4736
4737 if (GET_MODE (base) != Pmode)
4738 {
4739 reason = "base is not in Pmode";
4740 goto report_error;
4741 }
4742
4743 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4744 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4745 {
4746 reason = "base is not valid";
4747 goto report_error;
4748 }
4749 }
4750
4751 /* Validate index register.
4752
4753 Don't allow SUBREG's here, it can lead to spill failures when the index
4754 is one word out of a two word structure, which is represented internally
4755 as a DImode int. */
4756
4757 if (index)
4758 {
4759 reason_rtx = index;
4760
4761 if (GET_CODE (index) != REG)
4762 {
4763 reason = "index is not a register";
4764 goto report_error;
4765 }
4766
4767 if (GET_MODE (index) != Pmode)
4768 {
4769 reason = "index is not in Pmode";
4770 goto report_error;
4771 }
4772
4773 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4774 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4775 {
4776 reason = "index is not valid";
4777 goto report_error;
4778 }
4779 }
4780
4781 /* Validate scale factor. */
4782 if (scale != 1)
4783 {
4784 reason_rtx = GEN_INT (scale);
4785 if (!index)
4786 {
4787 reason = "scale without index";
4788 goto report_error;
4789 }
4790
4791 if (scale != 2 && scale != 4 && scale != 8)
4792 {
4793 reason = "scale is not a valid multiplier";
4794 goto report_error;
4795 }
4796 }
4797
4798 /* Validate displacement. */
4799 if (disp)
4800 {
4801 reason_rtx = disp;
4802
4803 if (!CONSTANT_ADDRESS_P (disp))
4804 {
4805 reason = "displacement is not constant";
4806 goto report_error;
4807 }
4808
4809 if (TARGET_64BIT)
4810 {
4811 if (!x86_64_sign_extended_value (disp))
4812 {
4813 reason = "displacement is out of range";
4814 goto report_error;
4815 }
4816 }
4817 else
4818 {
4819 if (GET_CODE (disp) == CONST_DOUBLE)
4820 {
4821 reason = "displacement is a const_double";
4822 goto report_error;
4823 }
4824 }
4825
4826 if (flag_pic && SYMBOLIC_CONST (disp))
4827 {
4828 if (TARGET_64BIT && (index || base))
4829 {
4830 reason = "non-constant pic memory reference";
4831 goto report_error;
4832 }
4833 if (! legitimate_pic_address_disp_p (disp))
4834 {
4835 reason = "displacement is an invalid pic construct";
4836 goto report_error;
4837 }
4838
4839 /* This code used to verify that a symbolic pic displacement
4840 includes the pic_offset_table_rtx register.
4841
4842 While this is good idea, unfortunately these constructs may
4843 be created by "adds using lea" optimization for incorrect
4844 code like:
4845
4846 int a;
4847 int foo(int i)
4848 {
4849 return *(&a+i);
4850 }
4851
4852 This code is nonsensical, but results in addressing
4853 GOT table with pic_offset_table_rtx base. We can't
4854 just refuse it easily, since it gets matched by
4855 "addsi3" pattern, that later gets split to lea in the
4856 case output register differs from input. While this
4857 can be handled by separate addsi pattern for this case
4858 that never results in lea, this seems to be easier and
4859 correct fix for crash to disable this test. */
4860 }
4861 else if (HALF_PIC_P ())
4862 {
4863 if (! HALF_PIC_ADDRESS_P (disp)
4864 || (base != NULL_RTX || index != NULL_RTX))
4865 {
4866 reason = "displacement is an invalid half-pic reference";
4867 goto report_error;
4868 }
4869 }
4870 }
4871
4872 /* Everything looks valid. */
4873 if (TARGET_DEBUG_ADDR)
4874 fprintf (stderr, "Success.\n");
4875 return TRUE;
4876
4877 report_error:
4878 if (TARGET_DEBUG_ADDR)
4879 {
4880 fprintf (stderr, "Error: %s\n", reason);
4881 debug_rtx (reason_rtx);
4882 }
4883 return FALSE;
4884 }
4885 \f
4886 /* Return an unique alias set for the GOT. */
4887
4888 static HOST_WIDE_INT
4889 ix86_GOT_alias_set ()
4890 {
4891 static HOST_WIDE_INT set = -1;
4892 if (set == -1)
4893 set = new_alias_set ();
4894 return set;
4895 }
4896
4897 /* Return a legitimate reference for ORIG (an address) using the
4898 register REG. If REG is 0, a new pseudo is generated.
4899
4900 There are two types of references that must be handled:
4901
4902 1. Global data references must load the address from the GOT, via
4903 the PIC reg. An insn is emitted to do this load, and the reg is
4904 returned.
4905
4906 2. Static data references, constant pool addresses, and code labels
4907 compute the address as an offset from the GOT, whose base is in
4908 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4909 differentiate them from global data objects. The returned
4910 address is the PIC reg + an unspec constant.
4911
4912 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4913 reg also appears in the address. */
4914
4915 rtx
4916 legitimize_pic_address (orig, reg)
4917 rtx orig;
4918 rtx reg;
4919 {
4920 rtx addr = orig;
4921 rtx new = orig;
4922 rtx base;
4923
4924 if (local_symbolic_operand (addr, Pmode))
4925 {
4926 /* In 64bit mode we can address such objects directly. */
4927 if (TARGET_64BIT)
4928 new = addr;
4929 else
4930 {
4931 /* This symbol may be referenced via a displacement from the PIC
4932 base address (@GOTOFF). */
4933
4934 current_function_uses_pic_offset_table = 1;
4935 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4936 new = gen_rtx_CONST (Pmode, new);
4937 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4938
4939 if (reg != 0)
4940 {
4941 emit_move_insn (reg, new);
4942 new = reg;
4943 }
4944 }
4945 }
4946 else if (GET_CODE (addr) == SYMBOL_REF)
4947 {
4948 if (TARGET_64BIT)
4949 {
4950 current_function_uses_pic_offset_table = 1;
4951 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4952 new = gen_rtx_CONST (Pmode, new);
4953 new = gen_rtx_MEM (Pmode, new);
4954 RTX_UNCHANGING_P (new) = 1;
4955 set_mem_alias_set (new, ix86_GOT_alias_set ());
4956
4957 if (reg == 0)
4958 reg = gen_reg_rtx (Pmode);
4959 /* Use directly gen_movsi, otherwise the address is loaded
4960 into register for CSE. We don't want to CSE this addresses,
4961 instead we CSE addresses from the GOT table, so skip this. */
4962 emit_insn (gen_movsi (reg, new));
4963 new = reg;
4964 }
4965 else
4966 {
4967 /* This symbol must be referenced via a load from the
4968 Global Offset Table (@GOT). */
4969
4970 current_function_uses_pic_offset_table = 1;
4971 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4972 new = gen_rtx_CONST (Pmode, new);
4973 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4974 new = gen_rtx_MEM (Pmode, new);
4975 RTX_UNCHANGING_P (new) = 1;
4976 set_mem_alias_set (new, ix86_GOT_alias_set ());
4977
4978 if (reg == 0)
4979 reg = gen_reg_rtx (Pmode);
4980 emit_move_insn (reg, new);
4981 new = reg;
4982 }
4983 }
4984 else
4985 {
4986 if (GET_CODE (addr) == CONST)
4987 {
4988 addr = XEXP (addr, 0);
4989 if (GET_CODE (addr) == UNSPEC)
4990 {
4991 /* Check that the unspec is one of the ones we generate? */
4992 }
4993 else if (GET_CODE (addr) != PLUS)
4994 abort ();
4995 }
4996 if (GET_CODE (addr) == PLUS)
4997 {
4998 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4999
5000 /* Check first to see if this is a constant offset from a @GOTOFF
5001 symbol reference. */
5002 if (local_symbolic_operand (op0, Pmode)
5003 && GET_CODE (op1) == CONST_INT)
5004 {
5005 if (!TARGET_64BIT)
5006 {
5007 current_function_uses_pic_offset_table = 1;
5008 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
5009 new = gen_rtx_PLUS (Pmode, new, op1);
5010 new = gen_rtx_CONST (Pmode, new);
5011 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5012
5013 if (reg != 0)
5014 {
5015 emit_move_insn (reg, new);
5016 new = reg;
5017 }
5018 }
5019 else
5020 {
5021 /* ??? We need to limit offsets here. */
5022 }
5023 }
5024 else
5025 {
5026 base = legitimize_pic_address (XEXP (addr, 0), reg);
5027 new = legitimize_pic_address (XEXP (addr, 1),
5028 base == reg ? NULL_RTX : reg);
5029
5030 if (GET_CODE (new) == CONST_INT)
5031 new = plus_constant (base, INTVAL (new));
5032 else
5033 {
5034 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5035 {
5036 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5037 new = XEXP (new, 1);
5038 }
5039 new = gen_rtx_PLUS (Pmode, base, new);
5040 }
5041 }
5042 }
5043 }
5044 return new;
5045 }
5046 \f
5047 /* Try machine-dependent ways of modifying an illegitimate address
5048 to be legitimate. If we find one, return the new, valid address.
5049 This macro is used in only one place: `memory_address' in explow.c.
5050
5051 OLDX is the address as it was before break_out_memory_refs was called.
5052 In some cases it is useful to look at this to decide what needs to be done.
5053
5054 MODE and WIN are passed so that this macro can use
5055 GO_IF_LEGITIMATE_ADDRESS.
5056
5057 It is always safe for this macro to do nothing. It exists to recognize
5058 opportunities to optimize the output.
5059
5060 For the 80386, we handle X+REG by loading X into a register R and
5061 using R+REG. R will go in a general reg and indexing will be used.
5062 However, if REG is a broken-out memory address or multiplication,
5063 nothing needs to be done because REG can certainly go in a general reg.
5064
5065 When -fpic is used, special handling is needed for symbolic references.
5066 See comments by legitimize_pic_address in i386.c for details. */
5067
5068 rtx
5069 legitimize_address (x, oldx, mode)
5070 register rtx x;
5071 register rtx oldx ATTRIBUTE_UNUSED;
5072 enum machine_mode mode;
5073 {
5074 int changed = 0;
5075 unsigned log;
5076
5077 if (TARGET_DEBUG_ADDR)
5078 {
5079 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5080 GET_MODE_NAME (mode));
5081 debug_rtx (x);
5082 }
5083
5084 if (flag_pic && SYMBOLIC_CONST (x))
5085 return legitimize_pic_address (x, 0);
5086
5087 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5088 if (GET_CODE (x) == ASHIFT
5089 && GET_CODE (XEXP (x, 1)) == CONST_INT
5090 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5091 {
5092 changed = 1;
5093 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5094 GEN_INT (1 << log));
5095 }
5096
5097 if (GET_CODE (x) == PLUS)
5098 {
5099 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5100
5101 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5102 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5103 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5104 {
5105 changed = 1;
5106 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5107 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5108 GEN_INT (1 << log));
5109 }
5110
5111 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5112 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5113 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5114 {
5115 changed = 1;
5116 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5117 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5118 GEN_INT (1 << log));
5119 }
5120
5121 /* Put multiply first if it isn't already. */
5122 if (GET_CODE (XEXP (x, 1)) == MULT)
5123 {
5124 rtx tmp = XEXP (x, 0);
5125 XEXP (x, 0) = XEXP (x, 1);
5126 XEXP (x, 1) = tmp;
5127 changed = 1;
5128 }
5129
5130 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5131 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5132 created by virtual register instantiation, register elimination, and
5133 similar optimizations. */
5134 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5135 {
5136 changed = 1;
5137 x = gen_rtx_PLUS (Pmode,
5138 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5139 XEXP (XEXP (x, 1), 0)),
5140 XEXP (XEXP (x, 1), 1));
5141 }
5142
5143 /* Canonicalize
5144 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5145 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5146 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5147 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5148 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5149 && CONSTANT_P (XEXP (x, 1)))
5150 {
5151 rtx constant;
5152 rtx other = NULL_RTX;
5153
5154 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5155 {
5156 constant = XEXP (x, 1);
5157 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5158 }
5159 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5160 {
5161 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5162 other = XEXP (x, 1);
5163 }
5164 else
5165 constant = 0;
5166
5167 if (constant)
5168 {
5169 changed = 1;
5170 x = gen_rtx_PLUS (Pmode,
5171 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5172 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5173 plus_constant (other, INTVAL (constant)));
5174 }
5175 }
5176
5177 if (changed && legitimate_address_p (mode, x, FALSE))
5178 return x;
5179
5180 if (GET_CODE (XEXP (x, 0)) == MULT)
5181 {
5182 changed = 1;
5183 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5184 }
5185
5186 if (GET_CODE (XEXP (x, 1)) == MULT)
5187 {
5188 changed = 1;
5189 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5190 }
5191
5192 if (changed
5193 && GET_CODE (XEXP (x, 1)) == REG
5194 && GET_CODE (XEXP (x, 0)) == REG)
5195 return x;
5196
5197 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5198 {
5199 changed = 1;
5200 x = legitimize_pic_address (x, 0);
5201 }
5202
5203 if (changed && legitimate_address_p (mode, x, FALSE))
5204 return x;
5205
5206 if (GET_CODE (XEXP (x, 0)) == REG)
5207 {
5208 register rtx temp = gen_reg_rtx (Pmode);
5209 register rtx val = force_operand (XEXP (x, 1), temp);
5210 if (val != temp)
5211 emit_move_insn (temp, val);
5212
5213 XEXP (x, 1) = temp;
5214 return x;
5215 }
5216
5217 else if (GET_CODE (XEXP (x, 1)) == REG)
5218 {
5219 register rtx temp = gen_reg_rtx (Pmode);
5220 register rtx val = force_operand (XEXP (x, 0), temp);
5221 if (val != temp)
5222 emit_move_insn (temp, val);
5223
5224 XEXP (x, 0) = temp;
5225 return x;
5226 }
5227 }
5228
5229 return x;
5230 }
5231 \f
5232 /* Print an integer constant expression in assembler syntax. Addition
5233 and subtraction are the only arithmetic that may appear in these
5234 expressions. FILE is the stdio stream to write to, X is the rtx, and
5235 CODE is the operand print code from the output string. */
5236
5237 static void
5238 output_pic_addr_const (file, x, code)
5239 FILE *file;
5240 rtx x;
5241 int code;
5242 {
5243 char buf[256];
5244
5245 switch (GET_CODE (x))
5246 {
5247 case PC:
5248 if (flag_pic)
5249 putc ('.', file);
5250 else
5251 abort ();
5252 break;
5253
5254 case SYMBOL_REF:
5255 assemble_name (file, XSTR (x, 0));
5256 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5257 fputs ("@PLT", file);
5258 break;
5259
5260 case LABEL_REF:
5261 x = XEXP (x, 0);
5262 /* FALLTHRU */
5263 case CODE_LABEL:
5264 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5265 assemble_name (asm_out_file, buf);
5266 break;
5267
5268 case CONST_INT:
5269 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5270 break;
5271
5272 case CONST:
5273 /* This used to output parentheses around the expression,
5274 but that does not work on the 386 (either ATT or BSD assembler). */
5275 output_pic_addr_const (file, XEXP (x, 0), code);
5276 break;
5277
5278 case CONST_DOUBLE:
5279 if (GET_MODE (x) == VOIDmode)
5280 {
5281 /* We can use %d if the number is <32 bits and positive. */
5282 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5283 fprintf (file, "0x%lx%08lx",
5284 (unsigned long) CONST_DOUBLE_HIGH (x),
5285 (unsigned long) CONST_DOUBLE_LOW (x));
5286 else
5287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5288 }
5289 else
5290 /* We can't handle floating point constants;
5291 PRINT_OPERAND must handle them. */
5292 output_operand_lossage ("floating constant misused");
5293 break;
5294
5295 case PLUS:
5296 /* Some assemblers need integer constants to appear first. */
5297 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5298 {
5299 output_pic_addr_const (file, XEXP (x, 0), code);
5300 putc ('+', file);
5301 output_pic_addr_const (file, XEXP (x, 1), code);
5302 }
5303 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5304 {
5305 output_pic_addr_const (file, XEXP (x, 1), code);
5306 putc ('+', file);
5307 output_pic_addr_const (file, XEXP (x, 0), code);
5308 }
5309 else
5310 abort ();
5311 break;
5312
5313 case MINUS:
5314 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5315 output_pic_addr_const (file, XEXP (x, 0), code);
5316 putc ('-', file);
5317 output_pic_addr_const (file, XEXP (x, 1), code);
5318 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5319 break;
5320
5321 case UNSPEC:
5322 if (XVECLEN (x, 0) != 1)
5323 abort ();
5324 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5325 switch (XINT (x, 1))
5326 {
5327 case 6:
5328 fputs ("@GOT", file);
5329 break;
5330 case 7:
5331 fputs ("@GOTOFF", file);
5332 break;
5333 case 8:
5334 fputs ("@PLT", file);
5335 break;
5336 case 15:
5337 fputs ("@GOTPCREL(%RIP)", file);
5338 break;
5339 default:
5340 output_operand_lossage ("invalid UNSPEC as operand");
5341 break;
5342 }
5343 break;
5344
5345 default:
5346 output_operand_lossage ("invalid expression as operand");
5347 }
5348 }
5349
5350 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5351 We need to handle our special PIC relocations. */
5352
5353 void
5354 i386_dwarf_output_addr_const (file, x)
5355 FILE *file;
5356 rtx x;
5357 {
5358 #ifdef ASM_QUAD
5359 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5360 #else
5361 if (TARGET_64BIT)
5362 abort ();
5363 fprintf (file, "%s", ASM_LONG);
5364 #endif
5365 if (flag_pic)
5366 output_pic_addr_const (file, x, '\0');
5367 else
5368 output_addr_const (file, x);
5369 fputc ('\n', file);
5370 }
5371
5372 /* In the name of slightly smaller debug output, and to cater to
5373 general assembler losage, recognize PIC+GOTOFF and turn it back
5374 into a direct symbol reference. */
5375
5376 rtx
5377 i386_simplify_dwarf_addr (orig_x)
5378 rtx orig_x;
5379 {
5380 rtx x = orig_x, y;
5381
5382 if (TARGET_64BIT)
5383 {
5384 if (GET_CODE (x) != CONST
5385 || GET_CODE (XEXP (x, 0)) != UNSPEC
5386 || XINT (XEXP (x, 0), 1) != 15)
5387 return orig_x;
5388 return XVECEXP (XEXP (x, 0), 0, 0);
5389 }
5390
5391 if (GET_CODE (x) != PLUS
5392 || GET_CODE (XEXP (x, 1)) != CONST)
5393 return orig_x;
5394
5395 if (GET_CODE (XEXP (x, 0)) == REG
5396 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5397 /* %ebx + GOT/GOTOFF */
5398 y = NULL;
5399 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5400 {
5401 /* %ebx + %reg * scale + GOT/GOTOFF */
5402 y = XEXP (x, 0);
5403 if (GET_CODE (XEXP (y, 0)) == REG
5404 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5405 y = XEXP (y, 1);
5406 else if (GET_CODE (XEXP (y, 1)) == REG
5407 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5408 y = XEXP (y, 0);
5409 else
5410 return orig_x;
5411 if (GET_CODE (y) != REG
5412 && GET_CODE (y) != MULT
5413 && GET_CODE (y) != ASHIFT)
5414 return orig_x;
5415 }
5416 else
5417 return orig_x;
5418
5419 x = XEXP (XEXP (x, 1), 0);
5420 if (GET_CODE (x) == UNSPEC
5421 && (XINT (x, 1) == 6
5422 || XINT (x, 1) == 7))
5423 {
5424 if (y)
5425 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5426 return XVECEXP (x, 0, 0);
5427 }
5428
5429 if (GET_CODE (x) == PLUS
5430 && GET_CODE (XEXP (x, 0)) == UNSPEC
5431 && GET_CODE (XEXP (x, 1)) == CONST_INT
5432 && (XINT (XEXP (x, 0), 1) == 6
5433 || XINT (XEXP (x, 0), 1) == 7))
5434 {
5435 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5436 if (y)
5437 return gen_rtx_PLUS (Pmode, y, x);
5438 return x;
5439 }
5440
5441 return orig_x;
5442 }
5443 \f
5444 static void
5445 put_condition_code (code, mode, reverse, fp, file)
5446 enum rtx_code code;
5447 enum machine_mode mode;
5448 int reverse, fp;
5449 FILE *file;
5450 {
5451 const char *suffix;
5452
5453 if (mode == CCFPmode || mode == CCFPUmode)
5454 {
5455 enum rtx_code second_code, bypass_code;
5456 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5457 if (bypass_code != NIL || second_code != NIL)
5458 abort ();
5459 code = ix86_fp_compare_code_to_integer (code);
5460 mode = CCmode;
5461 }
5462 if (reverse)
5463 code = reverse_condition (code);
5464
5465 switch (code)
5466 {
5467 case EQ:
5468 suffix = "e";
5469 break;
5470 case NE:
5471 suffix = "ne";
5472 break;
5473 case GT:
5474 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5475 abort ();
5476 suffix = "g";
5477 break;
5478 case GTU:
5479 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5480 Those same assemblers have the same but opposite losage on cmov. */
5481 if (mode != CCmode)
5482 abort ();
5483 suffix = fp ? "nbe" : "a";
5484 break;
5485 case LT:
5486 if (mode == CCNOmode || mode == CCGOCmode)
5487 suffix = "s";
5488 else if (mode == CCmode || mode == CCGCmode)
5489 suffix = "l";
5490 else
5491 abort ();
5492 break;
5493 case LTU:
5494 if (mode != CCmode)
5495 abort ();
5496 suffix = "b";
5497 break;
5498 case GE:
5499 if (mode == CCNOmode || mode == CCGOCmode)
5500 suffix = "ns";
5501 else if (mode == CCmode || mode == CCGCmode)
5502 suffix = "ge";
5503 else
5504 abort ();
5505 break;
5506 case GEU:
5507 /* ??? As above. */
5508 if (mode != CCmode)
5509 abort ();
5510 suffix = fp ? "nb" : "ae";
5511 break;
5512 case LE:
5513 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5514 abort ();
5515 suffix = "le";
5516 break;
5517 case LEU:
5518 if (mode != CCmode)
5519 abort ();
5520 suffix = "be";
5521 break;
5522 case UNORDERED:
5523 suffix = fp ? "u" : "p";
5524 break;
5525 case ORDERED:
5526 suffix = fp ? "nu" : "np";
5527 break;
5528 default:
5529 abort ();
5530 }
5531 fputs (suffix, file);
5532 }
5533
5534 void
5535 print_reg (x, code, file)
5536 rtx x;
5537 int code;
5538 FILE *file;
5539 {
5540 if (REGNO (x) == ARG_POINTER_REGNUM
5541 || REGNO (x) == FRAME_POINTER_REGNUM
5542 || REGNO (x) == FLAGS_REG
5543 || REGNO (x) == FPSR_REG)
5544 abort ();
5545
5546 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
5547 putc ('%', file);
5548
5549 if (code == 'w' || MMX_REG_P (x))
5550 code = 2;
5551 else if (code == 'b')
5552 code = 1;
5553 else if (code == 'k')
5554 code = 4;
5555 else if (code == 'q')
5556 code = 8;
5557 else if (code == 'y')
5558 code = 3;
5559 else if (code == 'h')
5560 code = 0;
5561 else
5562 code = GET_MODE_SIZE (GET_MODE (x));
5563
5564 /* Irritatingly, AMD extended registers use different naming convention
5565 from the normal registers. */
5566 if (REX_INT_REG_P (x))
5567 {
5568 if (!TARGET_64BIT)
5569 abort ();
5570 switch (code)
5571 {
5572 case 0:
5573 error ("extended registers have no high halves");
5574 break;
5575 case 1:
5576 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5577 break;
5578 case 2:
5579 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5580 break;
5581 case 4:
5582 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5583 break;
5584 case 8:
5585 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5586 break;
5587 default:
5588 error ("unsupported operand size for extended register");
5589 break;
5590 }
5591 return;
5592 }
5593 switch (code)
5594 {
5595 case 3:
5596 if (STACK_TOP_P (x))
5597 {
5598 fputs ("st(0)", file);
5599 break;
5600 }
5601 /* FALLTHRU */
5602 case 8:
5603 case 4:
5604 case 12:
5605 if (! ANY_FP_REG_P (x))
5606 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5607 /* FALLTHRU */
5608 case 16:
5609 case 2:
5610 fputs (hi_reg_name[REGNO (x)], file);
5611 break;
5612 case 1:
5613 fputs (qi_reg_name[REGNO (x)], file);
5614 break;
5615 case 0:
5616 fputs (qi_high_reg_name[REGNO (x)], file);
5617 break;
5618 default:
5619 abort ();
5620 }
5621 }
5622
5623 /* Meaning of CODE:
5624 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5625 C -- print opcode suffix for set/cmov insn.
5626 c -- like C, but print reversed condition
5627 F,f -- likewise, but for floating-point.
5628 R -- print the prefix for register names.
5629 z -- print the opcode suffix for the size of the current operand.
5630 * -- print a star (in certain assembler syntax)
5631 A -- print an absolute memory reference.
5632 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5633 s -- print a shift double count, followed by the assemblers argument
5634 delimiter.
5635 b -- print the QImode name of the register for the indicated operand.
5636 %b0 would print %al if operands[0] is reg 0.
5637 w -- likewise, print the HImode name of the register.
5638 k -- likewise, print the SImode name of the register.
5639 q -- likewise, print the DImode name of the register.
5640 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5641 y -- print "st(0)" instead of "st" as a register.
5642 D -- print condition for SSE cmp instruction.
5643 P -- if PIC, print an @PLT suffix.
5644 X -- don't print any sort of PIC '@' suffix for a symbol.
5645 */
5646
5647 void
5648 print_operand (file, x, code)
5649 FILE *file;
5650 rtx x;
5651 int code;
5652 {
5653 if (code)
5654 {
5655 switch (code)
5656 {
5657 case '*':
5658 if (ASSEMBLER_DIALECT == ASM_ATT)
5659 putc ('*', file);
5660 return;
5661
5662 case 'A':
5663 if (ASSEMBLER_DIALECT == ASM_ATT)
5664 putc ('*', file);
5665 else if (ASSEMBLER_DIALECT == ASM_INTEL)
5666 {
5667 /* Intel syntax. For absolute addresses, registers should not
5668 be surrounded by braces. */
5669 if (GET_CODE (x) != REG)
5670 {
5671 putc ('[', file);
5672 PRINT_OPERAND (file, x, 0);
5673 putc (']', file);
5674 return;
5675 }
5676 }
5677 else
5678 abort ();
5679
5680 PRINT_OPERAND (file, x, 0);
5681 return;
5682
5683
5684 case 'L':
5685 if (ASSEMBLER_DIALECT == ASM_ATT)
5686 putc ('l', file);
5687 return;
5688
5689 case 'W':
5690 if (ASSEMBLER_DIALECT == ASM_ATT)
5691 putc ('w', file);
5692 return;
5693
5694 case 'B':
5695 if (ASSEMBLER_DIALECT == ASM_ATT)
5696 putc ('b', file);
5697 return;
5698
5699 case 'Q':
5700 if (ASSEMBLER_DIALECT == ASM_ATT)
5701 putc ('l', file);
5702 return;
5703
5704 case 'S':
5705 if (ASSEMBLER_DIALECT == ASM_ATT)
5706 putc ('s', file);
5707 return;
5708
5709 case 'T':
5710 if (ASSEMBLER_DIALECT == ASM_ATT)
5711 putc ('t', file);
5712 return;
5713
5714 case 'z':
5715 /* 387 opcodes don't get size suffixes if the operands are
5716 registers. */
5717
5718 if (STACK_REG_P (x))
5719 return;
5720
5721 /* this is the size of op from size of operand */
5722 switch (GET_MODE_SIZE (GET_MODE (x)))
5723 {
5724 case 2:
5725 #ifdef HAVE_GAS_FILDS_FISTS
5726 putc ('s', file);
5727 #endif
5728 return;
5729
5730 case 4:
5731 if (GET_MODE (x) == SFmode)
5732 {
5733 putc ('s', file);
5734 return;
5735 }
5736 else
5737 putc ('l', file);
5738 return;
5739
5740 case 12:
5741 case 16:
5742 putc ('t', file);
5743 return;
5744
5745 case 8:
5746 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5747 {
5748 #ifdef GAS_MNEMONICS
5749 putc ('q', file);
5750 #else
5751 putc ('l', file);
5752 putc ('l', file);
5753 #endif
5754 }
5755 else
5756 putc ('l', file);
5757 return;
5758
5759 default:
5760 abort ();
5761 }
5762
5763 case 'b':
5764 case 'w':
5765 case 'k':
5766 case 'q':
5767 case 'h':
5768 case 'y':
5769 case 'X':
5770 case 'P':
5771 break;
5772
5773 case 's':
5774 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5775 {
5776 PRINT_OPERAND (file, x, 0);
5777 putc (',', file);
5778 }
5779 return;
5780
5781 case 'D':
5782 /* Little bit of braindamage here. The SSE compare instructions
5783 does use completely different names for the comparisons that the
5784 fp conditional moves. */
5785 switch (GET_CODE (x))
5786 {
5787 case EQ:
5788 case UNEQ:
5789 fputs ("eq", file);
5790 break;
5791 case LT:
5792 case UNLT:
5793 fputs ("lt", file);
5794 break;
5795 case LE:
5796 case UNLE:
5797 fputs ("le", file);
5798 break;
5799 case UNORDERED:
5800 fputs ("unord", file);
5801 break;
5802 case NE:
5803 case LTGT:
5804 fputs ("neq", file);
5805 break;
5806 case UNGE:
5807 case GE:
5808 fputs ("nlt", file);
5809 break;
5810 case UNGT:
5811 case GT:
5812 fputs ("nle", file);
5813 break;
5814 case ORDERED:
5815 fputs ("ord", file);
5816 break;
5817 default:
5818 abort ();
5819 break;
5820 }
5821 return;
5822 case 'C':
5823 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5824 return;
5825 case 'F':
5826 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5827 return;
5828
5829 /* Like above, but reverse condition */
5830 case 'c':
5831 /* Check to see if argument to %c is really a constant
5832 and not a condition code which needs to be reversed. */
5833 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5834 {
5835 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5836 return;
5837 }
5838 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5839 return;
5840 case 'f':
5841 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5842 return;
5843 case '+':
5844 {
5845 rtx x;
5846
5847 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5848 return;
5849
5850 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5851 if (x)
5852 {
5853 int pred_val = INTVAL (XEXP (x, 0));
5854
5855 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5856 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5857 {
5858 int taken = pred_val > REG_BR_PROB_BASE / 2;
5859 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5860
5861 /* Emit hints only in the case default branch prediction
5862 heruistics would fail. */
5863 if (taken != cputaken)
5864 {
5865 /* We use 3e (DS) prefix for taken branches and
5866 2e (CS) prefix for not taken branches. */
5867 if (taken)
5868 fputs ("ds ; ", file);
5869 else
5870 fputs ("cs ; ", file);
5871 }
5872 }
5873 }
5874 return;
5875 }
5876 default:
5877 output_operand_lossage ("invalid operand code `%c'", code);
5878 }
5879 }
5880
5881 if (GET_CODE (x) == REG)
5882 {
5883 PRINT_REG (x, code, file);
5884 }
5885
5886 else if (GET_CODE (x) == MEM)
5887 {
5888 /* No `byte ptr' prefix for call instructions. */
5889 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
5890 {
5891 const char * size;
5892 switch (GET_MODE_SIZE (GET_MODE (x)))
5893 {
5894 case 1: size = "BYTE"; break;
5895 case 2: size = "WORD"; break;
5896 case 4: size = "DWORD"; break;
5897 case 8: size = "QWORD"; break;
5898 case 12: size = "XWORD"; break;
5899 case 16: size = "XMMWORD"; break;
5900 default:
5901 abort ();
5902 }
5903
5904 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5905 if (code == 'b')
5906 size = "BYTE";
5907 else if (code == 'w')
5908 size = "WORD";
5909 else if (code == 'k')
5910 size = "DWORD";
5911
5912 fputs (size, file);
5913 fputs (" PTR ", file);
5914 }
5915
5916 x = XEXP (x, 0);
5917 if (flag_pic && CONSTANT_ADDRESS_P (x))
5918 output_pic_addr_const (file, x, code);
5919 /* Avoid (%rip) for call operands. */
5920 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5921 && GET_CODE (x) != CONST_INT)
5922 output_addr_const (file, x);
5923 else
5924 output_address (x);
5925 }
5926
5927 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5928 {
5929 REAL_VALUE_TYPE r;
5930 long l;
5931
5932 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5933 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5934
5935 if (ASSEMBLER_DIALECT == ASM_ATT)
5936 putc ('$', file);
5937 fprintf (file, "0x%lx", l);
5938 }
5939
5940 /* These float cases don't actually occur as immediate operands. */
5941 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5942 {
5943 REAL_VALUE_TYPE r;
5944 char dstr[30];
5945
5946 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5947 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5948 fprintf (file, "%s", dstr);
5949 }
5950
5951 else if (GET_CODE (x) == CONST_DOUBLE
5952 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5953 {
5954 REAL_VALUE_TYPE r;
5955 char dstr[30];
5956
5957 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5958 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5959 fprintf (file, "%s", dstr);
5960 }
5961 else
5962 {
5963 if (code != 'P')
5964 {
5965 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5966 {
5967 if (ASSEMBLER_DIALECT == ASM_ATT)
5968 putc ('$', file);
5969 }
5970 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5971 || GET_CODE (x) == LABEL_REF)
5972 {
5973 if (ASSEMBLER_DIALECT == ASM_ATT)
5974 putc ('$', file);
5975 else
5976 fputs ("OFFSET FLAT:", file);
5977 }
5978 }
5979 if (GET_CODE (x) == CONST_INT)
5980 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5981 else if (flag_pic)
5982 output_pic_addr_const (file, x, code);
5983 else
5984 output_addr_const (file, x);
5985 }
5986 }
5987 \f
5988 /* Print a memory operand whose address is ADDR. */
5989
5990 void
5991 print_operand_address (file, addr)
5992 FILE *file;
5993 register rtx addr;
5994 {
5995 struct ix86_address parts;
5996 rtx base, index, disp;
5997 int scale;
5998
5999 if (! ix86_decompose_address (addr, &parts))
6000 abort ();
6001
6002 base = parts.base;
6003 index = parts.index;
6004 disp = parts.disp;
6005 scale = parts.scale;
6006
6007 if (!base && !index)
6008 {
6009 /* Displacement only requires special attention. */
6010
6011 if (GET_CODE (disp) == CONST_INT)
6012 {
6013 if (ASSEMBLER_DIALECT == ASM_INTEL)
6014 {
6015 if (USER_LABEL_PREFIX[0] == 0)
6016 putc ('%', file);
6017 fputs ("ds:", file);
6018 }
6019 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6020 }
6021 else if (flag_pic)
6022 output_pic_addr_const (file, addr, 0);
6023 else
6024 output_addr_const (file, addr);
6025
6026 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6027 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6028 fputs ("(%rip)", file);
6029 }
6030 else
6031 {
6032 if (ASSEMBLER_DIALECT == ASM_ATT)
6033 {
6034 if (disp)
6035 {
6036 if (flag_pic)
6037 output_pic_addr_const (file, disp, 0);
6038 else if (GET_CODE (disp) == LABEL_REF)
6039 output_asm_label (disp);
6040 else
6041 output_addr_const (file, disp);
6042 }
6043
6044 putc ('(', file);
6045 if (base)
6046 PRINT_REG (base, 0, file);
6047 if (index)
6048 {
6049 putc (',', file);
6050 PRINT_REG (index, 0, file);
6051 if (scale != 1)
6052 fprintf (file, ",%d", scale);
6053 }
6054 putc (')', file);
6055 }
6056 else
6057 {
6058 rtx offset = NULL_RTX;
6059
6060 if (disp)
6061 {
6062 /* Pull out the offset of a symbol; print any symbol itself. */
6063 if (GET_CODE (disp) == CONST
6064 && GET_CODE (XEXP (disp, 0)) == PLUS
6065 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6066 {
6067 offset = XEXP (XEXP (disp, 0), 1);
6068 disp = gen_rtx_CONST (VOIDmode,
6069 XEXP (XEXP (disp, 0), 0));
6070 }
6071
6072 if (flag_pic)
6073 output_pic_addr_const (file, disp, 0);
6074 else if (GET_CODE (disp) == LABEL_REF)
6075 output_asm_label (disp);
6076 else if (GET_CODE (disp) == CONST_INT)
6077 offset = disp;
6078 else
6079 output_addr_const (file, disp);
6080 }
6081
6082 putc ('[', file);
6083 if (base)
6084 {
6085 PRINT_REG (base, 0, file);
6086 if (offset)
6087 {
6088 if (INTVAL (offset) >= 0)
6089 putc ('+', file);
6090 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6091 }
6092 }
6093 else if (offset)
6094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6095 else
6096 putc ('0', file);
6097
6098 if (index)
6099 {
6100 putc ('+', file);
6101 PRINT_REG (index, 0, file);
6102 if (scale != 1)
6103 fprintf (file, "*%d", scale);
6104 }
6105 putc (']', file);
6106 }
6107 }
6108 }
6109 \f
6110 /* Split one or more DImode RTL references into pairs of SImode
6111 references. The RTL can be REG, offsettable MEM, integer constant, or
6112 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6113 split and "num" is its length. lo_half and hi_half are output arrays
6114 that parallel "operands". */
6115
6116 void
6117 split_di (operands, num, lo_half, hi_half)
6118 rtx operands[];
6119 int num;
6120 rtx lo_half[], hi_half[];
6121 {
6122 while (num--)
6123 {
6124 rtx op = operands[num];
6125
6126 /* simplify_subreg refuse to split volatile memory addresses,
6127 but we still have to handle it. */
6128 if (GET_CODE (op) == MEM)
6129 {
6130 lo_half[num] = adjust_address (op, SImode, 0);
6131 hi_half[num] = adjust_address (op, SImode, 4);
6132 }
6133 else
6134 {
6135 lo_half[num] = simplify_gen_subreg (SImode, op,
6136 GET_MODE (op) == VOIDmode
6137 ? DImode : GET_MODE (op), 0);
6138 hi_half[num] = simplify_gen_subreg (SImode, op,
6139 GET_MODE (op) == VOIDmode
6140 ? DImode : GET_MODE (op), 4);
6141 }
6142 }
6143 }
6144 /* Split one or more TImode RTL references into pairs of SImode
6145 references. The RTL can be REG, offsettable MEM, integer constant, or
6146 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6147 split and "num" is its length. lo_half and hi_half are output arrays
6148 that parallel "operands". */
6149
6150 void
6151 split_ti (operands, num, lo_half, hi_half)
6152 rtx operands[];
6153 int num;
6154 rtx lo_half[], hi_half[];
6155 {
6156 while (num--)
6157 {
6158 rtx op = operands[num];
6159
6160 /* simplify_subreg refuse to split volatile memory addresses, but we
6161 still have to handle it. */
6162 if (GET_CODE (op) == MEM)
6163 {
6164 lo_half[num] = adjust_address (op, DImode, 0);
6165 hi_half[num] = adjust_address (op, DImode, 8);
6166 }
6167 else
6168 {
6169 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6170 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6171 }
6172 }
6173 }
6174 \f
6175 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6176 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6177 is the expression of the binary operation. The output may either be
6178 emitted here, or returned to the caller, like all output_* functions.
6179
6180 There is no guarantee that the operands are the same mode, as they
6181 might be within FLOAT or FLOAT_EXTEND expressions. */
6182
6183 #ifndef SYSV386_COMPAT
6184 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6185 wants to fix the assemblers because that causes incompatibility
6186 with gcc. No-one wants to fix gcc because that causes
6187 incompatibility with assemblers... You can use the option of
6188 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6189 #define SYSV386_COMPAT 1
6190 #endif
6191
6192 const char *
6193 output_387_binary_op (insn, operands)
6194 rtx insn;
6195 rtx *operands;
6196 {
6197 static char buf[30];
6198 const char *p;
6199 const char *ssep;
6200 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6201
6202 #ifdef ENABLE_CHECKING
6203 /* Even if we do not want to check the inputs, this documents input
6204 constraints. Which helps in understanding the following code. */
6205 if (STACK_REG_P (operands[0])
6206 && ((REG_P (operands[1])
6207 && REGNO (operands[0]) == REGNO (operands[1])
6208 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6209 || (REG_P (operands[2])
6210 && REGNO (operands[0]) == REGNO (operands[2])
6211 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6212 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6213 ; /* ok */
6214 else if (!is_sse)
6215 abort ();
6216 #endif
6217
6218 switch (GET_CODE (operands[3]))
6219 {
6220 case PLUS:
6221 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6222 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6223 p = "fiadd";
6224 else
6225 p = "fadd";
6226 ssep = "add";
6227 break;
6228
6229 case MINUS:
6230 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6231 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6232 p = "fisub";
6233 else
6234 p = "fsub";
6235 ssep = "sub";
6236 break;
6237
6238 case MULT:
6239 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6240 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6241 p = "fimul";
6242 else
6243 p = "fmul";
6244 ssep = "mul";
6245 break;
6246
6247 case DIV:
6248 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6249 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6250 p = "fidiv";
6251 else
6252 p = "fdiv";
6253 ssep = "div";
6254 break;
6255
6256 default:
6257 abort ();
6258 }
6259
6260 if (is_sse)
6261 {
6262 strcpy (buf, ssep);
6263 if (GET_MODE (operands[0]) == SFmode)
6264 strcat (buf, "ss\t{%2, %0|%0, %2}");
6265 else
6266 strcat (buf, "sd\t{%2, %0|%0, %2}");
6267 return buf;
6268 }
6269 strcpy (buf, p);
6270
6271 switch (GET_CODE (operands[3]))
6272 {
6273 case MULT:
6274 case PLUS:
6275 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6276 {
6277 rtx temp = operands[2];
6278 operands[2] = operands[1];
6279 operands[1] = temp;
6280 }
6281
6282 /* know operands[0] == operands[1]. */
6283
6284 if (GET_CODE (operands[2]) == MEM)
6285 {
6286 p = "%z2\t%2";
6287 break;
6288 }
6289
6290 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6291 {
6292 if (STACK_TOP_P (operands[0]))
6293 /* How is it that we are storing to a dead operand[2]?
6294 Well, presumably operands[1] is dead too. We can't
6295 store the result to st(0) as st(0) gets popped on this
6296 instruction. Instead store to operands[2] (which I
6297 think has to be st(1)). st(1) will be popped later.
6298 gcc <= 2.8.1 didn't have this check and generated
6299 assembly code that the Unixware assembler rejected. */
6300 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6301 else
6302 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6303 break;
6304 }
6305
6306 if (STACK_TOP_P (operands[0]))
6307 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6308 else
6309 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6310 break;
6311
6312 case MINUS:
6313 case DIV:
6314 if (GET_CODE (operands[1]) == MEM)
6315 {
6316 p = "r%z1\t%1";
6317 break;
6318 }
6319
6320 if (GET_CODE (operands[2]) == MEM)
6321 {
6322 p = "%z2\t%2";
6323 break;
6324 }
6325
6326 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6327 {
6328 #if SYSV386_COMPAT
6329 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6330 derived assemblers, confusingly reverse the direction of
6331 the operation for fsub{r} and fdiv{r} when the
6332 destination register is not st(0). The Intel assembler
6333 doesn't have this brain damage. Read !SYSV386_COMPAT to
6334 figure out what the hardware really does. */
6335 if (STACK_TOP_P (operands[0]))
6336 p = "{p\t%0, %2|rp\t%2, %0}";
6337 else
6338 p = "{rp\t%2, %0|p\t%0, %2}";
6339 #else
6340 if (STACK_TOP_P (operands[0]))
6341 /* As above for fmul/fadd, we can't store to st(0). */
6342 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6343 else
6344 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6345 #endif
6346 break;
6347 }
6348
6349 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6350 {
6351 #if SYSV386_COMPAT
6352 if (STACK_TOP_P (operands[0]))
6353 p = "{rp\t%0, %1|p\t%1, %0}";
6354 else
6355 p = "{p\t%1, %0|rp\t%0, %1}";
6356 #else
6357 if (STACK_TOP_P (operands[0]))
6358 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6359 else
6360 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6361 #endif
6362 break;
6363 }
6364
6365 if (STACK_TOP_P (operands[0]))
6366 {
6367 if (STACK_TOP_P (operands[1]))
6368 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6369 else
6370 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6371 break;
6372 }
6373 else if (STACK_TOP_P (operands[1]))
6374 {
6375 #if SYSV386_COMPAT
6376 p = "{\t%1, %0|r\t%0, %1}";
6377 #else
6378 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6379 #endif
6380 }
6381 else
6382 {
6383 #if SYSV386_COMPAT
6384 p = "{r\t%2, %0|\t%0, %2}";
6385 #else
6386 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6387 #endif
6388 }
6389 break;
6390
6391 default:
6392 abort ();
6393 }
6394
6395 strcat (buf, p);
6396 return buf;
6397 }
6398
6399 /* Output code to initialize control word copies used by
6400 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6401 is set to control word rounding downwards. */
6402 void
6403 emit_i387_cw_initialization (normal, round_down)
6404 rtx normal, round_down;
6405 {
6406 rtx reg = gen_reg_rtx (HImode);
6407
6408 emit_insn (gen_x86_fnstcw_1 (normal));
6409 emit_move_insn (reg, normal);
6410 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6411 && !TARGET_64BIT)
6412 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6413 else
6414 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6415 emit_move_insn (round_down, reg);
6416 }
6417
6418 /* Output code for INSN to convert a float to a signed int. OPERANDS
6419 are the insn operands. The output may be [HSD]Imode and the input
6420 operand may be [SDX]Fmode. */
6421
6422 const char *
6423 output_fix_trunc (insn, operands)
6424 rtx insn;
6425 rtx *operands;
6426 {
6427 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6428 int dimode_p = GET_MODE (operands[0]) == DImode;
6429
6430 /* Jump through a hoop or two for DImode, since the hardware has no
6431 non-popping instruction. We used to do this a different way, but
6432 that was somewhat fragile and broke with post-reload splitters. */
6433 if (dimode_p && !stack_top_dies)
6434 output_asm_insn ("fld\t%y1", operands);
6435
6436 if (!STACK_TOP_P (operands[1]))
6437 abort ();
6438
6439 if (GET_CODE (operands[0]) != MEM)
6440 abort ();
6441
6442 output_asm_insn ("fldcw\t%3", operands);
6443 if (stack_top_dies || dimode_p)
6444 output_asm_insn ("fistp%z0\t%0", operands);
6445 else
6446 output_asm_insn ("fist%z0\t%0", operands);
6447 output_asm_insn ("fldcw\t%2", operands);
6448
6449 return "";
6450 }
6451
6452 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6453 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6454 when fucom should be used. */
6455
6456 const char *
6457 output_fp_compare (insn, operands, eflags_p, unordered_p)
6458 rtx insn;
6459 rtx *operands;
6460 int eflags_p, unordered_p;
6461 {
6462 int stack_top_dies;
6463 rtx cmp_op0 = operands[0];
6464 rtx cmp_op1 = operands[1];
6465 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6466
6467 if (eflags_p == 2)
6468 {
6469 cmp_op0 = cmp_op1;
6470 cmp_op1 = operands[2];
6471 }
6472 if (is_sse)
6473 {
6474 if (GET_MODE (operands[0]) == SFmode)
6475 if (unordered_p)
6476 return "ucomiss\t{%1, %0|%0, %1}";
6477 else
6478 return "comiss\t{%1, %0|%0, %y}";
6479 else
6480 if (unordered_p)
6481 return "ucomisd\t{%1, %0|%0, %1}";
6482 else
6483 return "comisd\t{%1, %0|%0, %y}";
6484 }
6485
6486 if (! STACK_TOP_P (cmp_op0))
6487 abort ();
6488
6489 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6490
6491 if (STACK_REG_P (cmp_op1)
6492 && stack_top_dies
6493 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6494 && REGNO (cmp_op1) != FIRST_STACK_REG)
6495 {
6496 /* If both the top of the 387 stack dies, and the other operand
6497 is also a stack register that dies, then this must be a
6498 `fcompp' float compare */
6499
6500 if (eflags_p == 1)
6501 {
6502 /* There is no double popping fcomi variant. Fortunately,
6503 eflags is immune from the fstp's cc clobbering. */
6504 if (unordered_p)
6505 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6506 else
6507 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6508 return "fstp\t%y0";
6509 }
6510 else
6511 {
6512 if (eflags_p == 2)
6513 {
6514 if (unordered_p)
6515 return "fucompp\n\tfnstsw\t%0";
6516 else
6517 return "fcompp\n\tfnstsw\t%0";
6518 }
6519 else
6520 {
6521 if (unordered_p)
6522 return "fucompp";
6523 else
6524 return "fcompp";
6525 }
6526 }
6527 }
6528 else
6529 {
6530 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6531
6532 static const char * const alt[24] =
6533 {
6534 "fcom%z1\t%y1",
6535 "fcomp%z1\t%y1",
6536 "fucom%z1\t%y1",
6537 "fucomp%z1\t%y1",
6538
6539 "ficom%z1\t%y1",
6540 "ficomp%z1\t%y1",
6541 NULL,
6542 NULL,
6543
6544 "fcomi\t{%y1, %0|%0, %y1}",
6545 "fcomip\t{%y1, %0|%0, %y1}",
6546 "fucomi\t{%y1, %0|%0, %y1}",
6547 "fucomip\t{%y1, %0|%0, %y1}",
6548
6549 NULL,
6550 NULL,
6551 NULL,
6552 NULL,
6553
6554 "fcom%z2\t%y2\n\tfnstsw\t%0",
6555 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6556 "fucom%z2\t%y2\n\tfnstsw\t%0",
6557 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6558
6559 "ficom%z2\t%y2\n\tfnstsw\t%0",
6560 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6561 NULL,
6562 NULL
6563 };
6564
6565 int mask;
6566 const char *ret;
6567
6568 mask = eflags_p << 3;
6569 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6570 mask |= unordered_p << 1;
6571 mask |= stack_top_dies;
6572
6573 if (mask >= 24)
6574 abort ();
6575 ret = alt[mask];
6576 if (ret == NULL)
6577 abort ();
6578
6579 return ret;
6580 }
6581 }
6582
6583 void
6584 ix86_output_addr_vec_elt (file, value)
6585 FILE *file;
6586 int value;
6587 {
6588 const char *directive = ASM_LONG;
6589
6590 if (TARGET_64BIT)
6591 {
6592 #ifdef ASM_QUAD
6593 directive = ASM_QUAD;
6594 #else
6595 abort ();
6596 #endif
6597 }
6598
6599 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6600 }
6601
6602 void
6603 ix86_output_addr_diff_elt (file, value, rel)
6604 FILE *file;
6605 int value, rel;
6606 {
6607 if (TARGET_64BIT)
6608 fprintf (file, "%s%s%d-.+4+(.-%s%d)\n",
6609 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6610 else if (HAVE_AS_GOTOFF_IN_DATA)
6611 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6612 else
6613 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6614 ASM_LONG, LPREFIX, value);
6615 }
6616 \f
6617 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6618 for the target. */
6619
6620 void
6621 ix86_expand_clear (dest)
6622 rtx dest;
6623 {
6624 rtx tmp;
6625
6626 /* We play register width games, which are only valid after reload. */
6627 if (!reload_completed)
6628 abort ();
6629
6630 /* Avoid HImode and its attendant prefix byte. */
6631 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6632 dest = gen_rtx_REG (SImode, REGNO (dest));
6633
6634 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6635
6636 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6637 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6638 {
6639 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6640 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6641 }
6642
6643 emit_insn (tmp);
6644 }
6645
6646 void
6647 ix86_expand_move (mode, operands)
6648 enum machine_mode mode;
6649 rtx operands[];
6650 {
6651 int strict = (reload_in_progress || reload_completed);
6652 rtx insn;
6653
6654 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6655 {
6656 /* Emit insns to move operands[1] into operands[0]. */
6657
6658 if (GET_CODE (operands[0]) == MEM)
6659 operands[1] = force_reg (Pmode, operands[1]);
6660 else
6661 {
6662 rtx temp = operands[0];
6663 if (GET_CODE (temp) != REG)
6664 temp = gen_reg_rtx (Pmode);
6665 temp = legitimize_pic_address (operands[1], temp);
6666 if (temp == operands[0])
6667 return;
6668 operands[1] = temp;
6669 }
6670 }
6671 else
6672 {
6673 if (GET_CODE (operands[0]) == MEM
6674 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6675 || !push_operand (operands[0], mode))
6676 && GET_CODE (operands[1]) == MEM)
6677 operands[1] = force_reg (mode, operands[1]);
6678
6679 if (push_operand (operands[0], mode)
6680 && ! general_no_elim_operand (operands[1], mode))
6681 operands[1] = copy_to_mode_reg (mode, operands[1]);
6682
6683 /* Force large constants in 64bit compilation into register
6684 to get them CSEed. */
6685 if (TARGET_64BIT && mode == DImode
6686 && immediate_operand (operands[1], mode)
6687 && !x86_64_zero_extended_value (operands[1])
6688 && !register_operand (operands[0], mode)
6689 && optimize && !reload_completed && !reload_in_progress)
6690 operands[1] = copy_to_mode_reg (mode, operands[1]);
6691
6692 if (FLOAT_MODE_P (mode))
6693 {
6694 /* If we are loading a floating point constant to a register,
6695 force the value to memory now, since we'll get better code
6696 out the back end. */
6697
6698 if (strict)
6699 ;
6700 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6701 && register_operand (operands[0], mode))
6702 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6703 }
6704 }
6705
6706 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6707
6708 emit_insn (insn);
6709 }
6710
6711 void
6712 ix86_expand_vector_move (mode, operands)
6713 enum machine_mode mode;
6714 rtx operands[];
6715 {
6716 /* Force constants other than zero into memory. We do not know how
6717 the instructions used to build constants modify the upper 64 bits
6718 of the register, once we have that information we may be able
6719 to handle some of them more efficiently. */
6720 if ((reload_in_progress | reload_completed) == 0
6721 && register_operand (operands[0], mode)
6722 && CONSTANT_P (operands[1]))
6723 {
6724 rtx addr = gen_reg_rtx (Pmode);
6725 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6726 operands[1] = gen_rtx_MEM (mode, addr);
6727 }
6728
6729 /* Make operand1 a register if it isn't already. */
6730 if ((reload_in_progress | reload_completed) == 0
6731 && !register_operand (operands[0], mode)
6732 && !register_operand (operands[1], mode)
6733 && operands[1] != CONST0_RTX (mode))
6734 {
6735 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
6736 emit_move_insn (operands[0], temp);
6737 return;
6738 }
6739
6740 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6741 }
6742
6743 /* Attempt to expand a binary operator. Make the expansion closer to the
6744 actual machine, then just general_operand, which will allow 3 separate
6745 memory references (one output, two input) in a single insn. */
6746
6747 void
6748 ix86_expand_binary_operator (code, mode, operands)
6749 enum rtx_code code;
6750 enum machine_mode mode;
6751 rtx operands[];
6752 {
6753 int matching_memory;
6754 rtx src1, src2, dst, op, clob;
6755
6756 dst = operands[0];
6757 src1 = operands[1];
6758 src2 = operands[2];
6759
6760 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6761 if (GET_RTX_CLASS (code) == 'c'
6762 && (rtx_equal_p (dst, src2)
6763 || immediate_operand (src1, mode)))
6764 {
6765 rtx temp = src1;
6766 src1 = src2;
6767 src2 = temp;
6768 }
6769
6770 /* If the destination is memory, and we do not have matching source
6771 operands, do things in registers. */
6772 matching_memory = 0;
6773 if (GET_CODE (dst) == MEM)
6774 {
6775 if (rtx_equal_p (dst, src1))
6776 matching_memory = 1;
6777 else if (GET_RTX_CLASS (code) == 'c'
6778 && rtx_equal_p (dst, src2))
6779 matching_memory = 2;
6780 else
6781 dst = gen_reg_rtx (mode);
6782 }
6783
6784 /* Both source operands cannot be in memory. */
6785 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6786 {
6787 if (matching_memory != 2)
6788 src2 = force_reg (mode, src2);
6789 else
6790 src1 = force_reg (mode, src1);
6791 }
6792
6793 /* If the operation is not commutable, source 1 cannot be a constant
6794 or non-matching memory. */
6795 if ((CONSTANT_P (src1)
6796 || (!matching_memory && GET_CODE (src1) == MEM))
6797 && GET_RTX_CLASS (code) != 'c')
6798 src1 = force_reg (mode, src1);
6799
6800 /* If optimizing, copy to regs to improve CSE */
6801 if (optimize && ! no_new_pseudos)
6802 {
6803 if (GET_CODE (dst) == MEM)
6804 dst = gen_reg_rtx (mode);
6805 if (GET_CODE (src1) == MEM)
6806 src1 = force_reg (mode, src1);
6807 if (GET_CODE (src2) == MEM)
6808 src2 = force_reg (mode, src2);
6809 }
6810
6811 /* Emit the instruction. */
6812
6813 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6814 if (reload_in_progress)
6815 {
6816 /* Reload doesn't know about the flags register, and doesn't know that
6817 it doesn't want to clobber it. We can only do this with PLUS. */
6818 if (code != PLUS)
6819 abort ();
6820 emit_insn (op);
6821 }
6822 else
6823 {
6824 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6825 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6826 }
6827
6828 /* Fix up the destination if needed. */
6829 if (dst != operands[0])
6830 emit_move_insn (operands[0], dst);
6831 }
6832
6833 /* Return TRUE or FALSE depending on whether the binary operator meets the
6834 appropriate constraints. */
6835
6836 int
6837 ix86_binary_operator_ok (code, mode, operands)
6838 enum rtx_code code;
6839 enum machine_mode mode ATTRIBUTE_UNUSED;
6840 rtx operands[3];
6841 {
6842 /* Both source operands cannot be in memory. */
6843 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6844 return 0;
6845 /* If the operation is not commutable, source 1 cannot be a constant. */
6846 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6847 return 0;
6848 /* If the destination is memory, we must have a matching source operand. */
6849 if (GET_CODE (operands[0]) == MEM
6850 && ! (rtx_equal_p (operands[0], operands[1])
6851 || (GET_RTX_CLASS (code) == 'c'
6852 && rtx_equal_p (operands[0], operands[2]))))
6853 return 0;
6854 /* If the operation is not commutable and the source 1 is memory, we must
6855 have a matching destination. */
6856 if (GET_CODE (operands[1]) == MEM
6857 && GET_RTX_CLASS (code) != 'c'
6858 && ! rtx_equal_p (operands[0], operands[1]))
6859 return 0;
6860 return 1;
6861 }
6862
6863 /* Attempt to expand a unary operator. Make the expansion closer to the
6864 actual machine, then just general_operand, which will allow 2 separate
6865 memory references (one output, one input) in a single insn. */
6866
6867 void
6868 ix86_expand_unary_operator (code, mode, operands)
6869 enum rtx_code code;
6870 enum machine_mode mode;
6871 rtx operands[];
6872 {
6873 int matching_memory;
6874 rtx src, dst, op, clob;
6875
6876 dst = operands[0];
6877 src = operands[1];
6878
6879 /* If the destination is memory, and we do not have matching source
6880 operands, do things in registers. */
6881 matching_memory = 0;
6882 if (GET_CODE (dst) == MEM)
6883 {
6884 if (rtx_equal_p (dst, src))
6885 matching_memory = 1;
6886 else
6887 dst = gen_reg_rtx (mode);
6888 }
6889
6890 /* When source operand is memory, destination must match. */
6891 if (!matching_memory && GET_CODE (src) == MEM)
6892 src = force_reg (mode, src);
6893
6894 /* If optimizing, copy to regs to improve CSE */
6895 if (optimize && ! no_new_pseudos)
6896 {
6897 if (GET_CODE (dst) == MEM)
6898 dst = gen_reg_rtx (mode);
6899 if (GET_CODE (src) == MEM)
6900 src = force_reg (mode, src);
6901 }
6902
6903 /* Emit the instruction. */
6904
6905 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6906 if (reload_in_progress || code == NOT)
6907 {
6908 /* Reload doesn't know about the flags register, and doesn't know that
6909 it doesn't want to clobber it. */
6910 if (code != NOT)
6911 abort ();
6912 emit_insn (op);
6913 }
6914 else
6915 {
6916 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6917 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6918 }
6919
6920 /* Fix up the destination if needed. */
6921 if (dst != operands[0])
6922 emit_move_insn (operands[0], dst);
6923 }
6924
6925 /* Return TRUE or FALSE depending on whether the unary operator meets the
6926 appropriate constraints. */
6927
6928 int
6929 ix86_unary_operator_ok (code, mode, operands)
6930 enum rtx_code code ATTRIBUTE_UNUSED;
6931 enum machine_mode mode ATTRIBUTE_UNUSED;
6932 rtx operands[2] ATTRIBUTE_UNUSED;
6933 {
6934 /* If one of operands is memory, source and destination must match. */
6935 if ((GET_CODE (operands[0]) == MEM
6936 || GET_CODE (operands[1]) == MEM)
6937 && ! rtx_equal_p (operands[0], operands[1]))
6938 return FALSE;
6939 return TRUE;
6940 }
6941
6942 /* Return TRUE or FALSE depending on whether the first SET in INSN
6943 has source and destination with matching CC modes, and that the
6944 CC mode is at least as constrained as REQ_MODE. */
6945
6946 int
6947 ix86_match_ccmode (insn, req_mode)
6948 rtx insn;
6949 enum machine_mode req_mode;
6950 {
6951 rtx set;
6952 enum machine_mode set_mode;
6953
6954 set = PATTERN (insn);
6955 if (GET_CODE (set) == PARALLEL)
6956 set = XVECEXP (set, 0, 0);
6957 if (GET_CODE (set) != SET)
6958 abort ();
6959 if (GET_CODE (SET_SRC (set)) != COMPARE)
6960 abort ();
6961
6962 set_mode = GET_MODE (SET_DEST (set));
6963 switch (set_mode)
6964 {
6965 case CCNOmode:
6966 if (req_mode != CCNOmode
6967 && (req_mode != CCmode
6968 || XEXP (SET_SRC (set), 1) != const0_rtx))
6969 return 0;
6970 break;
6971 case CCmode:
6972 if (req_mode == CCGCmode)
6973 return 0;
6974 /* FALLTHRU */
6975 case CCGCmode:
6976 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6977 return 0;
6978 /* FALLTHRU */
6979 case CCGOCmode:
6980 if (req_mode == CCZmode)
6981 return 0;
6982 /* FALLTHRU */
6983 case CCZmode:
6984 break;
6985
6986 default:
6987 abort ();
6988 }
6989
6990 return (GET_MODE (SET_SRC (set)) == set_mode);
6991 }
6992
6993 /* Generate insn patterns to do an integer compare of OPERANDS. */
6994
6995 static rtx
6996 ix86_expand_int_compare (code, op0, op1)
6997 enum rtx_code code;
6998 rtx op0, op1;
6999 {
7000 enum machine_mode cmpmode;
7001 rtx tmp, flags;
7002
7003 cmpmode = SELECT_CC_MODE (code, op0, op1);
7004 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7005
7006 /* This is very simple, but making the interface the same as in the
7007 FP case makes the rest of the code easier. */
7008 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7009 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7010
7011 /* Return the test that should be put into the flags user, i.e.
7012 the bcc, scc, or cmov instruction. */
7013 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7014 }
7015
7016 /* Figure out whether to use ordered or unordered fp comparisons.
7017 Return the appropriate mode to use. */
7018
7019 enum machine_mode
7020 ix86_fp_compare_mode (code)
7021 enum rtx_code code ATTRIBUTE_UNUSED;
7022 {
7023 /* ??? In order to make all comparisons reversible, we do all comparisons
7024 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7025 all forms trapping and nontrapping comparisons, we can make inequality
7026 comparisons trapping again, since it results in better code when using
7027 FCOM based compares. */
7028 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7029 }
7030
7031 enum machine_mode
7032 ix86_cc_mode (code, op0, op1)
7033 enum rtx_code code;
7034 rtx op0, op1;
7035 {
7036 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7037 return ix86_fp_compare_mode (code);
7038 switch (code)
7039 {
7040 /* Only zero flag is needed. */
7041 case EQ: /* ZF=0 */
7042 case NE: /* ZF!=0 */
7043 return CCZmode;
7044 /* Codes needing carry flag. */
7045 case GEU: /* CF=0 */
7046 case GTU: /* CF=0 & ZF=0 */
7047 case LTU: /* CF=1 */
7048 case LEU: /* CF=1 | ZF=1 */
7049 return CCmode;
7050 /* Codes possibly doable only with sign flag when
7051 comparing against zero. */
7052 case GE: /* SF=OF or SF=0 */
7053 case LT: /* SF<>OF or SF=1 */
7054 if (op1 == const0_rtx)
7055 return CCGOCmode;
7056 else
7057 /* For other cases Carry flag is not required. */
7058 return CCGCmode;
7059 /* Codes doable only with sign flag when comparing
7060 against zero, but we miss jump instruction for it
7061 so we need to use relational tests agains overflow
7062 that thus needs to be zero. */
7063 case GT: /* ZF=0 & SF=OF */
7064 case LE: /* ZF=1 | SF<>OF */
7065 if (op1 == const0_rtx)
7066 return CCNOmode;
7067 else
7068 return CCGCmode;
7069 /* strcmp pattern do (use flags) and combine may ask us for proper
7070 mode. */
7071 case USE:
7072 return CCmode;
7073 default:
7074 abort ();
7075 }
7076 }
7077
7078 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7079
7080 int
7081 ix86_use_fcomi_compare (code)
7082 enum rtx_code code ATTRIBUTE_UNUSED;
7083 {
7084 enum rtx_code swapped_code = swap_condition (code);
7085 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7086 || (ix86_fp_comparison_cost (swapped_code)
7087 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7088 }
7089
7090 /* Swap, force into registers, or otherwise massage the two operands
7091 to a fp comparison. The operands are updated in place; the new
7092 comparsion code is returned. */
7093
7094 static enum rtx_code
7095 ix86_prepare_fp_compare_args (code, pop0, pop1)
7096 enum rtx_code code;
7097 rtx *pop0, *pop1;
7098 {
7099 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7100 rtx op0 = *pop0, op1 = *pop1;
7101 enum machine_mode op_mode = GET_MODE (op0);
7102 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7103
7104 /* All of the unordered compare instructions only work on registers.
7105 The same is true of the XFmode compare instructions. The same is
7106 true of the fcomi compare instructions. */
7107
7108 if (!is_sse
7109 && (fpcmp_mode == CCFPUmode
7110 || op_mode == XFmode
7111 || op_mode == TFmode
7112 || ix86_use_fcomi_compare (code)))
7113 {
7114 op0 = force_reg (op_mode, op0);
7115 op1 = force_reg (op_mode, op1);
7116 }
7117 else
7118 {
7119 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7120 things around if they appear profitable, otherwise force op0
7121 into a register. */
7122
7123 if (standard_80387_constant_p (op0) == 0
7124 || (GET_CODE (op0) == MEM
7125 && ! (standard_80387_constant_p (op1) == 0
7126 || GET_CODE (op1) == MEM)))
7127 {
7128 rtx tmp;
7129 tmp = op0, op0 = op1, op1 = tmp;
7130 code = swap_condition (code);
7131 }
7132
7133 if (GET_CODE (op0) != REG)
7134 op0 = force_reg (op_mode, op0);
7135
7136 if (CONSTANT_P (op1))
7137 {
7138 if (standard_80387_constant_p (op1))
7139 op1 = force_reg (op_mode, op1);
7140 else
7141 op1 = validize_mem (force_const_mem (op_mode, op1));
7142 }
7143 }
7144
7145 /* Try to rearrange the comparison to make it cheaper. */
7146 if (ix86_fp_comparison_cost (code)
7147 > ix86_fp_comparison_cost (swap_condition (code))
7148 && (GET_CODE (op1) == REG || !no_new_pseudos))
7149 {
7150 rtx tmp;
7151 tmp = op0, op0 = op1, op1 = tmp;
7152 code = swap_condition (code);
7153 if (GET_CODE (op0) != REG)
7154 op0 = force_reg (op_mode, op0);
7155 }
7156
7157 *pop0 = op0;
7158 *pop1 = op1;
7159 return code;
7160 }
7161
7162 /* Convert comparison codes we use to represent FP comparison to integer
7163 code that will result in proper branch. Return UNKNOWN if no such code
7164 is available. */
7165 static enum rtx_code
7166 ix86_fp_compare_code_to_integer (code)
7167 enum rtx_code code;
7168 {
7169 switch (code)
7170 {
7171 case GT:
7172 return GTU;
7173 case GE:
7174 return GEU;
7175 case ORDERED:
7176 case UNORDERED:
7177 return code;
7178 break;
7179 case UNEQ:
7180 return EQ;
7181 break;
7182 case UNLT:
7183 return LTU;
7184 break;
7185 case UNLE:
7186 return LEU;
7187 break;
7188 case LTGT:
7189 return NE;
7190 break;
7191 default:
7192 return UNKNOWN;
7193 }
7194 }
7195
7196 /* Split comparison code CODE into comparisons we can do using branch
7197 instructions. BYPASS_CODE is comparison code for branch that will
7198 branch around FIRST_CODE and SECOND_CODE. If some of branches
7199 is not required, set value to NIL.
7200 We never require more than two branches. */
7201 static void
7202 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7203 enum rtx_code code, *bypass_code, *first_code, *second_code;
7204 {
7205 *first_code = code;
7206 *bypass_code = NIL;
7207 *second_code = NIL;
7208
7209 /* The fcomi comparison sets flags as follows:
7210
7211 cmp ZF PF CF
7212 > 0 0 0
7213 < 0 0 1
7214 = 1 0 0
7215 un 1 1 1 */
7216
7217 switch (code)
7218 {
7219 case GT: /* GTU - CF=0 & ZF=0 */
7220 case GE: /* GEU - CF=0 */
7221 case ORDERED: /* PF=0 */
7222 case UNORDERED: /* PF=1 */
7223 case UNEQ: /* EQ - ZF=1 */
7224 case UNLT: /* LTU - CF=1 */
7225 case UNLE: /* LEU - CF=1 | ZF=1 */
7226 case LTGT: /* EQ - ZF=0 */
7227 break;
7228 case LT: /* LTU - CF=1 - fails on unordered */
7229 *first_code = UNLT;
7230 *bypass_code = UNORDERED;
7231 break;
7232 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7233 *first_code = UNLE;
7234 *bypass_code = UNORDERED;
7235 break;
7236 case EQ: /* EQ - ZF=1 - fails on unordered */
7237 *first_code = UNEQ;
7238 *bypass_code = UNORDERED;
7239 break;
7240 case NE: /* NE - ZF=0 - fails on unordered */
7241 *first_code = LTGT;
7242 *second_code = UNORDERED;
7243 break;
7244 case UNGE: /* GEU - CF=0 - fails on unordered */
7245 *first_code = GE;
7246 *second_code = UNORDERED;
7247 break;
7248 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7249 *first_code = GT;
7250 *second_code = UNORDERED;
7251 break;
7252 default:
7253 abort ();
7254 }
7255 if (!TARGET_IEEE_FP)
7256 {
7257 *second_code = NIL;
7258 *bypass_code = NIL;
7259 }
7260 }
7261
7262 /* Return cost of comparison done fcom + arithmetics operations on AX.
7263 All following functions do use number of instructions as an cost metrics.
7264 In future this should be tweaked to compute bytes for optimize_size and
7265 take into account performance of various instructions on various CPUs. */
7266 static int
7267 ix86_fp_comparison_arithmetics_cost (code)
7268 enum rtx_code code;
7269 {
7270 if (!TARGET_IEEE_FP)
7271 return 4;
7272 /* The cost of code output by ix86_expand_fp_compare. */
7273 switch (code)
7274 {
7275 case UNLE:
7276 case UNLT:
7277 case LTGT:
7278 case GT:
7279 case GE:
7280 case UNORDERED:
7281 case ORDERED:
7282 case UNEQ:
7283 return 4;
7284 break;
7285 case LT:
7286 case NE:
7287 case EQ:
7288 case UNGE:
7289 return 5;
7290 break;
7291 case LE:
7292 case UNGT:
7293 return 6;
7294 break;
7295 default:
7296 abort ();
7297 }
7298 }
7299
7300 /* Return cost of comparison done using fcomi operation.
7301 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7302 static int
7303 ix86_fp_comparison_fcomi_cost (code)
7304 enum rtx_code code;
7305 {
7306 enum rtx_code bypass_code, first_code, second_code;
7307 /* Return arbitarily high cost when instruction is not supported - this
7308 prevents gcc from using it. */
7309 if (!TARGET_CMOVE)
7310 return 1024;
7311 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7312 return (bypass_code != NIL || second_code != NIL) + 2;
7313 }
7314
7315 /* Return cost of comparison done using sahf operation.
7316 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7317 static int
7318 ix86_fp_comparison_sahf_cost (code)
7319 enum rtx_code code;
7320 {
7321 enum rtx_code bypass_code, first_code, second_code;
7322 /* Return arbitarily high cost when instruction is not preferred - this
7323 avoids gcc from using it. */
7324 if (!TARGET_USE_SAHF && !optimize_size)
7325 return 1024;
7326 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7327 return (bypass_code != NIL || second_code != NIL) + 3;
7328 }
7329
7330 /* Compute cost of the comparison done using any method.
7331 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7332 static int
7333 ix86_fp_comparison_cost (code)
7334 enum rtx_code code;
7335 {
7336 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7337 int min;
7338
7339 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7340 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7341
7342 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7343 if (min > sahf_cost)
7344 min = sahf_cost;
7345 if (min > fcomi_cost)
7346 min = fcomi_cost;
7347 return min;
7348 }
7349
7350 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7351
7352 static rtx
7353 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7354 enum rtx_code code;
7355 rtx op0, op1, scratch;
7356 rtx *second_test;
7357 rtx *bypass_test;
7358 {
7359 enum machine_mode fpcmp_mode, intcmp_mode;
7360 rtx tmp, tmp2;
7361 int cost = ix86_fp_comparison_cost (code);
7362 enum rtx_code bypass_code, first_code, second_code;
7363
7364 fpcmp_mode = ix86_fp_compare_mode (code);
7365 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7366
7367 if (second_test)
7368 *second_test = NULL_RTX;
7369 if (bypass_test)
7370 *bypass_test = NULL_RTX;
7371
7372 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7373
7374 /* Do fcomi/sahf based test when profitable. */
7375 if ((bypass_code == NIL || bypass_test)
7376 && (second_code == NIL || second_test)
7377 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7378 {
7379 if (TARGET_CMOVE)
7380 {
7381 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7382 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7383 tmp);
7384 emit_insn (tmp);
7385 }
7386 else
7387 {
7388 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7389 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7390 if (!scratch)
7391 scratch = gen_reg_rtx (HImode);
7392 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7393 emit_insn (gen_x86_sahf_1 (scratch));
7394 }
7395
7396 /* The FP codes work out to act like unsigned. */
7397 intcmp_mode = fpcmp_mode;
7398 code = first_code;
7399 if (bypass_code != NIL)
7400 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7401 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7402 const0_rtx);
7403 if (second_code != NIL)
7404 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7405 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7406 const0_rtx);
7407 }
7408 else
7409 {
7410 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7411 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7412 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7413 if (!scratch)
7414 scratch = gen_reg_rtx (HImode);
7415 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7416
7417 /* In the unordered case, we have to check C2 for NaN's, which
7418 doesn't happen to work out to anything nice combination-wise.
7419 So do some bit twiddling on the value we've got in AH to come
7420 up with an appropriate set of condition codes. */
7421
7422 intcmp_mode = CCNOmode;
7423 switch (code)
7424 {
7425 case GT:
7426 case UNGT:
7427 if (code == GT || !TARGET_IEEE_FP)
7428 {
7429 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7430 code = EQ;
7431 }
7432 else
7433 {
7434 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7435 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7436 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7437 intcmp_mode = CCmode;
7438 code = GEU;
7439 }
7440 break;
7441 case LT:
7442 case UNLT:
7443 if (code == LT && TARGET_IEEE_FP)
7444 {
7445 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7446 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7447 intcmp_mode = CCmode;
7448 code = EQ;
7449 }
7450 else
7451 {
7452 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7453 code = NE;
7454 }
7455 break;
7456 case GE:
7457 case UNGE:
7458 if (code == GE || !TARGET_IEEE_FP)
7459 {
7460 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7461 code = EQ;
7462 }
7463 else
7464 {
7465 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7466 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7467 GEN_INT (0x01)));
7468 code = NE;
7469 }
7470 break;
7471 case LE:
7472 case UNLE:
7473 if (code == LE && TARGET_IEEE_FP)
7474 {
7475 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7476 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7477 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7478 intcmp_mode = CCmode;
7479 code = LTU;
7480 }
7481 else
7482 {
7483 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7484 code = NE;
7485 }
7486 break;
7487 case EQ:
7488 case UNEQ:
7489 if (code == EQ && TARGET_IEEE_FP)
7490 {
7491 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7492 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7493 intcmp_mode = CCmode;
7494 code = EQ;
7495 }
7496 else
7497 {
7498 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7499 code = NE;
7500 break;
7501 }
7502 break;
7503 case NE:
7504 case LTGT:
7505 if (code == NE && TARGET_IEEE_FP)
7506 {
7507 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7508 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7509 GEN_INT (0x40)));
7510 code = NE;
7511 }
7512 else
7513 {
7514 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7515 code = EQ;
7516 }
7517 break;
7518
7519 case UNORDERED:
7520 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7521 code = NE;
7522 break;
7523 case ORDERED:
7524 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7525 code = EQ;
7526 break;
7527
7528 default:
7529 abort ();
7530 }
7531 }
7532
7533 /* Return the test that should be put into the flags user, i.e.
7534 the bcc, scc, or cmov instruction. */
7535 return gen_rtx_fmt_ee (code, VOIDmode,
7536 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7537 const0_rtx);
7538 }
7539
7540 rtx
7541 ix86_expand_compare (code, second_test, bypass_test)
7542 enum rtx_code code;
7543 rtx *second_test, *bypass_test;
7544 {
7545 rtx op0, op1, ret;
7546 op0 = ix86_compare_op0;
7547 op1 = ix86_compare_op1;
7548
7549 if (second_test)
7550 *second_test = NULL_RTX;
7551 if (bypass_test)
7552 *bypass_test = NULL_RTX;
7553
7554 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7555 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7556 second_test, bypass_test);
7557 else
7558 ret = ix86_expand_int_compare (code, op0, op1);
7559
7560 return ret;
7561 }
7562
7563 /* Return true if the CODE will result in nontrivial jump sequence. */
7564 bool
7565 ix86_fp_jump_nontrivial_p (code)
7566 enum rtx_code code;
7567 {
7568 enum rtx_code bypass_code, first_code, second_code;
7569 if (!TARGET_CMOVE)
7570 return true;
7571 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7572 return bypass_code != NIL || second_code != NIL;
7573 }
7574
7575 void
7576 ix86_expand_branch (code, label)
7577 enum rtx_code code;
7578 rtx label;
7579 {
7580 rtx tmp;
7581
7582 switch (GET_MODE (ix86_compare_op0))
7583 {
7584 case QImode:
7585 case HImode:
7586 case SImode:
7587 simple:
7588 tmp = ix86_expand_compare (code, NULL, NULL);
7589 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7590 gen_rtx_LABEL_REF (VOIDmode, label),
7591 pc_rtx);
7592 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7593 return;
7594
7595 case SFmode:
7596 case DFmode:
7597 case XFmode:
7598 case TFmode:
7599 {
7600 rtvec vec;
7601 int use_fcomi;
7602 enum rtx_code bypass_code, first_code, second_code;
7603
7604 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7605 &ix86_compare_op1);
7606
7607 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7608
7609 /* Check whether we will use the natural sequence with one jump. If
7610 so, we can expand jump early. Otherwise delay expansion by
7611 creating compound insn to not confuse optimizers. */
7612 if (bypass_code == NIL && second_code == NIL
7613 && TARGET_CMOVE)
7614 {
7615 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7616 gen_rtx_LABEL_REF (VOIDmode, label),
7617 pc_rtx, NULL_RTX);
7618 }
7619 else
7620 {
7621 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7622 ix86_compare_op0, ix86_compare_op1);
7623 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7624 gen_rtx_LABEL_REF (VOIDmode, label),
7625 pc_rtx);
7626 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7627
7628 use_fcomi = ix86_use_fcomi_compare (code);
7629 vec = rtvec_alloc (3 + !use_fcomi);
7630 RTVEC_ELT (vec, 0) = tmp;
7631 RTVEC_ELT (vec, 1)
7632 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7633 RTVEC_ELT (vec, 2)
7634 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7635 if (! use_fcomi)
7636 RTVEC_ELT (vec, 3)
7637 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7638
7639 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7640 }
7641 return;
7642 }
7643
7644 case DImode:
7645 if (TARGET_64BIT)
7646 goto simple;
7647 /* Expand DImode branch into multiple compare+branch. */
7648 {
7649 rtx lo[2], hi[2], label2;
7650 enum rtx_code code1, code2, code3;
7651
7652 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7653 {
7654 tmp = ix86_compare_op0;
7655 ix86_compare_op0 = ix86_compare_op1;
7656 ix86_compare_op1 = tmp;
7657 code = swap_condition (code);
7658 }
7659 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7660 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7661
7662 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7663 avoid two branches. This costs one extra insn, so disable when
7664 optimizing for size. */
7665
7666 if ((code == EQ || code == NE)
7667 && (!optimize_size
7668 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7669 {
7670 rtx xor0, xor1;
7671
7672 xor1 = hi[0];
7673 if (hi[1] != const0_rtx)
7674 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7675 NULL_RTX, 0, OPTAB_WIDEN);
7676
7677 xor0 = lo[0];
7678 if (lo[1] != const0_rtx)
7679 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7680 NULL_RTX, 0, OPTAB_WIDEN);
7681
7682 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7683 NULL_RTX, 0, OPTAB_WIDEN);
7684
7685 ix86_compare_op0 = tmp;
7686 ix86_compare_op1 = const0_rtx;
7687 ix86_expand_branch (code, label);
7688 return;
7689 }
7690
7691 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7692 op1 is a constant and the low word is zero, then we can just
7693 examine the high word. */
7694
7695 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7696 switch (code)
7697 {
7698 case LT: case LTU: case GE: case GEU:
7699 ix86_compare_op0 = hi[0];
7700 ix86_compare_op1 = hi[1];
7701 ix86_expand_branch (code, label);
7702 return;
7703 default:
7704 break;
7705 }
7706
7707 /* Otherwise, we need two or three jumps. */
7708
7709 label2 = gen_label_rtx ();
7710
7711 code1 = code;
7712 code2 = swap_condition (code);
7713 code3 = unsigned_condition (code);
7714
7715 switch (code)
7716 {
7717 case LT: case GT: case LTU: case GTU:
7718 break;
7719
7720 case LE: code1 = LT; code2 = GT; break;
7721 case GE: code1 = GT; code2 = LT; break;
7722 case LEU: code1 = LTU; code2 = GTU; break;
7723 case GEU: code1 = GTU; code2 = LTU; break;
7724
7725 case EQ: code1 = NIL; code2 = NE; break;
7726 case NE: code2 = NIL; break;
7727
7728 default:
7729 abort ();
7730 }
7731
7732 /*
7733 * a < b =>
7734 * if (hi(a) < hi(b)) goto true;
7735 * if (hi(a) > hi(b)) goto false;
7736 * if (lo(a) < lo(b)) goto true;
7737 * false:
7738 */
7739
7740 ix86_compare_op0 = hi[0];
7741 ix86_compare_op1 = hi[1];
7742
7743 if (code1 != NIL)
7744 ix86_expand_branch (code1, label);
7745 if (code2 != NIL)
7746 ix86_expand_branch (code2, label2);
7747
7748 ix86_compare_op0 = lo[0];
7749 ix86_compare_op1 = lo[1];
7750 ix86_expand_branch (code3, label);
7751
7752 if (code2 != NIL)
7753 emit_label (label2);
7754 return;
7755 }
7756
7757 default:
7758 abort ();
7759 }
7760 }
7761
7762 /* Split branch based on floating point condition. */
7763 void
7764 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7765 enum rtx_code code;
7766 rtx op1, op2, target1, target2, tmp;
7767 {
7768 rtx second, bypass;
7769 rtx label = NULL_RTX;
7770 rtx condition;
7771 int bypass_probability = -1, second_probability = -1, probability = -1;
7772 rtx i;
7773
7774 if (target2 != pc_rtx)
7775 {
7776 rtx tmp = target2;
7777 code = reverse_condition_maybe_unordered (code);
7778 target2 = target1;
7779 target1 = tmp;
7780 }
7781
7782 condition = ix86_expand_fp_compare (code, op1, op2,
7783 tmp, &second, &bypass);
7784
7785 if (split_branch_probability >= 0)
7786 {
7787 /* Distribute the probabilities across the jumps.
7788 Assume the BYPASS and SECOND to be always test
7789 for UNORDERED. */
7790 probability = split_branch_probability;
7791
7792 /* Value of 1 is low enough to make no need for probability
7793 to be updated. Later we may run some experiments and see
7794 if unordered values are more frequent in practice. */
7795 if (bypass)
7796 bypass_probability = 1;
7797 if (second)
7798 second_probability = 1;
7799 }
7800 if (bypass != NULL_RTX)
7801 {
7802 label = gen_label_rtx ();
7803 i = emit_jump_insn (gen_rtx_SET
7804 (VOIDmode, pc_rtx,
7805 gen_rtx_IF_THEN_ELSE (VOIDmode,
7806 bypass,
7807 gen_rtx_LABEL_REF (VOIDmode,
7808 label),
7809 pc_rtx)));
7810 if (bypass_probability >= 0)
7811 REG_NOTES (i)
7812 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7813 GEN_INT (bypass_probability),
7814 REG_NOTES (i));
7815 }
7816 i = emit_jump_insn (gen_rtx_SET
7817 (VOIDmode, pc_rtx,
7818 gen_rtx_IF_THEN_ELSE (VOIDmode,
7819 condition, target1, target2)));
7820 if (probability >= 0)
7821 REG_NOTES (i)
7822 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7823 GEN_INT (probability),
7824 REG_NOTES (i));
7825 if (second != NULL_RTX)
7826 {
7827 i = emit_jump_insn (gen_rtx_SET
7828 (VOIDmode, pc_rtx,
7829 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7830 target2)));
7831 if (second_probability >= 0)
7832 REG_NOTES (i)
7833 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7834 GEN_INT (second_probability),
7835 REG_NOTES (i));
7836 }
7837 if (label != NULL_RTX)
7838 emit_label (label);
7839 }
7840
7841 int
7842 ix86_expand_setcc (code, dest)
7843 enum rtx_code code;
7844 rtx dest;
7845 {
7846 rtx ret, tmp, tmpreg;
7847 rtx second_test, bypass_test;
7848
7849 if (GET_MODE (ix86_compare_op0) == DImode
7850 && !TARGET_64BIT)
7851 return 0; /* FAIL */
7852
7853 if (GET_MODE (dest) != QImode)
7854 abort ();
7855
7856 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7857 PUT_MODE (ret, QImode);
7858
7859 tmp = dest;
7860 tmpreg = dest;
7861
7862 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7863 if (bypass_test || second_test)
7864 {
7865 rtx test = second_test;
7866 int bypass = 0;
7867 rtx tmp2 = gen_reg_rtx (QImode);
7868 if (bypass_test)
7869 {
7870 if (second_test)
7871 abort ();
7872 test = bypass_test;
7873 bypass = 1;
7874 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7875 }
7876 PUT_MODE (test, QImode);
7877 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7878
7879 if (bypass)
7880 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7881 else
7882 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7883 }
7884
7885 return 1; /* DONE */
7886 }
7887
7888 int
7889 ix86_expand_int_movcc (operands)
7890 rtx operands[];
7891 {
7892 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7893 rtx compare_seq, compare_op;
7894 rtx second_test, bypass_test;
7895 enum machine_mode mode = GET_MODE (operands[0]);
7896
7897 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7898 In case comparsion is done with immediate, we can convert it to LTU or
7899 GEU by altering the integer. */
7900
7901 if ((code == LEU || code == GTU)
7902 && GET_CODE (ix86_compare_op1) == CONST_INT
7903 && mode != HImode
7904 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
7905 && GET_CODE (operands[2]) == CONST_INT
7906 && GET_CODE (operands[3]) == CONST_INT)
7907 {
7908 if (code == LEU)
7909 code = LTU;
7910 else
7911 code = GEU;
7912 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7913 }
7914
7915 start_sequence ();
7916 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7917 compare_seq = gen_sequence ();
7918 end_sequence ();
7919
7920 compare_code = GET_CODE (compare_op);
7921
7922 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7923 HImode insns, we'd be swallowed in word prefix ops. */
7924
7925 if (mode != HImode
7926 && (mode != DImode || TARGET_64BIT)
7927 && GET_CODE (operands[2]) == CONST_INT
7928 && GET_CODE (operands[3]) == CONST_INT)
7929 {
7930 rtx out = operands[0];
7931 HOST_WIDE_INT ct = INTVAL (operands[2]);
7932 HOST_WIDE_INT cf = INTVAL (operands[3]);
7933 HOST_WIDE_INT diff;
7934
7935 if ((compare_code == LTU || compare_code == GEU)
7936 && !second_test && !bypass_test)
7937 {
7938
7939 /* Detect overlap between destination and compare sources. */
7940 rtx tmp = out;
7941
7942 /* To simplify rest of code, restrict to the GEU case. */
7943 if (compare_code == LTU)
7944 {
7945 int tmp = ct;
7946 ct = cf;
7947 cf = tmp;
7948 compare_code = reverse_condition (compare_code);
7949 code = reverse_condition (code);
7950 }
7951 diff = ct - cf;
7952
7953 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7954 || reg_overlap_mentioned_p (out, ix86_compare_op1))
7955 tmp = gen_reg_rtx (mode);
7956
7957 emit_insn (compare_seq);
7958 if (mode == DImode)
7959 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7960 else
7961 emit_insn (gen_x86_movsicc_0_m1 (tmp));
7962
7963 if (diff == 1)
7964 {
7965 /*
7966 * cmpl op0,op1
7967 * sbbl dest,dest
7968 * [addl dest, ct]
7969 *
7970 * Size 5 - 8.
7971 */
7972 if (ct)
7973 tmp = expand_simple_binop (mode, PLUS,
7974 tmp, GEN_INT (ct),
7975 tmp, 1, OPTAB_DIRECT);
7976 }
7977 else if (cf == -1)
7978 {
7979 /*
7980 * cmpl op0,op1
7981 * sbbl dest,dest
7982 * orl $ct, dest
7983 *
7984 * Size 8.
7985 */
7986 tmp = expand_simple_binop (mode, IOR,
7987 tmp, GEN_INT (ct),
7988 tmp, 1, OPTAB_DIRECT);
7989 }
7990 else if (diff == -1 && ct)
7991 {
7992 /*
7993 * cmpl op0,op1
7994 * sbbl dest,dest
7995 * xorl $-1, dest
7996 * [addl dest, cf]
7997 *
7998 * Size 8 - 11.
7999 */
8000 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8001 if (cf)
8002 tmp = expand_simple_binop (mode, PLUS,
8003 tmp, GEN_INT (cf),
8004 tmp, 1, OPTAB_DIRECT);
8005 }
8006 else
8007 {
8008 /*
8009 * cmpl op0,op1
8010 * sbbl dest,dest
8011 * andl cf - ct, dest
8012 * [addl dest, ct]
8013 *
8014 * Size 8 - 11.
8015 */
8016 tmp = expand_simple_binop (mode, AND,
8017 tmp,
8018 GEN_INT (trunc_int_for_mode
8019 (cf - ct, mode)),
8020 tmp, 1, OPTAB_DIRECT);
8021 if (ct)
8022 tmp = expand_simple_binop (mode, PLUS,
8023 tmp, GEN_INT (ct),
8024 tmp, 1, OPTAB_DIRECT);
8025 }
8026
8027 if (tmp != out)
8028 emit_move_insn (out, tmp);
8029
8030 return 1; /* DONE */
8031 }
8032
8033 diff = ct - cf;
8034 if (diff < 0)
8035 {
8036 HOST_WIDE_INT tmp;
8037 tmp = ct, ct = cf, cf = tmp;
8038 diff = -diff;
8039 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8040 {
8041 /* We may be reversing unordered compare to normal compare, that
8042 is not valid in general (we may convert non-trapping condition
8043 to trapping one), however on i386 we currently emit all
8044 comparisons unordered. */
8045 compare_code = reverse_condition_maybe_unordered (compare_code);
8046 code = reverse_condition_maybe_unordered (code);
8047 }
8048 else
8049 {
8050 compare_code = reverse_condition (compare_code);
8051 code = reverse_condition (code);
8052 }
8053 }
8054 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8055 || diff == 3 || diff == 5 || diff == 9)
8056 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8057 {
8058 /*
8059 * xorl dest,dest
8060 * cmpl op1,op2
8061 * setcc dest
8062 * lea cf(dest*(ct-cf)),dest
8063 *
8064 * Size 14.
8065 *
8066 * This also catches the degenerate setcc-only case.
8067 */
8068
8069 rtx tmp;
8070 int nops;
8071
8072 out = emit_store_flag (out, code, ix86_compare_op0,
8073 ix86_compare_op1, VOIDmode, 0, 1);
8074
8075 nops = 0;
8076 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8077 done in proper mode to match. */
8078 if (diff == 1)
8079 tmp = out;
8080 else
8081 {
8082 rtx out1;
8083 out1 = out;
8084 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8085 nops++;
8086 if (diff & 1)
8087 {
8088 tmp = gen_rtx_PLUS (mode, tmp, out1);
8089 nops++;
8090 }
8091 }
8092 if (cf != 0)
8093 {
8094 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8095 nops++;
8096 }
8097 if (tmp != out
8098 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8099 {
8100 if (nops == 1)
8101 {
8102 rtx clob;
8103
8104 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8105 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8106
8107 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8108 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8109 emit_insn (tmp);
8110 }
8111 else
8112 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8113 }
8114 if (out != operands[0])
8115 emit_move_insn (operands[0], out);
8116
8117 return 1; /* DONE */
8118 }
8119
8120 /*
8121 * General case: Jumpful:
8122 * xorl dest,dest cmpl op1, op2
8123 * cmpl op1, op2 movl ct, dest
8124 * setcc dest jcc 1f
8125 * decl dest movl cf, dest
8126 * andl (cf-ct),dest 1:
8127 * addl ct,dest
8128 *
8129 * Size 20. Size 14.
8130 *
8131 * This is reasonably steep, but branch mispredict costs are
8132 * high on modern cpus, so consider failing only if optimizing
8133 * for space.
8134 *
8135 * %%% Parameterize branch_cost on the tuning architecture, then
8136 * use that. The 80386 couldn't care less about mispredicts.
8137 */
8138
8139 if (!optimize_size && !TARGET_CMOVE)
8140 {
8141 if (ct == 0)
8142 {
8143 ct = cf;
8144 cf = 0;
8145 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8146 {
8147 /* We may be reversing unordered compare to normal compare,
8148 that is not valid in general (we may convert non-trapping
8149 condition to trapping one), however on i386 we currently
8150 emit all comparisons unordered. */
8151 compare_code = reverse_condition_maybe_unordered (compare_code);
8152 code = reverse_condition_maybe_unordered (code);
8153 }
8154 else
8155 {
8156 compare_code = reverse_condition (compare_code);
8157 code = reverse_condition (code);
8158 }
8159 }
8160
8161 out = emit_store_flag (out, code, ix86_compare_op0,
8162 ix86_compare_op1, VOIDmode, 0, 1);
8163
8164 out = expand_simple_binop (mode, PLUS,
8165 out, constm1_rtx,
8166 out, 1, OPTAB_DIRECT);
8167 out = expand_simple_binop (mode, AND,
8168 out,
8169 GEN_INT (trunc_int_for_mode
8170 (cf - ct, mode)),
8171 out, 1, OPTAB_DIRECT);
8172 out = expand_simple_binop (mode, PLUS,
8173 out, GEN_INT (ct),
8174 out, 1, OPTAB_DIRECT);
8175 if (out != operands[0])
8176 emit_move_insn (operands[0], out);
8177
8178 return 1; /* DONE */
8179 }
8180 }
8181
8182 if (!TARGET_CMOVE)
8183 {
8184 /* Try a few things more with specific constants and a variable. */
8185
8186 optab op;
8187 rtx var, orig_out, out, tmp;
8188
8189 if (optimize_size)
8190 return 0; /* FAIL */
8191
8192 /* If one of the two operands is an interesting constant, load a
8193 constant with the above and mask it in with a logical operation. */
8194
8195 if (GET_CODE (operands[2]) == CONST_INT)
8196 {
8197 var = operands[3];
8198 if (INTVAL (operands[2]) == 0)
8199 operands[3] = constm1_rtx, op = and_optab;
8200 else if (INTVAL (operands[2]) == -1)
8201 operands[3] = const0_rtx, op = ior_optab;
8202 else
8203 return 0; /* FAIL */
8204 }
8205 else if (GET_CODE (operands[3]) == CONST_INT)
8206 {
8207 var = operands[2];
8208 if (INTVAL (operands[3]) == 0)
8209 operands[2] = constm1_rtx, op = and_optab;
8210 else if (INTVAL (operands[3]) == -1)
8211 operands[2] = const0_rtx, op = ior_optab;
8212 else
8213 return 0; /* FAIL */
8214 }
8215 else
8216 return 0; /* FAIL */
8217
8218 orig_out = operands[0];
8219 tmp = gen_reg_rtx (mode);
8220 operands[0] = tmp;
8221
8222 /* Recurse to get the constant loaded. */
8223 if (ix86_expand_int_movcc (operands) == 0)
8224 return 0; /* FAIL */
8225
8226 /* Mask in the interesting variable. */
8227 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8228 OPTAB_WIDEN);
8229 if (out != orig_out)
8230 emit_move_insn (orig_out, out);
8231
8232 return 1; /* DONE */
8233 }
8234
8235 /*
8236 * For comparison with above,
8237 *
8238 * movl cf,dest
8239 * movl ct,tmp
8240 * cmpl op1,op2
8241 * cmovcc tmp,dest
8242 *
8243 * Size 15.
8244 */
8245
8246 if (! nonimmediate_operand (operands[2], mode))
8247 operands[2] = force_reg (mode, operands[2]);
8248 if (! nonimmediate_operand (operands[3], mode))
8249 operands[3] = force_reg (mode, operands[3]);
8250
8251 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8252 {
8253 rtx tmp = gen_reg_rtx (mode);
8254 emit_move_insn (tmp, operands[3]);
8255 operands[3] = tmp;
8256 }
8257 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8258 {
8259 rtx tmp = gen_reg_rtx (mode);
8260 emit_move_insn (tmp, operands[2]);
8261 operands[2] = tmp;
8262 }
8263 if (! register_operand (operands[2], VOIDmode)
8264 && ! register_operand (operands[3], VOIDmode))
8265 operands[2] = force_reg (mode, operands[2]);
8266
8267 emit_insn (compare_seq);
8268 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8269 gen_rtx_IF_THEN_ELSE (mode,
8270 compare_op, operands[2],
8271 operands[3])));
8272 if (bypass_test)
8273 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8274 gen_rtx_IF_THEN_ELSE (mode,
8275 bypass_test,
8276 operands[3],
8277 operands[0])));
8278 if (second_test)
8279 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8280 gen_rtx_IF_THEN_ELSE (mode,
8281 second_test,
8282 operands[2],
8283 operands[0])));
8284
8285 return 1; /* DONE */
8286 }
8287
8288 int
8289 ix86_expand_fp_movcc (operands)
8290 rtx operands[];
8291 {
8292 enum rtx_code code;
8293 rtx tmp;
8294 rtx compare_op, second_test, bypass_test;
8295
8296 /* For SF/DFmode conditional moves based on comparisons
8297 in same mode, we may want to use SSE min/max instructions. */
8298 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8299 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8300 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8301 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8302 && (!TARGET_IEEE_FP
8303 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8304 /* We may be called from the post-reload splitter. */
8305 && (!REG_P (operands[0])
8306 || SSE_REG_P (operands[0])
8307 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8308 {
8309 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8310 code = GET_CODE (operands[1]);
8311
8312 /* See if we have (cross) match between comparison operands and
8313 conditional move operands. */
8314 if (rtx_equal_p (operands[2], op1))
8315 {
8316 rtx tmp = op0;
8317 op0 = op1;
8318 op1 = tmp;
8319 code = reverse_condition_maybe_unordered (code);
8320 }
8321 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8322 {
8323 /* Check for min operation. */
8324 if (code == LT)
8325 {
8326 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8327 if (memory_operand (op0, VOIDmode))
8328 op0 = force_reg (GET_MODE (operands[0]), op0);
8329 if (GET_MODE (operands[0]) == SFmode)
8330 emit_insn (gen_minsf3 (operands[0], op0, op1));
8331 else
8332 emit_insn (gen_mindf3 (operands[0], op0, op1));
8333 return 1;
8334 }
8335 /* Check for max operation. */
8336 if (code == GT)
8337 {
8338 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8339 if (memory_operand (op0, VOIDmode))
8340 op0 = force_reg (GET_MODE (operands[0]), op0);
8341 if (GET_MODE (operands[0]) == SFmode)
8342 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8343 else
8344 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8345 return 1;
8346 }
8347 }
8348 /* Manage condition to be sse_comparison_operator. In case we are
8349 in non-ieee mode, try to canonicalize the destination operand
8350 to be first in the comparison - this helps reload to avoid extra
8351 moves. */
8352 if (!sse_comparison_operator (operands[1], VOIDmode)
8353 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8354 {
8355 rtx tmp = ix86_compare_op0;
8356 ix86_compare_op0 = ix86_compare_op1;
8357 ix86_compare_op1 = tmp;
8358 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8359 VOIDmode, ix86_compare_op0,
8360 ix86_compare_op1);
8361 }
8362 /* Similary try to manage result to be first operand of conditional
8363 move. We also don't support the NE comparison on SSE, so try to
8364 avoid it. */
8365 if ((rtx_equal_p (operands[0], operands[3])
8366 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8367 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8368 {
8369 rtx tmp = operands[2];
8370 operands[2] = operands[3];
8371 operands[3] = tmp;
8372 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8373 (GET_CODE (operands[1])),
8374 VOIDmode, ix86_compare_op0,
8375 ix86_compare_op1);
8376 }
8377 if (GET_MODE (operands[0]) == SFmode)
8378 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8379 operands[2], operands[3],
8380 ix86_compare_op0, ix86_compare_op1));
8381 else
8382 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8383 operands[2], operands[3],
8384 ix86_compare_op0, ix86_compare_op1));
8385 return 1;
8386 }
8387
8388 /* The floating point conditional move instructions don't directly
8389 support conditions resulting from a signed integer comparison. */
8390
8391 code = GET_CODE (operands[1]);
8392 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8393
8394 /* The floating point conditional move instructions don't directly
8395 support signed integer comparisons. */
8396
8397 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8398 {
8399 if (second_test != NULL || bypass_test != NULL)
8400 abort ();
8401 tmp = gen_reg_rtx (QImode);
8402 ix86_expand_setcc (code, tmp);
8403 code = NE;
8404 ix86_compare_op0 = tmp;
8405 ix86_compare_op1 = const0_rtx;
8406 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8407 }
8408 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8409 {
8410 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8411 emit_move_insn (tmp, operands[3]);
8412 operands[3] = tmp;
8413 }
8414 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8415 {
8416 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8417 emit_move_insn (tmp, operands[2]);
8418 operands[2] = tmp;
8419 }
8420
8421 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8422 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8423 compare_op,
8424 operands[2],
8425 operands[3])));
8426 if (bypass_test)
8427 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8428 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8429 bypass_test,
8430 operands[3],
8431 operands[0])));
8432 if (second_test)
8433 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8434 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8435 second_test,
8436 operands[2],
8437 operands[0])));
8438
8439 return 1;
8440 }
8441
8442 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8443 works for floating pointer parameters and nonoffsetable memories.
8444 For pushes, it returns just stack offsets; the values will be saved
8445 in the right order. Maximally three parts are generated. */
8446
8447 static int
8448 ix86_split_to_parts (operand, parts, mode)
8449 rtx operand;
8450 rtx *parts;
8451 enum machine_mode mode;
8452 {
8453 int size;
8454
8455 if (!TARGET_64BIT)
8456 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8457 else
8458 size = (GET_MODE_SIZE (mode) + 4) / 8;
8459
8460 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8461 abort ();
8462 if (size < 2 || size > 3)
8463 abort ();
8464
8465 /* Optimize constant pool reference to immediates. This is used by fp moves,
8466 that force all constants to memory to allow combining. */
8467
8468 if (GET_CODE (operand) == MEM
8469 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8470 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8471 operand = get_pool_constant (XEXP (operand, 0));
8472
8473 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8474 {
8475 /* The only non-offsetable memories we handle are pushes. */
8476 if (! push_operand (operand, VOIDmode))
8477 abort ();
8478
8479 operand = copy_rtx (operand);
8480 PUT_MODE (operand, Pmode);
8481 parts[0] = parts[1] = parts[2] = operand;
8482 }
8483 else if (!TARGET_64BIT)
8484 {
8485 if (mode == DImode)
8486 split_di (&operand, 1, &parts[0], &parts[1]);
8487 else
8488 {
8489 if (REG_P (operand))
8490 {
8491 if (!reload_completed)
8492 abort ();
8493 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8494 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8495 if (size == 3)
8496 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8497 }
8498 else if (offsettable_memref_p (operand))
8499 {
8500 operand = adjust_address (operand, SImode, 0);
8501 parts[0] = operand;
8502 parts[1] = adjust_address (operand, SImode, 4);
8503 if (size == 3)
8504 parts[2] = adjust_address (operand, SImode, 8);
8505 }
8506 else if (GET_CODE (operand) == CONST_DOUBLE)
8507 {
8508 REAL_VALUE_TYPE r;
8509 long l[4];
8510
8511 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8512 switch (mode)
8513 {
8514 case XFmode:
8515 case TFmode:
8516 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8517 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8518 break;
8519 case DFmode:
8520 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8521 break;
8522 default:
8523 abort ();
8524 }
8525 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8526 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8527 }
8528 else
8529 abort ();
8530 }
8531 }
8532 else
8533 {
8534 if (mode == TImode)
8535 split_ti (&operand, 1, &parts[0], &parts[1]);
8536 if (mode == XFmode || mode == TFmode)
8537 {
8538 if (REG_P (operand))
8539 {
8540 if (!reload_completed)
8541 abort ();
8542 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8543 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8544 }
8545 else if (offsettable_memref_p (operand))
8546 {
8547 operand = adjust_address (operand, DImode, 0);
8548 parts[0] = operand;
8549 parts[1] = adjust_address (operand, SImode, 8);
8550 }
8551 else if (GET_CODE (operand) == CONST_DOUBLE)
8552 {
8553 REAL_VALUE_TYPE r;
8554 long l[3];
8555
8556 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8557 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8558 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8559 if (HOST_BITS_PER_WIDE_INT >= 64)
8560 parts[0]
8561 = GEN_INT (trunc_int_for_mode
8562 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8563 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8564 DImode));
8565 else
8566 parts[0] = immed_double_const (l[0], l[1], DImode);
8567 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8568 }
8569 else
8570 abort ();
8571 }
8572 }
8573
8574 return size;
8575 }
8576
8577 /* Emit insns to perform a move or push of DI, DF, and XF values.
8578 Return false when normal moves are needed; true when all required
8579 insns have been emitted. Operands 2-4 contain the input values
8580 int the correct order; operands 5-7 contain the output values. */
8581
8582 void
8583 ix86_split_long_move (operands)
8584 rtx operands[];
8585 {
8586 rtx part[2][3];
8587 int nparts;
8588 int push = 0;
8589 int collisions = 0;
8590 enum machine_mode mode = GET_MODE (operands[0]);
8591
8592 /* The DFmode expanders may ask us to move double.
8593 For 64bit target this is single move. By hiding the fact
8594 here we simplify i386.md splitters. */
8595 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8596 {
8597 /* Optimize constant pool reference to immediates. This is used by
8598 fp moves, that force all constants to memory to allow combining. */
8599
8600 if (GET_CODE (operands[1]) == MEM
8601 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8602 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8603 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8604 if (push_operand (operands[0], VOIDmode))
8605 {
8606 operands[0] = copy_rtx (operands[0]);
8607 PUT_MODE (operands[0], Pmode);
8608 }
8609 else
8610 operands[0] = gen_lowpart (DImode, operands[0]);
8611 operands[1] = gen_lowpart (DImode, operands[1]);
8612 emit_move_insn (operands[0], operands[1]);
8613 return;
8614 }
8615
8616 /* The only non-offsettable memory we handle is push. */
8617 if (push_operand (operands[0], VOIDmode))
8618 push = 1;
8619 else if (GET_CODE (operands[0]) == MEM
8620 && ! offsettable_memref_p (operands[0]))
8621 abort ();
8622
8623 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8624 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8625
8626 /* When emitting push, take care for source operands on the stack. */
8627 if (push && GET_CODE (operands[1]) == MEM
8628 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8629 {
8630 if (nparts == 3)
8631 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8632 XEXP (part[1][2], 0));
8633 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8634 XEXP (part[1][1], 0));
8635 }
8636
8637 /* We need to do copy in the right order in case an address register
8638 of the source overlaps the destination. */
8639 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8640 {
8641 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8642 collisions++;
8643 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8644 collisions++;
8645 if (nparts == 3
8646 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8647 collisions++;
8648
8649 /* Collision in the middle part can be handled by reordering. */
8650 if (collisions == 1 && nparts == 3
8651 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8652 {
8653 rtx tmp;
8654 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8655 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8656 }
8657
8658 /* If there are more collisions, we can't handle it by reordering.
8659 Do an lea to the last part and use only one colliding move. */
8660 else if (collisions > 1)
8661 {
8662 collisions = 1;
8663 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8664 XEXP (part[1][0], 0)));
8665 part[1][0] = change_address (part[1][0],
8666 TARGET_64BIT ? DImode : SImode,
8667 part[0][nparts - 1]);
8668 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8669 if (nparts == 3)
8670 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8671 }
8672 }
8673
8674 if (push)
8675 {
8676 if (!TARGET_64BIT)
8677 {
8678 if (nparts == 3)
8679 {
8680 /* We use only first 12 bytes of TFmode value, but for pushing we
8681 are required to adjust stack as if we were pushing real 16byte
8682 value. */
8683 if (mode == TFmode && !TARGET_64BIT)
8684 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8685 GEN_INT (-4)));
8686 emit_move_insn (part[0][2], part[1][2]);
8687 }
8688 }
8689 else
8690 {
8691 /* In 64bit mode we don't have 32bit push available. In case this is
8692 register, it is OK - we will just use larger counterpart. We also
8693 retype memory - these comes from attempt to avoid REX prefix on
8694 moving of second half of TFmode value. */
8695 if (GET_MODE (part[1][1]) == SImode)
8696 {
8697 if (GET_CODE (part[1][1]) == MEM)
8698 part[1][1] = adjust_address (part[1][1], DImode, 0);
8699 else if (REG_P (part[1][1]))
8700 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8701 else
8702 abort ();
8703 if (GET_MODE (part[1][0]) == SImode)
8704 part[1][0] = part[1][1];
8705 }
8706 }
8707 emit_move_insn (part[0][1], part[1][1]);
8708 emit_move_insn (part[0][0], part[1][0]);
8709 return;
8710 }
8711
8712 /* Choose correct order to not overwrite the source before it is copied. */
8713 if ((REG_P (part[0][0])
8714 && REG_P (part[1][1])
8715 && (REGNO (part[0][0]) == REGNO (part[1][1])
8716 || (nparts == 3
8717 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8718 || (collisions > 0
8719 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8720 {
8721 if (nparts == 3)
8722 {
8723 operands[2] = part[0][2];
8724 operands[3] = part[0][1];
8725 operands[4] = part[0][0];
8726 operands[5] = part[1][2];
8727 operands[6] = part[1][1];
8728 operands[7] = part[1][0];
8729 }
8730 else
8731 {
8732 operands[2] = part[0][1];
8733 operands[3] = part[0][0];
8734 operands[5] = part[1][1];
8735 operands[6] = part[1][0];
8736 }
8737 }
8738 else
8739 {
8740 if (nparts == 3)
8741 {
8742 operands[2] = part[0][0];
8743 operands[3] = part[0][1];
8744 operands[4] = part[0][2];
8745 operands[5] = part[1][0];
8746 operands[6] = part[1][1];
8747 operands[7] = part[1][2];
8748 }
8749 else
8750 {
8751 operands[2] = part[0][0];
8752 operands[3] = part[0][1];
8753 operands[5] = part[1][0];
8754 operands[6] = part[1][1];
8755 }
8756 }
8757 emit_move_insn (operands[2], operands[5]);
8758 emit_move_insn (operands[3], operands[6]);
8759 if (nparts == 3)
8760 emit_move_insn (operands[4], operands[7]);
8761
8762 return;
8763 }
8764
8765 void
8766 ix86_split_ashldi (operands, scratch)
8767 rtx *operands, scratch;
8768 {
8769 rtx low[2], high[2];
8770 int count;
8771
8772 if (GET_CODE (operands[2]) == CONST_INT)
8773 {
8774 split_di (operands, 2, low, high);
8775 count = INTVAL (operands[2]) & 63;
8776
8777 if (count >= 32)
8778 {
8779 emit_move_insn (high[0], low[1]);
8780 emit_move_insn (low[0], const0_rtx);
8781
8782 if (count > 32)
8783 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8784 }
8785 else
8786 {
8787 if (!rtx_equal_p (operands[0], operands[1]))
8788 emit_move_insn (operands[0], operands[1]);
8789 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8790 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8791 }
8792 }
8793 else
8794 {
8795 if (!rtx_equal_p (operands[0], operands[1]))
8796 emit_move_insn (operands[0], operands[1]);
8797
8798 split_di (operands, 1, low, high);
8799
8800 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8801 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8802
8803 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8804 {
8805 if (! no_new_pseudos)
8806 scratch = force_reg (SImode, const0_rtx);
8807 else
8808 emit_move_insn (scratch, const0_rtx);
8809
8810 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8811 scratch));
8812 }
8813 else
8814 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8815 }
8816 }
8817
8818 void
8819 ix86_split_ashrdi (operands, scratch)
8820 rtx *operands, scratch;
8821 {
8822 rtx low[2], high[2];
8823 int count;
8824
8825 if (GET_CODE (operands[2]) == CONST_INT)
8826 {
8827 split_di (operands, 2, low, high);
8828 count = INTVAL (operands[2]) & 63;
8829
8830 if (count >= 32)
8831 {
8832 emit_move_insn (low[0], high[1]);
8833
8834 if (! reload_completed)
8835 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8836 else
8837 {
8838 emit_move_insn (high[0], low[0]);
8839 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8840 }
8841
8842 if (count > 32)
8843 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8844 }
8845 else
8846 {
8847 if (!rtx_equal_p (operands[0], operands[1]))
8848 emit_move_insn (operands[0], operands[1]);
8849 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8850 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8851 }
8852 }
8853 else
8854 {
8855 if (!rtx_equal_p (operands[0], operands[1]))
8856 emit_move_insn (operands[0], operands[1]);
8857
8858 split_di (operands, 1, low, high);
8859
8860 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8861 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8862
8863 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8864 {
8865 if (! no_new_pseudos)
8866 scratch = gen_reg_rtx (SImode);
8867 emit_move_insn (scratch, high[0]);
8868 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8869 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8870 scratch));
8871 }
8872 else
8873 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8874 }
8875 }
8876
8877 void
8878 ix86_split_lshrdi (operands, scratch)
8879 rtx *operands, scratch;
8880 {
8881 rtx low[2], high[2];
8882 int count;
8883
8884 if (GET_CODE (operands[2]) == CONST_INT)
8885 {
8886 split_di (operands, 2, low, high);
8887 count = INTVAL (operands[2]) & 63;
8888
8889 if (count >= 32)
8890 {
8891 emit_move_insn (low[0], high[1]);
8892 emit_move_insn (high[0], const0_rtx);
8893
8894 if (count > 32)
8895 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8896 }
8897 else
8898 {
8899 if (!rtx_equal_p (operands[0], operands[1]))
8900 emit_move_insn (operands[0], operands[1]);
8901 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8902 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8903 }
8904 }
8905 else
8906 {
8907 if (!rtx_equal_p (operands[0], operands[1]))
8908 emit_move_insn (operands[0], operands[1]);
8909
8910 split_di (operands, 1, low, high);
8911
8912 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8913 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8914
8915 /* Heh. By reversing the arguments, we can reuse this pattern. */
8916 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8917 {
8918 if (! no_new_pseudos)
8919 scratch = force_reg (SImode, const0_rtx);
8920 else
8921 emit_move_insn (scratch, const0_rtx);
8922
8923 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8924 scratch));
8925 }
8926 else
8927 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8928 }
8929 }
8930
8931 /* Helper function for the string operations below. Dest VARIABLE whether
8932 it is aligned to VALUE bytes. If true, jump to the label. */
8933 static rtx
8934 ix86_expand_aligntest (variable, value)
8935 rtx variable;
8936 int value;
8937 {
8938 rtx label = gen_label_rtx ();
8939 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8940 if (GET_MODE (variable) == DImode)
8941 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8942 else
8943 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8944 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8945 1, label);
8946 return label;
8947 }
8948
8949 /* Adjust COUNTER by the VALUE. */
8950 static void
8951 ix86_adjust_counter (countreg, value)
8952 rtx countreg;
8953 HOST_WIDE_INT value;
8954 {
8955 if (GET_MODE (countreg) == DImode)
8956 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8957 else
8958 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8959 }
8960
8961 /* Zero extend possibly SImode EXP to Pmode register. */
8962 rtx
8963 ix86_zero_extend_to_Pmode (exp)
8964 rtx exp;
8965 {
8966 rtx r;
8967 if (GET_MODE (exp) == VOIDmode)
8968 return force_reg (Pmode, exp);
8969 if (GET_MODE (exp) == Pmode)
8970 return copy_to_mode_reg (Pmode, exp);
8971 r = gen_reg_rtx (Pmode);
8972 emit_insn (gen_zero_extendsidi2 (r, exp));
8973 return r;
8974 }
8975
8976 /* Expand string move (memcpy) operation. Use i386 string operations when
8977 profitable. expand_clrstr contains similar code. */
8978 int
8979 ix86_expand_movstr (dst, src, count_exp, align_exp)
8980 rtx dst, src, count_exp, align_exp;
8981 {
8982 rtx srcreg, destreg, countreg;
8983 enum machine_mode counter_mode;
8984 HOST_WIDE_INT align = 0;
8985 unsigned HOST_WIDE_INT count = 0;
8986 rtx insns;
8987
8988 start_sequence ();
8989
8990 if (GET_CODE (align_exp) == CONST_INT)
8991 align = INTVAL (align_exp);
8992
8993 /* This simple hack avoids all inlining code and simplifies code below. */
8994 if (!TARGET_ALIGN_STRINGOPS)
8995 align = 64;
8996
8997 if (GET_CODE (count_exp) == CONST_INT)
8998 count = INTVAL (count_exp);
8999
9000 /* Figure out proper mode for counter. For 32bits it is always SImode,
9001 for 64bits use SImode when possible, otherwise DImode.
9002 Set count to number of bytes copied when known at compile time. */
9003 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9004 || x86_64_zero_extended_value (count_exp))
9005 counter_mode = SImode;
9006 else
9007 counter_mode = DImode;
9008
9009 if (counter_mode != SImode && counter_mode != DImode)
9010 abort ();
9011
9012 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9013 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9014
9015 emit_insn (gen_cld ());
9016
9017 /* When optimizing for size emit simple rep ; movsb instruction for
9018 counts not divisible by 4. */
9019
9020 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9021 {
9022 countreg = ix86_zero_extend_to_Pmode (count_exp);
9023 if (TARGET_64BIT)
9024 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9025 destreg, srcreg, countreg));
9026 else
9027 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9028 destreg, srcreg, countreg));
9029 }
9030
9031 /* For constant aligned (or small unaligned) copies use rep movsl
9032 followed by code copying the rest. For PentiumPro ensure 8 byte
9033 alignment to allow rep movsl acceleration. */
9034
9035 else if (count != 0
9036 && (align >= 8
9037 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9038 || optimize_size || count < (unsigned int) 64))
9039 {
9040 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9041 if (count & ~(size - 1))
9042 {
9043 countreg = copy_to_mode_reg (counter_mode,
9044 GEN_INT ((count >> (size == 4 ? 2 : 3))
9045 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9046 countreg = ix86_zero_extend_to_Pmode (countreg);
9047 if (size == 4)
9048 {
9049 if (TARGET_64BIT)
9050 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9051 destreg, srcreg, countreg));
9052 else
9053 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9054 destreg, srcreg, countreg));
9055 }
9056 else
9057 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9058 destreg, srcreg, countreg));
9059 }
9060 if (size == 8 && (count & 0x04))
9061 emit_insn (gen_strmovsi (destreg, srcreg));
9062 if (count & 0x02)
9063 emit_insn (gen_strmovhi (destreg, srcreg));
9064 if (count & 0x01)
9065 emit_insn (gen_strmovqi (destreg, srcreg));
9066 }
9067 /* The generic code based on the glibc implementation:
9068 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9069 allowing accelerated copying there)
9070 - copy the data using rep movsl
9071 - copy the rest. */
9072 else
9073 {
9074 rtx countreg2;
9075 rtx label = NULL;
9076
9077 /* In case we don't know anything about the alignment, default to
9078 library version, since it is usually equally fast and result in
9079 shorter code. */
9080 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9081 {
9082 end_sequence ();
9083 return 0;
9084 }
9085
9086 if (TARGET_SINGLE_STRINGOP)
9087 emit_insn (gen_cld ());
9088
9089 countreg2 = gen_reg_rtx (Pmode);
9090 countreg = copy_to_mode_reg (counter_mode, count_exp);
9091
9092 /* We don't use loops to align destination and to copy parts smaller
9093 than 4 bytes, because gcc is able to optimize such code better (in
9094 the case the destination or the count really is aligned, gcc is often
9095 able to predict the branches) and also it is friendlier to the
9096 hardware branch prediction.
9097
9098 Using loops is benefical for generic case, because we can
9099 handle small counts using the loops. Many CPUs (such as Athlon)
9100 have large REP prefix setup costs.
9101
9102 This is quite costy. Maybe we can revisit this decision later or
9103 add some customizability to this code. */
9104
9105 if (count == 0
9106 && align < (TARGET_PENTIUMPRO && (count == 0
9107 || count >= (unsigned int) 260)
9108 ? 8 : UNITS_PER_WORD))
9109 {
9110 label = gen_label_rtx ();
9111 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9112 LEU, 0, counter_mode, 1, label);
9113 }
9114 if (align <= 1)
9115 {
9116 rtx label = ix86_expand_aligntest (destreg, 1);
9117 emit_insn (gen_strmovqi (destreg, srcreg));
9118 ix86_adjust_counter (countreg, 1);
9119 emit_label (label);
9120 LABEL_NUSES (label) = 1;
9121 }
9122 if (align <= 2)
9123 {
9124 rtx label = ix86_expand_aligntest (destreg, 2);
9125 emit_insn (gen_strmovhi (destreg, srcreg));
9126 ix86_adjust_counter (countreg, 2);
9127 emit_label (label);
9128 LABEL_NUSES (label) = 1;
9129 }
9130 if (align <= 4
9131 && ((TARGET_PENTIUMPRO && (count == 0
9132 || count >= (unsigned int) 260))
9133 || TARGET_64BIT))
9134 {
9135 rtx label = ix86_expand_aligntest (destreg, 4);
9136 emit_insn (gen_strmovsi (destreg, srcreg));
9137 ix86_adjust_counter (countreg, 4);
9138 emit_label (label);
9139 LABEL_NUSES (label) = 1;
9140 }
9141
9142 if (!TARGET_SINGLE_STRINGOP)
9143 emit_insn (gen_cld ());
9144 if (TARGET_64BIT)
9145 {
9146 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9147 GEN_INT (3)));
9148 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9149 destreg, srcreg, countreg2));
9150 }
9151 else
9152 {
9153 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9154 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9155 destreg, srcreg, countreg2));
9156 }
9157
9158 if (label)
9159 {
9160 emit_label (label);
9161 LABEL_NUSES (label) = 1;
9162 }
9163 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9164 emit_insn (gen_strmovsi (destreg, srcreg));
9165 if ((align <= 4 || count == 0) && TARGET_64BIT)
9166 {
9167 rtx label = ix86_expand_aligntest (countreg, 4);
9168 emit_insn (gen_strmovsi (destreg, srcreg));
9169 emit_label (label);
9170 LABEL_NUSES (label) = 1;
9171 }
9172 if (align > 2 && count != 0 && (count & 2))
9173 emit_insn (gen_strmovhi (destreg, srcreg));
9174 if (align <= 2 || count == 0)
9175 {
9176 rtx label = ix86_expand_aligntest (countreg, 2);
9177 emit_insn (gen_strmovhi (destreg, srcreg));
9178 emit_label (label);
9179 LABEL_NUSES (label) = 1;
9180 }
9181 if (align > 1 && count != 0 && (count & 1))
9182 emit_insn (gen_strmovqi (destreg, srcreg));
9183 if (align <= 1 || count == 0)
9184 {
9185 rtx label = ix86_expand_aligntest (countreg, 1);
9186 emit_insn (gen_strmovqi (destreg, srcreg));
9187 emit_label (label);
9188 LABEL_NUSES (label) = 1;
9189 }
9190 }
9191
9192 insns = get_insns ();
9193 end_sequence ();
9194
9195 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9196 emit_insns (insns);
9197 return 1;
9198 }
9199
9200 /* Expand string clear operation (bzero). Use i386 string operations when
9201 profitable. expand_movstr contains similar code. */
9202 int
9203 ix86_expand_clrstr (src, count_exp, align_exp)
9204 rtx src, count_exp, align_exp;
9205 {
9206 rtx destreg, zeroreg, countreg;
9207 enum machine_mode counter_mode;
9208 HOST_WIDE_INT align = 0;
9209 unsigned HOST_WIDE_INT count = 0;
9210
9211 if (GET_CODE (align_exp) == CONST_INT)
9212 align = INTVAL (align_exp);
9213
9214 /* This simple hack avoids all inlining code and simplifies code below. */
9215 if (!TARGET_ALIGN_STRINGOPS)
9216 align = 32;
9217
9218 if (GET_CODE (count_exp) == CONST_INT)
9219 count = INTVAL (count_exp);
9220 /* Figure out proper mode for counter. For 32bits it is always SImode,
9221 for 64bits use SImode when possible, otherwise DImode.
9222 Set count to number of bytes copied when known at compile time. */
9223 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9224 || x86_64_zero_extended_value (count_exp))
9225 counter_mode = SImode;
9226 else
9227 counter_mode = DImode;
9228
9229 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9230
9231 emit_insn (gen_cld ());
9232
9233 /* When optimizing for size emit simple rep ; movsb instruction for
9234 counts not divisible by 4. */
9235
9236 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9237 {
9238 countreg = ix86_zero_extend_to_Pmode (count_exp);
9239 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9240 if (TARGET_64BIT)
9241 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9242 destreg, countreg));
9243 else
9244 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9245 destreg, countreg));
9246 }
9247 else if (count != 0
9248 && (align >= 8
9249 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9250 || optimize_size || count < (unsigned int) 64))
9251 {
9252 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9253 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9254 if (count & ~(size - 1))
9255 {
9256 countreg = copy_to_mode_reg (counter_mode,
9257 GEN_INT ((count >> (size == 4 ? 2 : 3))
9258 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9259 countreg = ix86_zero_extend_to_Pmode (countreg);
9260 if (size == 4)
9261 {
9262 if (TARGET_64BIT)
9263 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9264 destreg, countreg));
9265 else
9266 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9267 destreg, countreg));
9268 }
9269 else
9270 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9271 destreg, countreg));
9272 }
9273 if (size == 8 && (count & 0x04))
9274 emit_insn (gen_strsetsi (destreg,
9275 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9276 if (count & 0x02)
9277 emit_insn (gen_strsethi (destreg,
9278 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9279 if (count & 0x01)
9280 emit_insn (gen_strsetqi (destreg,
9281 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9282 }
9283 else
9284 {
9285 rtx countreg2;
9286 rtx label = NULL;
9287
9288 /* In case we don't know anything about the alignment, default to
9289 library version, since it is usually equally fast and result in
9290 shorter code. */
9291 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9292 return 0;
9293
9294 if (TARGET_SINGLE_STRINGOP)
9295 emit_insn (gen_cld ());
9296
9297 countreg2 = gen_reg_rtx (Pmode);
9298 countreg = copy_to_mode_reg (counter_mode, count_exp);
9299 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9300
9301 if (count == 0
9302 && align < (TARGET_PENTIUMPRO && (count == 0
9303 || count >= (unsigned int) 260)
9304 ? 8 : UNITS_PER_WORD))
9305 {
9306 label = gen_label_rtx ();
9307 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9308 LEU, 0, counter_mode, 1, label);
9309 }
9310 if (align <= 1)
9311 {
9312 rtx label = ix86_expand_aligntest (destreg, 1);
9313 emit_insn (gen_strsetqi (destreg,
9314 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9315 ix86_adjust_counter (countreg, 1);
9316 emit_label (label);
9317 LABEL_NUSES (label) = 1;
9318 }
9319 if (align <= 2)
9320 {
9321 rtx label = ix86_expand_aligntest (destreg, 2);
9322 emit_insn (gen_strsethi (destreg,
9323 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9324 ix86_adjust_counter (countreg, 2);
9325 emit_label (label);
9326 LABEL_NUSES (label) = 1;
9327 }
9328 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9329 || count >= (unsigned int) 260))
9330 {
9331 rtx label = ix86_expand_aligntest (destreg, 4);
9332 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9333 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9334 : zeroreg)));
9335 ix86_adjust_counter (countreg, 4);
9336 emit_label (label);
9337 LABEL_NUSES (label) = 1;
9338 }
9339
9340 if (!TARGET_SINGLE_STRINGOP)
9341 emit_insn (gen_cld ());
9342 if (TARGET_64BIT)
9343 {
9344 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9345 GEN_INT (3)));
9346 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9347 destreg, countreg2));
9348 }
9349 else
9350 {
9351 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9352 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9353 destreg, countreg2));
9354 }
9355
9356 if (label)
9357 {
9358 emit_label (label);
9359 LABEL_NUSES (label) = 1;
9360 }
9361 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9362 emit_insn (gen_strsetsi (destreg,
9363 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9364 if (TARGET_64BIT && (align <= 4 || count == 0))
9365 {
9366 rtx label = ix86_expand_aligntest (destreg, 2);
9367 emit_insn (gen_strsetsi (destreg,
9368 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9369 emit_label (label);
9370 LABEL_NUSES (label) = 1;
9371 }
9372 if (align > 2 && count != 0 && (count & 2))
9373 emit_insn (gen_strsethi (destreg,
9374 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9375 if (align <= 2 || count == 0)
9376 {
9377 rtx label = ix86_expand_aligntest (destreg, 2);
9378 emit_insn (gen_strsethi (destreg,
9379 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9380 emit_label (label);
9381 LABEL_NUSES (label) = 1;
9382 }
9383 if (align > 1 && count != 0 && (count & 1))
9384 emit_insn (gen_strsetqi (destreg,
9385 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9386 if (align <= 1 || count == 0)
9387 {
9388 rtx label = ix86_expand_aligntest (destreg, 1);
9389 emit_insn (gen_strsetqi (destreg,
9390 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9391 emit_label (label);
9392 LABEL_NUSES (label) = 1;
9393 }
9394 }
9395 return 1;
9396 }
9397 /* Expand strlen. */
9398 int
9399 ix86_expand_strlen (out, src, eoschar, align)
9400 rtx out, src, eoschar, align;
9401 {
9402 rtx addr, scratch1, scratch2, scratch3, scratch4;
9403
9404 /* The generic case of strlen expander is long. Avoid it's
9405 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9406
9407 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9408 && !TARGET_INLINE_ALL_STRINGOPS
9409 && !optimize_size
9410 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9411 return 0;
9412
9413 addr = force_reg (Pmode, XEXP (src, 0));
9414 scratch1 = gen_reg_rtx (Pmode);
9415
9416 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9417 && !optimize_size)
9418 {
9419 /* Well it seems that some optimizer does not combine a call like
9420 foo(strlen(bar), strlen(bar));
9421 when the move and the subtraction is done here. It does calculate
9422 the length just once when these instructions are done inside of
9423 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9424 often used and I use one fewer register for the lifetime of
9425 output_strlen_unroll() this is better. */
9426
9427 emit_move_insn (out, addr);
9428
9429 ix86_expand_strlensi_unroll_1 (out, align);
9430
9431 /* strlensi_unroll_1 returns the address of the zero at the end of
9432 the string, like memchr(), so compute the length by subtracting
9433 the start address. */
9434 if (TARGET_64BIT)
9435 emit_insn (gen_subdi3 (out, out, addr));
9436 else
9437 emit_insn (gen_subsi3 (out, out, addr));
9438 }
9439 else
9440 {
9441 scratch2 = gen_reg_rtx (Pmode);
9442 scratch3 = gen_reg_rtx (Pmode);
9443 scratch4 = force_reg (Pmode, constm1_rtx);
9444
9445 emit_move_insn (scratch3, addr);
9446 eoschar = force_reg (QImode, eoschar);
9447
9448 emit_insn (gen_cld ());
9449 if (TARGET_64BIT)
9450 {
9451 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9452 align, scratch4, scratch3));
9453 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9454 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9455 }
9456 else
9457 {
9458 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9459 align, scratch4, scratch3));
9460 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9461 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9462 }
9463 }
9464 return 1;
9465 }
9466
9467 /* Expand the appropriate insns for doing strlen if not just doing
9468 repnz; scasb
9469
9470 out = result, initialized with the start address
9471 align_rtx = alignment of the address.
9472 scratch = scratch register, initialized with the startaddress when
9473 not aligned, otherwise undefined
9474
9475 This is just the body. It needs the initialisations mentioned above and
9476 some address computing at the end. These things are done in i386.md. */
9477
9478 static void
9479 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9480 rtx out, align_rtx;
9481 {
9482 int align;
9483 rtx tmp;
9484 rtx align_2_label = NULL_RTX;
9485 rtx align_3_label = NULL_RTX;
9486 rtx align_4_label = gen_label_rtx ();
9487 rtx end_0_label = gen_label_rtx ();
9488 rtx mem;
9489 rtx tmpreg = gen_reg_rtx (SImode);
9490 rtx scratch = gen_reg_rtx (SImode);
9491
9492 align = 0;
9493 if (GET_CODE (align_rtx) == CONST_INT)
9494 align = INTVAL (align_rtx);
9495
9496 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9497
9498 /* Is there a known alignment and is it less than 4? */
9499 if (align < 4)
9500 {
9501 rtx scratch1 = gen_reg_rtx (Pmode);
9502 emit_move_insn (scratch1, out);
9503 /* Is there a known alignment and is it not 2? */
9504 if (align != 2)
9505 {
9506 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9507 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9508
9509 /* Leave just the 3 lower bits. */
9510 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9511 NULL_RTX, 0, OPTAB_WIDEN);
9512
9513 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9514 Pmode, 1, align_4_label);
9515 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9516 Pmode, 1, align_2_label);
9517 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9518 Pmode, 1, align_3_label);
9519 }
9520 else
9521 {
9522 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9523 check if is aligned to 4 - byte. */
9524
9525 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9526 NULL_RTX, 0, OPTAB_WIDEN);
9527
9528 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9529 Pmode, 1, align_4_label);
9530 }
9531
9532 mem = gen_rtx_MEM (QImode, out);
9533
9534 /* Now compare the bytes. */
9535
9536 /* Compare the first n unaligned byte on a byte per byte basis. */
9537 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9538 QImode, 1, end_0_label);
9539
9540 /* Increment the address. */
9541 if (TARGET_64BIT)
9542 emit_insn (gen_adddi3 (out, out, const1_rtx));
9543 else
9544 emit_insn (gen_addsi3 (out, out, const1_rtx));
9545
9546 /* Not needed with an alignment of 2 */
9547 if (align != 2)
9548 {
9549 emit_label (align_2_label);
9550
9551 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9552 end_0_label);
9553
9554 if (TARGET_64BIT)
9555 emit_insn (gen_adddi3 (out, out, const1_rtx));
9556 else
9557 emit_insn (gen_addsi3 (out, out, const1_rtx));
9558
9559 emit_label (align_3_label);
9560 }
9561
9562 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9563 end_0_label);
9564
9565 if (TARGET_64BIT)
9566 emit_insn (gen_adddi3 (out, out, const1_rtx));
9567 else
9568 emit_insn (gen_addsi3 (out, out, const1_rtx));
9569 }
9570
9571 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9572 align this loop. It gives only huge programs, but does not help to
9573 speed up. */
9574 emit_label (align_4_label);
9575
9576 mem = gen_rtx_MEM (SImode, out);
9577 emit_move_insn (scratch, mem);
9578 if (TARGET_64BIT)
9579 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9580 else
9581 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9582
9583 /* This formula yields a nonzero result iff one of the bytes is zero.
9584 This saves three branches inside loop and many cycles. */
9585
9586 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9587 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9588 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9589 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9590 GEN_INT (trunc_int_for_mode
9591 (0x80808080, SImode))));
9592 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9593 align_4_label);
9594
9595 if (TARGET_CMOVE)
9596 {
9597 rtx reg = gen_reg_rtx (SImode);
9598 rtx reg2 = gen_reg_rtx (Pmode);
9599 emit_move_insn (reg, tmpreg);
9600 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9601
9602 /* If zero is not in the first two bytes, move two bytes forward. */
9603 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9604 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9605 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9606 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9607 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9608 reg,
9609 tmpreg)));
9610 /* Emit lea manually to avoid clobbering of flags. */
9611 emit_insn (gen_rtx_SET (SImode, reg2,
9612 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9613
9614 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9615 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9616 emit_insn (gen_rtx_SET (VOIDmode, out,
9617 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9618 reg2,
9619 out)));
9620
9621 }
9622 else
9623 {
9624 rtx end_2_label = gen_label_rtx ();
9625 /* Is zero in the first two bytes? */
9626
9627 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9628 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9629 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9630 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9631 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9632 pc_rtx);
9633 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9634 JUMP_LABEL (tmp) = end_2_label;
9635
9636 /* Not in the first two. Move two bytes forward. */
9637 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9638 if (TARGET_64BIT)
9639 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9640 else
9641 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9642
9643 emit_label (end_2_label);
9644
9645 }
9646
9647 /* Avoid branch in fixing the byte. */
9648 tmpreg = gen_lowpart (QImode, tmpreg);
9649 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9650 if (TARGET_64BIT)
9651 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9652 else
9653 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9654
9655 emit_label (end_0_label);
9656 }
9657 \f
9658 /* Clear stack slot assignments remembered from previous functions.
9659 This is called from INIT_EXPANDERS once before RTL is emitted for each
9660 function. */
9661
9662 static void
9663 ix86_init_machine_status (p)
9664 struct function *p;
9665 {
9666 p->machine = (struct machine_function *)
9667 xcalloc (1, sizeof (struct machine_function));
9668 }
9669
9670 /* Mark machine specific bits of P for GC. */
9671 static void
9672 ix86_mark_machine_status (p)
9673 struct function *p;
9674 {
9675 struct machine_function *machine = p->machine;
9676 enum machine_mode mode;
9677 int n;
9678
9679 if (! machine)
9680 return;
9681
9682 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9683 mode = (enum machine_mode) ((int) mode + 1))
9684 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9685 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9686 }
9687
9688 static void
9689 ix86_free_machine_status (p)
9690 struct function *p;
9691 {
9692 free (p->machine);
9693 p->machine = NULL;
9694 }
9695
9696 /* Return a MEM corresponding to a stack slot with mode MODE.
9697 Allocate a new slot if necessary.
9698
9699 The RTL for a function can have several slots available: N is
9700 which slot to use. */
9701
9702 rtx
9703 assign_386_stack_local (mode, n)
9704 enum machine_mode mode;
9705 int n;
9706 {
9707 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9708 abort ();
9709
9710 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9711 ix86_stack_locals[(int) mode][n]
9712 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9713
9714 return ix86_stack_locals[(int) mode][n];
9715 }
9716 \f
9717 /* Calculate the length of the memory address in the instruction
9718 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9719
9720 static int
9721 memory_address_length (addr)
9722 rtx addr;
9723 {
9724 struct ix86_address parts;
9725 rtx base, index, disp;
9726 int len;
9727
9728 if (GET_CODE (addr) == PRE_DEC
9729 || GET_CODE (addr) == POST_INC
9730 || GET_CODE (addr) == PRE_MODIFY
9731 || GET_CODE (addr) == POST_MODIFY)
9732 return 0;
9733
9734 if (! ix86_decompose_address (addr, &parts))
9735 abort ();
9736
9737 base = parts.base;
9738 index = parts.index;
9739 disp = parts.disp;
9740 len = 0;
9741
9742 /* Register Indirect. */
9743 if (base && !index && !disp)
9744 {
9745 /* Special cases: ebp and esp need the two-byte modrm form. */
9746 if (addr == stack_pointer_rtx
9747 || addr == arg_pointer_rtx
9748 || addr == frame_pointer_rtx
9749 || addr == hard_frame_pointer_rtx)
9750 len = 1;
9751 }
9752
9753 /* Direct Addressing. */
9754 else if (disp && !base && !index)
9755 len = 4;
9756
9757 else
9758 {
9759 /* Find the length of the displacement constant. */
9760 if (disp)
9761 {
9762 if (GET_CODE (disp) == CONST_INT
9763 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9764 len = 1;
9765 else
9766 len = 4;
9767 }
9768
9769 /* An index requires the two-byte modrm form. */
9770 if (index)
9771 len += 1;
9772 }
9773
9774 return len;
9775 }
9776
9777 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9778 expect that insn have 8bit immediate alternative. */
9779 int
9780 ix86_attr_length_immediate_default (insn, shortform)
9781 rtx insn;
9782 int shortform;
9783 {
9784 int len = 0;
9785 int i;
9786 extract_insn_cached (insn);
9787 for (i = recog_data.n_operands - 1; i >= 0; --i)
9788 if (CONSTANT_P (recog_data.operand[i]))
9789 {
9790 if (len)
9791 abort ();
9792 if (shortform
9793 && GET_CODE (recog_data.operand[i]) == CONST_INT
9794 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9795 len = 1;
9796 else
9797 {
9798 switch (get_attr_mode (insn))
9799 {
9800 case MODE_QI:
9801 len+=1;
9802 break;
9803 case MODE_HI:
9804 len+=2;
9805 break;
9806 case MODE_SI:
9807 len+=4;
9808 break;
9809 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9810 case MODE_DI:
9811 len+=4;
9812 break;
9813 default:
9814 fatal_insn ("unknown insn mode", insn);
9815 }
9816 }
9817 }
9818 return len;
9819 }
9820 /* Compute default value for "length_address" attribute. */
9821 int
9822 ix86_attr_length_address_default (insn)
9823 rtx insn;
9824 {
9825 int i;
9826 extract_insn_cached (insn);
9827 for (i = recog_data.n_operands - 1; i >= 0; --i)
9828 if (GET_CODE (recog_data.operand[i]) == MEM)
9829 {
9830 return memory_address_length (XEXP (recog_data.operand[i], 0));
9831 break;
9832 }
9833 return 0;
9834 }
9835 \f
9836 /* Return the maximum number of instructions a cpu can issue. */
9837
9838 static int
9839 ix86_issue_rate ()
9840 {
9841 switch (ix86_cpu)
9842 {
9843 case PROCESSOR_PENTIUM:
9844 case PROCESSOR_K6:
9845 return 2;
9846
9847 case PROCESSOR_PENTIUMPRO:
9848 case PROCESSOR_PENTIUM4:
9849 case PROCESSOR_ATHLON:
9850 return 3;
9851
9852 default:
9853 return 1;
9854 }
9855 }
9856
9857 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9858 by DEP_INSN and nothing set by DEP_INSN. */
9859
9860 static int
9861 ix86_flags_dependant (insn, dep_insn, insn_type)
9862 rtx insn, dep_insn;
9863 enum attr_type insn_type;
9864 {
9865 rtx set, set2;
9866
9867 /* Simplify the test for uninteresting insns. */
9868 if (insn_type != TYPE_SETCC
9869 && insn_type != TYPE_ICMOV
9870 && insn_type != TYPE_FCMOV
9871 && insn_type != TYPE_IBR)
9872 return 0;
9873
9874 if ((set = single_set (dep_insn)) != 0)
9875 {
9876 set = SET_DEST (set);
9877 set2 = NULL_RTX;
9878 }
9879 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9880 && XVECLEN (PATTERN (dep_insn), 0) == 2
9881 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9882 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9883 {
9884 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9885 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9886 }
9887 else
9888 return 0;
9889
9890 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9891 return 0;
9892
9893 /* This test is true if the dependent insn reads the flags but
9894 not any other potentially set register. */
9895 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9896 return 0;
9897
9898 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9899 return 0;
9900
9901 return 1;
9902 }
9903
9904 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9905 address with operands set by DEP_INSN. */
9906
9907 static int
9908 ix86_agi_dependant (insn, dep_insn, insn_type)
9909 rtx insn, dep_insn;
9910 enum attr_type insn_type;
9911 {
9912 rtx addr;
9913
9914 if (insn_type == TYPE_LEA
9915 && TARGET_PENTIUM)
9916 {
9917 addr = PATTERN (insn);
9918 if (GET_CODE (addr) == SET)
9919 ;
9920 else if (GET_CODE (addr) == PARALLEL
9921 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9922 addr = XVECEXP (addr, 0, 0);
9923 else
9924 abort ();
9925 addr = SET_SRC (addr);
9926 }
9927 else
9928 {
9929 int i;
9930 extract_insn_cached (insn);
9931 for (i = recog_data.n_operands - 1; i >= 0; --i)
9932 if (GET_CODE (recog_data.operand[i]) == MEM)
9933 {
9934 addr = XEXP (recog_data.operand[i], 0);
9935 goto found;
9936 }
9937 return 0;
9938 found:;
9939 }
9940
9941 return modified_in_p (addr, dep_insn);
9942 }
9943
9944 static int
9945 ix86_adjust_cost (insn, link, dep_insn, cost)
9946 rtx insn, link, dep_insn;
9947 int cost;
9948 {
9949 enum attr_type insn_type, dep_insn_type;
9950 enum attr_memory memory, dep_memory;
9951 rtx set, set2;
9952 int dep_insn_code_number;
9953
9954 /* Anti and output depenancies have zero cost on all CPUs. */
9955 if (REG_NOTE_KIND (link) != 0)
9956 return 0;
9957
9958 dep_insn_code_number = recog_memoized (dep_insn);
9959
9960 /* If we can't recognize the insns, we can't really do anything. */
9961 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
9962 return cost;
9963
9964 insn_type = get_attr_type (insn);
9965 dep_insn_type = get_attr_type (dep_insn);
9966
9967 switch (ix86_cpu)
9968 {
9969 case PROCESSOR_PENTIUM:
9970 /* Address Generation Interlock adds a cycle of latency. */
9971 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9972 cost += 1;
9973
9974 /* ??? Compares pair with jump/setcc. */
9975 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9976 cost = 0;
9977
9978 /* Floating point stores require value to be ready one cycle ealier. */
9979 if (insn_type == TYPE_FMOV
9980 && get_attr_memory (insn) == MEMORY_STORE
9981 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9982 cost += 1;
9983 break;
9984
9985 case PROCESSOR_PENTIUMPRO:
9986 memory = get_attr_memory (insn);
9987 dep_memory = get_attr_memory (dep_insn);
9988
9989 /* Since we can't represent delayed latencies of load+operation,
9990 increase the cost here for non-imov insns. */
9991 if (dep_insn_type != TYPE_IMOV
9992 && dep_insn_type != TYPE_FMOV
9993 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
9994 cost += 1;
9995
9996 /* INT->FP conversion is expensive. */
9997 if (get_attr_fp_int_src (dep_insn))
9998 cost += 5;
9999
10000 /* There is one cycle extra latency between an FP op and a store. */
10001 if (insn_type == TYPE_FMOV
10002 && (set = single_set (dep_insn)) != NULL_RTX
10003 && (set2 = single_set (insn)) != NULL_RTX
10004 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10005 && GET_CODE (SET_DEST (set2)) == MEM)
10006 cost += 1;
10007
10008 /* Show ability of reorder buffer to hide latency of load by executing
10009 in parallel with previous instruction in case
10010 previous instruction is not needed to compute the address. */
10011 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10012 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10013 {
10014 /* Claim moves to take one cycle, as core can issue one load
10015 at time and the next load can start cycle later. */
10016 if (dep_insn_type == TYPE_IMOV
10017 || dep_insn_type == TYPE_FMOV)
10018 cost = 1;
10019 else if (cost > 1)
10020 cost--;
10021 }
10022 break;
10023
10024 case PROCESSOR_K6:
10025 memory = get_attr_memory (insn);
10026 dep_memory = get_attr_memory (dep_insn);
10027 /* The esp dependency is resolved before the instruction is really
10028 finished. */
10029 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10030 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10031 return 1;
10032
10033 /* Since we can't represent delayed latencies of load+operation,
10034 increase the cost here for non-imov insns. */
10035 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10036 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10037
10038 /* INT->FP conversion is expensive. */
10039 if (get_attr_fp_int_src (dep_insn))
10040 cost += 5;
10041
10042 /* Show ability of reorder buffer to hide latency of load by executing
10043 in parallel with previous instruction in case
10044 previous instruction is not needed to compute the address. */
10045 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10046 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10047 {
10048 /* Claim moves to take one cycle, as core can issue one load
10049 at time and the next load can start cycle later. */
10050 if (dep_insn_type == TYPE_IMOV
10051 || dep_insn_type == TYPE_FMOV)
10052 cost = 1;
10053 else if (cost > 2)
10054 cost -= 2;
10055 else
10056 cost = 1;
10057 }
10058 break;
10059
10060 case PROCESSOR_ATHLON:
10061 memory = get_attr_memory (insn);
10062 dep_memory = get_attr_memory (dep_insn);
10063
10064 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10065 {
10066 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10067 cost += 2;
10068 else
10069 cost += 3;
10070 }
10071 /* Show ability of reorder buffer to hide latency of load by executing
10072 in parallel with previous instruction in case
10073 previous instruction is not needed to compute the address. */
10074 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10075 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10076 {
10077 /* Claim moves to take one cycle, as core can issue one load
10078 at time and the next load can start cycle later. */
10079 if (dep_insn_type == TYPE_IMOV
10080 || dep_insn_type == TYPE_FMOV)
10081 cost = 0;
10082 else if (cost >= 3)
10083 cost -= 3;
10084 else
10085 cost = 0;
10086 }
10087
10088 default:
10089 break;
10090 }
10091
10092 return cost;
10093 }
10094
10095 static union
10096 {
10097 struct ppro_sched_data
10098 {
10099 rtx decode[3];
10100 int issued_this_cycle;
10101 } ppro;
10102 } ix86_sched_data;
10103
10104 static int
10105 ix86_safe_length (insn)
10106 rtx insn;
10107 {
10108 if (recog_memoized (insn) >= 0)
10109 return get_attr_length (insn);
10110 else
10111 return 128;
10112 }
10113
10114 static int
10115 ix86_safe_length_prefix (insn)
10116 rtx insn;
10117 {
10118 if (recog_memoized (insn) >= 0)
10119 return get_attr_length (insn);
10120 else
10121 return 0;
10122 }
10123
10124 static enum attr_memory
10125 ix86_safe_memory (insn)
10126 rtx insn;
10127 {
10128 if (recog_memoized (insn) >= 0)
10129 return get_attr_memory (insn);
10130 else
10131 return MEMORY_UNKNOWN;
10132 }
10133
10134 static enum attr_pent_pair
10135 ix86_safe_pent_pair (insn)
10136 rtx insn;
10137 {
10138 if (recog_memoized (insn) >= 0)
10139 return get_attr_pent_pair (insn);
10140 else
10141 return PENT_PAIR_NP;
10142 }
10143
10144 static enum attr_ppro_uops
10145 ix86_safe_ppro_uops (insn)
10146 rtx insn;
10147 {
10148 if (recog_memoized (insn) >= 0)
10149 return get_attr_ppro_uops (insn);
10150 else
10151 return PPRO_UOPS_MANY;
10152 }
10153
10154 static void
10155 ix86_dump_ppro_packet (dump)
10156 FILE *dump;
10157 {
10158 if (ix86_sched_data.ppro.decode[0])
10159 {
10160 fprintf (dump, "PPRO packet: %d",
10161 INSN_UID (ix86_sched_data.ppro.decode[0]));
10162 if (ix86_sched_data.ppro.decode[1])
10163 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10164 if (ix86_sched_data.ppro.decode[2])
10165 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10166 fputc ('\n', dump);
10167 }
10168 }
10169
10170 /* We're beginning a new block. Initialize data structures as necessary. */
10171
10172 static void
10173 ix86_sched_init (dump, sched_verbose, veclen)
10174 FILE *dump ATTRIBUTE_UNUSED;
10175 int sched_verbose ATTRIBUTE_UNUSED;
10176 int veclen ATTRIBUTE_UNUSED;
10177 {
10178 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10179 }
10180
10181 /* Shift INSN to SLOT, and shift everything else down. */
10182
10183 static void
10184 ix86_reorder_insn (insnp, slot)
10185 rtx *insnp, *slot;
10186 {
10187 if (insnp != slot)
10188 {
10189 rtx insn = *insnp;
10190 do
10191 insnp[0] = insnp[1];
10192 while (++insnp != slot);
10193 *insnp = insn;
10194 }
10195 }
10196
10197 /* Find an instruction with given pairability and minimal amount of cycles
10198 lost by the fact that the CPU waits for both pipelines to finish before
10199 reading next instructions. Also take care that both instructions together
10200 can not exceed 7 bytes. */
10201
10202 static rtx *
10203 ix86_pent_find_pair (e_ready, ready, type, first)
10204 rtx *e_ready;
10205 rtx *ready;
10206 enum attr_pent_pair type;
10207 rtx first;
10208 {
10209 int mincycles, cycles;
10210 enum attr_pent_pair tmp;
10211 enum attr_memory memory;
10212 rtx *insnp, *bestinsnp = NULL;
10213
10214 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10215 return NULL;
10216
10217 memory = ix86_safe_memory (first);
10218 cycles = result_ready_cost (first);
10219 mincycles = INT_MAX;
10220
10221 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10222 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10223 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10224 {
10225 enum attr_memory second_memory;
10226 int secondcycles, currentcycles;
10227
10228 second_memory = ix86_safe_memory (*insnp);
10229 secondcycles = result_ready_cost (*insnp);
10230 currentcycles = abs (cycles - secondcycles);
10231
10232 if (secondcycles >= 1 && cycles >= 1)
10233 {
10234 /* Two read/modify/write instructions together takes two
10235 cycles longer. */
10236 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10237 currentcycles += 2;
10238
10239 /* Read modify/write instruction followed by read/modify
10240 takes one cycle longer. */
10241 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10242 && tmp != PENT_PAIR_UV
10243 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10244 currentcycles += 1;
10245 }
10246 if (currentcycles < mincycles)
10247 bestinsnp = insnp, mincycles = currentcycles;
10248 }
10249
10250 return bestinsnp;
10251 }
10252
10253 /* Subroutines of ix86_sched_reorder. */
10254
10255 static void
10256 ix86_sched_reorder_pentium (ready, e_ready)
10257 rtx *ready;
10258 rtx *e_ready;
10259 {
10260 enum attr_pent_pair pair1, pair2;
10261 rtx *insnp;
10262
10263 /* This wouldn't be necessary if Haifa knew that static insn ordering
10264 is important to which pipe an insn is issued to. So we have to make
10265 some minor rearrangements. */
10266
10267 pair1 = ix86_safe_pent_pair (*e_ready);
10268
10269 /* If the first insn is non-pairable, let it be. */
10270 if (pair1 == PENT_PAIR_NP)
10271 return;
10272
10273 pair2 = PENT_PAIR_NP;
10274 insnp = 0;
10275
10276 /* If the first insn is UV or PV pairable, search for a PU
10277 insn to go with. */
10278 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10279 {
10280 insnp = ix86_pent_find_pair (e_ready-1, ready,
10281 PENT_PAIR_PU, *e_ready);
10282 if (insnp)
10283 pair2 = PENT_PAIR_PU;
10284 }
10285
10286 /* If the first insn is PU or UV pairable, search for a PV
10287 insn to go with. */
10288 if (pair2 == PENT_PAIR_NP
10289 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10290 {
10291 insnp = ix86_pent_find_pair (e_ready-1, ready,
10292 PENT_PAIR_PV, *e_ready);
10293 if (insnp)
10294 pair2 = PENT_PAIR_PV;
10295 }
10296
10297 /* If the first insn is pairable, search for a UV
10298 insn to go with. */
10299 if (pair2 == PENT_PAIR_NP)
10300 {
10301 insnp = ix86_pent_find_pair (e_ready-1, ready,
10302 PENT_PAIR_UV, *e_ready);
10303 if (insnp)
10304 pair2 = PENT_PAIR_UV;
10305 }
10306
10307 if (pair2 == PENT_PAIR_NP)
10308 return;
10309
10310 /* Found something! Decide if we need to swap the order. */
10311 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10312 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10313 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10314 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10315 ix86_reorder_insn (insnp, e_ready);
10316 else
10317 ix86_reorder_insn (insnp, e_ready - 1);
10318 }
10319
10320 static void
10321 ix86_sched_reorder_ppro (ready, e_ready)
10322 rtx *ready;
10323 rtx *e_ready;
10324 {
10325 rtx decode[3];
10326 enum attr_ppro_uops cur_uops;
10327 int issued_this_cycle;
10328 rtx *insnp;
10329 int i;
10330
10331 /* At this point .ppro.decode contains the state of the three
10332 decoders from last "cycle". That is, those insns that were
10333 actually independent. But here we're scheduling for the
10334 decoder, and we may find things that are decodable in the
10335 same cycle. */
10336
10337 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10338 issued_this_cycle = 0;
10339
10340 insnp = e_ready;
10341 cur_uops = ix86_safe_ppro_uops (*insnp);
10342
10343 /* If the decoders are empty, and we've a complex insn at the
10344 head of the priority queue, let it issue without complaint. */
10345 if (decode[0] == NULL)
10346 {
10347 if (cur_uops == PPRO_UOPS_MANY)
10348 {
10349 decode[0] = *insnp;
10350 goto ppro_done;
10351 }
10352
10353 /* Otherwise, search for a 2-4 uop unsn to issue. */
10354 while (cur_uops != PPRO_UOPS_FEW)
10355 {
10356 if (insnp == ready)
10357 break;
10358 cur_uops = ix86_safe_ppro_uops (*--insnp);
10359 }
10360
10361 /* If so, move it to the head of the line. */
10362 if (cur_uops == PPRO_UOPS_FEW)
10363 ix86_reorder_insn (insnp, e_ready);
10364
10365 /* Issue the head of the queue. */
10366 issued_this_cycle = 1;
10367 decode[0] = *e_ready--;
10368 }
10369
10370 /* Look for simple insns to fill in the other two slots. */
10371 for (i = 1; i < 3; ++i)
10372 if (decode[i] == NULL)
10373 {
10374 if (ready >= e_ready)
10375 goto ppro_done;
10376
10377 insnp = e_ready;
10378 cur_uops = ix86_safe_ppro_uops (*insnp);
10379 while (cur_uops != PPRO_UOPS_ONE)
10380 {
10381 if (insnp == ready)
10382 break;
10383 cur_uops = ix86_safe_ppro_uops (*--insnp);
10384 }
10385
10386 /* Found one. Move it to the head of the queue and issue it. */
10387 if (cur_uops == PPRO_UOPS_ONE)
10388 {
10389 ix86_reorder_insn (insnp, e_ready);
10390 decode[i] = *e_ready--;
10391 issued_this_cycle++;
10392 continue;
10393 }
10394
10395 /* ??? Didn't find one. Ideally, here we would do a lazy split
10396 of 2-uop insns, issue one and queue the other. */
10397 }
10398
10399 ppro_done:
10400 if (issued_this_cycle == 0)
10401 issued_this_cycle = 1;
10402 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10403 }
10404
10405 /* We are about to being issuing insns for this clock cycle.
10406 Override the default sort algorithm to better slot instructions. */
10407 static int
10408 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10409 FILE *dump ATTRIBUTE_UNUSED;
10410 int sched_verbose ATTRIBUTE_UNUSED;
10411 rtx *ready;
10412 int *n_readyp;
10413 int clock_var ATTRIBUTE_UNUSED;
10414 {
10415 int n_ready = *n_readyp;
10416 rtx *e_ready = ready + n_ready - 1;
10417
10418 if (n_ready < 2)
10419 goto out;
10420
10421 switch (ix86_cpu)
10422 {
10423 default:
10424 break;
10425
10426 case PROCESSOR_PENTIUM:
10427 ix86_sched_reorder_pentium (ready, e_ready);
10428 break;
10429
10430 case PROCESSOR_PENTIUMPRO:
10431 ix86_sched_reorder_ppro (ready, e_ready);
10432 break;
10433 }
10434
10435 out:
10436 return ix86_issue_rate ();
10437 }
10438
10439 /* We are about to issue INSN. Return the number of insns left on the
10440 ready queue that can be issued this cycle. */
10441
10442 static int
10443 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10444 FILE *dump;
10445 int sched_verbose;
10446 rtx insn;
10447 int can_issue_more;
10448 {
10449 int i;
10450 switch (ix86_cpu)
10451 {
10452 default:
10453 return can_issue_more - 1;
10454
10455 case PROCESSOR_PENTIUMPRO:
10456 {
10457 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10458
10459 if (uops == PPRO_UOPS_MANY)
10460 {
10461 if (sched_verbose)
10462 ix86_dump_ppro_packet (dump);
10463 ix86_sched_data.ppro.decode[0] = insn;
10464 ix86_sched_data.ppro.decode[1] = NULL;
10465 ix86_sched_data.ppro.decode[2] = NULL;
10466 if (sched_verbose)
10467 ix86_dump_ppro_packet (dump);
10468 ix86_sched_data.ppro.decode[0] = NULL;
10469 }
10470 else if (uops == PPRO_UOPS_FEW)
10471 {
10472 if (sched_verbose)
10473 ix86_dump_ppro_packet (dump);
10474 ix86_sched_data.ppro.decode[0] = insn;
10475 ix86_sched_data.ppro.decode[1] = NULL;
10476 ix86_sched_data.ppro.decode[2] = NULL;
10477 }
10478 else
10479 {
10480 for (i = 0; i < 3; ++i)
10481 if (ix86_sched_data.ppro.decode[i] == NULL)
10482 {
10483 ix86_sched_data.ppro.decode[i] = insn;
10484 break;
10485 }
10486 if (i == 3)
10487 abort ();
10488 if (i == 2)
10489 {
10490 if (sched_verbose)
10491 ix86_dump_ppro_packet (dump);
10492 ix86_sched_data.ppro.decode[0] = NULL;
10493 ix86_sched_data.ppro.decode[1] = NULL;
10494 ix86_sched_data.ppro.decode[2] = NULL;
10495 }
10496 }
10497 }
10498 return --ix86_sched_data.ppro.issued_this_cycle;
10499 }
10500 }
10501 \f
10502 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10503 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10504 appropriate. */
10505
10506 void
10507 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10508 rtx insns;
10509 rtx dstref, srcref, dstreg, srcreg;
10510 {
10511 rtx insn;
10512
10513 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10514 if (INSN_P (insn))
10515 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10516 dstreg, srcreg);
10517 }
10518
10519 /* Subroutine of above to actually do the updating by recursively walking
10520 the rtx. */
10521
10522 static void
10523 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10524 rtx x;
10525 rtx dstref, srcref, dstreg, srcreg;
10526 {
10527 enum rtx_code code = GET_CODE (x);
10528 const char *format_ptr = GET_RTX_FORMAT (code);
10529 int i, j;
10530
10531 if (code == MEM && XEXP (x, 0) == dstreg)
10532 MEM_COPY_ATTRIBUTES (x, dstref);
10533 else if (code == MEM && XEXP (x, 0) == srcreg)
10534 MEM_COPY_ATTRIBUTES (x, srcref);
10535
10536 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10537 {
10538 if (*format_ptr == 'e')
10539 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10540 dstreg, srcreg);
10541 else if (*format_ptr == 'E')
10542 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10543 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10544 dstreg, srcreg);
10545 }
10546 }
10547 \f
10548 /* Compute the alignment given to a constant that is being placed in memory.
10549 EXP is the constant and ALIGN is the alignment that the object would
10550 ordinarily have.
10551 The value of this function is used instead of that alignment to align
10552 the object. */
10553
10554 int
10555 ix86_constant_alignment (exp, align)
10556 tree exp;
10557 int align;
10558 {
10559 if (TREE_CODE (exp) == REAL_CST)
10560 {
10561 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10562 return 64;
10563 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10564 return 128;
10565 }
10566 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10567 && align < 256)
10568 return 256;
10569
10570 return align;
10571 }
10572
10573 /* Compute the alignment for a static variable.
10574 TYPE is the data type, and ALIGN is the alignment that
10575 the object would ordinarily have. The value of this function is used
10576 instead of that alignment to align the object. */
10577
10578 int
10579 ix86_data_alignment (type, align)
10580 tree type;
10581 int align;
10582 {
10583 if (AGGREGATE_TYPE_P (type)
10584 && TYPE_SIZE (type)
10585 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10586 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10587 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10588 return 256;
10589
10590 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10591 to 16byte boundary. */
10592 if (TARGET_64BIT)
10593 {
10594 if (AGGREGATE_TYPE_P (type)
10595 && TYPE_SIZE (type)
10596 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10597 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10598 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10599 return 128;
10600 }
10601
10602 if (TREE_CODE (type) == ARRAY_TYPE)
10603 {
10604 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10605 return 64;
10606 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10607 return 128;
10608 }
10609 else if (TREE_CODE (type) == COMPLEX_TYPE)
10610 {
10611
10612 if (TYPE_MODE (type) == DCmode && align < 64)
10613 return 64;
10614 if (TYPE_MODE (type) == XCmode && align < 128)
10615 return 128;
10616 }
10617 else if ((TREE_CODE (type) == RECORD_TYPE
10618 || TREE_CODE (type) == UNION_TYPE
10619 || TREE_CODE (type) == QUAL_UNION_TYPE)
10620 && TYPE_FIELDS (type))
10621 {
10622 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10623 return 64;
10624 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10625 return 128;
10626 }
10627 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10628 || TREE_CODE (type) == INTEGER_TYPE)
10629 {
10630 if (TYPE_MODE (type) == DFmode && align < 64)
10631 return 64;
10632 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10633 return 128;
10634 }
10635
10636 return align;
10637 }
10638
10639 /* Compute the alignment for a local variable.
10640 TYPE is the data type, and ALIGN is the alignment that
10641 the object would ordinarily have. The value of this macro is used
10642 instead of that alignment to align the object. */
10643
10644 int
10645 ix86_local_alignment (type, align)
10646 tree type;
10647 int align;
10648 {
10649 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10650 to 16byte boundary. */
10651 if (TARGET_64BIT)
10652 {
10653 if (AGGREGATE_TYPE_P (type)
10654 && TYPE_SIZE (type)
10655 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10656 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10657 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10658 return 128;
10659 }
10660 if (TREE_CODE (type) == ARRAY_TYPE)
10661 {
10662 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10663 return 64;
10664 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10665 return 128;
10666 }
10667 else if (TREE_CODE (type) == COMPLEX_TYPE)
10668 {
10669 if (TYPE_MODE (type) == DCmode && align < 64)
10670 return 64;
10671 if (TYPE_MODE (type) == XCmode && align < 128)
10672 return 128;
10673 }
10674 else if ((TREE_CODE (type) == RECORD_TYPE
10675 || TREE_CODE (type) == UNION_TYPE
10676 || TREE_CODE (type) == QUAL_UNION_TYPE)
10677 && TYPE_FIELDS (type))
10678 {
10679 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10680 return 64;
10681 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10682 return 128;
10683 }
10684 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10685 || TREE_CODE (type) == INTEGER_TYPE)
10686 {
10687
10688 if (TYPE_MODE (type) == DFmode && align < 64)
10689 return 64;
10690 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10691 return 128;
10692 }
10693 return align;
10694 }
10695 \f
10696 /* Emit RTL insns to initialize the variable parts of a trampoline.
10697 FNADDR is an RTX for the address of the function's pure code.
10698 CXT is an RTX for the static chain value for the function. */
10699 void
10700 x86_initialize_trampoline (tramp, fnaddr, cxt)
10701 rtx tramp, fnaddr, cxt;
10702 {
10703 if (!TARGET_64BIT)
10704 {
10705 /* Compute offset from the end of the jmp to the target function. */
10706 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10707 plus_constant (tramp, 10),
10708 NULL_RTX, 1, OPTAB_DIRECT);
10709 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10710 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10711 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10712 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10713 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10714 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10715 }
10716 else
10717 {
10718 int offset = 0;
10719 /* Try to load address using shorter movl instead of movabs.
10720 We may want to support movq for kernel mode, but kernel does not use
10721 trampolines at the moment. */
10722 if (x86_64_zero_extended_value (fnaddr))
10723 {
10724 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10725 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10726 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10727 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10728 gen_lowpart (SImode, fnaddr));
10729 offset += 6;
10730 }
10731 else
10732 {
10733 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10734 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10735 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10736 fnaddr);
10737 offset += 10;
10738 }
10739 /* Load static chain using movabs to r10. */
10740 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10741 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10742 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10743 cxt);
10744 offset += 10;
10745 /* Jump to the r11 */
10746 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10747 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10748 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10749 GEN_INT (trunc_int_for_mode (0xe3, QImode)));
10750 offset += 3;
10751 if (offset > TRAMPOLINE_SIZE)
10752 abort ();
10753 }
10754 }
10755 \f
10756 #define def_builtin(MASK, NAME, TYPE, CODE) \
10757 do { \
10758 if ((MASK) & target_flags) \
10759 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10760 } while (0)
10761
10762 struct builtin_description
10763 {
10764 const unsigned int mask;
10765 const enum insn_code icode;
10766 const char *const name;
10767 const enum ix86_builtins code;
10768 const enum rtx_code comparison;
10769 const unsigned int flag;
10770 };
10771
10772 static const struct builtin_description bdesc_comi[] =
10773 {
10774 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10775 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10776 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10777 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10778 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10779 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10780 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10781 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10782 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10783 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10784 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10785 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10786 };
10787
10788 static const struct builtin_description bdesc_2arg[] =
10789 {
10790 /* SSE */
10791 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10792 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10793 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10794 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10795 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10796 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10797 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10798 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10799
10800 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10801 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10802 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10803 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10804 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10805 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10806 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10807 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10808 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10809 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10810 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10811 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10812 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10813 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10814 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10815 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10816 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10817 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10818 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10819 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10820 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10821 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10822 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10823 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10824
10825 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10826 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10827 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10828 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10829
10830 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10831 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10832 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10833 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10834 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10835
10836 /* MMX */
10837 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10838 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10839 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10840 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10841 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10842 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10843
10844 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10845 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10846 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10847 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10848 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10849 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10850 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10851 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10852
10853 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10854 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10855 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10856
10857 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10858 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10859 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10860 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10861
10862 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10863 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10864
10865 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10866 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10867 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10868 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10869 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10870 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10871
10872 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10873 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10874 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10875 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10876
10877 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10878 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10879 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10880 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10881 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10882 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10883
10884 /* Special. */
10885 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10886 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10887 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10888
10889 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10890 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10891
10892 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10893 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10894 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10895 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10896 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10897 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10898
10899 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10900 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10901 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10902 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10903 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10904 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10905
10906 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10907 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10908 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10909 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10910
10911 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10912 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10913
10914 };
10915
10916 static const struct builtin_description bdesc_1arg[] =
10917 {
10918 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10919 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10920
10921 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10922 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10923 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10924
10925 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10926 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10927 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10928 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10929
10930 };
10931
10932 void
10933 ix86_init_builtins ()
10934 {
10935 if (TARGET_MMX)
10936 ix86_init_mmx_sse_builtins ();
10937 }
10938
10939 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10940 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10941 builtins. */
10942 static void
10943 ix86_init_mmx_sse_builtins ()
10944 {
10945 const struct builtin_description * d;
10946 size_t i;
10947 tree endlink = void_list_node;
10948
10949 tree pchar_type_node = build_pointer_type (char_type_node);
10950 tree pfloat_type_node = build_pointer_type (float_type_node);
10951 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10952 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10953
10954 /* Comparisons. */
10955 tree int_ftype_v4sf_v4sf
10956 = build_function_type (integer_type_node,
10957 tree_cons (NULL_TREE, V4SF_type_node,
10958 tree_cons (NULL_TREE,
10959 V4SF_type_node,
10960 endlink)));
10961 tree v4si_ftype_v4sf_v4sf
10962 = build_function_type (V4SI_type_node,
10963 tree_cons (NULL_TREE, V4SF_type_node,
10964 tree_cons (NULL_TREE,
10965 V4SF_type_node,
10966 endlink)));
10967 /* MMX/SSE/integer conversions. */
10968 tree int_ftype_v4sf
10969 = build_function_type (integer_type_node,
10970 tree_cons (NULL_TREE, V4SF_type_node,
10971 endlink));
10972 tree int_ftype_v8qi
10973 = build_function_type (integer_type_node,
10974 tree_cons (NULL_TREE, V8QI_type_node,
10975 endlink));
10976 tree v4sf_ftype_v4sf_int
10977 = build_function_type (V4SF_type_node,
10978 tree_cons (NULL_TREE, V4SF_type_node,
10979 tree_cons (NULL_TREE, integer_type_node,
10980 endlink)));
10981 tree v4sf_ftype_v4sf_v2si
10982 = build_function_type (V4SF_type_node,
10983 tree_cons (NULL_TREE, V4SF_type_node,
10984 tree_cons (NULL_TREE, V2SI_type_node,
10985 endlink)));
10986 tree int_ftype_v4hi_int
10987 = build_function_type (integer_type_node,
10988 tree_cons (NULL_TREE, V4HI_type_node,
10989 tree_cons (NULL_TREE, integer_type_node,
10990 endlink)));
10991 tree v4hi_ftype_v4hi_int_int
10992 = build_function_type (V4HI_type_node,
10993 tree_cons (NULL_TREE, V4HI_type_node,
10994 tree_cons (NULL_TREE, integer_type_node,
10995 tree_cons (NULL_TREE,
10996 integer_type_node,
10997 endlink))));
10998 /* Miscellaneous. */
10999 tree v8qi_ftype_v4hi_v4hi
11000 = build_function_type (V8QI_type_node,
11001 tree_cons (NULL_TREE, V4HI_type_node,
11002 tree_cons (NULL_TREE, V4HI_type_node,
11003 endlink)));
11004 tree v4hi_ftype_v2si_v2si
11005 = build_function_type (V4HI_type_node,
11006 tree_cons (NULL_TREE, V2SI_type_node,
11007 tree_cons (NULL_TREE, V2SI_type_node,
11008 endlink)));
11009 tree v4sf_ftype_v4sf_v4sf_int
11010 = build_function_type (V4SF_type_node,
11011 tree_cons (NULL_TREE, V4SF_type_node,
11012 tree_cons (NULL_TREE, V4SF_type_node,
11013 tree_cons (NULL_TREE,
11014 integer_type_node,
11015 endlink))));
11016 tree v4hi_ftype_v8qi_v8qi
11017 = build_function_type (V4HI_type_node,
11018 tree_cons (NULL_TREE, V8QI_type_node,
11019 tree_cons (NULL_TREE, V8QI_type_node,
11020 endlink)));
11021 tree v2si_ftype_v4hi_v4hi
11022 = build_function_type (V2SI_type_node,
11023 tree_cons (NULL_TREE, V4HI_type_node,
11024 tree_cons (NULL_TREE, V4HI_type_node,
11025 endlink)));
11026 tree v4hi_ftype_v4hi_int
11027 = build_function_type (V4HI_type_node,
11028 tree_cons (NULL_TREE, V4HI_type_node,
11029 tree_cons (NULL_TREE, integer_type_node,
11030 endlink)));
11031 tree v4hi_ftype_v4hi_di
11032 = build_function_type (V4HI_type_node,
11033 tree_cons (NULL_TREE, V4HI_type_node,
11034 tree_cons (NULL_TREE,
11035 long_long_integer_type_node,
11036 endlink)));
11037 tree v2si_ftype_v2si_di
11038 = build_function_type (V2SI_type_node,
11039 tree_cons (NULL_TREE, V2SI_type_node,
11040 tree_cons (NULL_TREE,
11041 long_long_integer_type_node,
11042 endlink)));
11043 tree void_ftype_void
11044 = build_function_type (void_type_node, endlink);
11045 tree void_ftype_unsigned
11046 = build_function_type (void_type_node,
11047 tree_cons (NULL_TREE, unsigned_type_node,
11048 endlink));
11049 tree unsigned_ftype_void
11050 = build_function_type (unsigned_type_node, endlink);
11051 tree di_ftype_void
11052 = build_function_type (long_long_unsigned_type_node, endlink);
11053 tree v4sf_ftype_void
11054 = build_function_type (V4SF_type_node, endlink);
11055 tree v2si_ftype_v4sf
11056 = build_function_type (V2SI_type_node,
11057 tree_cons (NULL_TREE, V4SF_type_node,
11058 endlink));
11059 /* Loads/stores. */
11060 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11061 tree_cons (NULL_TREE, V8QI_type_node,
11062 tree_cons (NULL_TREE,
11063 pchar_type_node,
11064 endlink)));
11065 tree void_ftype_v8qi_v8qi_pchar
11066 = build_function_type (void_type_node, maskmovq_args);
11067 tree v4sf_ftype_pfloat
11068 = build_function_type (V4SF_type_node,
11069 tree_cons (NULL_TREE, pfloat_type_node,
11070 endlink));
11071 /* @@@ the type is bogus */
11072 tree v4sf_ftype_v4sf_pv2si
11073 = build_function_type (V4SF_type_node,
11074 tree_cons (NULL_TREE, V4SF_type_node,
11075 tree_cons (NULL_TREE, pv2si_type_node,
11076 endlink)));
11077 tree void_ftype_pv2si_v4sf
11078 = build_function_type (void_type_node,
11079 tree_cons (NULL_TREE, pv2si_type_node,
11080 tree_cons (NULL_TREE, V4SF_type_node,
11081 endlink)));
11082 tree void_ftype_pfloat_v4sf
11083 = build_function_type (void_type_node,
11084 tree_cons (NULL_TREE, pfloat_type_node,
11085 tree_cons (NULL_TREE, V4SF_type_node,
11086 endlink)));
11087 tree void_ftype_pdi_di
11088 = build_function_type (void_type_node,
11089 tree_cons (NULL_TREE, pdi_type_node,
11090 tree_cons (NULL_TREE,
11091 long_long_unsigned_type_node,
11092 endlink)));
11093 /* Normal vector unops. */
11094 tree v4sf_ftype_v4sf
11095 = build_function_type (V4SF_type_node,
11096 tree_cons (NULL_TREE, V4SF_type_node,
11097 endlink));
11098
11099 /* Normal vector binops. */
11100 tree v4sf_ftype_v4sf_v4sf
11101 = build_function_type (V4SF_type_node,
11102 tree_cons (NULL_TREE, V4SF_type_node,
11103 tree_cons (NULL_TREE, V4SF_type_node,
11104 endlink)));
11105 tree v8qi_ftype_v8qi_v8qi
11106 = build_function_type (V8QI_type_node,
11107 tree_cons (NULL_TREE, V8QI_type_node,
11108 tree_cons (NULL_TREE, V8QI_type_node,
11109 endlink)));
11110 tree v4hi_ftype_v4hi_v4hi
11111 = build_function_type (V4HI_type_node,
11112 tree_cons (NULL_TREE, V4HI_type_node,
11113 tree_cons (NULL_TREE, V4HI_type_node,
11114 endlink)));
11115 tree v2si_ftype_v2si_v2si
11116 = build_function_type (V2SI_type_node,
11117 tree_cons (NULL_TREE, V2SI_type_node,
11118 tree_cons (NULL_TREE, V2SI_type_node,
11119 endlink)));
11120 tree di_ftype_di_di
11121 = build_function_type (long_long_unsigned_type_node,
11122 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11123 tree_cons (NULL_TREE,
11124 long_long_unsigned_type_node,
11125 endlink)));
11126
11127 tree v2si_ftype_v2sf
11128 = build_function_type (V2SI_type_node,
11129 tree_cons (NULL_TREE, V2SF_type_node,
11130 endlink));
11131 tree v2sf_ftype_v2si
11132 = build_function_type (V2SF_type_node,
11133 tree_cons (NULL_TREE, V2SI_type_node,
11134 endlink));
11135 tree v2si_ftype_v2si
11136 = build_function_type (V2SI_type_node,
11137 tree_cons (NULL_TREE, V2SI_type_node,
11138 endlink));
11139 tree v2sf_ftype_v2sf
11140 = build_function_type (V2SF_type_node,
11141 tree_cons (NULL_TREE, V2SF_type_node,
11142 endlink));
11143 tree v2sf_ftype_v2sf_v2sf
11144 = build_function_type (V2SF_type_node,
11145 tree_cons (NULL_TREE, V2SF_type_node,
11146 tree_cons (NULL_TREE,
11147 V2SF_type_node,
11148 endlink)));
11149 tree v2si_ftype_v2sf_v2sf
11150 = build_function_type (V2SI_type_node,
11151 tree_cons (NULL_TREE, V2SF_type_node,
11152 tree_cons (NULL_TREE,
11153 V2SF_type_node,
11154 endlink)));
11155
11156 /* Add all builtins that are more or less simple operations on two
11157 operands. */
11158 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11159 {
11160 /* Use one of the operands; the target can have a different mode for
11161 mask-generating compares. */
11162 enum machine_mode mode;
11163 tree type;
11164
11165 if (d->name == 0)
11166 continue;
11167 mode = insn_data[d->icode].operand[1].mode;
11168
11169 switch (mode)
11170 {
11171 case V4SFmode:
11172 type = v4sf_ftype_v4sf_v4sf;
11173 break;
11174 case V8QImode:
11175 type = v8qi_ftype_v8qi_v8qi;
11176 break;
11177 case V4HImode:
11178 type = v4hi_ftype_v4hi_v4hi;
11179 break;
11180 case V2SImode:
11181 type = v2si_ftype_v2si_v2si;
11182 break;
11183 case DImode:
11184 type = di_ftype_di_di;
11185 break;
11186
11187 default:
11188 abort ();
11189 }
11190
11191 /* Override for comparisons. */
11192 if (d->icode == CODE_FOR_maskcmpv4sf3
11193 || d->icode == CODE_FOR_maskncmpv4sf3
11194 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11195 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11196 type = v4si_ftype_v4sf_v4sf;
11197
11198 def_builtin (d->mask, d->name, type, d->code);
11199 }
11200
11201 /* Add the remaining MMX insns with somewhat more complicated types. */
11202 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11203 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11204 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11205 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11206 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11207 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11208 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11209
11210 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11211 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11212 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11213
11214 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11215 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11216
11217 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11218 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11219
11220 /* comi/ucomi insns. */
11221 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11222 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11223
11224 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11225 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11226 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11227
11228 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11229 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11230 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11231 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11232 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11233 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11234
11235 def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11236 def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11237 def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11238 def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11239
11240 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11241 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11242
11243 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11244
11245 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11246 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11247 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11248 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11249 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11250 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11251
11252 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11253 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11254 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11255 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11256
11257 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11258 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11259 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11260 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11261
11262 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11263
11264 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11265
11266 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11267 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11268 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11269 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11270 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11271 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11272
11273 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11274
11275 /* Original 3DNow! */
11276 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11277 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11278 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11286 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11287 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11288 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11289 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11290 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11291 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11292 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11293 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11294 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11295 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11296
11297 /* 3DNow! extension as used in the Athlon CPU. */
11298 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11299 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11300 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11301 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11302 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11303 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11304
11305 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11306 }
11307
11308 /* Errors in the source file can cause expand_expr to return const0_rtx
11309 where we expect a vector. To avoid crashing, use one of the vector
11310 clear instructions. */
11311 static rtx
11312 safe_vector_operand (x, mode)
11313 rtx x;
11314 enum machine_mode mode;
11315 {
11316 if (x != const0_rtx)
11317 return x;
11318 x = gen_reg_rtx (mode);
11319
11320 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11321 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11322 : gen_rtx_SUBREG (DImode, x, 0)));
11323 else
11324 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11325 : gen_rtx_SUBREG (V4SFmode, x, 0)));
11326 return x;
11327 }
11328
11329 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11330
11331 static rtx
11332 ix86_expand_binop_builtin (icode, arglist, target)
11333 enum insn_code icode;
11334 tree arglist;
11335 rtx target;
11336 {
11337 rtx pat;
11338 tree arg0 = TREE_VALUE (arglist);
11339 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11340 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11341 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11342 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11343 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11344 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11345
11346 if (VECTOR_MODE_P (mode0))
11347 op0 = safe_vector_operand (op0, mode0);
11348 if (VECTOR_MODE_P (mode1))
11349 op1 = safe_vector_operand (op1, mode1);
11350
11351 if (! target
11352 || GET_MODE (target) != tmode
11353 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11354 target = gen_reg_rtx (tmode);
11355
11356 /* In case the insn wants input operands in modes different from
11357 the result, abort. */
11358 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11359 abort ();
11360
11361 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11362 op0 = copy_to_mode_reg (mode0, op0);
11363 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11364 op1 = copy_to_mode_reg (mode1, op1);
11365
11366 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11367 yet one of the two must not be a memory. This is normally enforced
11368 by expanders, but we didn't bother to create one here. */
11369 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11370 op0 = copy_to_mode_reg (mode0, op0);
11371
11372 pat = GEN_FCN (icode) (target, op0, op1);
11373 if (! pat)
11374 return 0;
11375 emit_insn (pat);
11376 return target;
11377 }
11378
11379 /* In type_for_mode we restrict the ability to create TImode types
11380 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11381 to have a V4SFmode signature. Convert them in-place to TImode. */
11382
11383 static rtx
11384 ix86_expand_timode_binop_builtin (icode, arglist, target)
11385 enum insn_code icode;
11386 tree arglist;
11387 rtx target;
11388 {
11389 rtx pat;
11390 tree arg0 = TREE_VALUE (arglist);
11391 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11392 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11393 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11394
11395 op0 = gen_lowpart (TImode, op0);
11396 op1 = gen_lowpart (TImode, op1);
11397 target = gen_reg_rtx (TImode);
11398
11399 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11400 op0 = copy_to_mode_reg (TImode, op0);
11401 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11402 op1 = copy_to_mode_reg (TImode, op1);
11403
11404 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11405 yet one of the two must not be a memory. This is normally enforced
11406 by expanders, but we didn't bother to create one here. */
11407 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11408 op0 = copy_to_mode_reg (TImode, op0);
11409
11410 pat = GEN_FCN (icode) (target, op0, op1);
11411 if (! pat)
11412 return 0;
11413 emit_insn (pat);
11414
11415 return gen_lowpart (V4SFmode, target);
11416 }
11417
11418 /* Subroutine of ix86_expand_builtin to take care of stores. */
11419
11420 static rtx
11421 ix86_expand_store_builtin (icode, arglist)
11422 enum insn_code icode;
11423 tree arglist;
11424 {
11425 rtx pat;
11426 tree arg0 = TREE_VALUE (arglist);
11427 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11428 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11429 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11430 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11431 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11432
11433 if (VECTOR_MODE_P (mode1))
11434 op1 = safe_vector_operand (op1, mode1);
11435
11436 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11437
11438 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11439 op1 = copy_to_mode_reg (mode1, op1);
11440
11441 pat = GEN_FCN (icode) (op0, op1);
11442 if (pat)
11443 emit_insn (pat);
11444 return 0;
11445 }
11446
11447 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11448
11449 static rtx
11450 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11451 enum insn_code icode;
11452 tree arglist;
11453 rtx target;
11454 int do_load;
11455 {
11456 rtx pat;
11457 tree arg0 = TREE_VALUE (arglist);
11458 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11459 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11460 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11461
11462 if (! target
11463 || GET_MODE (target) != tmode
11464 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11465 target = gen_reg_rtx (tmode);
11466 if (do_load)
11467 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11468 else
11469 {
11470 if (VECTOR_MODE_P (mode0))
11471 op0 = safe_vector_operand (op0, mode0);
11472
11473 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11474 op0 = copy_to_mode_reg (mode0, op0);
11475 }
11476
11477 pat = GEN_FCN (icode) (target, op0);
11478 if (! pat)
11479 return 0;
11480 emit_insn (pat);
11481 return target;
11482 }
11483
11484 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11485 sqrtss, rsqrtss, rcpss. */
11486
11487 static rtx
11488 ix86_expand_unop1_builtin (icode, arglist, target)
11489 enum insn_code icode;
11490 tree arglist;
11491 rtx target;
11492 {
11493 rtx pat;
11494 tree arg0 = TREE_VALUE (arglist);
11495 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11496 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11497 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11498
11499 if (! target
11500 || GET_MODE (target) != tmode
11501 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11502 target = gen_reg_rtx (tmode);
11503
11504 if (VECTOR_MODE_P (mode0))
11505 op0 = safe_vector_operand (op0, mode0);
11506
11507 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11508 op0 = copy_to_mode_reg (mode0, op0);
11509
11510 op1 = op0;
11511 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
11512 op1 = copy_to_mode_reg (mode0, op1);
11513
11514 pat = GEN_FCN (icode) (target, op0, op1);
11515 if (! pat)
11516 return 0;
11517 emit_insn (pat);
11518 return target;
11519 }
11520
11521 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11522
11523 static rtx
11524 ix86_expand_sse_compare (d, arglist, target)
11525 const struct builtin_description *d;
11526 tree arglist;
11527 rtx target;
11528 {
11529 rtx pat;
11530 tree arg0 = TREE_VALUE (arglist);
11531 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11532 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11533 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11534 rtx op2;
11535 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11536 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11537 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11538 enum rtx_code comparison = d->comparison;
11539
11540 if (VECTOR_MODE_P (mode0))
11541 op0 = safe_vector_operand (op0, mode0);
11542 if (VECTOR_MODE_P (mode1))
11543 op1 = safe_vector_operand (op1, mode1);
11544
11545 /* Swap operands if we have a comparison that isn't available in
11546 hardware. */
11547 if (d->flag)
11548 {
11549 rtx tmp = gen_reg_rtx (mode1);
11550 emit_move_insn (tmp, op1);
11551 op1 = op0;
11552 op0 = tmp;
11553 }
11554
11555 if (! target
11556 || GET_MODE (target) != tmode
11557 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11558 target = gen_reg_rtx (tmode);
11559
11560 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11561 op0 = copy_to_mode_reg (mode0, op0);
11562 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11563 op1 = copy_to_mode_reg (mode1, op1);
11564
11565 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11566 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11567 if (! pat)
11568 return 0;
11569 emit_insn (pat);
11570 return target;
11571 }
11572
11573 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11574
11575 static rtx
11576 ix86_expand_sse_comi (d, arglist, target)
11577 const struct builtin_description *d;
11578 tree arglist;
11579 rtx target;
11580 {
11581 rtx pat;
11582 tree arg0 = TREE_VALUE (arglist);
11583 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11584 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11585 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11586 rtx op2;
11587 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11588 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11589 enum rtx_code comparison = d->comparison;
11590
11591 if (VECTOR_MODE_P (mode0))
11592 op0 = safe_vector_operand (op0, mode0);
11593 if (VECTOR_MODE_P (mode1))
11594 op1 = safe_vector_operand (op1, mode1);
11595
11596 /* Swap operands if we have a comparison that isn't available in
11597 hardware. */
11598 if (d->flag)
11599 {
11600 rtx tmp = op1;
11601 op1 = op0;
11602 op0 = tmp;
11603 }
11604
11605 target = gen_reg_rtx (SImode);
11606 emit_move_insn (target, const0_rtx);
11607 target = gen_rtx_SUBREG (QImode, target, 0);
11608
11609 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11610 op0 = copy_to_mode_reg (mode0, op0);
11611 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11612 op1 = copy_to_mode_reg (mode1, op1);
11613
11614 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11615 pat = GEN_FCN (d->icode) (op0, op1, op2);
11616 if (! pat)
11617 return 0;
11618 emit_insn (pat);
11619 emit_insn (gen_rtx_SET (VOIDmode,
11620 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11621 gen_rtx_fmt_ee (comparison, QImode,
11622 gen_rtx_REG (CCmode, FLAGS_REG),
11623 const0_rtx)));
11624
11625 return SUBREG_REG (target);
11626 }
11627
11628 /* Expand an expression EXP that calls a built-in function,
11629 with result going to TARGET if that's convenient
11630 (and in mode MODE if that's convenient).
11631 SUBTARGET may be used as the target for computing one of EXP's operands.
11632 IGNORE is nonzero if the value is to be ignored. */
11633
11634 rtx
11635 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11636 tree exp;
11637 rtx target;
11638 rtx subtarget ATTRIBUTE_UNUSED;
11639 enum machine_mode mode ATTRIBUTE_UNUSED;
11640 int ignore ATTRIBUTE_UNUSED;
11641 {
11642 const struct builtin_description *d;
11643 size_t i;
11644 enum insn_code icode;
11645 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11646 tree arglist = TREE_OPERAND (exp, 1);
11647 tree arg0, arg1, arg2;
11648 rtx op0, op1, op2, pat;
11649 enum machine_mode tmode, mode0, mode1, mode2;
11650 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11651
11652 switch (fcode)
11653 {
11654 case IX86_BUILTIN_EMMS:
11655 emit_insn (gen_emms ());
11656 return 0;
11657
11658 case IX86_BUILTIN_SFENCE:
11659 emit_insn (gen_sfence ());
11660 return 0;
11661
11662 case IX86_BUILTIN_PEXTRW:
11663 icode = CODE_FOR_mmx_pextrw;
11664 arg0 = TREE_VALUE (arglist);
11665 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11666 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11667 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11668 tmode = insn_data[icode].operand[0].mode;
11669 mode0 = insn_data[icode].operand[1].mode;
11670 mode1 = insn_data[icode].operand[2].mode;
11671
11672 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11673 op0 = copy_to_mode_reg (mode0, op0);
11674 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11675 {
11676 /* @@@ better error message */
11677 error ("selector must be an immediate");
11678 return gen_reg_rtx (tmode);
11679 }
11680 if (target == 0
11681 || GET_MODE (target) != tmode
11682 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11683 target = gen_reg_rtx (tmode);
11684 pat = GEN_FCN (icode) (target, op0, op1);
11685 if (! pat)
11686 return 0;
11687 emit_insn (pat);
11688 return target;
11689
11690 case IX86_BUILTIN_PINSRW:
11691 icode = CODE_FOR_mmx_pinsrw;
11692 arg0 = TREE_VALUE (arglist);
11693 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11694 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11695 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11696 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11697 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11698 tmode = insn_data[icode].operand[0].mode;
11699 mode0 = insn_data[icode].operand[1].mode;
11700 mode1 = insn_data[icode].operand[2].mode;
11701 mode2 = insn_data[icode].operand[3].mode;
11702
11703 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11704 op0 = copy_to_mode_reg (mode0, op0);
11705 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11706 op1 = copy_to_mode_reg (mode1, op1);
11707 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11708 {
11709 /* @@@ better error message */
11710 error ("selector must be an immediate");
11711 return const0_rtx;
11712 }
11713 if (target == 0
11714 || GET_MODE (target) != tmode
11715 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11716 target = gen_reg_rtx (tmode);
11717 pat = GEN_FCN (icode) (target, op0, op1, op2);
11718 if (! pat)
11719 return 0;
11720 emit_insn (pat);
11721 return target;
11722
11723 case IX86_BUILTIN_MASKMOVQ:
11724 icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq;
11725 /* Note the arg order is different from the operand order. */
11726 arg1 = TREE_VALUE (arglist);
11727 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11728 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11729 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11730 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11731 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11732 mode0 = insn_data[icode].operand[0].mode;
11733 mode1 = insn_data[icode].operand[1].mode;
11734 mode2 = insn_data[icode].operand[2].mode;
11735
11736 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11737 op0 = copy_to_mode_reg (mode0, op0);
11738 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11739 op1 = copy_to_mode_reg (mode1, op1);
11740 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11741 op2 = copy_to_mode_reg (mode2, op2);
11742 pat = GEN_FCN (icode) (op0, op1, op2);
11743 if (! pat)
11744 return 0;
11745 emit_insn (pat);
11746 return 0;
11747
11748 case IX86_BUILTIN_SQRTSS:
11749 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11750 case IX86_BUILTIN_RSQRTSS:
11751 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11752 case IX86_BUILTIN_RCPSS:
11753 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11754
11755 case IX86_BUILTIN_ANDPS:
11756 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
11757 arglist, target);
11758 case IX86_BUILTIN_ANDNPS:
11759 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
11760 arglist, target);
11761 case IX86_BUILTIN_ORPS:
11762 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
11763 arglist, target);
11764 case IX86_BUILTIN_XORPS:
11765 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
11766 arglist, target);
11767
11768 case IX86_BUILTIN_LOADAPS:
11769 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11770
11771 case IX86_BUILTIN_LOADUPS:
11772 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11773
11774 case IX86_BUILTIN_STOREAPS:
11775 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
11776 case IX86_BUILTIN_STOREUPS:
11777 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
11778
11779 case IX86_BUILTIN_LOADSS:
11780 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11781
11782 case IX86_BUILTIN_STORESS:
11783 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
11784
11785 case IX86_BUILTIN_LOADHPS:
11786 case IX86_BUILTIN_LOADLPS:
11787 icode = (fcode == IX86_BUILTIN_LOADHPS
11788 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11789 arg0 = TREE_VALUE (arglist);
11790 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11791 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11792 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11793 tmode = insn_data[icode].operand[0].mode;
11794 mode0 = insn_data[icode].operand[1].mode;
11795 mode1 = insn_data[icode].operand[2].mode;
11796
11797 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11798 op0 = copy_to_mode_reg (mode0, op0);
11799 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11800 if (target == 0
11801 || GET_MODE (target) != tmode
11802 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11803 target = gen_reg_rtx (tmode);
11804 pat = GEN_FCN (icode) (target, op0, op1);
11805 if (! pat)
11806 return 0;
11807 emit_insn (pat);
11808 return target;
11809
11810 case IX86_BUILTIN_STOREHPS:
11811 case IX86_BUILTIN_STORELPS:
11812 icode = (fcode == IX86_BUILTIN_STOREHPS
11813 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11814 arg0 = TREE_VALUE (arglist);
11815 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11816 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11817 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11818 mode0 = insn_data[icode].operand[1].mode;
11819 mode1 = insn_data[icode].operand[2].mode;
11820
11821 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11822 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11823 op1 = copy_to_mode_reg (mode1, op1);
11824
11825 pat = GEN_FCN (icode) (op0, op0, op1);
11826 if (! pat)
11827 return 0;
11828 emit_insn (pat);
11829 return 0;
11830
11831 case IX86_BUILTIN_MOVNTPS:
11832 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
11833 case IX86_BUILTIN_MOVNTQ:
11834 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
11835
11836 case IX86_BUILTIN_LDMXCSR:
11837 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11838 target = assign_386_stack_local (SImode, 0);
11839 emit_move_insn (target, op0);
11840 emit_insn (gen_ldmxcsr (target));
11841 return 0;
11842
11843 case IX86_BUILTIN_STMXCSR:
11844 target = assign_386_stack_local (SImode, 0);
11845 emit_insn (gen_stmxcsr (target));
11846 return copy_to_mode_reg (SImode, target);
11847
11848 case IX86_BUILTIN_SHUFPS:
11849 icode = CODE_FOR_sse_shufps;
11850 arg0 = TREE_VALUE (arglist);
11851 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11852 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11853 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11854 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11855 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11856 tmode = insn_data[icode].operand[0].mode;
11857 mode0 = insn_data[icode].operand[1].mode;
11858 mode1 = insn_data[icode].operand[2].mode;
11859 mode2 = insn_data[icode].operand[3].mode;
11860
11861 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11862 op0 = copy_to_mode_reg (mode0, op0);
11863 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11864 op1 = copy_to_mode_reg (mode1, op1);
11865 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11866 {
11867 /* @@@ better error message */
11868 error ("mask must be an immediate");
11869 return gen_reg_rtx (tmode);
11870 }
11871 if (target == 0
11872 || GET_MODE (target) != tmode
11873 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11874 target = gen_reg_rtx (tmode);
11875 pat = GEN_FCN (icode) (target, op0, op1, op2);
11876 if (! pat)
11877 return 0;
11878 emit_insn (pat);
11879 return target;
11880
11881 case IX86_BUILTIN_PSHUFW:
11882 icode = CODE_FOR_mmx_pshufw;
11883 arg0 = TREE_VALUE (arglist);
11884 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11885 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11886 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11887 tmode = insn_data[icode].operand[0].mode;
11888 mode1 = insn_data[icode].operand[1].mode;
11889 mode2 = insn_data[icode].operand[2].mode;
11890
11891 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11892 op0 = copy_to_mode_reg (mode1, op0);
11893 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
11894 {
11895 /* @@@ better error message */
11896 error ("mask must be an immediate");
11897 return const0_rtx;
11898 }
11899 if (target == 0
11900 || GET_MODE (target) != tmode
11901 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11902 target = gen_reg_rtx (tmode);
11903 pat = GEN_FCN (icode) (target, op0, op1);
11904 if (! pat)
11905 return 0;
11906 emit_insn (pat);
11907 return target;
11908
11909 case IX86_BUILTIN_FEMMS:
11910 emit_insn (gen_femms ());
11911 return NULL_RTX;
11912
11913 case IX86_BUILTIN_PAVGUSB:
11914 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11915
11916 case IX86_BUILTIN_PF2ID:
11917 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11918
11919 case IX86_BUILTIN_PFACC:
11920 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11921
11922 case IX86_BUILTIN_PFADD:
11923 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11924
11925 case IX86_BUILTIN_PFCMPEQ:
11926 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11927
11928 case IX86_BUILTIN_PFCMPGE:
11929 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11930
11931 case IX86_BUILTIN_PFCMPGT:
11932 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11933
11934 case IX86_BUILTIN_PFMAX:
11935 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11936
11937 case IX86_BUILTIN_PFMIN:
11938 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11939
11940 case IX86_BUILTIN_PFMUL:
11941 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11942
11943 case IX86_BUILTIN_PFRCP:
11944 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11945
11946 case IX86_BUILTIN_PFRCPIT1:
11947 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11948
11949 case IX86_BUILTIN_PFRCPIT2:
11950 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11951
11952 case IX86_BUILTIN_PFRSQIT1:
11953 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11954
11955 case IX86_BUILTIN_PFRSQRT:
11956 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11957
11958 case IX86_BUILTIN_PFSUB:
11959 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11960
11961 case IX86_BUILTIN_PFSUBR:
11962 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11963
11964 case IX86_BUILTIN_PI2FD:
11965 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11966
11967 case IX86_BUILTIN_PMULHRW:
11968 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11969
11970 case IX86_BUILTIN_PF2IW:
11971 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11972
11973 case IX86_BUILTIN_PFNACC:
11974 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11975
11976 case IX86_BUILTIN_PFPNACC:
11977 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11978
11979 case IX86_BUILTIN_PI2FW:
11980 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11981
11982 case IX86_BUILTIN_PSWAPDSI:
11983 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11984
11985 case IX86_BUILTIN_PSWAPDSF:
11986 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11987
11988 case IX86_BUILTIN_SSE_ZERO:
11989 target = gen_reg_rtx (V4SFmode);
11990 emit_insn (gen_sse_clrv4sf (target));
11991 return target;
11992
11993 case IX86_BUILTIN_MMX_ZERO:
11994 target = gen_reg_rtx (DImode);
11995 emit_insn (gen_mmx_clrdi (target));
11996 return target;
11997
11998 default:
11999 break;
12000 }
12001
12002 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
12003 if (d->code == fcode)
12004 {
12005 /* Compares are treated specially. */
12006 if (d->icode == CODE_FOR_maskcmpv4sf3
12007 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12008 || d->icode == CODE_FOR_maskncmpv4sf3
12009 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12010 return ix86_expand_sse_compare (d, arglist, target);
12011
12012 return ix86_expand_binop_builtin (d->icode, arglist, target);
12013 }
12014
12015 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12016 if (d->code == fcode)
12017 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12018
12019 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12020 if (d->code == fcode)
12021 return ix86_expand_sse_comi (d, arglist, target);
12022
12023 /* @@@ Should really do something sensible here. */
12024 return 0;
12025 }
12026
12027 /* Store OPERAND to the memory after reload is completed. This means
12028 that we can't easily use assign_stack_local. */
12029 rtx
12030 ix86_force_to_memory (mode, operand)
12031 enum machine_mode mode;
12032 rtx operand;
12033 {
12034 rtx result;
12035 if (!reload_completed)
12036 abort ();
12037 if (TARGET_64BIT && TARGET_RED_ZONE)
12038 {
12039 result = gen_rtx_MEM (mode,
12040 gen_rtx_PLUS (Pmode,
12041 stack_pointer_rtx,
12042 GEN_INT (-RED_ZONE_SIZE)));
12043 emit_move_insn (result, operand);
12044 }
12045 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12046 {
12047 switch (mode)
12048 {
12049 case HImode:
12050 case SImode:
12051 operand = gen_lowpart (DImode, operand);
12052 /* FALLTHRU */
12053 case DImode:
12054 emit_insn (
12055 gen_rtx_SET (VOIDmode,
12056 gen_rtx_MEM (DImode,
12057 gen_rtx_PRE_DEC (DImode,
12058 stack_pointer_rtx)),
12059 operand));
12060 break;
12061 default:
12062 abort ();
12063 }
12064 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12065 }
12066 else
12067 {
12068 switch (mode)
12069 {
12070 case DImode:
12071 {
12072 rtx operands[2];
12073 split_di (&operand, 1, operands, operands + 1);
12074 emit_insn (
12075 gen_rtx_SET (VOIDmode,
12076 gen_rtx_MEM (SImode,
12077 gen_rtx_PRE_DEC (Pmode,
12078 stack_pointer_rtx)),
12079 operands[1]));
12080 emit_insn (
12081 gen_rtx_SET (VOIDmode,
12082 gen_rtx_MEM (SImode,
12083 gen_rtx_PRE_DEC (Pmode,
12084 stack_pointer_rtx)),
12085 operands[0]));
12086 }
12087 break;
12088 case HImode:
12089 /* It is better to store HImodes as SImodes. */
12090 if (!TARGET_PARTIAL_REG_STALL)
12091 operand = gen_lowpart (SImode, operand);
12092 /* FALLTHRU */
12093 case SImode:
12094 emit_insn (
12095 gen_rtx_SET (VOIDmode,
12096 gen_rtx_MEM (GET_MODE (operand),
12097 gen_rtx_PRE_DEC (SImode,
12098 stack_pointer_rtx)),
12099 operand));
12100 break;
12101 default:
12102 abort ();
12103 }
12104 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12105 }
12106 return result;
12107 }
12108
12109 /* Free operand from the memory. */
12110 void
12111 ix86_free_from_memory (mode)
12112 enum machine_mode mode;
12113 {
12114 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12115 {
12116 int size;
12117
12118 if (mode == DImode || TARGET_64BIT)
12119 size = 8;
12120 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12121 size = 2;
12122 else
12123 size = 4;
12124 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12125 to pop or add instruction if registers are available. */
12126 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12127 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12128 GEN_INT (size))));
12129 }
12130 }
12131
12132 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12133 QImode must go into class Q_REGS.
12134 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12135 movdf to do mem-to-mem moves through integer regs. */
12136 enum reg_class
12137 ix86_preferred_reload_class (x, class)
12138 rtx x;
12139 enum reg_class class;
12140 {
12141 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12142 {
12143 /* SSE can't load any constant directly yet. */
12144 if (SSE_CLASS_P (class))
12145 return NO_REGS;
12146 /* Floats can load 0 and 1. */
12147 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12148 {
12149 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12150 if (MAYBE_SSE_CLASS_P (class))
12151 return (reg_class_subset_p (class, GENERAL_REGS)
12152 ? GENERAL_REGS : FLOAT_REGS);
12153 else
12154 return class;
12155 }
12156 /* General regs can load everything. */
12157 if (reg_class_subset_p (class, GENERAL_REGS))
12158 return GENERAL_REGS;
12159 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12160 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12161 return NO_REGS;
12162 }
12163 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12164 return NO_REGS;
12165 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12166 return Q_REGS;
12167 return class;
12168 }
12169
12170 /* If we are copying between general and FP registers, we need a memory
12171 location. The same is true for SSE and MMX registers.
12172
12173 The macro can't work reliably when one of the CLASSES is class containing
12174 registers from multiple units (SSE, MMX, integer). We avoid this by never
12175 combining those units in single alternative in the machine description.
12176 Ensure that this constraint holds to avoid unexpected surprises.
12177
12178 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12179 enforce these sanity checks. */
12180 int
12181 ix86_secondary_memory_needed (class1, class2, mode, strict)
12182 enum reg_class class1, class2;
12183 enum machine_mode mode;
12184 int strict;
12185 {
12186 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12187 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12188 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12189 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12190 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12191 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12192 {
12193 if (strict)
12194 abort ();
12195 else
12196 return 1;
12197 }
12198 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12199 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12200 && (mode) != SImode)
12201 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12202 && (mode) != SImode));
12203 }
12204 /* Return the cost of moving data from a register in class CLASS1 to
12205 one in class CLASS2.
12206
12207 It is not required that the cost always equal 2 when FROM is the same as TO;
12208 on some machines it is expensive to move between registers if they are not
12209 general registers. */
12210 int
12211 ix86_register_move_cost (mode, class1, class2)
12212 enum machine_mode mode;
12213 enum reg_class class1, class2;
12214 {
12215 /* In case we require secondary memory, compute cost of the store followed
12216 by load. In case of copying from general_purpose_register we may emit
12217 multiple stores followed by single load causing memory size mismatch
12218 stall. Count this as arbitarily high cost of 20. */
12219 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12220 {
12221 int add_cost = 0;
12222 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12223 add_cost = 20;
12224 return (MEMORY_MOVE_COST (mode, class1, 0)
12225 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12226 }
12227 /* Moves between SSE/MMX and integer unit are expensive. */
12228 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12229 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12230 return ix86_cost->mmxsse_to_integer;
12231 if (MAYBE_FLOAT_CLASS_P (class1))
12232 return ix86_cost->fp_move;
12233 if (MAYBE_SSE_CLASS_P (class1))
12234 return ix86_cost->sse_move;
12235 if (MAYBE_MMX_CLASS_P (class1))
12236 return ix86_cost->mmx_move;
12237 return 2;
12238 }
12239
12240 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12241 int
12242 ix86_hard_regno_mode_ok (regno, mode)
12243 int regno;
12244 enum machine_mode mode;
12245 {
12246 /* Flags and only flags can only hold CCmode values. */
12247 if (CC_REGNO_P (regno))
12248 return GET_MODE_CLASS (mode) == MODE_CC;
12249 if (GET_MODE_CLASS (mode) == MODE_CC
12250 || GET_MODE_CLASS (mode) == MODE_RANDOM
12251 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12252 return 0;
12253 if (FP_REGNO_P (regno))
12254 return VALID_FP_MODE_P (mode);
12255 if (SSE_REGNO_P (regno))
12256 return VALID_SSE_REG_MODE (mode);
12257 if (MMX_REGNO_P (regno))
12258 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12259 /* We handle both integer and floats in the general purpose registers.
12260 In future we should be able to handle vector modes as well. */
12261 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12262 return 0;
12263 /* Take care for QImode values - they can be in non-QI regs, but then
12264 they do cause partial register stalls. */
12265 if (regno < 4 || mode != QImode || TARGET_64BIT)
12266 return 1;
12267 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12268 }
12269
12270 /* Return the cost of moving data of mode M between a
12271 register and memory. A value of 2 is the default; this cost is
12272 relative to those in `REGISTER_MOVE_COST'.
12273
12274 If moving between registers and memory is more expensive than
12275 between two registers, you should define this macro to express the
12276 relative cost.
12277
12278 Model also increased moving costs of QImode registers in non
12279 Q_REGS classes.
12280 */
12281 int
12282 ix86_memory_move_cost (mode, class, in)
12283 enum machine_mode mode;
12284 enum reg_class class;
12285 int in;
12286 {
12287 if (FLOAT_CLASS_P (class))
12288 {
12289 int index;
12290 switch (mode)
12291 {
12292 case SFmode:
12293 index = 0;
12294 break;
12295 case DFmode:
12296 index = 1;
12297 break;
12298 case XFmode:
12299 case TFmode:
12300 index = 2;
12301 break;
12302 default:
12303 return 100;
12304 }
12305 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12306 }
12307 if (SSE_CLASS_P (class))
12308 {
12309 int index;
12310 switch (GET_MODE_SIZE (mode))
12311 {
12312 case 4:
12313 index = 0;
12314 break;
12315 case 8:
12316 index = 1;
12317 break;
12318 case 16:
12319 index = 2;
12320 break;
12321 default:
12322 return 100;
12323 }
12324 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12325 }
12326 if (MMX_CLASS_P (class))
12327 {
12328 int index;
12329 switch (GET_MODE_SIZE (mode))
12330 {
12331 case 4:
12332 index = 0;
12333 break;
12334 case 8:
12335 index = 1;
12336 break;
12337 default:
12338 return 100;
12339 }
12340 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12341 }
12342 switch (GET_MODE_SIZE (mode))
12343 {
12344 case 1:
12345 if (in)
12346 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12347 : ix86_cost->movzbl_load);
12348 else
12349 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12350 : ix86_cost->int_store[0] + 4);
12351 break;
12352 case 2:
12353 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12354 default:
12355 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12356 if (mode == TFmode)
12357 mode = XFmode;
12358 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12359 * (int) GET_MODE_SIZE (mode) / 4);
12360 }
12361 }
12362
12363 #ifdef DO_GLOBAL_CTORS_BODY
12364 static void
12365 ix86_svr3_asm_out_constructor (symbol, priority)
12366 rtx symbol;
12367 int priority ATTRIBUTE_UNUSED;
12368 {
12369 init_section ();
12370 fputs ("\tpushl $", asm_out_file);
12371 assemble_name (asm_out_file, XSTR (symbol, 0));
12372 fputc ('\n', asm_out_file);
12373 }
12374 #endif
12375
12376 /* Order the registers for register allocator. */
12377
12378 void
12379 x86_order_regs_for_local_alloc ()
12380 {
12381 int pos = 0;
12382 int i;
12383
12384 /* First allocate the local general purpose registers. */
12385 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12386 if (GENERAL_REGNO_P (i) && call_used_regs[i])
12387 reg_alloc_order [pos++] = i;
12388
12389 /* Global general purpose registers. */
12390 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12391 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
12392 reg_alloc_order [pos++] = i;
12393
12394 /* x87 registers come first in case we are doing FP math
12395 using them. */
12396 if (!TARGET_SSE_MATH)
12397 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12398 reg_alloc_order [pos++] = i;
12399
12400 /* SSE registers. */
12401 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
12402 reg_alloc_order [pos++] = i;
12403 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
12404 reg_alloc_order [pos++] = i;
12405
12406 /* x87 registerts. */
12407 if (TARGET_SSE_MATH)
12408 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12409 reg_alloc_order [pos++] = i;
12410
12411 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
12412 reg_alloc_order [pos++] = i;
12413
12414 /* Initialize the rest of array as we do not allocate some registers
12415 at all. */
12416 while (pos < FIRST_PSEUDO_REGISTER)
12417 reg_alloc_order [pos++] = 0;
12418 }
This page took 0.603928 seconds and 5 git commands to generate.