]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.c (classify_argument): Also check for QUAL_UNION_TYPE.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT (-1)
48 #endif
49
50 /* Processor costs (relative to an add) */
51 static const
52 struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
60 3, /* cost of movsx */
61 3, /* cost of movzx */
62 0, /* "large" insn */
63 2, /* MOVE_RATIO */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
84 0, /* size of prefetch block */
85 0, /* number of parallel prefetches */
86 };
87 /* Processor costs (relative to an add) */
88 static const
89 struct processor_costs i386_cost = { /* 386 specific costs */
90 1, /* cost of an add instruction */
91 1, /* cost of a lea instruction */
92 3, /* variable shift costs */
93 2, /* constant shift costs */
94 6, /* cost of starting a multiply */
95 1, /* cost of multiply per each bit set */
96 23, /* cost of a divide/mod */
97 3, /* cost of movsx */
98 2, /* cost of movzx */
99 15, /* "large" insn */
100 3, /* MOVE_RATIO */
101 4, /* cost for loading QImode using movzbl */
102 {2, 4, 2}, /* cost of loading integer registers
103 in QImode, HImode and SImode.
104 Relative to reg-reg move (2). */
105 {2, 4, 2}, /* cost of storing integer registers */
106 2, /* cost of reg,reg fld/fst */
107 {8, 8, 8}, /* cost of loading fp registers
108 in SFmode, DFmode and XFmode */
109 {8, 8, 8}, /* cost of loading integer registers */
110 2, /* cost of moving MMX register */
111 {4, 8}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {4, 8}, /* cost of storing MMX registers
114 in SImode and DImode */
115 2, /* cost of moving SSE register */
116 {4, 8, 16}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {4, 8, 16}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of prefetch block */
122 0, /* number of parallel prefetches */
123 };
124
125 static const
126 struct processor_costs i486_cost = { /* 486 specific costs */
127 1, /* cost of an add instruction */
128 1, /* cost of a lea instruction */
129 3, /* variable shift costs */
130 2, /* constant shift costs */
131 12, /* cost of starting a multiply */
132 1, /* cost of multiply per each bit set */
133 40, /* cost of a divide/mod */
134 3, /* cost of movsx */
135 2, /* cost of movzx */
136 15, /* "large" insn */
137 3, /* MOVE_RATIO */
138 4, /* cost for loading QImode using movzbl */
139 {2, 4, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 4, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {8, 8, 8}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {8, 8, 8}, /* cost of loading integer registers */
147 2, /* cost of moving MMX register */
148 {4, 8}, /* cost of loading MMX registers
149 in SImode and DImode */
150 {4, 8}, /* cost of storing MMX registers
151 in SImode and DImode */
152 2, /* cost of moving SSE register */
153 {4, 8, 16}, /* cost of loading SSE registers
154 in SImode, DImode and TImode */
155 {4, 8, 16}, /* cost of storing SSE registers
156 in SImode, DImode and TImode */
157 3, /* MMX or SSE register to integer */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 };
161
162 static const
163 struct processor_costs pentium_cost = {
164 1, /* cost of an add instruction */
165 1, /* cost of a lea instruction */
166 4, /* variable shift costs */
167 1, /* constant shift costs */
168 11, /* cost of starting a multiply */
169 0, /* cost of multiply per each bit set */
170 25, /* cost of a divide/mod */
171 3, /* cost of movsx */
172 2, /* cost of movzx */
173 8, /* "large" insn */
174 6, /* MOVE_RATIO */
175 6, /* cost for loading QImode using movzbl */
176 {2, 4, 2}, /* cost of loading integer registers
177 in QImode, HImode and SImode.
178 Relative to reg-reg move (2). */
179 {2, 4, 2}, /* cost of storing integer registers */
180 2, /* cost of reg,reg fld/fst */
181 {2, 2, 6}, /* cost of loading fp registers
182 in SFmode, DFmode and XFmode */
183 {4, 4, 6}, /* cost of loading integer registers */
184 8, /* cost of moving MMX register */
185 {8, 8}, /* cost of loading MMX registers
186 in SImode and DImode */
187 {8, 8}, /* cost of storing MMX registers
188 in SImode and DImode */
189 2, /* cost of moving SSE register */
190 {4, 8, 16}, /* cost of loading SSE registers
191 in SImode, DImode and TImode */
192 {4, 8, 16}, /* cost of storing SSE registers
193 in SImode, DImode and TImode */
194 3, /* MMX or SSE register to integer */
195 0, /* size of prefetch block */
196 0, /* number of parallel prefetches */
197 };
198
199 static const
200 struct processor_costs pentiumpro_cost = {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
203 1, /* variable shift costs */
204 1, /* constant shift costs */
205 4, /* cost of starting a multiply */
206 0, /* cost of multiply per each bit set */
207 17, /* cost of a divide/mod */
208 1, /* cost of movsx */
209 1, /* cost of movzx */
210 8, /* "large" insn */
211 6, /* MOVE_RATIO */
212 2, /* cost for loading QImode using movzbl */
213 {4, 4, 4}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
215 Relative to reg-reg move (2). */
216 {2, 2, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
220 {4, 4, 6}, /* cost of loading integer registers */
221 2, /* cost of moving MMX register */
222 {2, 2}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {2, 2}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {2, 2, 8}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {2, 2, 8}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 32, /* size of prefetch block */
233 6, /* number of parallel prefetches */
234 };
235
236 static const
237 struct processor_costs k6_cost = {
238 1, /* cost of an add instruction */
239 2, /* cost of a lea instruction */
240 1, /* variable shift costs */
241 1, /* constant shift costs */
242 3, /* cost of starting a multiply */
243 0, /* cost of multiply per each bit set */
244 18, /* cost of a divide/mod */
245 2, /* cost of movsx */
246 2, /* cost of movzx */
247 8, /* "large" insn */
248 4, /* MOVE_RATIO */
249 3, /* cost for loading QImode using movzbl */
250 {4, 5, 4}, /* cost of loading integer registers
251 in QImode, HImode and SImode.
252 Relative to reg-reg move (2). */
253 {2, 3, 2}, /* cost of storing integer registers */
254 4, /* cost of reg,reg fld/fst */
255 {6, 6, 6}, /* cost of loading fp registers
256 in SFmode, DFmode and XFmode */
257 {4, 4, 4}, /* cost of loading integer registers */
258 2, /* cost of moving MMX register */
259 {2, 2}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {2, 2}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {2, 2, 8}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {2, 2, 8}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 6, /* MMX or SSE register to integer */
269 32, /* size of prefetch block */
270 1, /* number of parallel prefetches */
271 };
272
273 static const
274 struct processor_costs athlon_cost = {
275 1, /* cost of an add instruction */
276 2, /* cost of a lea instruction */
277 1, /* variable shift costs */
278 1, /* constant shift costs */
279 5, /* cost of starting a multiply */
280 0, /* cost of multiply per each bit set */
281 42, /* cost of a divide/mod */
282 1, /* cost of movsx */
283 1, /* cost of movzx */
284 8, /* "large" insn */
285 9, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {4, 5, 4}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 3, 2}, /* cost of storing integer registers */
291 4, /* cost of reg,reg fld/fst */
292 {6, 6, 20}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {4, 4, 16}, /* cost of loading integer registers */
295 2, /* cost of moving MMX register */
296 {2, 2}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {2, 2}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {2, 2, 8}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {2, 2, 8}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
305 6, /* MMX or SSE register to integer */
306 64, /* size of prefetch block */
307 6, /* number of parallel prefetches */
308 };
309
310 static const
311 struct processor_costs pentium4_cost = {
312 1, /* cost of an add instruction */
313 1, /* cost of a lea instruction */
314 8, /* variable shift costs */
315 8, /* constant shift costs */
316 30, /* cost of starting a multiply */
317 0, /* cost of multiply per each bit set */
318 112, /* cost of a divide/mod */
319 1, /* cost of movsx */
320 1, /* cost of movzx */
321 16, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 5, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 3, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of loading integer registers */
332 2, /* cost of moving MMX register */
333 {2, 2}, /* cost of loading MMX registers
334 in SImode and DImode */
335 {2, 2}, /* cost of storing MMX registers
336 in SImode and DImode */
337 12, /* cost of moving SSE register */
338 {12, 12, 12}, /* cost of loading SSE registers
339 in SImode, DImode and TImode */
340 {2, 2, 8}, /* cost of storing SSE registers
341 in SImode, DImode and TImode */
342 10, /* MMX or SSE register to integer */
343 64, /* size of prefetch block */
344 6, /* number of parallel prefetches */
345 };
346
347 const struct processor_costs *ix86_cost = &pentium_cost;
348
349 /* Processor feature/optimization bitmasks. */
350 #define m_386 (1<<PROCESSOR_I386)
351 #define m_486 (1<<PROCESSOR_I486)
352 #define m_PENT (1<<PROCESSOR_PENTIUM)
353 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
354 #define m_K6 (1<<PROCESSOR_K6)
355 #define m_ATHLON (1<<PROCESSOR_ATHLON)
356 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
357
358 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
359 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
360 const int x86_zero_extend_with_and = m_486 | m_PENT;
361 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
362 const int x86_double_with_add = ~m_386;
363 const int x86_use_bit_test = m_386;
364 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
365 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
366 const int x86_3dnow_a = m_ATHLON;
367 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
368 const int x86_branch_hints = m_PENT4;
369 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
370 const int x86_partial_reg_stall = m_PPRO;
371 const int x86_use_loop = m_K6;
372 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
373 const int x86_use_mov0 = m_K6;
374 const int x86_use_cltd = ~(m_PENT | m_K6);
375 const int x86_read_modify_write = ~m_PENT;
376 const int x86_read_modify = ~(m_PENT | m_PPRO);
377 const int x86_split_long_moves = m_PPRO;
378 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
379 const int x86_single_stringop = m_386 | m_PENT4;
380 const int x86_qimode_math = ~(0);
381 const int x86_promote_qi_regs = 0;
382 const int x86_himode_math = ~(m_PPRO);
383 const int x86_promote_hi_regs = m_PPRO;
384 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
385 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
386 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
387 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
388 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
389 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
390 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
391 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
392 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
393 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_decompose_lea = m_PENT4;
395
396 /* In case the avreage insn count for single function invocation is
397 lower than this constant, emit fast (but longer) prologue and
398 epilogue code. */
399 #define FAST_PROLOGUE_INSN_COUNT 30
400 /* Set by prologue expander and used by epilogue expander to determine
401 the style used. */
402 static int use_fast_prologue_epilogue;
403
404 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
405
406 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
407 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
408 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
409
410 /* Array of the smallest class containing reg number REGNO, indexed by
411 REGNO. Used by REGNO_REG_CLASS in i386.h. */
412
413 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
414 {
415 /* ax, dx, cx, bx */
416 AREG, DREG, CREG, BREG,
417 /* si, di, bp, sp */
418 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
419 /* FP registers */
420 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
421 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
422 /* arg pointer */
423 NON_Q_REGS,
424 /* flags, fpsr, dirflag, frame */
425 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
426 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
427 SSE_REGS, SSE_REGS,
428 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
429 MMX_REGS, MMX_REGS,
430 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
431 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
433 SSE_REGS, SSE_REGS,
434 };
435
436 /* The "default" register map used in 32bit mode. */
437
438 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
439 {
440 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
441 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
442 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
443 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
444 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
445 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
446 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
447 };
448
449 static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
450 1 /*RDX*/, 2 /*RCX*/,
451 FIRST_REX_INT_REG /*R8 */,
452 FIRST_REX_INT_REG + 1 /*R9 */};
453 static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
454
455 /* The "default" register map used in 64bit mode. */
456 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
457 {
458 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
459 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
460 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
461 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
462 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
463 8,9,10,11,12,13,14,15, /* extended integer registers */
464 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
465 };
466
467 /* Define the register numbers to be used in Dwarf debugging information.
468 The SVR4 reference port C compiler uses the following register numbers
469 in its Dwarf output code:
470 0 for %eax (gcc regno = 0)
471 1 for %ecx (gcc regno = 2)
472 2 for %edx (gcc regno = 1)
473 3 for %ebx (gcc regno = 3)
474 4 for %esp (gcc regno = 7)
475 5 for %ebp (gcc regno = 6)
476 6 for %esi (gcc regno = 4)
477 7 for %edi (gcc regno = 5)
478 The following three DWARF register numbers are never generated by
479 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
480 believes these numbers have these meanings.
481 8 for %eip (no gcc equivalent)
482 9 for %eflags (gcc regno = 17)
483 10 for %trapno (no gcc equivalent)
484 It is not at all clear how we should number the FP stack registers
485 for the x86 architecture. If the version of SDB on x86/svr4 were
486 a bit less brain dead with respect to floating-point then we would
487 have a precedent to follow with respect to DWARF register numbers
488 for x86 FP registers, but the SDB on x86/svr4 is so completely
489 broken with respect to FP registers that it is hardly worth thinking
490 of it as something to strive for compatibility with.
491 The version of x86/svr4 SDB I have at the moment does (partially)
492 seem to believe that DWARF register number 11 is associated with
493 the x86 register %st(0), but that's about all. Higher DWARF
494 register numbers don't seem to be associated with anything in
495 particular, and even for DWARF regno 11, SDB only seems to under-
496 stand that it should say that a variable lives in %st(0) (when
497 asked via an `=' command) if we said it was in DWARF regno 11,
498 but SDB still prints garbage when asked for the value of the
499 variable in question (via a `/' command).
500 (Also note that the labels SDB prints for various FP stack regs
501 when doing an `x' command are all wrong.)
502 Note that these problems generally don't affect the native SVR4
503 C compiler because it doesn't allow the use of -O with -g and
504 because when it is *not* optimizing, it allocates a memory
505 location for each floating-point variable, and the memory
506 location is what gets described in the DWARF AT_location
507 attribute for the variable in question.
508 Regardless of the severe mental illness of the x86/svr4 SDB, we
509 do something sensible here and we use the following DWARF
510 register numbers. Note that these are all stack-top-relative
511 numbers.
512 11 for %st(0) (gcc regno = 8)
513 12 for %st(1) (gcc regno = 9)
514 13 for %st(2) (gcc regno = 10)
515 14 for %st(3) (gcc regno = 11)
516 15 for %st(4) (gcc regno = 12)
517 16 for %st(5) (gcc regno = 13)
518 17 for %st(6) (gcc regno = 14)
519 18 for %st(7) (gcc regno = 15)
520 */
521 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
522 {
523 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
524 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
525 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
526 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
527 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
528 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
529 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
530 };
531
532 /* Test and compare insns in i386.md store the information needed to
533 generate branch and scc insns here. */
534
535 rtx ix86_compare_op0 = NULL_RTX;
536 rtx ix86_compare_op1 = NULL_RTX;
537
538 #define MAX_386_STACK_LOCALS 3
539 /* Size of the register save area. */
540 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
541
542 /* Define the structure for the machine field in struct function. */
543 struct machine_function
544 {
545 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
546 int save_varrargs_registers;
547 int accesses_prev_frame;
548 };
549
550 #define ix86_stack_locals (cfun->machine->stack_locals)
551 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
552
553 /* Structure describing stack frame layout.
554 Stack grows downward:
555
556 [arguments]
557 <- ARG_POINTER
558 saved pc
559
560 saved frame pointer if frame_pointer_needed
561 <- HARD_FRAME_POINTER
562 [saved regs]
563
564 [padding1] \
565 )
566 [va_arg registers] (
567 > to_allocate <- FRAME_POINTER
568 [frame] (
569 )
570 [padding2] /
571 */
572 struct ix86_frame
573 {
574 int nregs;
575 int padding1;
576 int va_arg_size;
577 HOST_WIDE_INT frame;
578 int padding2;
579 int outgoing_arguments_size;
580 int red_zone_size;
581
582 HOST_WIDE_INT to_allocate;
583 /* The offsets relative to ARG_POINTER. */
584 HOST_WIDE_INT frame_pointer_offset;
585 HOST_WIDE_INT hard_frame_pointer_offset;
586 HOST_WIDE_INT stack_pointer_offset;
587 };
588
589 /* Used to enable/disable debugging features. */
590 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
591 /* Code model option as passed by user. */
592 const char *ix86_cmodel_string;
593 /* Parsed value. */
594 enum cmodel ix86_cmodel;
595 /* Asm dialect. */
596 const char *ix86_asm_string;
597 enum asm_dialect ix86_asm_dialect = ASM_ATT;
598
599 /* which cpu are we scheduling for */
600 enum processor_type ix86_cpu;
601
602 /* which unit we are generating floating point math for */
603 enum fpmath_unit ix86_fpmath;
604
605 /* which instruction set architecture to use. */
606 int ix86_arch;
607
608 /* Strings to hold which cpu and instruction set architecture to use. */
609 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
610 const char *ix86_arch_string; /* for -march=<xxx> */
611 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
612
613 /* # of registers to use to pass arguments. */
614 const char *ix86_regparm_string;
615
616 /* true if sse prefetch instruction is not NOOP. */
617 int x86_prefetch_sse;
618
619 /* ix86_regparm_string as a number */
620 int ix86_regparm;
621
622 /* Alignment to use for loops and jumps: */
623
624 /* Power of two alignment for loops. */
625 const char *ix86_align_loops_string;
626
627 /* Power of two alignment for non-loop jumps. */
628 const char *ix86_align_jumps_string;
629
630 /* Power of two alignment for stack boundary in bytes. */
631 const char *ix86_preferred_stack_boundary_string;
632
633 /* Preferred alignment for stack boundary in bits. */
634 int ix86_preferred_stack_boundary;
635
636 /* Values 1-5: see jump.c */
637 int ix86_branch_cost;
638 const char *ix86_branch_cost_string;
639
640 /* Power of two alignment for functions. */
641 const char *ix86_align_funcs_string;
642
643 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
644 static char internal_label_prefix[16];
645 static int internal_label_prefix_len;
646 \f
647 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
648 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
649 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
650 int, int, FILE *));
651 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
652 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
653 rtx *, rtx *));
654 static rtx gen_push PARAMS ((rtx));
655 static int memory_address_length PARAMS ((rtx addr));
656 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
657 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
658 static int ix86_safe_length PARAMS ((rtx));
659 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
660 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
661 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
662 static void ix86_dump_ppro_packet PARAMS ((FILE *));
663 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
664 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
665 rtx));
666 static void ix86_init_machine_status PARAMS ((struct function *));
667 static void ix86_mark_machine_status PARAMS ((struct function *));
668 static void ix86_free_machine_status PARAMS ((struct function *));
669 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
670 static int ix86_safe_length_prefix PARAMS ((rtx));
671 static int ix86_nsaved_regs PARAMS ((void));
672 static void ix86_emit_save_regs PARAMS ((void));
673 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
674 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
675 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
676 static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *));
677 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
678 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
679 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
680 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
681 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
682 static int ix86_issue_rate PARAMS ((void));
683 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
684 static void ix86_sched_init PARAMS ((FILE *, int, int));
685 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
686 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
687 static void ix86_init_mmx_sse_builtins PARAMS ((void));
688
689 struct ix86_address
690 {
691 rtx base, index, disp;
692 HOST_WIDE_INT scale;
693 };
694
695 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
696
697 struct builtin_description;
698 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
699 tree, rtx));
700 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
701 tree, rtx));
702 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
703 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
704 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
705 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
706 tree, rtx));
707 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
708 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
709 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
710 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
711 enum rtx_code *,
712 enum rtx_code *,
713 enum rtx_code *));
714 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
715 rtx *, rtx *));
716 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
717 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
718 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
719 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
720 static int ix86_save_reg PARAMS ((int, int));
721 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
722 static int ix86_comp_type_attributes PARAMS ((tree, tree));
723 const struct attribute_spec ix86_attribute_table[];
724 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
725 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
726
727 #ifdef DO_GLOBAL_CTORS_BODY
728 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
729 #endif
730
731 /* Register class used for passing given 64bit part of the argument.
732 These represent classes as documented by the PS ABI, with the exception
733 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
734 use SF or DFmode move instead of DImode to avoid reformating penalties.
735
736 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
737 whenever possible (upper half does contain padding).
738 */
739 enum x86_64_reg_class
740 {
741 X86_64_NO_CLASS,
742 X86_64_INTEGER_CLASS,
743 X86_64_INTEGERSI_CLASS,
744 X86_64_SSE_CLASS,
745 X86_64_SSESF_CLASS,
746 X86_64_SSEDF_CLASS,
747 X86_64_SSEUP_CLASS,
748 X86_64_X87_CLASS,
749 X86_64_X87UP_CLASS,
750 X86_64_MEMORY_CLASS
751 };
752 static const char * const x86_64_reg_class_name[] =
753 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
754
755 #define MAX_CLASSES 4
756 static int classify_argument PARAMS ((enum machine_mode, tree,
757 enum x86_64_reg_class [MAX_CLASSES],
758 int));
759 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
760 int *));
761 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
762 const int *, int));
763 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
764 enum x86_64_reg_class));
765 \f
766 /* Initialize the GCC target structure. */
767 #undef TARGET_ATTRIBUTE_TABLE
768 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
769 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
770 # undef TARGET_MERGE_DECL_ATTRIBUTES
771 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
772 #endif
773
774 #undef TARGET_COMP_TYPE_ATTRIBUTES
775 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
776
777 #undef TARGET_INIT_BUILTINS
778 #define TARGET_INIT_BUILTINS ix86_init_builtins
779
780 #undef TARGET_EXPAND_BUILTIN
781 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
782
783 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
784 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
785 HOST_WIDE_INT));
786 # undef TARGET_ASM_FUNCTION_PROLOGUE
787 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
788 #endif
789
790 #undef TARGET_ASM_OPEN_PAREN
791 #define TARGET_ASM_OPEN_PAREN ""
792 #undef TARGET_ASM_CLOSE_PAREN
793 #define TARGET_ASM_CLOSE_PAREN ""
794
795 #undef TARGET_ASM_ALIGNED_HI_OP
796 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
797 #undef TARGET_ASM_ALIGNED_SI_OP
798 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
799 #ifdef ASM_QUAD
800 #undef TARGET_ASM_ALIGNED_DI_OP
801 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
802 #endif
803
804 #undef TARGET_ASM_UNALIGNED_HI_OP
805 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
806 #undef TARGET_ASM_UNALIGNED_SI_OP
807 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
808 #undef TARGET_ASM_UNALIGNED_DI_OP
809 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
810
811 #undef TARGET_SCHED_ADJUST_COST
812 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
813 #undef TARGET_SCHED_ISSUE_RATE
814 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
815 #undef TARGET_SCHED_VARIABLE_ISSUE
816 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
817 #undef TARGET_SCHED_INIT
818 #define TARGET_SCHED_INIT ix86_sched_init
819 #undef TARGET_SCHED_REORDER
820 #define TARGET_SCHED_REORDER ix86_sched_reorder
821
822 struct gcc_target targetm = TARGET_INITIALIZER;
823 \f
824 /* Sometimes certain combinations of command options do not make
825 sense on a particular target machine. You can define a macro
826 `OVERRIDE_OPTIONS' to take account of this. This macro, if
827 defined, is executed once just after all the command options have
828 been parsed.
829
830 Don't use this macro to turn on various extra optimizations for
831 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
832
833 void
834 override_options ()
835 {
836 int i;
837 /* Comes from final.c -- no real reason to change it. */
838 #define MAX_CODE_ALIGN 16
839
840 static struct ptt
841 {
842 const struct processor_costs *cost; /* Processor costs */
843 const int target_enable; /* Target flags to enable. */
844 const int target_disable; /* Target flags to disable. */
845 const int align_loop; /* Default alignments. */
846 const int align_loop_max_skip;
847 const int align_jump;
848 const int align_jump_max_skip;
849 const int align_func;
850 const int branch_cost;
851 }
852 const processor_target_table[PROCESSOR_max] =
853 {
854 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
855 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
856 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
857 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
858 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
859 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
860 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
861 };
862
863 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
864 static struct pta
865 {
866 const char *const name; /* processor name or nickname. */
867 const enum processor_type processor;
868 const enum pta_flags
869 {
870 PTA_SSE = 1,
871 PTA_SSE2 = 2,
872 PTA_MMX = 4,
873 PTA_PREFETCH_SSE = 8,
874 PTA_3DNOW = 16,
875 PTA_3DNOW_A = 64
876 } flags;
877 }
878 const processor_alias_table[] =
879 {
880 {"i386", PROCESSOR_I386, 0},
881 {"i486", PROCESSOR_I486, 0},
882 {"i586", PROCESSOR_PENTIUM, 0},
883 {"pentium", PROCESSOR_PENTIUM, 0},
884 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
885 {"i686", PROCESSOR_PENTIUMPRO, 0},
886 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
887 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
888 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
889 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
890 PTA_MMX | PTA_PREFETCH_SSE},
891 {"k6", PROCESSOR_K6, PTA_MMX},
892 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
893 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
894 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
895 | PTA_3DNOW_A},
896 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
897 | PTA_3DNOW | PTA_3DNOW_A},
898 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
899 | PTA_3DNOW_A | PTA_SSE},
900 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
901 | PTA_3DNOW_A | PTA_SSE},
902 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
903 | PTA_3DNOW_A | PTA_SSE},
904 };
905
906 int const pta_size = ARRAY_SIZE (processor_alias_table);
907
908 #ifdef SUBTARGET_OVERRIDE_OPTIONS
909 SUBTARGET_OVERRIDE_OPTIONS;
910 #endif
911
912 if (!ix86_cpu_string && ix86_arch_string)
913 ix86_cpu_string = ix86_arch_string;
914 if (!ix86_cpu_string)
915 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
916 if (!ix86_arch_string)
917 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
918
919 if (ix86_cmodel_string != 0)
920 {
921 if (!strcmp (ix86_cmodel_string, "small"))
922 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
923 else if (flag_pic)
924 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
925 else if (!strcmp (ix86_cmodel_string, "32"))
926 ix86_cmodel = CM_32;
927 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
928 ix86_cmodel = CM_KERNEL;
929 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
930 ix86_cmodel = CM_MEDIUM;
931 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
932 ix86_cmodel = CM_LARGE;
933 else
934 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
935 }
936 else
937 {
938 ix86_cmodel = CM_32;
939 if (TARGET_64BIT)
940 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
941 }
942 if (ix86_asm_string != 0)
943 {
944 if (!strcmp (ix86_asm_string, "intel"))
945 ix86_asm_dialect = ASM_INTEL;
946 else if (!strcmp (ix86_asm_string, "att"))
947 ix86_asm_dialect = ASM_ATT;
948 else
949 error ("bad value (%s) for -masm= switch", ix86_asm_string);
950 }
951 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
952 error ("code model `%s' not supported in the %s bit mode",
953 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
954 if (ix86_cmodel == CM_LARGE)
955 sorry ("code model `large' not supported yet");
956 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
957 sorry ("%i-bit mode not compiled in",
958 (target_flags & MASK_64BIT) ? 64 : 32);
959
960 for (i = 0; i < pta_size; i++)
961 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
962 {
963 ix86_arch = processor_alias_table[i].processor;
964 /* Default cpu tuning to the architecture. */
965 ix86_cpu = ix86_arch;
966 if (processor_alias_table[i].flags & PTA_MMX
967 && !(target_flags & MASK_MMX_SET))
968 target_flags |= MASK_MMX;
969 if (processor_alias_table[i].flags & PTA_3DNOW
970 && !(target_flags & MASK_3DNOW_SET))
971 target_flags |= MASK_3DNOW;
972 if (processor_alias_table[i].flags & PTA_3DNOW_A
973 && !(target_flags & MASK_3DNOW_A_SET))
974 target_flags |= MASK_3DNOW_A;
975 if (processor_alias_table[i].flags & PTA_SSE
976 && !(target_flags & MASK_SSE_SET))
977 target_flags |= MASK_SSE;
978 if (processor_alias_table[i].flags & PTA_SSE2
979 && !(target_flags & MASK_SSE2_SET))
980 target_flags |= MASK_SSE2;
981 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
982 x86_prefetch_sse = true;
983 break;
984 }
985
986 if (i == pta_size)
987 error ("bad value (%s) for -march= switch", ix86_arch_string);
988
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
991 {
992 ix86_cpu = processor_alias_table[i].processor;
993 break;
994 }
995 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
996 x86_prefetch_sse = true;
997 if (i == pta_size)
998 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
999
1000 if (optimize_size)
1001 ix86_cost = &size_cost;
1002 else
1003 ix86_cost = processor_target_table[ix86_cpu].cost;
1004 target_flags |= processor_target_table[ix86_cpu].target_enable;
1005 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1006
1007 /* Arrange to set up i386_stack_locals for all functions. */
1008 init_machine_status = ix86_init_machine_status;
1009 mark_machine_status = ix86_mark_machine_status;
1010 free_machine_status = ix86_free_machine_status;
1011
1012 /* Validate -mregparm= value. */
1013 if (ix86_regparm_string)
1014 {
1015 i = atoi (ix86_regparm_string);
1016 if (i < 0 || i > REGPARM_MAX)
1017 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1018 else
1019 ix86_regparm = i;
1020 }
1021 else
1022 if (TARGET_64BIT)
1023 ix86_regparm = REGPARM_MAX;
1024
1025 /* If the user has provided any of the -malign-* options,
1026 warn and use that value only if -falign-* is not set.
1027 Remove this code in GCC 3.2 or later. */
1028 if (ix86_align_loops_string)
1029 {
1030 warning ("-malign-loops is obsolete, use -falign-loops");
1031 if (align_loops == 0)
1032 {
1033 i = atoi (ix86_align_loops_string);
1034 if (i < 0 || i > MAX_CODE_ALIGN)
1035 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1036 else
1037 align_loops = 1 << i;
1038 }
1039 }
1040
1041 if (ix86_align_jumps_string)
1042 {
1043 warning ("-malign-jumps is obsolete, use -falign-jumps");
1044 if (align_jumps == 0)
1045 {
1046 i = atoi (ix86_align_jumps_string);
1047 if (i < 0 || i > MAX_CODE_ALIGN)
1048 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1049 else
1050 align_jumps = 1 << i;
1051 }
1052 }
1053
1054 if (ix86_align_funcs_string)
1055 {
1056 warning ("-malign-functions is obsolete, use -falign-functions");
1057 if (align_functions == 0)
1058 {
1059 i = atoi (ix86_align_funcs_string);
1060 if (i < 0 || i > MAX_CODE_ALIGN)
1061 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1062 else
1063 align_functions = 1 << i;
1064 }
1065 }
1066
1067 /* Default align_* from the processor table. */
1068 if (align_loops == 0)
1069 {
1070 align_loops = processor_target_table[ix86_cpu].align_loop;
1071 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1072 }
1073 if (align_jumps == 0)
1074 {
1075 align_jumps = processor_target_table[ix86_cpu].align_jump;
1076 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1077 }
1078 if (align_functions == 0)
1079 {
1080 align_functions = processor_target_table[ix86_cpu].align_func;
1081 }
1082
1083 /* Validate -mpreferred-stack-boundary= value, or provide default.
1084 The default of 128 bits is for Pentium III's SSE __m128, but we
1085 don't want additional code to keep the stack aligned when
1086 optimizing for code size. */
1087 ix86_preferred_stack_boundary = (optimize_size
1088 ? TARGET_64BIT ? 64 : 32
1089 : 128);
1090 if (ix86_preferred_stack_boundary_string)
1091 {
1092 i = atoi (ix86_preferred_stack_boundary_string);
1093 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1094 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1095 TARGET_64BIT ? 3 : 2);
1096 else
1097 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1098 }
1099
1100 /* Validate -mbranch-cost= value, or provide default. */
1101 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1102 if (ix86_branch_cost_string)
1103 {
1104 i = atoi (ix86_branch_cost_string);
1105 if (i < 0 || i > 5)
1106 error ("-mbranch-cost=%d is not between 0 and 5", i);
1107 else
1108 ix86_branch_cost = i;
1109 }
1110
1111 /* Keep nonleaf frame pointers. */
1112 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1113 flag_omit_frame_pointer = 1;
1114
1115 /* If we're doing fast math, we don't care about comparison order
1116 wrt NaNs. This lets us use a shorter comparison sequence. */
1117 if (flag_unsafe_math_optimizations)
1118 target_flags &= ~MASK_IEEE_FP;
1119
1120 if (TARGET_64BIT)
1121 {
1122 if (TARGET_ALIGN_DOUBLE)
1123 error ("-malign-double makes no sense in the 64bit mode");
1124 if (TARGET_RTD)
1125 error ("-mrtd calling convention not supported in the 64bit mode");
1126 /* Enable by default the SSE and MMX builtins. */
1127 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1128 ix86_fpmath = FPMATH_SSE;
1129 }
1130 else
1131 ix86_fpmath = FPMATH_387;
1132
1133 if (ix86_fpmath_string != 0)
1134 {
1135 if (! strcmp (ix86_fpmath_string, "387"))
1136 ix86_fpmath = FPMATH_387;
1137 else if (! strcmp (ix86_fpmath_string, "sse"))
1138 {
1139 if (!TARGET_SSE)
1140 {
1141 warning ("SSE instruction set disabled, using 387 arithmetics");
1142 ix86_fpmath = FPMATH_387;
1143 }
1144 else
1145 ix86_fpmath = FPMATH_SSE;
1146 }
1147 else if (! strcmp (ix86_fpmath_string, "387,sse")
1148 || ! strcmp (ix86_fpmath_string, "sse,387"))
1149 {
1150 if (!TARGET_SSE)
1151 {
1152 warning ("SSE instruction set disabled, using 387 arithmetics");
1153 ix86_fpmath = FPMATH_387;
1154 }
1155 else if (!TARGET_80387)
1156 {
1157 warning ("387 instruction set disabled, using SSE arithmetics");
1158 ix86_fpmath = FPMATH_SSE;
1159 }
1160 else
1161 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1162 }
1163 else
1164 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1165 }
1166
1167 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1168 on by -msse. */
1169 if (TARGET_SSE)
1170 {
1171 target_flags |= MASK_MMX;
1172 x86_prefetch_sse = true;
1173 }
1174
1175 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1176 if (TARGET_3DNOW)
1177 {
1178 target_flags |= MASK_MMX;
1179 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1180 extensions it adds. */
1181 if (x86_3dnow_a & (1 << ix86_arch))
1182 target_flags |= MASK_3DNOW_A;
1183 }
1184 if ((x86_accumulate_outgoing_args & CPUMASK)
1185 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1186 && !optimize_size)
1187 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1188
1189 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1190 {
1191 char *p;
1192 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1193 p = strchr (internal_label_prefix, 'X');
1194 internal_label_prefix_len = p - internal_label_prefix;
1195 *p = '\0';
1196 }
1197 }
1198 \f
1199 void
1200 optimization_options (level, size)
1201 int level;
1202 int size ATTRIBUTE_UNUSED;
1203 {
1204 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1205 make the problem with not enough registers even worse. */
1206 #ifdef INSN_SCHEDULING
1207 if (level > 1)
1208 flag_schedule_insns = 0;
1209 #endif
1210 if (TARGET_64BIT && optimize >= 1)
1211 flag_omit_frame_pointer = 1;
1212 if (TARGET_64BIT)
1213 {
1214 flag_pcc_struct_return = 0;
1215 flag_asynchronous_unwind_tables = 1;
1216 }
1217 }
1218 \f
1219 /* Table of valid machine attributes. */
1220 const struct attribute_spec ix86_attribute_table[] =
1221 {
1222 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1223 /* Stdcall attribute says callee is responsible for popping arguments
1224 if they are not variable. */
1225 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1226 /* Cdecl attribute says the callee is a normal C declaration */
1227 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1228 /* Regparm attribute specifies how many integer arguments are to be
1229 passed in registers. */
1230 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1231 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1232 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1233 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1234 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1235 #endif
1236 { NULL, 0, 0, false, false, false, NULL }
1237 };
1238
1239 /* Handle a "cdecl" or "stdcall" attribute;
1240 arguments as in struct attribute_spec.handler. */
1241 static tree
1242 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1243 tree *node;
1244 tree name;
1245 tree args ATTRIBUTE_UNUSED;
1246 int flags ATTRIBUTE_UNUSED;
1247 bool *no_add_attrs;
1248 {
1249 if (TREE_CODE (*node) != FUNCTION_TYPE
1250 && TREE_CODE (*node) != METHOD_TYPE
1251 && TREE_CODE (*node) != FIELD_DECL
1252 && TREE_CODE (*node) != TYPE_DECL)
1253 {
1254 warning ("`%s' attribute only applies to functions",
1255 IDENTIFIER_POINTER (name));
1256 *no_add_attrs = true;
1257 }
1258
1259 if (TARGET_64BIT)
1260 {
1261 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1262 *no_add_attrs = true;
1263 }
1264
1265 return NULL_TREE;
1266 }
1267
1268 /* Handle a "regparm" attribute;
1269 arguments as in struct attribute_spec.handler. */
1270 static tree
1271 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1272 tree *node;
1273 tree name;
1274 tree args;
1275 int flags ATTRIBUTE_UNUSED;
1276 bool *no_add_attrs;
1277 {
1278 if (TREE_CODE (*node) != FUNCTION_TYPE
1279 && TREE_CODE (*node) != METHOD_TYPE
1280 && TREE_CODE (*node) != FIELD_DECL
1281 && TREE_CODE (*node) != TYPE_DECL)
1282 {
1283 warning ("`%s' attribute only applies to functions",
1284 IDENTIFIER_POINTER (name));
1285 *no_add_attrs = true;
1286 }
1287 else
1288 {
1289 tree cst;
1290
1291 cst = TREE_VALUE (args);
1292 if (TREE_CODE (cst) != INTEGER_CST)
1293 {
1294 warning ("`%s' attribute requires an integer constant argument",
1295 IDENTIFIER_POINTER (name));
1296 *no_add_attrs = true;
1297 }
1298 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1299 {
1300 warning ("argument to `%s' attribute larger than %d",
1301 IDENTIFIER_POINTER (name), REGPARM_MAX);
1302 *no_add_attrs = true;
1303 }
1304 }
1305
1306 return NULL_TREE;
1307 }
1308
1309 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1310
1311 /* Generate the assembly code for function entry. FILE is a stdio
1312 stream to output the code to. SIZE is an int: how many units of
1313 temporary storage to allocate.
1314
1315 Refer to the array `regs_ever_live' to determine which registers to
1316 save; `regs_ever_live[I]' is nonzero if register number I is ever
1317 used in the function. This function is responsible for knowing
1318 which registers should not be saved even if used.
1319
1320 We override it here to allow for the new profiling code to go before
1321 the prologue and the old mcount code to go after the prologue (and
1322 after %ebx has been set up for ELF shared library support). */
1323
1324 static void
1325 ix86_osf_output_function_prologue (file, size)
1326 FILE *file;
1327 HOST_WIDE_INT size;
1328 {
1329 const char *prefix = "";
1330 const char *const lprefix = LPREFIX;
1331 int labelno = profile_label_no;
1332
1333 #ifdef OSF_OS
1334
1335 if (TARGET_UNDERSCORES)
1336 prefix = "_";
1337
1338 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1339 {
1340 if (!flag_pic && !HALF_PIC_P ())
1341 {
1342 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1343 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1344 }
1345
1346 else if (HALF_PIC_P ())
1347 {
1348 rtx symref;
1349
1350 HALF_PIC_EXTERNAL ("_mcount_ptr");
1351 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1352 "_mcount_ptr"));
1353
1354 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1355 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1356 XSTR (symref, 0));
1357 fprintf (file, "\tcall *(%%eax)\n");
1358 }
1359
1360 else
1361 {
1362 static int call_no = 0;
1363
1364 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1365 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1366 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1367 lprefix, call_no++);
1368 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1369 lprefix, labelno);
1370 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1371 prefix);
1372 fprintf (file, "\tcall *(%%eax)\n");
1373 }
1374 }
1375
1376 #else /* !OSF_OS */
1377
1378 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1379 {
1380 if (!flag_pic)
1381 {
1382 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1383 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1384 }
1385
1386 else
1387 {
1388 static int call_no = 0;
1389
1390 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1391 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1392 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1393 lprefix, call_no++);
1394 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1395 lprefix, labelno);
1396 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1397 prefix);
1398 fprintf (file, "\tcall *(%%eax)\n");
1399 }
1400 }
1401 #endif /* !OSF_OS */
1402
1403 function_prologue (file, size);
1404 }
1405
1406 #endif /* OSF_OS || TARGET_OSF1ELF */
1407
1408 /* Return 0 if the attributes for two types are incompatible, 1 if they
1409 are compatible, and 2 if they are nearly compatible (which causes a
1410 warning to be generated). */
1411
1412 static int
1413 ix86_comp_type_attributes (type1, type2)
1414 tree type1;
1415 tree type2;
1416 {
1417 /* Check for mismatch of non-default calling convention. */
1418 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1419
1420 if (TREE_CODE (type1) != FUNCTION_TYPE)
1421 return 1;
1422
1423 /* Check for mismatched return types (cdecl vs stdcall). */
1424 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1425 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1426 return 0;
1427 return 1;
1428 }
1429 \f
1430 /* Value is the number of bytes of arguments automatically
1431 popped when returning from a subroutine call.
1432 FUNDECL is the declaration node of the function (as a tree),
1433 FUNTYPE is the data type of the function (as a tree),
1434 or for a library call it is an identifier node for the subroutine name.
1435 SIZE is the number of bytes of arguments passed on the stack.
1436
1437 On the 80386, the RTD insn may be used to pop them if the number
1438 of args is fixed, but if the number is variable then the caller
1439 must pop them all. RTD can't be used for library calls now
1440 because the library is compiled with the Unix compiler.
1441 Use of RTD is a selectable option, since it is incompatible with
1442 standard Unix calling sequences. If the option is not selected,
1443 the caller must always pop the args.
1444
1445 The attribute stdcall is equivalent to RTD on a per module basis. */
1446
1447 int
1448 ix86_return_pops_args (fundecl, funtype, size)
1449 tree fundecl;
1450 tree funtype;
1451 int size;
1452 {
1453 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1454
1455 /* Cdecl functions override -mrtd, and never pop the stack. */
1456 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1457
1458 /* Stdcall functions will pop the stack if not variable args. */
1459 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1460 rtd = 1;
1461
1462 if (rtd
1463 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1464 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1465 == void_type_node)))
1466 return size;
1467 }
1468
1469 /* Lose any fake structure return argument if it is passed on the stack. */
1470 if (aggregate_value_p (TREE_TYPE (funtype))
1471 && !TARGET_64BIT)
1472 {
1473 int nregs = ix86_regparm;
1474
1475 if (funtype)
1476 {
1477 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1478
1479 if (attr)
1480 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1481 }
1482
1483 if (!nregs)
1484 return GET_MODE_SIZE (Pmode);
1485 }
1486
1487 return 0;
1488 }
1489 \f
1490 /* Argument support functions. */
1491
1492 /* Return true when register may be used to pass function parameters. */
1493 bool
1494 ix86_function_arg_regno_p (regno)
1495 int regno;
1496 {
1497 int i;
1498 if (!TARGET_64BIT)
1499 return (regno < REGPARM_MAX
1500 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1501 if (SSE_REGNO_P (regno) && TARGET_SSE)
1502 return true;
1503 /* RAX is used as hidden argument to va_arg functions. */
1504 if (!regno)
1505 return true;
1506 for (i = 0; i < REGPARM_MAX; i++)
1507 if (regno == x86_64_int_parameter_registers[i])
1508 return true;
1509 return false;
1510 }
1511
1512 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1513 for a call to a function whose data type is FNTYPE.
1514 For a library call, FNTYPE is 0. */
1515
1516 void
1517 init_cumulative_args (cum, fntype, libname)
1518 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1519 tree fntype; /* tree ptr for function decl */
1520 rtx libname; /* SYMBOL_REF of library name or 0 */
1521 {
1522 static CUMULATIVE_ARGS zero_cum;
1523 tree param, next_param;
1524
1525 if (TARGET_DEBUG_ARG)
1526 {
1527 fprintf (stderr, "\ninit_cumulative_args (");
1528 if (fntype)
1529 fprintf (stderr, "fntype code = %s, ret code = %s",
1530 tree_code_name[(int) TREE_CODE (fntype)],
1531 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1532 else
1533 fprintf (stderr, "no fntype");
1534
1535 if (libname)
1536 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1537 }
1538
1539 *cum = zero_cum;
1540
1541 /* Set up the number of registers to use for passing arguments. */
1542 cum->nregs = ix86_regparm;
1543 cum->sse_nregs = SSE_REGPARM_MAX;
1544 if (fntype && !TARGET_64BIT)
1545 {
1546 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1547
1548 if (attr)
1549 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1550 }
1551 cum->maybe_vaarg = false;
1552
1553 /* Determine if this function has variable arguments. This is
1554 indicated by the last argument being 'void_type_mode' if there
1555 are no variable arguments. If there are variable arguments, then
1556 we won't pass anything in registers */
1557
1558 if (cum->nregs)
1559 {
1560 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1561 param != 0; param = next_param)
1562 {
1563 next_param = TREE_CHAIN (param);
1564 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1565 {
1566 if (!TARGET_64BIT)
1567 cum->nregs = 0;
1568 cum->maybe_vaarg = true;
1569 }
1570 }
1571 }
1572 if ((!fntype && !libname)
1573 || (fntype && !TYPE_ARG_TYPES (fntype)))
1574 cum->maybe_vaarg = 1;
1575
1576 if (TARGET_DEBUG_ARG)
1577 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1578
1579 return;
1580 }
1581
1582 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1583 of this code is to classify each 8bytes of incoming argument by the register
1584 class and assign registers accordingly. */
1585
1586 /* Return the union class of CLASS1 and CLASS2.
1587 See the x86-64 PS ABI for details. */
1588
1589 static enum x86_64_reg_class
1590 merge_classes (class1, class2)
1591 enum x86_64_reg_class class1, class2;
1592 {
1593 /* Rule #1: If both classes are equal, this is the resulting class. */
1594 if (class1 == class2)
1595 return class1;
1596
1597 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1598 the other class. */
1599 if (class1 == X86_64_NO_CLASS)
1600 return class2;
1601 if (class2 == X86_64_NO_CLASS)
1602 return class1;
1603
1604 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1605 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1606 return X86_64_MEMORY_CLASS;
1607
1608 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1609 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1610 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1611 return X86_64_INTEGERSI_CLASS;
1612 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1613 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1614 return X86_64_INTEGER_CLASS;
1615
1616 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1617 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1618 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1619 return X86_64_MEMORY_CLASS;
1620
1621 /* Rule #6: Otherwise class SSE is used. */
1622 return X86_64_SSE_CLASS;
1623 }
1624
1625 /* Classify the argument of type TYPE and mode MODE.
1626 CLASSES will be filled by the register class used to pass each word
1627 of the operand. The number of words is returned. In case the parameter
1628 should be passed in memory, 0 is returned. As a special case for zero
1629 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1630
1631 BIT_OFFSET is used internally for handling records and specifies offset
1632 of the offset in bits modulo 256 to avoid overflow cases.
1633
1634 See the x86-64 PS ABI for details.
1635 */
1636
1637 static int
1638 classify_argument (mode, type, classes, bit_offset)
1639 enum machine_mode mode;
1640 tree type;
1641 enum x86_64_reg_class classes[MAX_CLASSES];
1642 int bit_offset;
1643 {
1644 int bytes =
1645 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1646 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1647
1648 if (type && AGGREGATE_TYPE_P (type))
1649 {
1650 int i;
1651 tree field;
1652 enum x86_64_reg_class subclasses[MAX_CLASSES];
1653
1654 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1655 if (bytes > 16)
1656 return 0;
1657
1658 for (i = 0; i < words; i++)
1659 classes[i] = X86_64_NO_CLASS;
1660
1661 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1662 signalize memory class, so handle it as special case. */
1663 if (!words)
1664 {
1665 classes[0] = X86_64_NO_CLASS;
1666 return 1;
1667 }
1668
1669 /* Classify each field of record and merge classes. */
1670 if (TREE_CODE (type) == RECORD_TYPE)
1671 {
1672 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1673 {
1674 if (TREE_CODE (field) == FIELD_DECL)
1675 {
1676 int num;
1677
1678 /* Bitfields are always classified as integer. Handle them
1679 early, since later code would consider them to be
1680 misaligned integers. */
1681 if (DECL_BIT_FIELD (field))
1682 {
1683 for (i = int_bit_position (field) / 8 / 8;
1684 i < (int_bit_position (field)
1685 + tree_low_cst (DECL_SIZE (field), 0)
1686 + 63) / 8 / 8; i++)
1687 classes[i] =
1688 merge_classes (X86_64_INTEGER_CLASS,
1689 classes[i]);
1690 }
1691 else
1692 {
1693 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1694 TREE_TYPE (field), subclasses,
1695 (int_bit_position (field)
1696 + bit_offset) % 256);
1697 if (!num)
1698 return 0;
1699 for (i = 0; i < num; i++)
1700 {
1701 int pos =
1702 (int_bit_position (field) + bit_offset) / 8 / 8;
1703 classes[i + pos] =
1704 merge_classes (subclasses[i], classes[i + pos]);
1705 }
1706 }
1707 }
1708 }
1709 }
1710 /* Arrays are handled as small records. */
1711 else if (TREE_CODE (type) == ARRAY_TYPE)
1712 {
1713 int num;
1714 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1715 TREE_TYPE (type), subclasses, bit_offset);
1716 if (!num)
1717 return 0;
1718
1719 /* The partial classes are now full classes. */
1720 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1721 subclasses[0] = X86_64_SSE_CLASS;
1722 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1723 subclasses[0] = X86_64_INTEGER_CLASS;
1724
1725 for (i = 0; i < words; i++)
1726 classes[i] = subclasses[i % num];
1727 }
1728 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1729 else if (TREE_CODE (type) == UNION_TYPE
1730 || TREE_CODE (type) == QUAL_UNION_TYPE)
1731 {
1732 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1733 {
1734 if (TREE_CODE (field) == FIELD_DECL)
1735 {
1736 int num;
1737 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1738 TREE_TYPE (field), subclasses,
1739 bit_offset);
1740 if (!num)
1741 return 0;
1742 for (i = 0; i < num; i++)
1743 classes[i] = merge_classes (subclasses[i], classes[i]);
1744 }
1745 }
1746 }
1747 else
1748 abort ();
1749
1750 /* Final merger cleanup. */
1751 for (i = 0; i < words; i++)
1752 {
1753 /* If one class is MEMORY, everything should be passed in
1754 memory. */
1755 if (classes[i] == X86_64_MEMORY_CLASS)
1756 return 0;
1757
1758 /* The X86_64_SSEUP_CLASS should be always preceded by
1759 X86_64_SSE_CLASS. */
1760 if (classes[i] == X86_64_SSEUP_CLASS
1761 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1762 classes[i] = X86_64_SSE_CLASS;
1763
1764 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1765 if (classes[i] == X86_64_X87UP_CLASS
1766 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1767 classes[i] = X86_64_SSE_CLASS;
1768 }
1769 return words;
1770 }
1771
1772 /* Compute alignment needed. We align all types to natural boundaries with
1773 exception of XFmode that is aligned to 64bits. */
1774 if (mode != VOIDmode && mode != BLKmode)
1775 {
1776 int mode_alignment = GET_MODE_BITSIZE (mode);
1777
1778 if (mode == XFmode)
1779 mode_alignment = 128;
1780 else if (mode == XCmode)
1781 mode_alignment = 256;
1782 /* Misaligned fields are always returned in memory. */
1783 if (bit_offset % mode_alignment)
1784 return 0;
1785 }
1786
1787 /* Classification of atomic types. */
1788 switch (mode)
1789 {
1790 case DImode:
1791 case SImode:
1792 case HImode:
1793 case QImode:
1794 case CSImode:
1795 case CHImode:
1796 case CQImode:
1797 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1798 classes[0] = X86_64_INTEGERSI_CLASS;
1799 else
1800 classes[0] = X86_64_INTEGER_CLASS;
1801 return 1;
1802 case CDImode:
1803 case TImode:
1804 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1805 return 2;
1806 case CTImode:
1807 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1808 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1809 return 4;
1810 case SFmode:
1811 if (!(bit_offset % 64))
1812 classes[0] = X86_64_SSESF_CLASS;
1813 else
1814 classes[0] = X86_64_SSE_CLASS;
1815 return 1;
1816 case DFmode:
1817 classes[0] = X86_64_SSEDF_CLASS;
1818 return 1;
1819 case TFmode:
1820 classes[0] = X86_64_X87_CLASS;
1821 classes[1] = X86_64_X87UP_CLASS;
1822 return 2;
1823 case TCmode:
1824 classes[0] = X86_64_X87_CLASS;
1825 classes[1] = X86_64_X87UP_CLASS;
1826 classes[2] = X86_64_X87_CLASS;
1827 classes[3] = X86_64_X87UP_CLASS;
1828 return 4;
1829 case DCmode:
1830 classes[0] = X86_64_SSEDF_CLASS;
1831 classes[1] = X86_64_SSEDF_CLASS;
1832 return 2;
1833 case SCmode:
1834 classes[0] = X86_64_SSE_CLASS;
1835 return 1;
1836 case V4SFmode:
1837 case V4SImode:
1838 classes[0] = X86_64_SSE_CLASS;
1839 classes[1] = X86_64_SSEUP_CLASS;
1840 return 2;
1841 case V2SFmode:
1842 case V2SImode:
1843 case V4HImode:
1844 case V8QImode:
1845 classes[0] = X86_64_SSE_CLASS;
1846 return 1;
1847 case BLKmode:
1848 case VOIDmode:
1849 return 0;
1850 default:
1851 abort ();
1852 }
1853 }
1854
1855 /* Examine the argument and return set number of register required in each
1856 class. Return 0 iff parameter should be passed in memory. */
1857 static int
1858 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1859 enum machine_mode mode;
1860 tree type;
1861 int *int_nregs, *sse_nregs;
1862 int in_return;
1863 {
1864 enum x86_64_reg_class class[MAX_CLASSES];
1865 int n = classify_argument (mode, type, class, 0);
1866
1867 *int_nregs = 0;
1868 *sse_nregs = 0;
1869 if (!n)
1870 return 0;
1871 for (n--; n >= 0; n--)
1872 switch (class[n])
1873 {
1874 case X86_64_INTEGER_CLASS:
1875 case X86_64_INTEGERSI_CLASS:
1876 (*int_nregs)++;
1877 break;
1878 case X86_64_SSE_CLASS:
1879 case X86_64_SSESF_CLASS:
1880 case X86_64_SSEDF_CLASS:
1881 (*sse_nregs)++;
1882 break;
1883 case X86_64_NO_CLASS:
1884 case X86_64_SSEUP_CLASS:
1885 break;
1886 case X86_64_X87_CLASS:
1887 case X86_64_X87UP_CLASS:
1888 if (!in_return)
1889 return 0;
1890 break;
1891 case X86_64_MEMORY_CLASS:
1892 abort ();
1893 }
1894 return 1;
1895 }
1896 /* Construct container for the argument used by GCC interface. See
1897 FUNCTION_ARG for the detailed description. */
1898 static rtx
1899 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1900 enum machine_mode mode;
1901 tree type;
1902 int in_return;
1903 int nintregs, nsseregs;
1904 const int * intreg;
1905 int sse_regno;
1906 {
1907 enum machine_mode tmpmode;
1908 int bytes =
1909 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1910 enum x86_64_reg_class class[MAX_CLASSES];
1911 int n;
1912 int i;
1913 int nexps = 0;
1914 int needed_sseregs, needed_intregs;
1915 rtx exp[MAX_CLASSES];
1916 rtx ret;
1917
1918 n = classify_argument (mode, type, class, 0);
1919 if (TARGET_DEBUG_ARG)
1920 {
1921 if (!n)
1922 fprintf (stderr, "Memory class\n");
1923 else
1924 {
1925 fprintf (stderr, "Classes:");
1926 for (i = 0; i < n; i++)
1927 {
1928 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1929 }
1930 fprintf (stderr, "\n");
1931 }
1932 }
1933 if (!n)
1934 return NULL;
1935 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1936 return NULL;
1937 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1938 return NULL;
1939
1940 /* First construct simple cases. Avoid SCmode, since we want to use
1941 single register to pass this type. */
1942 if (n == 1 && mode != SCmode)
1943 switch (class[0])
1944 {
1945 case X86_64_INTEGER_CLASS:
1946 case X86_64_INTEGERSI_CLASS:
1947 return gen_rtx_REG (mode, intreg[0]);
1948 case X86_64_SSE_CLASS:
1949 case X86_64_SSESF_CLASS:
1950 case X86_64_SSEDF_CLASS:
1951 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1952 case X86_64_X87_CLASS:
1953 return gen_rtx_REG (mode, FIRST_STACK_REG);
1954 case X86_64_NO_CLASS:
1955 /* Zero sized array, struct or class. */
1956 return NULL;
1957 default:
1958 abort ();
1959 }
1960 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1961 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1962 if (n == 2
1963 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1964 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1965 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1966 && class[1] == X86_64_INTEGER_CLASS
1967 && (mode == CDImode || mode == TImode)
1968 && intreg[0] + 1 == intreg[1])
1969 return gen_rtx_REG (mode, intreg[0]);
1970 if (n == 4
1971 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1972 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1973 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1974
1975 /* Otherwise figure out the entries of the PARALLEL. */
1976 for (i = 0; i < n; i++)
1977 {
1978 switch (class[i])
1979 {
1980 case X86_64_NO_CLASS:
1981 break;
1982 case X86_64_INTEGER_CLASS:
1983 case X86_64_INTEGERSI_CLASS:
1984 /* Merge TImodes on aligned occassions here too. */
1985 if (i * 8 + 8 > bytes)
1986 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1987 else if (class[i] == X86_64_INTEGERSI_CLASS)
1988 tmpmode = SImode;
1989 else
1990 tmpmode = DImode;
1991 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1992 if (tmpmode == BLKmode)
1993 tmpmode = DImode;
1994 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1995 gen_rtx_REG (tmpmode, *intreg),
1996 GEN_INT (i*8));
1997 intreg++;
1998 break;
1999 case X86_64_SSESF_CLASS:
2000 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2001 gen_rtx_REG (SFmode,
2002 SSE_REGNO (sse_regno)),
2003 GEN_INT (i*8));
2004 sse_regno++;
2005 break;
2006 case X86_64_SSEDF_CLASS:
2007 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2008 gen_rtx_REG (DFmode,
2009 SSE_REGNO (sse_regno)),
2010 GEN_INT (i*8));
2011 sse_regno++;
2012 break;
2013 case X86_64_SSE_CLASS:
2014 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2015 tmpmode = TImode, i++;
2016 else
2017 tmpmode = DImode;
2018 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2019 gen_rtx_REG (tmpmode,
2020 SSE_REGNO (sse_regno)),
2021 GEN_INT (i*8));
2022 sse_regno++;
2023 break;
2024 default:
2025 abort ();
2026 }
2027 }
2028 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2029 for (i = 0; i < nexps; i++)
2030 XVECEXP (ret, 0, i) = exp [i];
2031 return ret;
2032 }
2033
2034 /* Update the data in CUM to advance over an argument
2035 of mode MODE and data type TYPE.
2036 (TYPE is null for libcalls where that information may not be available.) */
2037
2038 void
2039 function_arg_advance (cum, mode, type, named)
2040 CUMULATIVE_ARGS *cum; /* current arg information */
2041 enum machine_mode mode; /* current arg mode */
2042 tree type; /* type of the argument or 0 if lib support */
2043 int named; /* whether or not the argument was named */
2044 {
2045 int bytes =
2046 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2047 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2048
2049 if (TARGET_DEBUG_ARG)
2050 fprintf (stderr,
2051 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2052 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2053 if (TARGET_64BIT)
2054 {
2055 int int_nregs, sse_nregs;
2056 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2057 cum->words += words;
2058 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2059 {
2060 cum->nregs -= int_nregs;
2061 cum->sse_nregs -= sse_nregs;
2062 cum->regno += int_nregs;
2063 cum->sse_regno += sse_nregs;
2064 }
2065 else
2066 cum->words += words;
2067 }
2068 else
2069 {
2070 if (TARGET_SSE && mode == TImode)
2071 {
2072 cum->sse_words += words;
2073 cum->sse_nregs -= 1;
2074 cum->sse_regno += 1;
2075 if (cum->sse_nregs <= 0)
2076 {
2077 cum->sse_nregs = 0;
2078 cum->sse_regno = 0;
2079 }
2080 }
2081 else
2082 {
2083 cum->words += words;
2084 cum->nregs -= words;
2085 cum->regno += words;
2086
2087 if (cum->nregs <= 0)
2088 {
2089 cum->nregs = 0;
2090 cum->regno = 0;
2091 }
2092 }
2093 }
2094 return;
2095 }
2096
2097 /* Define where to put the arguments to a function.
2098 Value is zero to push the argument on the stack,
2099 or a hard register in which to store the argument.
2100
2101 MODE is the argument's machine mode.
2102 TYPE is the data type of the argument (as a tree).
2103 This is null for libcalls where that information may
2104 not be available.
2105 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2106 the preceding args and about the function being called.
2107 NAMED is nonzero if this argument is a named parameter
2108 (otherwise it is an extra parameter matching an ellipsis). */
2109
2110 rtx
2111 function_arg (cum, mode, type, named)
2112 CUMULATIVE_ARGS *cum; /* current arg information */
2113 enum machine_mode mode; /* current arg mode */
2114 tree type; /* type of the argument or 0 if lib support */
2115 int named; /* != 0 for normal args, == 0 for ... args */
2116 {
2117 rtx ret = NULL_RTX;
2118 int bytes =
2119 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2120 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2121
2122 /* Handle an hidden AL argument containing number of registers for varargs
2123 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2124 any AL settings. */
2125 if (mode == VOIDmode)
2126 {
2127 if (TARGET_64BIT)
2128 return GEN_INT (cum->maybe_vaarg
2129 ? (cum->sse_nregs < 0
2130 ? SSE_REGPARM_MAX
2131 : cum->sse_regno)
2132 : -1);
2133 else
2134 return constm1_rtx;
2135 }
2136 if (TARGET_64BIT)
2137 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2138 &x86_64_int_parameter_registers [cum->regno],
2139 cum->sse_regno);
2140 else
2141 switch (mode)
2142 {
2143 /* For now, pass fp/complex values on the stack. */
2144 default:
2145 break;
2146
2147 case BLKmode:
2148 case DImode:
2149 case SImode:
2150 case HImode:
2151 case QImode:
2152 if (words <= cum->nregs)
2153 ret = gen_rtx_REG (mode, cum->regno);
2154 break;
2155 case TImode:
2156 if (cum->sse_nregs)
2157 ret = gen_rtx_REG (mode, cum->sse_regno);
2158 break;
2159 }
2160
2161 if (TARGET_DEBUG_ARG)
2162 {
2163 fprintf (stderr,
2164 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2165 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2166
2167 if (ret)
2168 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2169 else
2170 fprintf (stderr, ", stack");
2171
2172 fprintf (stderr, " )\n");
2173 }
2174
2175 return ret;
2176 }
2177
2178 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2179 and type. */
2180
2181 int
2182 ix86_function_arg_boundary (mode, type)
2183 enum machine_mode mode;
2184 tree type;
2185 {
2186 int align;
2187 if (!TARGET_64BIT)
2188 return PARM_BOUNDARY;
2189 if (type)
2190 align = TYPE_ALIGN (type);
2191 else
2192 align = GET_MODE_ALIGNMENT (mode);
2193 if (align < PARM_BOUNDARY)
2194 align = PARM_BOUNDARY;
2195 if (align > 128)
2196 align = 128;
2197 return align;
2198 }
2199
2200 /* Return true if N is a possible register number of function value. */
2201 bool
2202 ix86_function_value_regno_p (regno)
2203 int regno;
2204 {
2205 if (!TARGET_64BIT)
2206 {
2207 return ((regno) == 0
2208 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2209 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2210 }
2211 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2212 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2213 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2214 }
2215
2216 /* Define how to find the value returned by a function.
2217 VALTYPE is the data type of the value (as a tree).
2218 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2219 otherwise, FUNC is 0. */
2220 rtx
2221 ix86_function_value (valtype)
2222 tree valtype;
2223 {
2224 if (TARGET_64BIT)
2225 {
2226 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2227 REGPARM_MAX, SSE_REGPARM_MAX,
2228 x86_64_int_return_registers, 0);
2229 /* For zero sized structures, construct_continer return NULL, but we need
2230 to keep rest of compiler happy by returning meaningfull value. */
2231 if (!ret)
2232 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2233 return ret;
2234 }
2235 else
2236 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2237 }
2238
2239 /* Return false iff type is returned in memory. */
2240 int
2241 ix86_return_in_memory (type)
2242 tree type;
2243 {
2244 int needed_intregs, needed_sseregs;
2245 if (TARGET_64BIT)
2246 {
2247 return !examine_argument (TYPE_MODE (type), type, 1,
2248 &needed_intregs, &needed_sseregs);
2249 }
2250 else
2251 {
2252 if (TYPE_MODE (type) == BLKmode
2253 || (VECTOR_MODE_P (TYPE_MODE (type))
2254 && int_size_in_bytes (type) == 8)
2255 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2256 && TYPE_MODE (type) != TFmode
2257 && !VECTOR_MODE_P (TYPE_MODE (type))))
2258 return 1;
2259 return 0;
2260 }
2261 }
2262
2263 /* Define how to find the value returned by a library function
2264 assuming the value has mode MODE. */
2265 rtx
2266 ix86_libcall_value (mode)
2267 enum machine_mode mode;
2268 {
2269 if (TARGET_64BIT)
2270 {
2271 switch (mode)
2272 {
2273 case SFmode:
2274 case SCmode:
2275 case DFmode:
2276 case DCmode:
2277 return gen_rtx_REG (mode, FIRST_SSE_REG);
2278 case TFmode:
2279 case TCmode:
2280 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2281 default:
2282 return gen_rtx_REG (mode, 0);
2283 }
2284 }
2285 else
2286 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2287 }
2288 \f
2289 /* Create the va_list data type. */
2290
2291 tree
2292 ix86_build_va_list ()
2293 {
2294 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2295
2296 /* For i386 we use plain pointer to argument area. */
2297 if (!TARGET_64BIT)
2298 return build_pointer_type (char_type_node);
2299
2300 record = make_lang_type (RECORD_TYPE);
2301 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2302
2303 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2304 unsigned_type_node);
2305 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2306 unsigned_type_node);
2307 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2308 ptr_type_node);
2309 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2310 ptr_type_node);
2311
2312 DECL_FIELD_CONTEXT (f_gpr) = record;
2313 DECL_FIELD_CONTEXT (f_fpr) = record;
2314 DECL_FIELD_CONTEXT (f_ovf) = record;
2315 DECL_FIELD_CONTEXT (f_sav) = record;
2316
2317 TREE_CHAIN (record) = type_decl;
2318 TYPE_NAME (record) = type_decl;
2319 TYPE_FIELDS (record) = f_gpr;
2320 TREE_CHAIN (f_gpr) = f_fpr;
2321 TREE_CHAIN (f_fpr) = f_ovf;
2322 TREE_CHAIN (f_ovf) = f_sav;
2323
2324 layout_type (record);
2325
2326 /* The correct type is an array type of one element. */
2327 return build_array_type (record, build_index_type (size_zero_node));
2328 }
2329
2330 /* Perform any needed actions needed for a function that is receiving a
2331 variable number of arguments.
2332
2333 CUM is as above.
2334
2335 MODE and TYPE are the mode and type of the current parameter.
2336
2337 PRETEND_SIZE is a variable that should be set to the amount of stack
2338 that must be pushed by the prolog to pretend that our caller pushed
2339 it.
2340
2341 Normally, this macro will push all remaining incoming registers on the
2342 stack and set PRETEND_SIZE to the length of the registers pushed. */
2343
2344 void
2345 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2346 CUMULATIVE_ARGS *cum;
2347 enum machine_mode mode;
2348 tree type;
2349 int *pretend_size ATTRIBUTE_UNUSED;
2350 int no_rtl;
2351
2352 {
2353 CUMULATIVE_ARGS next_cum;
2354 rtx save_area = NULL_RTX, mem;
2355 rtx label;
2356 rtx label_ref;
2357 rtx tmp_reg;
2358 rtx nsse_reg;
2359 int set;
2360 tree fntype;
2361 int stdarg_p;
2362 int i;
2363
2364 if (!TARGET_64BIT)
2365 return;
2366
2367 /* Indicate to allocate space on the stack for varargs save area. */
2368 ix86_save_varrargs_registers = 1;
2369
2370 fntype = TREE_TYPE (current_function_decl);
2371 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2372 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2373 != void_type_node));
2374
2375 /* For varargs, we do not want to skip the dummy va_dcl argument.
2376 For stdargs, we do want to skip the last named argument. */
2377 next_cum = *cum;
2378 if (stdarg_p)
2379 function_arg_advance (&next_cum, mode, type, 1);
2380
2381 if (!no_rtl)
2382 save_area = frame_pointer_rtx;
2383
2384 set = get_varargs_alias_set ();
2385
2386 for (i = next_cum.regno; i < ix86_regparm; i++)
2387 {
2388 mem = gen_rtx_MEM (Pmode,
2389 plus_constant (save_area, i * UNITS_PER_WORD));
2390 set_mem_alias_set (mem, set);
2391 emit_move_insn (mem, gen_rtx_REG (Pmode,
2392 x86_64_int_parameter_registers[i]));
2393 }
2394
2395 if (next_cum.sse_nregs)
2396 {
2397 /* Now emit code to save SSE registers. The AX parameter contains number
2398 of SSE parameter regsiters used to call this function. We use
2399 sse_prologue_save insn template that produces computed jump across
2400 SSE saves. We need some preparation work to get this working. */
2401
2402 label = gen_label_rtx ();
2403 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2404
2405 /* Compute address to jump to :
2406 label - 5*eax + nnamed_sse_arguments*5 */
2407 tmp_reg = gen_reg_rtx (Pmode);
2408 nsse_reg = gen_reg_rtx (Pmode);
2409 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2410 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2411 gen_rtx_MULT (Pmode, nsse_reg,
2412 GEN_INT (4))));
2413 if (next_cum.sse_regno)
2414 emit_move_insn
2415 (nsse_reg,
2416 gen_rtx_CONST (DImode,
2417 gen_rtx_PLUS (DImode,
2418 label_ref,
2419 GEN_INT (next_cum.sse_regno * 4))));
2420 else
2421 emit_move_insn (nsse_reg, label_ref);
2422 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2423
2424 /* Compute address of memory block we save into. We always use pointer
2425 pointing 127 bytes after first byte to store - this is needed to keep
2426 instruction size limited by 4 bytes. */
2427 tmp_reg = gen_reg_rtx (Pmode);
2428 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2429 plus_constant (save_area,
2430 8 * REGPARM_MAX + 127)));
2431 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2432 set_mem_alias_set (mem, set);
2433 set_mem_align (mem, BITS_PER_WORD);
2434
2435 /* And finally do the dirty job! */
2436 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2437 GEN_INT (next_cum.sse_regno), label));
2438 }
2439
2440 }
2441
2442 /* Implement va_start. */
2443
2444 void
2445 ix86_va_start (stdarg_p, valist, nextarg)
2446 int stdarg_p;
2447 tree valist;
2448 rtx nextarg;
2449 {
2450 HOST_WIDE_INT words, n_gpr, n_fpr;
2451 tree f_gpr, f_fpr, f_ovf, f_sav;
2452 tree gpr, fpr, ovf, sav, t;
2453
2454 /* Only 64bit target needs something special. */
2455 if (!TARGET_64BIT)
2456 {
2457 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2458 return;
2459 }
2460
2461 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2462 f_fpr = TREE_CHAIN (f_gpr);
2463 f_ovf = TREE_CHAIN (f_fpr);
2464 f_sav = TREE_CHAIN (f_ovf);
2465
2466 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2467 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2468 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2469 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2470 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2471
2472 /* Count number of gp and fp argument registers used. */
2473 words = current_function_args_info.words;
2474 n_gpr = current_function_args_info.regno;
2475 n_fpr = current_function_args_info.sse_regno;
2476
2477 if (TARGET_DEBUG_ARG)
2478 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2479 (int) words, (int) n_gpr, (int) n_fpr);
2480
2481 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2482 build_int_2 (n_gpr * 8, 0));
2483 TREE_SIDE_EFFECTS (t) = 1;
2484 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2485
2486 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2487 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2488 TREE_SIDE_EFFECTS (t) = 1;
2489 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2490
2491 /* Find the overflow area. */
2492 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2493 if (words != 0)
2494 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2495 build_int_2 (words * UNITS_PER_WORD, 0));
2496 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2497 TREE_SIDE_EFFECTS (t) = 1;
2498 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2499
2500 /* Find the register save area.
2501 Prologue of the function save it right above stack frame. */
2502 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2503 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2504 TREE_SIDE_EFFECTS (t) = 1;
2505 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2506 }
2507
2508 /* Implement va_arg. */
2509 rtx
2510 ix86_va_arg (valist, type)
2511 tree valist, type;
2512 {
2513 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2514 tree f_gpr, f_fpr, f_ovf, f_sav;
2515 tree gpr, fpr, ovf, sav, t;
2516 int size, rsize;
2517 rtx lab_false, lab_over = NULL_RTX;
2518 rtx addr_rtx, r;
2519 rtx container;
2520
2521 /* Only 64bit target needs something special. */
2522 if (!TARGET_64BIT)
2523 {
2524 return std_expand_builtin_va_arg (valist, type);
2525 }
2526
2527 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2528 f_fpr = TREE_CHAIN (f_gpr);
2529 f_ovf = TREE_CHAIN (f_fpr);
2530 f_sav = TREE_CHAIN (f_ovf);
2531
2532 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2533 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2534 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2535 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2536 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2537
2538 size = int_size_in_bytes (type);
2539 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2540
2541 container = construct_container (TYPE_MODE (type), type, 0,
2542 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2543 /*
2544 * Pull the value out of the saved registers ...
2545 */
2546
2547 addr_rtx = gen_reg_rtx (Pmode);
2548
2549 if (container)
2550 {
2551 rtx int_addr_rtx, sse_addr_rtx;
2552 int needed_intregs, needed_sseregs;
2553 int need_temp;
2554
2555 lab_over = gen_label_rtx ();
2556 lab_false = gen_label_rtx ();
2557
2558 examine_argument (TYPE_MODE (type), type, 0,
2559 &needed_intregs, &needed_sseregs);
2560
2561
2562 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2563 || TYPE_ALIGN (type) > 128);
2564
2565 /* In case we are passing structure, verify that it is consetuctive block
2566 on the register save area. If not we need to do moves. */
2567 if (!need_temp && !REG_P (container))
2568 {
2569 /* Verify that all registers are strictly consetuctive */
2570 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2571 {
2572 int i;
2573
2574 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2575 {
2576 rtx slot = XVECEXP (container, 0, i);
2577 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2578 || INTVAL (XEXP (slot, 1)) != i * 16)
2579 need_temp = 1;
2580 }
2581 }
2582 else
2583 {
2584 int i;
2585
2586 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2587 {
2588 rtx slot = XVECEXP (container, 0, i);
2589 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2590 || INTVAL (XEXP (slot, 1)) != i * 8)
2591 need_temp = 1;
2592 }
2593 }
2594 }
2595 if (!need_temp)
2596 {
2597 int_addr_rtx = addr_rtx;
2598 sse_addr_rtx = addr_rtx;
2599 }
2600 else
2601 {
2602 int_addr_rtx = gen_reg_rtx (Pmode);
2603 sse_addr_rtx = gen_reg_rtx (Pmode);
2604 }
2605 /* First ensure that we fit completely in registers. */
2606 if (needed_intregs)
2607 {
2608 emit_cmp_and_jump_insns (expand_expr
2609 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2610 GEN_INT ((REGPARM_MAX - needed_intregs +
2611 1) * 8), GE, const1_rtx, SImode,
2612 1, lab_false);
2613 }
2614 if (needed_sseregs)
2615 {
2616 emit_cmp_and_jump_insns (expand_expr
2617 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2618 GEN_INT ((SSE_REGPARM_MAX -
2619 needed_sseregs + 1) * 16 +
2620 REGPARM_MAX * 8), GE, const1_rtx,
2621 SImode, 1, lab_false);
2622 }
2623
2624 /* Compute index to start of area used for integer regs. */
2625 if (needed_intregs)
2626 {
2627 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2628 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2629 if (r != int_addr_rtx)
2630 emit_move_insn (int_addr_rtx, r);
2631 }
2632 if (needed_sseregs)
2633 {
2634 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2635 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2636 if (r != sse_addr_rtx)
2637 emit_move_insn (sse_addr_rtx, r);
2638 }
2639 if (need_temp)
2640 {
2641 int i;
2642 rtx mem;
2643
2644 /* Never use the memory itself, as it has the alias set. */
2645 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2646 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2647 set_mem_alias_set (mem, get_varargs_alias_set ());
2648 set_mem_align (mem, BITS_PER_UNIT);
2649
2650 for (i = 0; i < XVECLEN (container, 0); i++)
2651 {
2652 rtx slot = XVECEXP (container, 0, i);
2653 rtx reg = XEXP (slot, 0);
2654 enum machine_mode mode = GET_MODE (reg);
2655 rtx src_addr;
2656 rtx src_mem;
2657 int src_offset;
2658 rtx dest_mem;
2659
2660 if (SSE_REGNO_P (REGNO (reg)))
2661 {
2662 src_addr = sse_addr_rtx;
2663 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2664 }
2665 else
2666 {
2667 src_addr = int_addr_rtx;
2668 src_offset = REGNO (reg) * 8;
2669 }
2670 src_mem = gen_rtx_MEM (mode, src_addr);
2671 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2672 src_mem = adjust_address (src_mem, mode, src_offset);
2673 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2674 emit_move_insn (dest_mem, src_mem);
2675 }
2676 }
2677
2678 if (needed_intregs)
2679 {
2680 t =
2681 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2682 build_int_2 (needed_intregs * 8, 0));
2683 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2684 TREE_SIDE_EFFECTS (t) = 1;
2685 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2686 }
2687 if (needed_sseregs)
2688 {
2689 t =
2690 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2691 build_int_2 (needed_sseregs * 16, 0));
2692 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2693 TREE_SIDE_EFFECTS (t) = 1;
2694 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2695 }
2696
2697 emit_jump_insn (gen_jump (lab_over));
2698 emit_barrier ();
2699 emit_label (lab_false);
2700 }
2701
2702 /* ... otherwise out of the overflow area. */
2703
2704 /* Care for on-stack alignment if needed. */
2705 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2706 t = ovf;
2707 else
2708 {
2709 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2710 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2711 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2712 }
2713 t = save_expr (t);
2714
2715 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2716 if (r != addr_rtx)
2717 emit_move_insn (addr_rtx, r);
2718
2719 t =
2720 build (PLUS_EXPR, TREE_TYPE (t), t,
2721 build_int_2 (rsize * UNITS_PER_WORD, 0));
2722 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2723 TREE_SIDE_EFFECTS (t) = 1;
2724 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2725
2726 if (container)
2727 emit_label (lab_over);
2728
2729 return addr_rtx;
2730 }
2731 \f
2732 /* Return nonzero if OP is general operand representable on x86_64. */
2733
2734 int
2735 x86_64_general_operand (op, mode)
2736 rtx op;
2737 enum machine_mode mode;
2738 {
2739 if (!TARGET_64BIT)
2740 return general_operand (op, mode);
2741 if (nonimmediate_operand (op, mode))
2742 return 1;
2743 return x86_64_sign_extended_value (op);
2744 }
2745
2746 /* Return nonzero if OP is general operand representable on x86_64
2747 as either sign extended or zero extended constant. */
2748
2749 int
2750 x86_64_szext_general_operand (op, mode)
2751 rtx op;
2752 enum machine_mode mode;
2753 {
2754 if (!TARGET_64BIT)
2755 return general_operand (op, mode);
2756 if (nonimmediate_operand (op, mode))
2757 return 1;
2758 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2759 }
2760
2761 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2762
2763 int
2764 x86_64_nonmemory_operand (op, mode)
2765 rtx op;
2766 enum machine_mode mode;
2767 {
2768 if (!TARGET_64BIT)
2769 return nonmemory_operand (op, mode);
2770 if (register_operand (op, mode))
2771 return 1;
2772 return x86_64_sign_extended_value (op);
2773 }
2774
2775 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2776
2777 int
2778 x86_64_movabs_operand (op, mode)
2779 rtx op;
2780 enum machine_mode mode;
2781 {
2782 if (!TARGET_64BIT || !flag_pic)
2783 return nonmemory_operand (op, mode);
2784 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2785 return 1;
2786 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2787 return 1;
2788 return 0;
2789 }
2790
2791 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2792
2793 int
2794 x86_64_szext_nonmemory_operand (op, mode)
2795 rtx op;
2796 enum machine_mode mode;
2797 {
2798 if (!TARGET_64BIT)
2799 return nonmemory_operand (op, mode);
2800 if (register_operand (op, mode))
2801 return 1;
2802 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2803 }
2804
2805 /* Return nonzero if OP is immediate operand representable on x86_64. */
2806
2807 int
2808 x86_64_immediate_operand (op, mode)
2809 rtx op;
2810 enum machine_mode mode;
2811 {
2812 if (!TARGET_64BIT)
2813 return immediate_operand (op, mode);
2814 return x86_64_sign_extended_value (op);
2815 }
2816
2817 /* Return nonzero if OP is immediate operand representable on x86_64. */
2818
2819 int
2820 x86_64_zext_immediate_operand (op, mode)
2821 rtx op;
2822 enum machine_mode mode ATTRIBUTE_UNUSED;
2823 {
2824 return x86_64_zero_extended_value (op);
2825 }
2826
2827 /* Return nonzero if OP is (const_int 1), else return zero. */
2828
2829 int
2830 const_int_1_operand (op, mode)
2831 rtx op;
2832 enum machine_mode mode ATTRIBUTE_UNUSED;
2833 {
2834 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2835 }
2836
2837 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2838 reference and a constant. */
2839
2840 int
2841 symbolic_operand (op, mode)
2842 register rtx op;
2843 enum machine_mode mode ATTRIBUTE_UNUSED;
2844 {
2845 switch (GET_CODE (op))
2846 {
2847 case SYMBOL_REF:
2848 case LABEL_REF:
2849 return 1;
2850
2851 case CONST:
2852 op = XEXP (op, 0);
2853 if (GET_CODE (op) == SYMBOL_REF
2854 || GET_CODE (op) == LABEL_REF
2855 || (GET_CODE (op) == UNSPEC
2856 && (XINT (op, 1) == 6
2857 || XINT (op, 1) == 7
2858 || XINT (op, 1) == 15)))
2859 return 1;
2860 if (GET_CODE (op) != PLUS
2861 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2862 return 0;
2863
2864 op = XEXP (op, 0);
2865 if (GET_CODE (op) == SYMBOL_REF
2866 || GET_CODE (op) == LABEL_REF)
2867 return 1;
2868 /* Only @GOTOFF gets offsets. */
2869 if (GET_CODE (op) != UNSPEC
2870 || XINT (op, 1) != 7)
2871 return 0;
2872
2873 op = XVECEXP (op, 0, 0);
2874 if (GET_CODE (op) == SYMBOL_REF
2875 || GET_CODE (op) == LABEL_REF)
2876 return 1;
2877 return 0;
2878
2879 default:
2880 return 0;
2881 }
2882 }
2883
2884 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2885
2886 int
2887 pic_symbolic_operand (op, mode)
2888 register rtx op;
2889 enum machine_mode mode ATTRIBUTE_UNUSED;
2890 {
2891 if (GET_CODE (op) != CONST)
2892 return 0;
2893 op = XEXP (op, 0);
2894 if (TARGET_64BIT)
2895 {
2896 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2897 return 1;
2898 }
2899 else
2900 {
2901 if (GET_CODE (op) == UNSPEC)
2902 return 1;
2903 if (GET_CODE (op) != PLUS
2904 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2905 return 0;
2906 op = XEXP (op, 0);
2907 if (GET_CODE (op) == UNSPEC)
2908 return 1;
2909 }
2910 return 0;
2911 }
2912
2913 /* Return true if OP is a symbolic operand that resolves locally. */
2914
2915 static int
2916 local_symbolic_operand (op, mode)
2917 rtx op;
2918 enum machine_mode mode ATTRIBUTE_UNUSED;
2919 {
2920 if (GET_CODE (op) == LABEL_REF)
2921 return 1;
2922
2923 if (GET_CODE (op) == CONST
2924 && GET_CODE (XEXP (op, 0)) == PLUS
2925 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2926 op = XEXP (XEXP (op, 0), 0);
2927
2928 if (GET_CODE (op) != SYMBOL_REF)
2929 return 0;
2930
2931 /* These we've been told are local by varasm and encode_section_info
2932 respectively. */
2933 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2934 return 1;
2935
2936 /* There is, however, a not insubstantial body of code in the rest of
2937 the compiler that assumes it can just stick the results of
2938 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2939 /* ??? This is a hack. Should update the body of the compiler to
2940 always create a DECL an invoke ENCODE_SECTION_INFO. */
2941 if (strncmp (XSTR (op, 0), internal_label_prefix,
2942 internal_label_prefix_len) == 0)
2943 return 1;
2944
2945 return 0;
2946 }
2947
2948 /* Test for a valid operand for a call instruction. Don't allow the
2949 arg pointer register or virtual regs since they may decay into
2950 reg + const, which the patterns can't handle. */
2951
2952 int
2953 call_insn_operand (op, mode)
2954 rtx op;
2955 enum machine_mode mode ATTRIBUTE_UNUSED;
2956 {
2957 /* Disallow indirect through a virtual register. This leads to
2958 compiler aborts when trying to eliminate them. */
2959 if (GET_CODE (op) == REG
2960 && (op == arg_pointer_rtx
2961 || op == frame_pointer_rtx
2962 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2963 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2964 return 0;
2965
2966 /* Disallow `call 1234'. Due to varying assembler lameness this
2967 gets either rejected or translated to `call .+1234'. */
2968 if (GET_CODE (op) == CONST_INT)
2969 return 0;
2970
2971 /* Explicitly allow SYMBOL_REF even if pic. */
2972 if (GET_CODE (op) == SYMBOL_REF)
2973 return 1;
2974
2975 /* Half-pic doesn't allow anything but registers and constants.
2976 We've just taken care of the later. */
2977 if (HALF_PIC_P ())
2978 return register_operand (op, Pmode);
2979
2980 /* Otherwise we can allow any general_operand in the address. */
2981 return general_operand (op, Pmode);
2982 }
2983
2984 int
2985 constant_call_address_operand (op, mode)
2986 rtx op;
2987 enum machine_mode mode ATTRIBUTE_UNUSED;
2988 {
2989 if (GET_CODE (op) == CONST
2990 && GET_CODE (XEXP (op, 0)) == PLUS
2991 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2992 op = XEXP (XEXP (op, 0), 0);
2993 return GET_CODE (op) == SYMBOL_REF;
2994 }
2995
2996 /* Match exactly zero and one. */
2997
2998 int
2999 const0_operand (op, mode)
3000 register rtx op;
3001 enum machine_mode mode;
3002 {
3003 return op == CONST0_RTX (mode);
3004 }
3005
3006 int
3007 const1_operand (op, mode)
3008 register rtx op;
3009 enum machine_mode mode ATTRIBUTE_UNUSED;
3010 {
3011 return op == const1_rtx;
3012 }
3013
3014 /* Match 2, 4, or 8. Used for leal multiplicands. */
3015
3016 int
3017 const248_operand (op, mode)
3018 register rtx op;
3019 enum machine_mode mode ATTRIBUTE_UNUSED;
3020 {
3021 return (GET_CODE (op) == CONST_INT
3022 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3023 }
3024
3025 /* True if this is a constant appropriate for an increment or decremenmt. */
3026
3027 int
3028 incdec_operand (op, mode)
3029 register rtx op;
3030 enum machine_mode mode ATTRIBUTE_UNUSED;
3031 {
3032 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3033 registers, since carry flag is not set. */
3034 if (TARGET_PENTIUM4 && !optimize_size)
3035 return 0;
3036 return op == const1_rtx || op == constm1_rtx;
3037 }
3038
3039 /* Return nonzero if OP is acceptable as operand of DImode shift
3040 expander. */
3041
3042 int
3043 shiftdi_operand (op, mode)
3044 rtx op;
3045 enum machine_mode mode ATTRIBUTE_UNUSED;
3046 {
3047 if (TARGET_64BIT)
3048 return nonimmediate_operand (op, mode);
3049 else
3050 return register_operand (op, mode);
3051 }
3052
3053 /* Return false if this is the stack pointer, or any other fake
3054 register eliminable to the stack pointer. Otherwise, this is
3055 a register operand.
3056
3057 This is used to prevent esp from being used as an index reg.
3058 Which would only happen in pathological cases. */
3059
3060 int
3061 reg_no_sp_operand (op, mode)
3062 register rtx op;
3063 enum machine_mode mode;
3064 {
3065 rtx t = op;
3066 if (GET_CODE (t) == SUBREG)
3067 t = SUBREG_REG (t);
3068 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3069 return 0;
3070
3071 return register_operand (op, mode);
3072 }
3073
3074 int
3075 mmx_reg_operand (op, mode)
3076 register rtx op;
3077 enum machine_mode mode ATTRIBUTE_UNUSED;
3078 {
3079 return MMX_REG_P (op);
3080 }
3081
3082 /* Return false if this is any eliminable register. Otherwise
3083 general_operand. */
3084
3085 int
3086 general_no_elim_operand (op, mode)
3087 register rtx op;
3088 enum machine_mode mode;
3089 {
3090 rtx t = op;
3091 if (GET_CODE (t) == SUBREG)
3092 t = SUBREG_REG (t);
3093 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3094 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3095 || t == virtual_stack_dynamic_rtx)
3096 return 0;
3097 if (REG_P (t)
3098 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3099 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3100 return 0;
3101
3102 return general_operand (op, mode);
3103 }
3104
3105 /* Return false if this is any eliminable register. Otherwise
3106 register_operand or const_int. */
3107
3108 int
3109 nonmemory_no_elim_operand (op, mode)
3110 register rtx op;
3111 enum machine_mode mode;
3112 {
3113 rtx t = op;
3114 if (GET_CODE (t) == SUBREG)
3115 t = SUBREG_REG (t);
3116 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3117 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3118 || t == virtual_stack_dynamic_rtx)
3119 return 0;
3120
3121 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3122 }
3123
3124 /* Return true if op is a Q_REGS class register. */
3125
3126 int
3127 q_regs_operand (op, mode)
3128 register rtx op;
3129 enum machine_mode mode;
3130 {
3131 if (mode != VOIDmode && GET_MODE (op) != mode)
3132 return 0;
3133 if (GET_CODE (op) == SUBREG)
3134 op = SUBREG_REG (op);
3135 return QI_REG_P (op);
3136 }
3137
3138 /* Return true if op is a NON_Q_REGS class register. */
3139
3140 int
3141 non_q_regs_operand (op, mode)
3142 register rtx op;
3143 enum machine_mode mode;
3144 {
3145 if (mode != VOIDmode && GET_MODE (op) != mode)
3146 return 0;
3147 if (GET_CODE (op) == SUBREG)
3148 op = SUBREG_REG (op);
3149 return NON_QI_REG_P (op);
3150 }
3151
3152 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3153 insns. */
3154 int
3155 sse_comparison_operator (op, mode)
3156 rtx op;
3157 enum machine_mode mode ATTRIBUTE_UNUSED;
3158 {
3159 enum rtx_code code = GET_CODE (op);
3160 switch (code)
3161 {
3162 /* Operations supported directly. */
3163 case EQ:
3164 case LT:
3165 case LE:
3166 case UNORDERED:
3167 case NE:
3168 case UNGE:
3169 case UNGT:
3170 case ORDERED:
3171 return 1;
3172 /* These are equivalent to ones above in non-IEEE comparisons. */
3173 case UNEQ:
3174 case UNLT:
3175 case UNLE:
3176 case LTGT:
3177 case GE:
3178 case GT:
3179 return !TARGET_IEEE_FP;
3180 default:
3181 return 0;
3182 }
3183 }
3184 /* Return 1 if OP is a valid comparison operator in valid mode. */
3185 int
3186 ix86_comparison_operator (op, mode)
3187 register rtx op;
3188 enum machine_mode mode;
3189 {
3190 enum machine_mode inmode;
3191 enum rtx_code code = GET_CODE (op);
3192 if (mode != VOIDmode && GET_MODE (op) != mode)
3193 return 0;
3194 if (GET_RTX_CLASS (code) != '<')
3195 return 0;
3196 inmode = GET_MODE (XEXP (op, 0));
3197
3198 if (inmode == CCFPmode || inmode == CCFPUmode)
3199 {
3200 enum rtx_code second_code, bypass_code;
3201 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3202 return (bypass_code == NIL && second_code == NIL);
3203 }
3204 switch (code)
3205 {
3206 case EQ: case NE:
3207 return 1;
3208 case LT: case GE:
3209 if (inmode == CCmode || inmode == CCGCmode
3210 || inmode == CCGOCmode || inmode == CCNOmode)
3211 return 1;
3212 return 0;
3213 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3214 if (inmode == CCmode)
3215 return 1;
3216 return 0;
3217 case GT: case LE:
3218 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3219 return 1;
3220 return 0;
3221 default:
3222 return 0;
3223 }
3224 }
3225
3226 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3227
3228 int
3229 fcmov_comparison_operator (op, mode)
3230 register rtx op;
3231 enum machine_mode mode;
3232 {
3233 enum machine_mode inmode;
3234 enum rtx_code code = GET_CODE (op);
3235 if (mode != VOIDmode && GET_MODE (op) != mode)
3236 return 0;
3237 if (GET_RTX_CLASS (code) != '<')
3238 return 0;
3239 inmode = GET_MODE (XEXP (op, 0));
3240 if (inmode == CCFPmode || inmode == CCFPUmode)
3241 {
3242 enum rtx_code second_code, bypass_code;
3243 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3244 if (bypass_code != NIL || second_code != NIL)
3245 return 0;
3246 code = ix86_fp_compare_code_to_integer (code);
3247 }
3248 /* i387 supports just limited amount of conditional codes. */
3249 switch (code)
3250 {
3251 case LTU: case GTU: case LEU: case GEU:
3252 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3253 return 1;
3254 return 0;
3255 case ORDERED: case UNORDERED:
3256 case EQ: case NE:
3257 return 1;
3258 default:
3259 return 0;
3260 }
3261 }
3262
3263 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3264
3265 int
3266 promotable_binary_operator (op, mode)
3267 register rtx op;
3268 enum machine_mode mode ATTRIBUTE_UNUSED;
3269 {
3270 switch (GET_CODE (op))
3271 {
3272 case MULT:
3273 /* Modern CPUs have same latency for HImode and SImode multiply,
3274 but 386 and 486 do HImode multiply faster. */
3275 return ix86_cpu > PROCESSOR_I486;
3276 case PLUS:
3277 case AND:
3278 case IOR:
3279 case XOR:
3280 case ASHIFT:
3281 return 1;
3282 default:
3283 return 0;
3284 }
3285 }
3286
3287 /* Nearly general operand, but accept any const_double, since we wish
3288 to be able to drop them into memory rather than have them get pulled
3289 into registers. */
3290
3291 int
3292 cmp_fp_expander_operand (op, mode)
3293 register rtx op;
3294 enum machine_mode mode;
3295 {
3296 if (mode != VOIDmode && mode != GET_MODE (op))
3297 return 0;
3298 if (GET_CODE (op) == CONST_DOUBLE)
3299 return 1;
3300 return general_operand (op, mode);
3301 }
3302
3303 /* Match an SI or HImode register for a zero_extract. */
3304
3305 int
3306 ext_register_operand (op, mode)
3307 register rtx op;
3308 enum machine_mode mode ATTRIBUTE_UNUSED;
3309 {
3310 int regno;
3311 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3312 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3313 return 0;
3314
3315 if (!register_operand (op, VOIDmode))
3316 return 0;
3317
3318 /* Be curefull to accept only registers having upper parts. */
3319 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3320 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3321 }
3322
3323 /* Return 1 if this is a valid binary floating-point operation.
3324 OP is the expression matched, and MODE is its mode. */
3325
3326 int
3327 binary_fp_operator (op, mode)
3328 register rtx op;
3329 enum machine_mode mode;
3330 {
3331 if (mode != VOIDmode && mode != GET_MODE (op))
3332 return 0;
3333
3334 switch (GET_CODE (op))
3335 {
3336 case PLUS:
3337 case MINUS:
3338 case MULT:
3339 case DIV:
3340 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3341
3342 default:
3343 return 0;
3344 }
3345 }
3346
3347 int
3348 mult_operator (op, mode)
3349 register rtx op;
3350 enum machine_mode mode ATTRIBUTE_UNUSED;
3351 {
3352 return GET_CODE (op) == MULT;
3353 }
3354
3355 int
3356 div_operator (op, mode)
3357 register rtx op;
3358 enum machine_mode mode ATTRIBUTE_UNUSED;
3359 {
3360 return GET_CODE (op) == DIV;
3361 }
3362
3363 int
3364 arith_or_logical_operator (op, mode)
3365 rtx op;
3366 enum machine_mode mode;
3367 {
3368 return ((mode == VOIDmode || GET_MODE (op) == mode)
3369 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3370 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3371 }
3372
3373 /* Returns 1 if OP is memory operand with a displacement. */
3374
3375 int
3376 memory_displacement_operand (op, mode)
3377 register rtx op;
3378 enum machine_mode mode;
3379 {
3380 struct ix86_address parts;
3381
3382 if (! memory_operand (op, mode))
3383 return 0;
3384
3385 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3386 abort ();
3387
3388 return parts.disp != NULL_RTX;
3389 }
3390
3391 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3392 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3393
3394 ??? It seems likely that this will only work because cmpsi is an
3395 expander, and no actual insns use this. */
3396
3397 int
3398 cmpsi_operand (op, mode)
3399 rtx op;
3400 enum machine_mode mode;
3401 {
3402 if (nonimmediate_operand (op, mode))
3403 return 1;
3404
3405 if (GET_CODE (op) == AND
3406 && GET_MODE (op) == SImode
3407 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3408 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3409 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3410 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3411 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3412 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3413 return 1;
3414
3415 return 0;
3416 }
3417
3418 /* Returns 1 if OP is memory operand that can not be represented by the
3419 modRM array. */
3420
3421 int
3422 long_memory_operand (op, mode)
3423 register rtx op;
3424 enum machine_mode mode;
3425 {
3426 if (! memory_operand (op, mode))
3427 return 0;
3428
3429 return memory_address_length (op) != 0;
3430 }
3431
3432 /* Return nonzero if the rtx is known aligned. */
3433
3434 int
3435 aligned_operand (op, mode)
3436 rtx op;
3437 enum machine_mode mode;
3438 {
3439 struct ix86_address parts;
3440
3441 if (!general_operand (op, mode))
3442 return 0;
3443
3444 /* Registers and immediate operands are always "aligned". */
3445 if (GET_CODE (op) != MEM)
3446 return 1;
3447
3448 /* Don't even try to do any aligned optimizations with volatiles. */
3449 if (MEM_VOLATILE_P (op))
3450 return 0;
3451
3452 op = XEXP (op, 0);
3453
3454 /* Pushes and pops are only valid on the stack pointer. */
3455 if (GET_CODE (op) == PRE_DEC
3456 || GET_CODE (op) == POST_INC)
3457 return 1;
3458
3459 /* Decode the address. */
3460 if (! ix86_decompose_address (op, &parts))
3461 abort ();
3462
3463 /* Look for some component that isn't known to be aligned. */
3464 if (parts.index)
3465 {
3466 if (parts.scale < 4
3467 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3468 return 0;
3469 }
3470 if (parts.base)
3471 {
3472 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3473 return 0;
3474 }
3475 if (parts.disp)
3476 {
3477 if (GET_CODE (parts.disp) != CONST_INT
3478 || (INTVAL (parts.disp) & 3) != 0)
3479 return 0;
3480 }
3481
3482 /* Didn't find one -- this must be an aligned address. */
3483 return 1;
3484 }
3485 \f
3486 /* Return true if the constant is something that can be loaded with
3487 a special instruction. Only handle 0.0 and 1.0; others are less
3488 worthwhile. */
3489
3490 int
3491 standard_80387_constant_p (x)
3492 rtx x;
3493 {
3494 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3495 return -1;
3496 /* Note that on the 80387, other constants, such as pi, that we should support
3497 too. On some machines, these are much slower to load as standard constant,
3498 than to load from doubles in memory. */
3499 if (x == CONST0_RTX (GET_MODE (x)))
3500 return 1;
3501 if (x == CONST1_RTX (GET_MODE (x)))
3502 return 2;
3503 return 0;
3504 }
3505
3506 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3507 */
3508 int
3509 standard_sse_constant_p (x)
3510 rtx x;
3511 {
3512 if (GET_CODE (x) != CONST_DOUBLE)
3513 return -1;
3514 return (x == CONST0_RTX (GET_MODE (x)));
3515 }
3516
3517 /* Returns 1 if OP contains a symbol reference */
3518
3519 int
3520 symbolic_reference_mentioned_p (op)
3521 rtx op;
3522 {
3523 register const char *fmt;
3524 register int i;
3525
3526 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3527 return 1;
3528
3529 fmt = GET_RTX_FORMAT (GET_CODE (op));
3530 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3531 {
3532 if (fmt[i] == 'E')
3533 {
3534 register int j;
3535
3536 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3537 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3538 return 1;
3539 }
3540
3541 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3542 return 1;
3543 }
3544
3545 return 0;
3546 }
3547
3548 /* Return 1 if it is appropriate to emit `ret' instructions in the
3549 body of a function. Do this only if the epilogue is simple, needing a
3550 couple of insns. Prior to reloading, we can't tell how many registers
3551 must be saved, so return 0 then. Return 0 if there is no frame
3552 marker to de-allocate.
3553
3554 If NON_SAVING_SETJMP is defined and true, then it is not possible
3555 for the epilogue to be simple, so return 0. This is a special case
3556 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3557 until final, but jump_optimize may need to know sooner if a
3558 `return' is OK. */
3559
3560 int
3561 ix86_can_use_return_insn_p ()
3562 {
3563 struct ix86_frame frame;
3564
3565 #ifdef NON_SAVING_SETJMP
3566 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3567 return 0;
3568 #endif
3569
3570 if (! reload_completed || frame_pointer_needed)
3571 return 0;
3572
3573 /* Don't allow more than 32 pop, since that's all we can do
3574 with one instruction. */
3575 if (current_function_pops_args
3576 && current_function_args_size >= 32768)
3577 return 0;
3578
3579 ix86_compute_frame_layout (&frame);
3580 return frame.to_allocate == 0 && frame.nregs == 0;
3581 }
3582 \f
3583 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3584 int
3585 x86_64_sign_extended_value (value)
3586 rtx value;
3587 {
3588 switch (GET_CODE (value))
3589 {
3590 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3591 to be at least 32 and this all acceptable constants are
3592 represented as CONST_INT. */
3593 case CONST_INT:
3594 if (HOST_BITS_PER_WIDE_INT == 32)
3595 return 1;
3596 else
3597 {
3598 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3599 return trunc_int_for_mode (val, SImode) == val;
3600 }
3601 break;
3602
3603 /* For certain code models, the symbolic references are known to fit. */
3604 case SYMBOL_REF:
3605 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3606
3607 /* For certain code models, the code is near as well. */
3608 case LABEL_REF:
3609 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3610
3611 /* We also may accept the offsetted memory references in certain special
3612 cases. */
3613 case CONST:
3614 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3615 && XVECLEN (XEXP (value, 0), 0) == 1
3616 && XINT (XEXP (value, 0), 1) == 15)
3617 return 1;
3618 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3619 {
3620 rtx op1 = XEXP (XEXP (value, 0), 0);
3621 rtx op2 = XEXP (XEXP (value, 0), 1);
3622 HOST_WIDE_INT offset;
3623
3624 if (ix86_cmodel == CM_LARGE)
3625 return 0;
3626 if (GET_CODE (op2) != CONST_INT)
3627 return 0;
3628 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3629 switch (GET_CODE (op1))
3630 {
3631 case SYMBOL_REF:
3632 /* For CM_SMALL assume that latest object is 1MB before
3633 end of 31bits boundary. We may also accept pretty
3634 large negative constants knowing that all objects are
3635 in the positive half of address space. */
3636 if (ix86_cmodel == CM_SMALL
3637 && offset < 1024*1024*1024
3638 && trunc_int_for_mode (offset, SImode) == offset)
3639 return 1;
3640 /* For CM_KERNEL we know that all object resist in the
3641 negative half of 32bits address space. We may not
3642 accept negative offsets, since they may be just off
3643 and we may accept pretty large positive ones. */
3644 if (ix86_cmodel == CM_KERNEL
3645 && offset > 0
3646 && trunc_int_for_mode (offset, SImode) == offset)
3647 return 1;
3648 break;
3649 case LABEL_REF:
3650 /* These conditions are similar to SYMBOL_REF ones, just the
3651 constraints for code models differ. */
3652 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3653 && offset < 1024*1024*1024
3654 && trunc_int_for_mode (offset, SImode) == offset)
3655 return 1;
3656 if (ix86_cmodel == CM_KERNEL
3657 && offset > 0
3658 && trunc_int_for_mode (offset, SImode) == offset)
3659 return 1;
3660 break;
3661 default:
3662 return 0;
3663 }
3664 }
3665 return 0;
3666 default:
3667 return 0;
3668 }
3669 }
3670
3671 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3672 int
3673 x86_64_zero_extended_value (value)
3674 rtx value;
3675 {
3676 switch (GET_CODE (value))
3677 {
3678 case CONST_DOUBLE:
3679 if (HOST_BITS_PER_WIDE_INT == 32)
3680 return (GET_MODE (value) == VOIDmode
3681 && !CONST_DOUBLE_HIGH (value));
3682 else
3683 return 0;
3684 case CONST_INT:
3685 if (HOST_BITS_PER_WIDE_INT == 32)
3686 return INTVAL (value) >= 0;
3687 else
3688 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3689 break;
3690
3691 /* For certain code models, the symbolic references are known to fit. */
3692 case SYMBOL_REF:
3693 return ix86_cmodel == CM_SMALL;
3694
3695 /* For certain code models, the code is near as well. */
3696 case LABEL_REF:
3697 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3698
3699 /* We also may accept the offsetted memory references in certain special
3700 cases. */
3701 case CONST:
3702 if (GET_CODE (XEXP (value, 0)) == PLUS)
3703 {
3704 rtx op1 = XEXP (XEXP (value, 0), 0);
3705 rtx op2 = XEXP (XEXP (value, 0), 1);
3706
3707 if (ix86_cmodel == CM_LARGE)
3708 return 0;
3709 switch (GET_CODE (op1))
3710 {
3711 case SYMBOL_REF:
3712 return 0;
3713 /* For small code model we may accept pretty large positive
3714 offsets, since one bit is available for free. Negative
3715 offsets are limited by the size of NULL pointer area
3716 specified by the ABI. */
3717 if (ix86_cmodel == CM_SMALL
3718 && GET_CODE (op2) == CONST_INT
3719 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3720 && (trunc_int_for_mode (INTVAL (op2), SImode)
3721 == INTVAL (op2)))
3722 return 1;
3723 /* ??? For the kernel, we may accept adjustment of
3724 -0x10000000, since we know that it will just convert
3725 negative address space to positive, but perhaps this
3726 is not worthwhile. */
3727 break;
3728 case LABEL_REF:
3729 /* These conditions are similar to SYMBOL_REF ones, just the
3730 constraints for code models differ. */
3731 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3732 && GET_CODE (op2) == CONST_INT
3733 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3734 && (trunc_int_for_mode (INTVAL (op2), SImode)
3735 == INTVAL (op2)))
3736 return 1;
3737 break;
3738 default:
3739 return 0;
3740 }
3741 }
3742 return 0;
3743 default:
3744 return 0;
3745 }
3746 }
3747
3748 /* Value should be nonzero if functions must have frame pointers.
3749 Zero means the frame pointer need not be set up (and parms may
3750 be accessed via the stack pointer) in functions that seem suitable. */
3751
3752 int
3753 ix86_frame_pointer_required ()
3754 {
3755 /* If we accessed previous frames, then the generated code expects
3756 to be able to access the saved ebp value in our frame. */
3757 if (cfun->machine->accesses_prev_frame)
3758 return 1;
3759
3760 /* Several x86 os'es need a frame pointer for other reasons,
3761 usually pertaining to setjmp. */
3762 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3763 return 1;
3764
3765 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3766 the frame pointer by default. Turn it back on now if we've not
3767 got a leaf function. */
3768 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3769 return 1;
3770
3771 return 0;
3772 }
3773
3774 /* Record that the current function accesses previous call frames. */
3775
3776 void
3777 ix86_setup_frame_addresses ()
3778 {
3779 cfun->machine->accesses_prev_frame = 1;
3780 }
3781 \f
3782 static char pic_label_name[32];
3783
3784 /* This function generates code for -fpic that loads %ebx with
3785 the return address of the caller and then returns. */
3786
3787 void
3788 ix86_asm_file_end (file)
3789 FILE *file;
3790 {
3791 rtx xops[2];
3792
3793 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3794 return;
3795
3796 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3797 to updating relocations to a section being discarded such that this
3798 doesn't work. Ought to detect this at configure time. */
3799 #if 0
3800 /* The trick here is to create a linkonce section containing the
3801 pic label thunk, but to refer to it with an internal label.
3802 Because the label is internal, we don't have inter-dso name
3803 binding issues on hosts that don't support ".hidden".
3804
3805 In order to use these macros, however, we must create a fake
3806 function decl. */
3807 if (targetm.have_named_sections)
3808 {
3809 tree decl = build_decl (FUNCTION_DECL,
3810 get_identifier ("i686.get_pc_thunk"),
3811 error_mark_node);
3812 DECL_ONE_ONLY (decl) = 1;
3813 UNIQUE_SECTION (decl, 0);
3814 named_section (decl, NULL);
3815 }
3816 else
3817 #else
3818 text_section ();
3819 #endif
3820
3821 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3822 internal (non-global) label that's being emitted, it didn't make
3823 sense to have .type information for local labels. This caused
3824 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3825 me debug info for a label that you're declaring non-global?) this
3826 was changed to call ASM_OUTPUT_LABEL() instead. */
3827
3828 ASM_OUTPUT_LABEL (file, pic_label_name);
3829
3830 xops[0] = pic_offset_table_rtx;
3831 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3832 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3833 output_asm_insn ("ret", xops);
3834 }
3835
3836 void
3837 load_pic_register ()
3838 {
3839 rtx gotsym, pclab;
3840
3841 if (TARGET_64BIT)
3842 abort ();
3843
3844 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3845
3846 if (TARGET_DEEP_BRANCH_PREDICTION)
3847 {
3848 if (! pic_label_name[0])
3849 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3850 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3851 }
3852 else
3853 {
3854 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3855 }
3856
3857 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3858
3859 if (! TARGET_DEEP_BRANCH_PREDICTION)
3860 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3861
3862 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3863 }
3864
3865 /* Generate an "push" pattern for input ARG. */
3866
3867 static rtx
3868 gen_push (arg)
3869 rtx arg;
3870 {
3871 return gen_rtx_SET (VOIDmode,
3872 gen_rtx_MEM (Pmode,
3873 gen_rtx_PRE_DEC (Pmode,
3874 stack_pointer_rtx)),
3875 arg);
3876 }
3877
3878 /* Return 1 if we need to save REGNO. */
3879 static int
3880 ix86_save_reg (regno, maybe_eh_return)
3881 int regno;
3882 int maybe_eh_return;
3883 {
3884 if (regno == PIC_OFFSET_TABLE_REGNUM
3885 && (current_function_uses_pic_offset_table
3886 || current_function_uses_const_pool
3887 || current_function_calls_eh_return))
3888 return 1;
3889
3890 if (current_function_calls_eh_return && maybe_eh_return)
3891 {
3892 unsigned i;
3893 for (i = 0; ; i++)
3894 {
3895 unsigned test = EH_RETURN_DATA_REGNO (i);
3896 if (test == INVALID_REGNUM)
3897 break;
3898 if (test == (unsigned) regno)
3899 return 1;
3900 }
3901 }
3902
3903 return (regs_ever_live[regno]
3904 && !call_used_regs[regno]
3905 && !fixed_regs[regno]
3906 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3907 }
3908
3909 /* Return number of registers to be saved on the stack. */
3910
3911 static int
3912 ix86_nsaved_regs ()
3913 {
3914 int nregs = 0;
3915 int regno;
3916
3917 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3918 if (ix86_save_reg (regno, true))
3919 nregs++;
3920 return nregs;
3921 }
3922
3923 /* Return the offset between two registers, one to be eliminated, and the other
3924 its replacement, at the start of a routine. */
3925
3926 HOST_WIDE_INT
3927 ix86_initial_elimination_offset (from, to)
3928 int from;
3929 int to;
3930 {
3931 struct ix86_frame frame;
3932 ix86_compute_frame_layout (&frame);
3933
3934 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3935 return frame.hard_frame_pointer_offset;
3936 else if (from == FRAME_POINTER_REGNUM
3937 && to == HARD_FRAME_POINTER_REGNUM)
3938 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3939 else
3940 {
3941 if (to != STACK_POINTER_REGNUM)
3942 abort ();
3943 else if (from == ARG_POINTER_REGNUM)
3944 return frame.stack_pointer_offset;
3945 else if (from != FRAME_POINTER_REGNUM)
3946 abort ();
3947 else
3948 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3949 }
3950 }
3951
3952 /* Fill structure ix86_frame about frame of currently computed function. */
3953
3954 static void
3955 ix86_compute_frame_layout (frame)
3956 struct ix86_frame *frame;
3957 {
3958 HOST_WIDE_INT total_size;
3959 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3960 int offset;
3961 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3962 HOST_WIDE_INT size = get_frame_size ();
3963
3964 frame->nregs = ix86_nsaved_regs ();
3965 total_size = size;
3966
3967 /* Skip return value and save base pointer. */
3968 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3969
3970 frame->hard_frame_pointer_offset = offset;
3971
3972 /* Do some sanity checking of stack_alignment_needed and
3973 preferred_alignment, since i386 port is the only using those features
3974 that may break easily. */
3975
3976 if (size && !stack_alignment_needed)
3977 abort ();
3978 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3979 abort ();
3980 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3981 abort ();
3982 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3983 abort ();
3984
3985 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3986 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3987
3988 /* Register save area */
3989 offset += frame->nregs * UNITS_PER_WORD;
3990
3991 /* Va-arg area */
3992 if (ix86_save_varrargs_registers)
3993 {
3994 offset += X86_64_VARARGS_SIZE;
3995 frame->va_arg_size = X86_64_VARARGS_SIZE;
3996 }
3997 else
3998 frame->va_arg_size = 0;
3999
4000 /* Align start of frame for local function. */
4001 frame->padding1 = ((offset + stack_alignment_needed - 1)
4002 & -stack_alignment_needed) - offset;
4003
4004 offset += frame->padding1;
4005
4006 /* Frame pointer points here. */
4007 frame->frame_pointer_offset = offset;
4008
4009 offset += size;
4010
4011 /* Add outgoing arguments area. */
4012 if (ACCUMULATE_OUTGOING_ARGS)
4013 {
4014 offset += current_function_outgoing_args_size;
4015 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4016 }
4017 else
4018 frame->outgoing_arguments_size = 0;
4019
4020 /* Align stack boundary. */
4021 frame->padding2 = ((offset + preferred_alignment - 1)
4022 & -preferred_alignment) - offset;
4023
4024 offset += frame->padding2;
4025
4026 /* We've reached end of stack frame. */
4027 frame->stack_pointer_offset = offset;
4028
4029 /* Size prologue needs to allocate. */
4030 frame->to_allocate =
4031 (size + frame->padding1 + frame->padding2
4032 + frame->outgoing_arguments_size + frame->va_arg_size);
4033
4034 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4035 && current_function_is_leaf)
4036 {
4037 frame->red_zone_size = frame->to_allocate;
4038 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4039 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4040 }
4041 else
4042 frame->red_zone_size = 0;
4043 frame->to_allocate -= frame->red_zone_size;
4044 frame->stack_pointer_offset -= frame->red_zone_size;
4045 #if 0
4046 fprintf (stderr, "nregs: %i\n", frame->nregs);
4047 fprintf (stderr, "size: %i\n", size);
4048 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4049 fprintf (stderr, "padding1: %i\n", frame->padding1);
4050 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4051 fprintf (stderr, "padding2: %i\n", frame->padding2);
4052 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4053 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4054 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4055 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4056 frame->hard_frame_pointer_offset);
4057 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4058 #endif
4059 }
4060
4061 /* Emit code to save registers in the prologue. */
4062
4063 static void
4064 ix86_emit_save_regs ()
4065 {
4066 register int regno;
4067 rtx insn;
4068
4069 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4070 if (ix86_save_reg (regno, true))
4071 {
4072 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4073 RTX_FRAME_RELATED_P (insn) = 1;
4074 }
4075 }
4076
4077 /* Emit code to save registers using MOV insns. First register
4078 is restored from POINTER + OFFSET. */
4079 static void
4080 ix86_emit_save_regs_using_mov (pointer, offset)
4081 rtx pointer;
4082 HOST_WIDE_INT offset;
4083 {
4084 int regno;
4085 rtx insn;
4086
4087 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4088 if (ix86_save_reg (regno, true))
4089 {
4090 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4091 Pmode, offset),
4092 gen_rtx_REG (Pmode, regno));
4093 RTX_FRAME_RELATED_P (insn) = 1;
4094 offset += UNITS_PER_WORD;
4095 }
4096 }
4097
4098 /* Expand the prologue into a bunch of separate insns. */
4099
4100 void
4101 ix86_expand_prologue ()
4102 {
4103 rtx insn;
4104 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4105 || current_function_uses_const_pool)
4106 && !TARGET_64BIT);
4107 struct ix86_frame frame;
4108 int use_mov = 0;
4109 HOST_WIDE_INT allocate;
4110
4111 if (!optimize_size)
4112 {
4113 use_fast_prologue_epilogue
4114 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4115 if (TARGET_PROLOGUE_USING_MOVE)
4116 use_mov = use_fast_prologue_epilogue;
4117 }
4118 ix86_compute_frame_layout (&frame);
4119
4120 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4121 slower on all targets. Also sdb doesn't like it. */
4122
4123 if (frame_pointer_needed)
4124 {
4125 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4126 RTX_FRAME_RELATED_P (insn) = 1;
4127
4128 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4129 RTX_FRAME_RELATED_P (insn) = 1;
4130 }
4131
4132 allocate = frame.to_allocate;
4133 /* In case we are dealing only with single register and empty frame,
4134 push is equivalent of the mov+add sequence. */
4135 if (allocate == 0 && frame.nregs <= 1)
4136 use_mov = 0;
4137
4138 if (!use_mov)
4139 ix86_emit_save_regs ();
4140 else
4141 allocate += frame.nregs * UNITS_PER_WORD;
4142
4143 if (allocate == 0)
4144 ;
4145 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4146 {
4147 insn = emit_insn (gen_pro_epilogue_adjust_stack
4148 (stack_pointer_rtx, stack_pointer_rtx,
4149 GEN_INT (-allocate)));
4150 RTX_FRAME_RELATED_P (insn) = 1;
4151 }
4152 else
4153 {
4154 /* ??? Is this only valid for Win32? */
4155
4156 rtx arg0, sym;
4157
4158 if (TARGET_64BIT)
4159 abort ();
4160
4161 arg0 = gen_rtx_REG (SImode, 0);
4162 emit_move_insn (arg0, GEN_INT (allocate));
4163
4164 sym = gen_rtx_MEM (FUNCTION_MODE,
4165 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4166 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4167
4168 CALL_INSN_FUNCTION_USAGE (insn)
4169 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4170 CALL_INSN_FUNCTION_USAGE (insn));
4171 }
4172 if (use_mov)
4173 {
4174 if (!frame_pointer_needed || !frame.to_allocate)
4175 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4176 else
4177 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4178 -frame.nregs * UNITS_PER_WORD);
4179 }
4180
4181 #ifdef SUBTARGET_PROLOGUE
4182 SUBTARGET_PROLOGUE;
4183 #endif
4184
4185 if (pic_reg_used)
4186 load_pic_register ();
4187
4188 /* If we are profiling, make sure no instructions are scheduled before
4189 the call to mcount. However, if -fpic, the above call will have
4190 done that. */
4191 if (current_function_profile && ! pic_reg_used)
4192 emit_insn (gen_blockage ());
4193 }
4194
4195 /* Emit code to restore saved registers using MOV insns. First register
4196 is restored from POINTER + OFFSET. */
4197 static void
4198 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4199 rtx pointer;
4200 int offset;
4201 int maybe_eh_return;
4202 {
4203 int regno;
4204
4205 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4206 if (ix86_save_reg (regno, maybe_eh_return))
4207 {
4208 emit_move_insn (gen_rtx_REG (Pmode, regno),
4209 adjust_address (gen_rtx_MEM (Pmode, pointer),
4210 Pmode, offset));
4211 offset += UNITS_PER_WORD;
4212 }
4213 }
4214
4215 /* Restore function stack, frame, and registers. */
4216
4217 void
4218 ix86_expand_epilogue (style)
4219 int style;
4220 {
4221 int regno;
4222 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4223 struct ix86_frame frame;
4224 HOST_WIDE_INT offset;
4225
4226 ix86_compute_frame_layout (&frame);
4227
4228 /* Calculate start of saved registers relative to ebp. Special care
4229 must be taken for the normal return case of a function using
4230 eh_return: the eax and edx registers are marked as saved, but not
4231 restored along this path. */
4232 offset = frame.nregs;
4233 if (current_function_calls_eh_return && style != 2)
4234 offset -= 2;
4235 offset *= -UNITS_PER_WORD;
4236
4237 /* If we're only restoring one register and sp is not valid then
4238 using a move instruction to restore the register since it's
4239 less work than reloading sp and popping the register.
4240
4241 The default code result in stack adjustment using add/lea instruction,
4242 while this code results in LEAVE instruction (or discrete equivalent),
4243 so it is profitable in some other cases as well. Especially when there
4244 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4245 and there is exactly one register to pop. This heruistic may need some
4246 tuning in future. */
4247 if ((!sp_valid && frame.nregs <= 1)
4248 || (TARGET_EPILOGUE_USING_MOVE
4249 && use_fast_prologue_epilogue
4250 && (frame.nregs > 1 || frame.to_allocate))
4251 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4252 || (frame_pointer_needed && TARGET_USE_LEAVE
4253 && use_fast_prologue_epilogue && frame.nregs == 1)
4254 || current_function_calls_eh_return)
4255 {
4256 /* Restore registers. We can use ebp or esp to address the memory
4257 locations. If both are available, default to ebp, since offsets
4258 are known to be small. Only exception is esp pointing directly to the
4259 end of block of saved registers, where we may simplify addressing
4260 mode. */
4261
4262 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4263 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4264 frame.to_allocate, style == 2);
4265 else
4266 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4267 offset, style == 2);
4268
4269 /* eh_return epilogues need %ecx added to the stack pointer. */
4270 if (style == 2)
4271 {
4272 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4273
4274 if (frame_pointer_needed)
4275 {
4276 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4277 tmp = plus_constant (tmp, UNITS_PER_WORD);
4278 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4279
4280 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4281 emit_move_insn (hard_frame_pointer_rtx, tmp);
4282
4283 emit_insn (gen_pro_epilogue_adjust_stack
4284 (stack_pointer_rtx, sa, const0_rtx));
4285 }
4286 else
4287 {
4288 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4289 tmp = plus_constant (tmp, (frame.to_allocate
4290 + frame.nregs * UNITS_PER_WORD));
4291 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4292 }
4293 }
4294 else if (!frame_pointer_needed)
4295 emit_insn (gen_pro_epilogue_adjust_stack
4296 (stack_pointer_rtx, stack_pointer_rtx,
4297 GEN_INT (frame.to_allocate
4298 + frame.nregs * UNITS_PER_WORD)));
4299 /* If not an i386, mov & pop is faster than "leave". */
4300 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4301 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4302 else
4303 {
4304 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4305 hard_frame_pointer_rtx,
4306 const0_rtx));
4307 if (TARGET_64BIT)
4308 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4309 else
4310 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4311 }
4312 }
4313 else
4314 {
4315 /* First step is to deallocate the stack frame so that we can
4316 pop the registers. */
4317 if (!sp_valid)
4318 {
4319 if (!frame_pointer_needed)
4320 abort ();
4321 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4322 hard_frame_pointer_rtx,
4323 GEN_INT (offset)));
4324 }
4325 else if (frame.to_allocate)
4326 emit_insn (gen_pro_epilogue_adjust_stack
4327 (stack_pointer_rtx, stack_pointer_rtx,
4328 GEN_INT (frame.to_allocate)));
4329
4330 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4331 if (ix86_save_reg (regno, false))
4332 {
4333 if (TARGET_64BIT)
4334 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4335 else
4336 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4337 }
4338 if (frame_pointer_needed)
4339 {
4340 /* Leave results in shorter dependency chains on CPUs that are
4341 able to grok it fast. */
4342 if (TARGET_USE_LEAVE)
4343 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4344 else if (TARGET_64BIT)
4345 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4346 else
4347 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4348 }
4349 }
4350
4351 /* Sibcall epilogues don't want a return instruction. */
4352 if (style == 0)
4353 return;
4354
4355 if (current_function_pops_args && current_function_args_size)
4356 {
4357 rtx popc = GEN_INT (current_function_pops_args);
4358
4359 /* i386 can only pop 64K bytes. If asked to pop more, pop
4360 return address, do explicit add, and jump indirectly to the
4361 caller. */
4362
4363 if (current_function_pops_args >= 65536)
4364 {
4365 rtx ecx = gen_rtx_REG (SImode, 2);
4366
4367 /* There are is no "pascal" calling convention in 64bit ABI. */
4368 if (TARGET_64BIT)
4369 abort ();
4370
4371 emit_insn (gen_popsi1 (ecx));
4372 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4373 emit_jump_insn (gen_return_indirect_internal (ecx));
4374 }
4375 else
4376 emit_jump_insn (gen_return_pop_internal (popc));
4377 }
4378 else
4379 emit_jump_insn (gen_return_internal ());
4380 }
4381 \f
4382 /* Extract the parts of an RTL expression that is a valid memory address
4383 for an instruction. Return 0 if the structure of the address is
4384 grossly off. Return -1 if the address contains ASHIFT, so it is not
4385 strictly valid, but still used for computing length of lea instruction.
4386 */
4387
4388 static int
4389 ix86_decompose_address (addr, out)
4390 register rtx addr;
4391 struct ix86_address *out;
4392 {
4393 rtx base = NULL_RTX;
4394 rtx index = NULL_RTX;
4395 rtx disp = NULL_RTX;
4396 HOST_WIDE_INT scale = 1;
4397 rtx scale_rtx = NULL_RTX;
4398 int retval = 1;
4399
4400 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4401 base = addr;
4402 else if (GET_CODE (addr) == PLUS)
4403 {
4404 rtx op0 = XEXP (addr, 0);
4405 rtx op1 = XEXP (addr, 1);
4406 enum rtx_code code0 = GET_CODE (op0);
4407 enum rtx_code code1 = GET_CODE (op1);
4408
4409 if (code0 == REG || code0 == SUBREG)
4410 {
4411 if (code1 == REG || code1 == SUBREG)
4412 index = op0, base = op1; /* index + base */
4413 else
4414 base = op0, disp = op1; /* base + displacement */
4415 }
4416 else if (code0 == MULT)
4417 {
4418 index = XEXP (op0, 0);
4419 scale_rtx = XEXP (op0, 1);
4420 if (code1 == REG || code1 == SUBREG)
4421 base = op1; /* index*scale + base */
4422 else
4423 disp = op1; /* index*scale + disp */
4424 }
4425 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4426 {
4427 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4428 scale_rtx = XEXP (XEXP (op0, 0), 1);
4429 base = XEXP (op0, 1);
4430 disp = op1;
4431 }
4432 else if (code0 == PLUS)
4433 {
4434 index = XEXP (op0, 0); /* index + base + disp */
4435 base = XEXP (op0, 1);
4436 disp = op1;
4437 }
4438 else
4439 return 0;
4440 }
4441 else if (GET_CODE (addr) == MULT)
4442 {
4443 index = XEXP (addr, 0); /* index*scale */
4444 scale_rtx = XEXP (addr, 1);
4445 }
4446 else if (GET_CODE (addr) == ASHIFT)
4447 {
4448 rtx tmp;
4449
4450 /* We're called for lea too, which implements ashift on occasion. */
4451 index = XEXP (addr, 0);
4452 tmp = XEXP (addr, 1);
4453 if (GET_CODE (tmp) != CONST_INT)
4454 return 0;
4455 scale = INTVAL (tmp);
4456 if ((unsigned HOST_WIDE_INT) scale > 3)
4457 return 0;
4458 scale = 1 << scale;
4459 retval = -1;
4460 }
4461 else
4462 disp = addr; /* displacement */
4463
4464 /* Extract the integral value of scale. */
4465 if (scale_rtx)
4466 {
4467 if (GET_CODE (scale_rtx) != CONST_INT)
4468 return 0;
4469 scale = INTVAL (scale_rtx);
4470 }
4471
4472 /* Allow arg pointer and stack pointer as index if there is not scaling */
4473 if (base && index && scale == 1
4474 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4475 || index == stack_pointer_rtx))
4476 {
4477 rtx tmp = base;
4478 base = index;
4479 index = tmp;
4480 }
4481
4482 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4483 if ((base == hard_frame_pointer_rtx
4484 || base == frame_pointer_rtx
4485 || base == arg_pointer_rtx) && !disp)
4486 disp = const0_rtx;
4487
4488 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4489 Avoid this by transforming to [%esi+0]. */
4490 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4491 && base && !index && !disp
4492 && REG_P (base)
4493 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4494 disp = const0_rtx;
4495
4496 /* Special case: encode reg+reg instead of reg*2. */
4497 if (!base && index && scale && scale == 2)
4498 base = index, scale = 1;
4499
4500 /* Special case: scaling cannot be encoded without base or displacement. */
4501 if (!base && !disp && index && scale != 1)
4502 disp = const0_rtx;
4503
4504 out->base = base;
4505 out->index = index;
4506 out->disp = disp;
4507 out->scale = scale;
4508
4509 return retval;
4510 }
4511 \f
4512 /* Return cost of the memory address x.
4513 For i386, it is better to use a complex address than let gcc copy
4514 the address into a reg and make a new pseudo. But not if the address
4515 requires to two regs - that would mean more pseudos with longer
4516 lifetimes. */
4517 int
4518 ix86_address_cost (x)
4519 rtx x;
4520 {
4521 struct ix86_address parts;
4522 int cost = 1;
4523
4524 if (!ix86_decompose_address (x, &parts))
4525 abort ();
4526
4527 /* More complex memory references are better. */
4528 if (parts.disp && parts.disp != const0_rtx)
4529 cost--;
4530
4531 /* Attempt to minimize number of registers in the address. */
4532 if ((parts.base
4533 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4534 || (parts.index
4535 && (!REG_P (parts.index)
4536 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4537 cost++;
4538
4539 if (parts.base
4540 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4541 && parts.index
4542 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4543 && parts.base != parts.index)
4544 cost++;
4545
4546 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4547 since it's predecode logic can't detect the length of instructions
4548 and it degenerates to vector decoded. Increase cost of such
4549 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4550 to split such addresses or even refuse such addresses at all.
4551
4552 Following addressing modes are affected:
4553 [base+scale*index]
4554 [scale*index+disp]
4555 [base+index]
4556
4557 The first and last case may be avoidable by explicitly coding the zero in
4558 memory address, but I don't have AMD-K6 machine handy to check this
4559 theory. */
4560
4561 if (TARGET_K6
4562 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4563 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4564 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4565 cost += 10;
4566
4567 return cost;
4568 }
4569 \f
4570 /* If X is a machine specific address (i.e. a symbol or label being
4571 referenced as a displacement from the GOT implemented using an
4572 UNSPEC), then return the base term. Otherwise return X. */
4573
4574 rtx
4575 ix86_find_base_term (x)
4576 rtx x;
4577 {
4578 rtx term;
4579
4580 if (TARGET_64BIT)
4581 {
4582 if (GET_CODE (x) != CONST)
4583 return x;
4584 term = XEXP (x, 0);
4585 if (GET_CODE (term) == PLUS
4586 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4587 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4588 term = XEXP (term, 0);
4589 if (GET_CODE (term) != UNSPEC
4590 || XVECLEN (term, 0) != 1
4591 || XINT (term, 1) != 15)
4592 return x;
4593
4594 term = XVECEXP (term, 0, 0);
4595
4596 if (GET_CODE (term) != SYMBOL_REF
4597 && GET_CODE (term) != LABEL_REF)
4598 return x;
4599
4600 return term;
4601 }
4602
4603 if (GET_CODE (x) != PLUS
4604 || XEXP (x, 0) != pic_offset_table_rtx
4605 || GET_CODE (XEXP (x, 1)) != CONST)
4606 return x;
4607
4608 term = XEXP (XEXP (x, 1), 0);
4609
4610 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4611 term = XEXP (term, 0);
4612
4613 if (GET_CODE (term) != UNSPEC
4614 || XVECLEN (term, 0) != 1
4615 || XINT (term, 1) != 7)
4616 return x;
4617
4618 term = XVECEXP (term, 0, 0);
4619
4620 if (GET_CODE (term) != SYMBOL_REF
4621 && GET_CODE (term) != LABEL_REF)
4622 return x;
4623
4624 return term;
4625 }
4626 \f
4627 /* Determine if a given CONST RTX is a valid memory displacement
4628 in PIC mode. */
4629
4630 int
4631 legitimate_pic_address_disp_p (disp)
4632 register rtx disp;
4633 {
4634 /* In 64bit mode we can allow direct addresses of symbols and labels
4635 when they are not dynamic symbols. */
4636 if (TARGET_64BIT)
4637 {
4638 rtx x = disp;
4639 if (GET_CODE (disp) == CONST)
4640 x = XEXP (disp, 0);
4641 /* ??? Handle PIC code models */
4642 if (GET_CODE (x) == PLUS
4643 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4644 && ix86_cmodel == CM_SMALL_PIC
4645 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4646 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4647 x = XEXP (x, 0);
4648 if (local_symbolic_operand (x, Pmode))
4649 return 1;
4650 }
4651 if (GET_CODE (disp) != CONST)
4652 return 0;
4653 disp = XEXP (disp, 0);
4654
4655 if (TARGET_64BIT)
4656 {
4657 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4658 of GOT tables. We should not need these anyway. */
4659 if (GET_CODE (disp) != UNSPEC
4660 || XVECLEN (disp, 0) != 1
4661 || XINT (disp, 1) != 15)
4662 return 0;
4663
4664 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4665 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4666 return 0;
4667 return 1;
4668 }
4669
4670 if (GET_CODE (disp) == PLUS)
4671 {
4672 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4673 return 0;
4674 disp = XEXP (disp, 0);
4675 }
4676
4677 if (GET_CODE (disp) != UNSPEC
4678 || XVECLEN (disp, 0) != 1)
4679 return 0;
4680
4681 /* Must be @GOT or @GOTOFF. */
4682 switch (XINT (disp, 1))
4683 {
4684 case 6: /* @GOT */
4685 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4686
4687 case 7: /* @GOTOFF */
4688 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4689 }
4690
4691 return 0;
4692 }
4693
4694 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4695 memory address for an instruction. The MODE argument is the machine mode
4696 for the MEM expression that wants to use this address.
4697
4698 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4699 convert common non-canonical forms to canonical form so that they will
4700 be recognized. */
4701
4702 int
4703 legitimate_address_p (mode, addr, strict)
4704 enum machine_mode mode;
4705 register rtx addr;
4706 int strict;
4707 {
4708 struct ix86_address parts;
4709 rtx base, index, disp;
4710 HOST_WIDE_INT scale;
4711 const char *reason = NULL;
4712 rtx reason_rtx = NULL_RTX;
4713
4714 if (TARGET_DEBUG_ADDR)
4715 {
4716 fprintf (stderr,
4717 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4718 GET_MODE_NAME (mode), strict);
4719 debug_rtx (addr);
4720 }
4721
4722 if (ix86_decompose_address (addr, &parts) <= 0)
4723 {
4724 reason = "decomposition failed";
4725 goto report_error;
4726 }
4727
4728 base = parts.base;
4729 index = parts.index;
4730 disp = parts.disp;
4731 scale = parts.scale;
4732
4733 /* Validate base register.
4734
4735 Don't allow SUBREG's here, it can lead to spill failures when the base
4736 is one word out of a two word structure, which is represented internally
4737 as a DImode int. */
4738
4739 if (base)
4740 {
4741 reason_rtx = base;
4742
4743 if (GET_CODE (base) != REG)
4744 {
4745 reason = "base is not a register";
4746 goto report_error;
4747 }
4748
4749 if (GET_MODE (base) != Pmode)
4750 {
4751 reason = "base is not in Pmode";
4752 goto report_error;
4753 }
4754
4755 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4756 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4757 {
4758 reason = "base is not valid";
4759 goto report_error;
4760 }
4761 }
4762
4763 /* Validate index register.
4764
4765 Don't allow SUBREG's here, it can lead to spill failures when the index
4766 is one word out of a two word structure, which is represented internally
4767 as a DImode int. */
4768
4769 if (index)
4770 {
4771 reason_rtx = index;
4772
4773 if (GET_CODE (index) != REG)
4774 {
4775 reason = "index is not a register";
4776 goto report_error;
4777 }
4778
4779 if (GET_MODE (index) != Pmode)
4780 {
4781 reason = "index is not in Pmode";
4782 goto report_error;
4783 }
4784
4785 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4786 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4787 {
4788 reason = "index is not valid";
4789 goto report_error;
4790 }
4791 }
4792
4793 /* Validate scale factor. */
4794 if (scale != 1)
4795 {
4796 reason_rtx = GEN_INT (scale);
4797 if (!index)
4798 {
4799 reason = "scale without index";
4800 goto report_error;
4801 }
4802
4803 if (scale != 2 && scale != 4 && scale != 8)
4804 {
4805 reason = "scale is not a valid multiplier";
4806 goto report_error;
4807 }
4808 }
4809
4810 /* Validate displacement. */
4811 if (disp)
4812 {
4813 reason_rtx = disp;
4814
4815 if (!CONSTANT_ADDRESS_P (disp))
4816 {
4817 reason = "displacement is not constant";
4818 goto report_error;
4819 }
4820
4821 if (TARGET_64BIT)
4822 {
4823 if (!x86_64_sign_extended_value (disp))
4824 {
4825 reason = "displacement is out of range";
4826 goto report_error;
4827 }
4828 }
4829 else
4830 {
4831 if (GET_CODE (disp) == CONST_DOUBLE)
4832 {
4833 reason = "displacement is a const_double";
4834 goto report_error;
4835 }
4836 }
4837
4838 if (flag_pic && SYMBOLIC_CONST (disp))
4839 {
4840 if (TARGET_64BIT && (index || base))
4841 {
4842 reason = "non-constant pic memory reference";
4843 goto report_error;
4844 }
4845 if (! legitimate_pic_address_disp_p (disp))
4846 {
4847 reason = "displacement is an invalid pic construct";
4848 goto report_error;
4849 }
4850
4851 /* This code used to verify that a symbolic pic displacement
4852 includes the pic_offset_table_rtx register.
4853
4854 While this is good idea, unfortunately these constructs may
4855 be created by "adds using lea" optimization for incorrect
4856 code like:
4857
4858 int a;
4859 int foo(int i)
4860 {
4861 return *(&a+i);
4862 }
4863
4864 This code is nonsensical, but results in addressing
4865 GOT table with pic_offset_table_rtx base. We can't
4866 just refuse it easily, since it gets matched by
4867 "addsi3" pattern, that later gets split to lea in the
4868 case output register differs from input. While this
4869 can be handled by separate addsi pattern for this case
4870 that never results in lea, this seems to be easier and
4871 correct fix for crash to disable this test. */
4872 }
4873 else if (HALF_PIC_P ())
4874 {
4875 if (! HALF_PIC_ADDRESS_P (disp)
4876 || (base != NULL_RTX || index != NULL_RTX))
4877 {
4878 reason = "displacement is an invalid half-pic reference";
4879 goto report_error;
4880 }
4881 }
4882 }
4883
4884 /* Everything looks valid. */
4885 if (TARGET_DEBUG_ADDR)
4886 fprintf (stderr, "Success.\n");
4887 return TRUE;
4888
4889 report_error:
4890 if (TARGET_DEBUG_ADDR)
4891 {
4892 fprintf (stderr, "Error: %s\n", reason);
4893 debug_rtx (reason_rtx);
4894 }
4895 return FALSE;
4896 }
4897 \f
4898 /* Return an unique alias set for the GOT. */
4899
4900 static HOST_WIDE_INT
4901 ix86_GOT_alias_set ()
4902 {
4903 static HOST_WIDE_INT set = -1;
4904 if (set == -1)
4905 set = new_alias_set ();
4906 return set;
4907 }
4908
4909 /* Return a legitimate reference for ORIG (an address) using the
4910 register REG. If REG is 0, a new pseudo is generated.
4911
4912 There are two types of references that must be handled:
4913
4914 1. Global data references must load the address from the GOT, via
4915 the PIC reg. An insn is emitted to do this load, and the reg is
4916 returned.
4917
4918 2. Static data references, constant pool addresses, and code labels
4919 compute the address as an offset from the GOT, whose base is in
4920 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4921 differentiate them from global data objects. The returned
4922 address is the PIC reg + an unspec constant.
4923
4924 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4925 reg also appears in the address. */
4926
4927 rtx
4928 legitimize_pic_address (orig, reg)
4929 rtx orig;
4930 rtx reg;
4931 {
4932 rtx addr = orig;
4933 rtx new = orig;
4934 rtx base;
4935
4936 if (local_symbolic_operand (addr, Pmode))
4937 {
4938 /* In 64bit mode we can address such objects directly. */
4939 if (TARGET_64BIT)
4940 new = addr;
4941 else
4942 {
4943 /* This symbol may be referenced via a displacement from the PIC
4944 base address (@GOTOFF). */
4945
4946 current_function_uses_pic_offset_table = 1;
4947 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4948 new = gen_rtx_CONST (Pmode, new);
4949 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4950
4951 if (reg != 0)
4952 {
4953 emit_move_insn (reg, new);
4954 new = reg;
4955 }
4956 }
4957 }
4958 else if (GET_CODE (addr) == SYMBOL_REF)
4959 {
4960 if (TARGET_64BIT)
4961 {
4962 current_function_uses_pic_offset_table = 1;
4963 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4964 new = gen_rtx_CONST (Pmode, new);
4965 new = gen_rtx_MEM (Pmode, new);
4966 RTX_UNCHANGING_P (new) = 1;
4967 set_mem_alias_set (new, ix86_GOT_alias_set ());
4968
4969 if (reg == 0)
4970 reg = gen_reg_rtx (Pmode);
4971 /* Use directly gen_movsi, otherwise the address is loaded
4972 into register for CSE. We don't want to CSE this addresses,
4973 instead we CSE addresses from the GOT table, so skip this. */
4974 emit_insn (gen_movsi (reg, new));
4975 new = reg;
4976 }
4977 else
4978 {
4979 /* This symbol must be referenced via a load from the
4980 Global Offset Table (@GOT). */
4981
4982 current_function_uses_pic_offset_table = 1;
4983 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4984 new = gen_rtx_CONST (Pmode, new);
4985 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4986 new = gen_rtx_MEM (Pmode, new);
4987 RTX_UNCHANGING_P (new) = 1;
4988 set_mem_alias_set (new, ix86_GOT_alias_set ());
4989
4990 if (reg == 0)
4991 reg = gen_reg_rtx (Pmode);
4992 emit_move_insn (reg, new);
4993 new = reg;
4994 }
4995 }
4996 else
4997 {
4998 if (GET_CODE (addr) == CONST)
4999 {
5000 addr = XEXP (addr, 0);
5001
5002 /* We must match stuff we generate before. Assume the only
5003 unspecs that can get here are ours. Not that we could do
5004 anything with them anyway... */
5005 if (GET_CODE (addr) == UNSPEC
5006 || (GET_CODE (addr) == PLUS
5007 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5008 return orig;
5009 if (GET_CODE (addr) != PLUS)
5010 abort ();
5011 }
5012 if (GET_CODE (addr) == PLUS)
5013 {
5014 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5015
5016 /* Check first to see if this is a constant offset from a @GOTOFF
5017 symbol reference. */
5018 if (local_symbolic_operand (op0, Pmode)
5019 && GET_CODE (op1) == CONST_INT)
5020 {
5021 if (!TARGET_64BIT)
5022 {
5023 current_function_uses_pic_offset_table = 1;
5024 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
5025 new = gen_rtx_PLUS (Pmode, new, op1);
5026 new = gen_rtx_CONST (Pmode, new);
5027 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5028
5029 if (reg != 0)
5030 {
5031 emit_move_insn (reg, new);
5032 new = reg;
5033 }
5034 }
5035 else
5036 {
5037 /* ??? We need to limit offsets here. */
5038 }
5039 }
5040 else
5041 {
5042 base = legitimize_pic_address (XEXP (addr, 0), reg);
5043 new = legitimize_pic_address (XEXP (addr, 1),
5044 base == reg ? NULL_RTX : reg);
5045
5046 if (GET_CODE (new) == CONST_INT)
5047 new = plus_constant (base, INTVAL (new));
5048 else
5049 {
5050 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5051 {
5052 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5053 new = XEXP (new, 1);
5054 }
5055 new = gen_rtx_PLUS (Pmode, base, new);
5056 }
5057 }
5058 }
5059 }
5060 return new;
5061 }
5062 \f
5063 /* Try machine-dependent ways of modifying an illegitimate address
5064 to be legitimate. If we find one, return the new, valid address.
5065 This macro is used in only one place: `memory_address' in explow.c.
5066
5067 OLDX is the address as it was before break_out_memory_refs was called.
5068 In some cases it is useful to look at this to decide what needs to be done.
5069
5070 MODE and WIN are passed so that this macro can use
5071 GO_IF_LEGITIMATE_ADDRESS.
5072
5073 It is always safe for this macro to do nothing. It exists to recognize
5074 opportunities to optimize the output.
5075
5076 For the 80386, we handle X+REG by loading X into a register R and
5077 using R+REG. R will go in a general reg and indexing will be used.
5078 However, if REG is a broken-out memory address or multiplication,
5079 nothing needs to be done because REG can certainly go in a general reg.
5080
5081 When -fpic is used, special handling is needed for symbolic references.
5082 See comments by legitimize_pic_address in i386.c for details. */
5083
5084 rtx
5085 legitimize_address (x, oldx, mode)
5086 register rtx x;
5087 register rtx oldx ATTRIBUTE_UNUSED;
5088 enum machine_mode mode;
5089 {
5090 int changed = 0;
5091 unsigned log;
5092
5093 if (TARGET_DEBUG_ADDR)
5094 {
5095 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5096 GET_MODE_NAME (mode));
5097 debug_rtx (x);
5098 }
5099
5100 if (flag_pic && SYMBOLIC_CONST (x))
5101 return legitimize_pic_address (x, 0);
5102
5103 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5104 if (GET_CODE (x) == ASHIFT
5105 && GET_CODE (XEXP (x, 1)) == CONST_INT
5106 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5107 {
5108 changed = 1;
5109 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5110 GEN_INT (1 << log));
5111 }
5112
5113 if (GET_CODE (x) == PLUS)
5114 {
5115 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5116
5117 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5118 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5119 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5120 {
5121 changed = 1;
5122 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5123 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5124 GEN_INT (1 << log));
5125 }
5126
5127 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5128 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5129 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5130 {
5131 changed = 1;
5132 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5133 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5134 GEN_INT (1 << log));
5135 }
5136
5137 /* Put multiply first if it isn't already. */
5138 if (GET_CODE (XEXP (x, 1)) == MULT)
5139 {
5140 rtx tmp = XEXP (x, 0);
5141 XEXP (x, 0) = XEXP (x, 1);
5142 XEXP (x, 1) = tmp;
5143 changed = 1;
5144 }
5145
5146 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5147 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5148 created by virtual register instantiation, register elimination, and
5149 similar optimizations. */
5150 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5151 {
5152 changed = 1;
5153 x = gen_rtx_PLUS (Pmode,
5154 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5155 XEXP (XEXP (x, 1), 0)),
5156 XEXP (XEXP (x, 1), 1));
5157 }
5158
5159 /* Canonicalize
5160 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5161 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5162 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5163 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5164 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5165 && CONSTANT_P (XEXP (x, 1)))
5166 {
5167 rtx constant;
5168 rtx other = NULL_RTX;
5169
5170 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5171 {
5172 constant = XEXP (x, 1);
5173 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5174 }
5175 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5176 {
5177 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5178 other = XEXP (x, 1);
5179 }
5180 else
5181 constant = 0;
5182
5183 if (constant)
5184 {
5185 changed = 1;
5186 x = gen_rtx_PLUS (Pmode,
5187 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5188 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5189 plus_constant (other, INTVAL (constant)));
5190 }
5191 }
5192
5193 if (changed && legitimate_address_p (mode, x, FALSE))
5194 return x;
5195
5196 if (GET_CODE (XEXP (x, 0)) == MULT)
5197 {
5198 changed = 1;
5199 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5200 }
5201
5202 if (GET_CODE (XEXP (x, 1)) == MULT)
5203 {
5204 changed = 1;
5205 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5206 }
5207
5208 if (changed
5209 && GET_CODE (XEXP (x, 1)) == REG
5210 && GET_CODE (XEXP (x, 0)) == REG)
5211 return x;
5212
5213 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5214 {
5215 changed = 1;
5216 x = legitimize_pic_address (x, 0);
5217 }
5218
5219 if (changed && legitimate_address_p (mode, x, FALSE))
5220 return x;
5221
5222 if (GET_CODE (XEXP (x, 0)) == REG)
5223 {
5224 register rtx temp = gen_reg_rtx (Pmode);
5225 register rtx val = force_operand (XEXP (x, 1), temp);
5226 if (val != temp)
5227 emit_move_insn (temp, val);
5228
5229 XEXP (x, 1) = temp;
5230 return x;
5231 }
5232
5233 else if (GET_CODE (XEXP (x, 1)) == REG)
5234 {
5235 register rtx temp = gen_reg_rtx (Pmode);
5236 register rtx val = force_operand (XEXP (x, 0), temp);
5237 if (val != temp)
5238 emit_move_insn (temp, val);
5239
5240 XEXP (x, 0) = temp;
5241 return x;
5242 }
5243 }
5244
5245 return x;
5246 }
5247 \f
5248 /* Print an integer constant expression in assembler syntax. Addition
5249 and subtraction are the only arithmetic that may appear in these
5250 expressions. FILE is the stdio stream to write to, X is the rtx, and
5251 CODE is the operand print code from the output string. */
5252
5253 static void
5254 output_pic_addr_const (file, x, code)
5255 FILE *file;
5256 rtx x;
5257 int code;
5258 {
5259 char buf[256];
5260
5261 switch (GET_CODE (x))
5262 {
5263 case PC:
5264 if (flag_pic)
5265 putc ('.', file);
5266 else
5267 abort ();
5268 break;
5269
5270 case SYMBOL_REF:
5271 assemble_name (file, XSTR (x, 0));
5272 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5273 fputs ("@PLT", file);
5274 break;
5275
5276 case LABEL_REF:
5277 x = XEXP (x, 0);
5278 /* FALLTHRU */
5279 case CODE_LABEL:
5280 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5281 assemble_name (asm_out_file, buf);
5282 break;
5283
5284 case CONST_INT:
5285 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5286 break;
5287
5288 case CONST:
5289 /* This used to output parentheses around the expression,
5290 but that does not work on the 386 (either ATT or BSD assembler). */
5291 output_pic_addr_const (file, XEXP (x, 0), code);
5292 break;
5293
5294 case CONST_DOUBLE:
5295 if (GET_MODE (x) == VOIDmode)
5296 {
5297 /* We can use %d if the number is <32 bits and positive. */
5298 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5299 fprintf (file, "0x%lx%08lx",
5300 (unsigned long) CONST_DOUBLE_HIGH (x),
5301 (unsigned long) CONST_DOUBLE_LOW (x));
5302 else
5303 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5304 }
5305 else
5306 /* We can't handle floating point constants;
5307 PRINT_OPERAND must handle them. */
5308 output_operand_lossage ("floating constant misused");
5309 break;
5310
5311 case PLUS:
5312 /* Some assemblers need integer constants to appear first. */
5313 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5314 {
5315 output_pic_addr_const (file, XEXP (x, 0), code);
5316 putc ('+', file);
5317 output_pic_addr_const (file, XEXP (x, 1), code);
5318 }
5319 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5320 {
5321 output_pic_addr_const (file, XEXP (x, 1), code);
5322 putc ('+', file);
5323 output_pic_addr_const (file, XEXP (x, 0), code);
5324 }
5325 else
5326 abort ();
5327 break;
5328
5329 case MINUS:
5330 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5331 output_pic_addr_const (file, XEXP (x, 0), code);
5332 putc ('-', file);
5333 output_pic_addr_const (file, XEXP (x, 1), code);
5334 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5335 break;
5336
5337 case UNSPEC:
5338 if (XVECLEN (x, 0) != 1)
5339 abort ();
5340 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5341 switch (XINT (x, 1))
5342 {
5343 case 6:
5344 fputs ("@GOT", file);
5345 break;
5346 case 7:
5347 fputs ("@GOTOFF", file);
5348 break;
5349 case 8:
5350 fputs ("@PLT", file);
5351 break;
5352 case 15:
5353 fputs ("@GOTPCREL(%RIP)", file);
5354 break;
5355 default:
5356 output_operand_lossage ("invalid UNSPEC as operand");
5357 break;
5358 }
5359 break;
5360
5361 default:
5362 output_operand_lossage ("invalid expression as operand");
5363 }
5364 }
5365
5366 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5367 We need to handle our special PIC relocations. */
5368
5369 void
5370 i386_dwarf_output_addr_const (file, x)
5371 FILE *file;
5372 rtx x;
5373 {
5374 #ifdef ASM_QUAD
5375 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5376 #else
5377 if (TARGET_64BIT)
5378 abort ();
5379 fprintf (file, "%s", ASM_LONG);
5380 #endif
5381 if (flag_pic)
5382 output_pic_addr_const (file, x, '\0');
5383 else
5384 output_addr_const (file, x);
5385 fputc ('\n', file);
5386 }
5387
5388 /* In the name of slightly smaller debug output, and to cater to
5389 general assembler losage, recognize PIC+GOTOFF and turn it back
5390 into a direct symbol reference. */
5391
5392 rtx
5393 i386_simplify_dwarf_addr (orig_x)
5394 rtx orig_x;
5395 {
5396 rtx x = orig_x, y;
5397
5398 if (TARGET_64BIT)
5399 {
5400 if (GET_CODE (x) != CONST
5401 || GET_CODE (XEXP (x, 0)) != UNSPEC
5402 || XINT (XEXP (x, 0), 1) != 15)
5403 return orig_x;
5404 return XVECEXP (XEXP (x, 0), 0, 0);
5405 }
5406
5407 if (GET_CODE (x) != PLUS
5408 || GET_CODE (XEXP (x, 1)) != CONST)
5409 return orig_x;
5410
5411 if (GET_CODE (XEXP (x, 0)) == REG
5412 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5413 /* %ebx + GOT/GOTOFF */
5414 y = NULL;
5415 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5416 {
5417 /* %ebx + %reg * scale + GOT/GOTOFF */
5418 y = XEXP (x, 0);
5419 if (GET_CODE (XEXP (y, 0)) == REG
5420 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5421 y = XEXP (y, 1);
5422 else if (GET_CODE (XEXP (y, 1)) == REG
5423 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5424 y = XEXP (y, 0);
5425 else
5426 return orig_x;
5427 if (GET_CODE (y) != REG
5428 && GET_CODE (y) != MULT
5429 && GET_CODE (y) != ASHIFT)
5430 return orig_x;
5431 }
5432 else
5433 return orig_x;
5434
5435 x = XEXP (XEXP (x, 1), 0);
5436 if (GET_CODE (x) == UNSPEC
5437 && (XINT (x, 1) == 6
5438 || XINT (x, 1) == 7))
5439 {
5440 if (y)
5441 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5442 return XVECEXP (x, 0, 0);
5443 }
5444
5445 if (GET_CODE (x) == PLUS
5446 && GET_CODE (XEXP (x, 0)) == UNSPEC
5447 && GET_CODE (XEXP (x, 1)) == CONST_INT
5448 && (XINT (XEXP (x, 0), 1) == 6
5449 || XINT (XEXP (x, 0), 1) == 7))
5450 {
5451 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5452 if (y)
5453 return gen_rtx_PLUS (Pmode, y, x);
5454 return x;
5455 }
5456
5457 return orig_x;
5458 }
5459 \f
5460 static void
5461 put_condition_code (code, mode, reverse, fp, file)
5462 enum rtx_code code;
5463 enum machine_mode mode;
5464 int reverse, fp;
5465 FILE *file;
5466 {
5467 const char *suffix;
5468
5469 if (mode == CCFPmode || mode == CCFPUmode)
5470 {
5471 enum rtx_code second_code, bypass_code;
5472 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5473 if (bypass_code != NIL || second_code != NIL)
5474 abort ();
5475 code = ix86_fp_compare_code_to_integer (code);
5476 mode = CCmode;
5477 }
5478 if (reverse)
5479 code = reverse_condition (code);
5480
5481 switch (code)
5482 {
5483 case EQ:
5484 suffix = "e";
5485 break;
5486 case NE:
5487 suffix = "ne";
5488 break;
5489 case GT:
5490 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5491 abort ();
5492 suffix = "g";
5493 break;
5494 case GTU:
5495 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5496 Those same assemblers have the same but opposite losage on cmov. */
5497 if (mode != CCmode)
5498 abort ();
5499 suffix = fp ? "nbe" : "a";
5500 break;
5501 case LT:
5502 if (mode == CCNOmode || mode == CCGOCmode)
5503 suffix = "s";
5504 else if (mode == CCmode || mode == CCGCmode)
5505 suffix = "l";
5506 else
5507 abort ();
5508 break;
5509 case LTU:
5510 if (mode != CCmode)
5511 abort ();
5512 suffix = "b";
5513 break;
5514 case GE:
5515 if (mode == CCNOmode || mode == CCGOCmode)
5516 suffix = "ns";
5517 else if (mode == CCmode || mode == CCGCmode)
5518 suffix = "ge";
5519 else
5520 abort ();
5521 break;
5522 case GEU:
5523 /* ??? As above. */
5524 if (mode != CCmode)
5525 abort ();
5526 suffix = fp ? "nb" : "ae";
5527 break;
5528 case LE:
5529 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5530 abort ();
5531 suffix = "le";
5532 break;
5533 case LEU:
5534 if (mode != CCmode)
5535 abort ();
5536 suffix = "be";
5537 break;
5538 case UNORDERED:
5539 suffix = fp ? "u" : "p";
5540 break;
5541 case ORDERED:
5542 suffix = fp ? "nu" : "np";
5543 break;
5544 default:
5545 abort ();
5546 }
5547 fputs (suffix, file);
5548 }
5549
5550 void
5551 print_reg (x, code, file)
5552 rtx x;
5553 int code;
5554 FILE *file;
5555 {
5556 if (REGNO (x) == ARG_POINTER_REGNUM
5557 || REGNO (x) == FRAME_POINTER_REGNUM
5558 || REGNO (x) == FLAGS_REG
5559 || REGNO (x) == FPSR_REG)
5560 abort ();
5561
5562 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
5563 putc ('%', file);
5564
5565 if (code == 'w' || MMX_REG_P (x))
5566 code = 2;
5567 else if (code == 'b')
5568 code = 1;
5569 else if (code == 'k')
5570 code = 4;
5571 else if (code == 'q')
5572 code = 8;
5573 else if (code == 'y')
5574 code = 3;
5575 else if (code == 'h')
5576 code = 0;
5577 else
5578 code = GET_MODE_SIZE (GET_MODE (x));
5579
5580 /* Irritatingly, AMD extended registers use different naming convention
5581 from the normal registers. */
5582 if (REX_INT_REG_P (x))
5583 {
5584 if (!TARGET_64BIT)
5585 abort ();
5586 switch (code)
5587 {
5588 case 0:
5589 error ("extended registers have no high halves");
5590 break;
5591 case 1:
5592 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5593 break;
5594 case 2:
5595 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5596 break;
5597 case 4:
5598 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5599 break;
5600 case 8:
5601 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5602 break;
5603 default:
5604 error ("unsupported operand size for extended register");
5605 break;
5606 }
5607 return;
5608 }
5609 switch (code)
5610 {
5611 case 3:
5612 if (STACK_TOP_P (x))
5613 {
5614 fputs ("st(0)", file);
5615 break;
5616 }
5617 /* FALLTHRU */
5618 case 8:
5619 case 4:
5620 case 12:
5621 if (! ANY_FP_REG_P (x))
5622 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5623 /* FALLTHRU */
5624 case 16:
5625 case 2:
5626 fputs (hi_reg_name[REGNO (x)], file);
5627 break;
5628 case 1:
5629 fputs (qi_reg_name[REGNO (x)], file);
5630 break;
5631 case 0:
5632 fputs (qi_high_reg_name[REGNO (x)], file);
5633 break;
5634 default:
5635 abort ();
5636 }
5637 }
5638
5639 /* Meaning of CODE:
5640 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5641 C -- print opcode suffix for set/cmov insn.
5642 c -- like C, but print reversed condition
5643 F,f -- likewise, but for floating-point.
5644 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5645 nothing
5646 R -- print the prefix for register names.
5647 z -- print the opcode suffix for the size of the current operand.
5648 * -- print a star (in certain assembler syntax)
5649 A -- print an absolute memory reference.
5650 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5651 s -- print a shift double count, followed by the assemblers argument
5652 delimiter.
5653 b -- print the QImode name of the register for the indicated operand.
5654 %b0 would print %al if operands[0] is reg 0.
5655 w -- likewise, print the HImode name of the register.
5656 k -- likewise, print the SImode name of the register.
5657 q -- likewise, print the DImode name of the register.
5658 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5659 y -- print "st(0)" instead of "st" as a register.
5660 D -- print condition for SSE cmp instruction.
5661 P -- if PIC, print an @PLT suffix.
5662 X -- don't print any sort of PIC '@' suffix for a symbol.
5663 */
5664
5665 void
5666 print_operand (file, x, code)
5667 FILE *file;
5668 rtx x;
5669 int code;
5670 {
5671 if (code)
5672 {
5673 switch (code)
5674 {
5675 case '*':
5676 if (ASSEMBLER_DIALECT == ASM_ATT)
5677 putc ('*', file);
5678 return;
5679
5680 case 'A':
5681 if (ASSEMBLER_DIALECT == ASM_ATT)
5682 putc ('*', file);
5683 else if (ASSEMBLER_DIALECT == ASM_INTEL)
5684 {
5685 /* Intel syntax. For absolute addresses, registers should not
5686 be surrounded by braces. */
5687 if (GET_CODE (x) != REG)
5688 {
5689 putc ('[', file);
5690 PRINT_OPERAND (file, x, 0);
5691 putc (']', file);
5692 return;
5693 }
5694 }
5695 else
5696 abort ();
5697
5698 PRINT_OPERAND (file, x, 0);
5699 return;
5700
5701
5702 case 'L':
5703 if (ASSEMBLER_DIALECT == ASM_ATT)
5704 putc ('l', file);
5705 return;
5706
5707 case 'W':
5708 if (ASSEMBLER_DIALECT == ASM_ATT)
5709 putc ('w', file);
5710 return;
5711
5712 case 'B':
5713 if (ASSEMBLER_DIALECT == ASM_ATT)
5714 putc ('b', file);
5715 return;
5716
5717 case 'Q':
5718 if (ASSEMBLER_DIALECT == ASM_ATT)
5719 putc ('l', file);
5720 return;
5721
5722 case 'S':
5723 if (ASSEMBLER_DIALECT == ASM_ATT)
5724 putc ('s', file);
5725 return;
5726
5727 case 'T':
5728 if (ASSEMBLER_DIALECT == ASM_ATT)
5729 putc ('t', file);
5730 return;
5731
5732 case 'z':
5733 /* 387 opcodes don't get size suffixes if the operands are
5734 registers. */
5735 if (STACK_REG_P (x))
5736 return;
5737
5738 /* Likewise if using Intel opcodes. */
5739 if (ASSEMBLER_DIALECT == ASM_INTEL)
5740 return;
5741
5742 /* This is the size of op from size of operand. */
5743 switch (GET_MODE_SIZE (GET_MODE (x)))
5744 {
5745 case 2:
5746 #ifdef HAVE_GAS_FILDS_FISTS
5747 putc ('s', file);
5748 #endif
5749 return;
5750
5751 case 4:
5752 if (GET_MODE (x) == SFmode)
5753 {
5754 putc ('s', file);
5755 return;
5756 }
5757 else
5758 putc ('l', file);
5759 return;
5760
5761 case 12:
5762 case 16:
5763 putc ('t', file);
5764 return;
5765
5766 case 8:
5767 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5768 {
5769 #ifdef GAS_MNEMONICS
5770 putc ('q', file);
5771 #else
5772 putc ('l', file);
5773 putc ('l', file);
5774 #endif
5775 }
5776 else
5777 putc ('l', file);
5778 return;
5779
5780 default:
5781 abort ();
5782 }
5783
5784 case 'b':
5785 case 'w':
5786 case 'k':
5787 case 'q':
5788 case 'h':
5789 case 'y':
5790 case 'X':
5791 case 'P':
5792 break;
5793
5794 case 's':
5795 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5796 {
5797 PRINT_OPERAND (file, x, 0);
5798 putc (',', file);
5799 }
5800 return;
5801
5802 case 'D':
5803 /* Little bit of braindamage here. The SSE compare instructions
5804 does use completely different names for the comparisons that the
5805 fp conditional moves. */
5806 switch (GET_CODE (x))
5807 {
5808 case EQ:
5809 case UNEQ:
5810 fputs ("eq", file);
5811 break;
5812 case LT:
5813 case UNLT:
5814 fputs ("lt", file);
5815 break;
5816 case LE:
5817 case UNLE:
5818 fputs ("le", file);
5819 break;
5820 case UNORDERED:
5821 fputs ("unord", file);
5822 break;
5823 case NE:
5824 case LTGT:
5825 fputs ("neq", file);
5826 break;
5827 case UNGE:
5828 case GE:
5829 fputs ("nlt", file);
5830 break;
5831 case UNGT:
5832 case GT:
5833 fputs ("nle", file);
5834 break;
5835 case ORDERED:
5836 fputs ("ord", file);
5837 break;
5838 default:
5839 abort ();
5840 break;
5841 }
5842 return;
5843 case 'O':
5844 #ifdef CMOV_SUN_AS_SYNTAX
5845 if (ASSEMBLER_DIALECT == ASM_ATT)
5846 {
5847 switch (GET_MODE (x))
5848 {
5849 case HImode: putc ('w', file); break;
5850 case SImode:
5851 case SFmode: putc ('l', file); break;
5852 case DImode:
5853 case DFmode: putc ('q', file); break;
5854 default: abort ();
5855 }
5856 putc ('.', file);
5857 }
5858 #endif
5859 return;
5860 case 'C':
5861 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5862 return;
5863 case 'F':
5864 #ifdef CMOV_SUN_AS_SYNTAX
5865 if (ASSEMBLER_DIALECT == ASM_ATT)
5866 putc ('.', file);
5867 #endif
5868 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5869 return;
5870
5871 /* Like above, but reverse condition */
5872 case 'c':
5873 /* Check to see if argument to %c is really a constant
5874 and not a condition code which needs to be reversed. */
5875 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5876 {
5877 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5878 return;
5879 }
5880 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5881 return;
5882 case 'f':
5883 #ifdef CMOV_SUN_AS_SYNTAX
5884 if (ASSEMBLER_DIALECT == ASM_ATT)
5885 putc ('.', file);
5886 #endif
5887 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5888 return;
5889 case '+':
5890 {
5891 rtx x;
5892
5893 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5894 return;
5895
5896 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5897 if (x)
5898 {
5899 int pred_val = INTVAL (XEXP (x, 0));
5900
5901 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5902 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5903 {
5904 int taken = pred_val > REG_BR_PROB_BASE / 2;
5905 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5906
5907 /* Emit hints only in the case default branch prediction
5908 heruistics would fail. */
5909 if (taken != cputaken)
5910 {
5911 /* We use 3e (DS) prefix for taken branches and
5912 2e (CS) prefix for not taken branches. */
5913 if (taken)
5914 fputs ("ds ; ", file);
5915 else
5916 fputs ("cs ; ", file);
5917 }
5918 }
5919 }
5920 return;
5921 }
5922 default:
5923 output_operand_lossage ("invalid operand code `%c'", code);
5924 }
5925 }
5926
5927 if (GET_CODE (x) == REG)
5928 {
5929 PRINT_REG (x, code, file);
5930 }
5931
5932 else if (GET_CODE (x) == MEM)
5933 {
5934 /* No `byte ptr' prefix for call instructions. */
5935 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
5936 {
5937 const char * size;
5938 switch (GET_MODE_SIZE (GET_MODE (x)))
5939 {
5940 case 1: size = "BYTE"; break;
5941 case 2: size = "WORD"; break;
5942 case 4: size = "DWORD"; break;
5943 case 8: size = "QWORD"; break;
5944 case 12: size = "XWORD"; break;
5945 case 16: size = "XMMWORD"; break;
5946 default:
5947 abort ();
5948 }
5949
5950 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5951 if (code == 'b')
5952 size = "BYTE";
5953 else if (code == 'w')
5954 size = "WORD";
5955 else if (code == 'k')
5956 size = "DWORD";
5957
5958 fputs (size, file);
5959 fputs (" PTR ", file);
5960 }
5961
5962 x = XEXP (x, 0);
5963 if (flag_pic && CONSTANT_ADDRESS_P (x))
5964 output_pic_addr_const (file, x, code);
5965 /* Avoid (%rip) for call operands. */
5966 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5967 && GET_CODE (x) != CONST_INT)
5968 output_addr_const (file, x);
5969 else
5970 output_address (x);
5971 }
5972
5973 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5974 {
5975 REAL_VALUE_TYPE r;
5976 long l;
5977
5978 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5979 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5980
5981 if (ASSEMBLER_DIALECT == ASM_ATT)
5982 putc ('$', file);
5983 fprintf (file, "0x%lx", l);
5984 }
5985
5986 /* These float cases don't actually occur as immediate operands. */
5987 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5988 {
5989 REAL_VALUE_TYPE r;
5990 char dstr[30];
5991
5992 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5993 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5994 fprintf (file, "%s", dstr);
5995 }
5996
5997 else if (GET_CODE (x) == CONST_DOUBLE
5998 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5999 {
6000 REAL_VALUE_TYPE r;
6001 char dstr[30];
6002
6003 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6004 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6005 fprintf (file, "%s", dstr);
6006 }
6007 else
6008 {
6009 if (code != 'P')
6010 {
6011 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6012 {
6013 if (ASSEMBLER_DIALECT == ASM_ATT)
6014 putc ('$', file);
6015 }
6016 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6017 || GET_CODE (x) == LABEL_REF)
6018 {
6019 if (ASSEMBLER_DIALECT == ASM_ATT)
6020 putc ('$', file);
6021 else
6022 fputs ("OFFSET FLAT:", file);
6023 }
6024 }
6025 if (GET_CODE (x) == CONST_INT)
6026 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6027 else if (flag_pic)
6028 output_pic_addr_const (file, x, code);
6029 else
6030 output_addr_const (file, x);
6031 }
6032 }
6033 \f
6034 /* Print a memory operand whose address is ADDR. */
6035
6036 void
6037 print_operand_address (file, addr)
6038 FILE *file;
6039 register rtx addr;
6040 {
6041 struct ix86_address parts;
6042 rtx base, index, disp;
6043 int scale;
6044
6045 if (! ix86_decompose_address (addr, &parts))
6046 abort ();
6047
6048 base = parts.base;
6049 index = parts.index;
6050 disp = parts.disp;
6051 scale = parts.scale;
6052
6053 if (!base && !index)
6054 {
6055 /* Displacement only requires special attention. */
6056
6057 if (GET_CODE (disp) == CONST_INT)
6058 {
6059 if (ASSEMBLER_DIALECT == ASM_INTEL)
6060 {
6061 if (USER_LABEL_PREFIX[0] == 0)
6062 putc ('%', file);
6063 fputs ("ds:", file);
6064 }
6065 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6066 }
6067 else if (flag_pic)
6068 output_pic_addr_const (file, addr, 0);
6069 else
6070 output_addr_const (file, addr);
6071
6072 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6073 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6074 fputs ("(%rip)", file);
6075 }
6076 else
6077 {
6078 if (ASSEMBLER_DIALECT == ASM_ATT)
6079 {
6080 if (disp)
6081 {
6082 if (flag_pic)
6083 output_pic_addr_const (file, disp, 0);
6084 else if (GET_CODE (disp) == LABEL_REF)
6085 output_asm_label (disp);
6086 else
6087 output_addr_const (file, disp);
6088 }
6089
6090 putc ('(', file);
6091 if (base)
6092 PRINT_REG (base, 0, file);
6093 if (index)
6094 {
6095 putc (',', file);
6096 PRINT_REG (index, 0, file);
6097 if (scale != 1)
6098 fprintf (file, ",%d", scale);
6099 }
6100 putc (')', file);
6101 }
6102 else
6103 {
6104 rtx offset = NULL_RTX;
6105
6106 if (disp)
6107 {
6108 /* Pull out the offset of a symbol; print any symbol itself. */
6109 if (GET_CODE (disp) == CONST
6110 && GET_CODE (XEXP (disp, 0)) == PLUS
6111 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6112 {
6113 offset = XEXP (XEXP (disp, 0), 1);
6114 disp = gen_rtx_CONST (VOIDmode,
6115 XEXP (XEXP (disp, 0), 0));
6116 }
6117
6118 if (flag_pic)
6119 output_pic_addr_const (file, disp, 0);
6120 else if (GET_CODE (disp) == LABEL_REF)
6121 output_asm_label (disp);
6122 else if (GET_CODE (disp) == CONST_INT)
6123 offset = disp;
6124 else
6125 output_addr_const (file, disp);
6126 }
6127
6128 putc ('[', file);
6129 if (base)
6130 {
6131 PRINT_REG (base, 0, file);
6132 if (offset)
6133 {
6134 if (INTVAL (offset) >= 0)
6135 putc ('+', file);
6136 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6137 }
6138 }
6139 else if (offset)
6140 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6141 else
6142 putc ('0', file);
6143
6144 if (index)
6145 {
6146 putc ('+', file);
6147 PRINT_REG (index, 0, file);
6148 if (scale != 1)
6149 fprintf (file, "*%d", scale);
6150 }
6151 putc (']', file);
6152 }
6153 }
6154 }
6155 \f
6156 /* Split one or more DImode RTL references into pairs of SImode
6157 references. The RTL can be REG, offsettable MEM, integer constant, or
6158 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6159 split and "num" is its length. lo_half and hi_half are output arrays
6160 that parallel "operands". */
6161
6162 void
6163 split_di (operands, num, lo_half, hi_half)
6164 rtx operands[];
6165 int num;
6166 rtx lo_half[], hi_half[];
6167 {
6168 while (num--)
6169 {
6170 rtx op = operands[num];
6171
6172 /* simplify_subreg refuse to split volatile memory addresses,
6173 but we still have to handle it. */
6174 if (GET_CODE (op) == MEM)
6175 {
6176 lo_half[num] = adjust_address (op, SImode, 0);
6177 hi_half[num] = adjust_address (op, SImode, 4);
6178 }
6179 else
6180 {
6181 lo_half[num] = simplify_gen_subreg (SImode, op,
6182 GET_MODE (op) == VOIDmode
6183 ? DImode : GET_MODE (op), 0);
6184 hi_half[num] = simplify_gen_subreg (SImode, op,
6185 GET_MODE (op) == VOIDmode
6186 ? DImode : GET_MODE (op), 4);
6187 }
6188 }
6189 }
6190 /* Split one or more TImode RTL references into pairs of SImode
6191 references. The RTL can be REG, offsettable MEM, integer constant, or
6192 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6193 split and "num" is its length. lo_half and hi_half are output arrays
6194 that parallel "operands". */
6195
6196 void
6197 split_ti (operands, num, lo_half, hi_half)
6198 rtx operands[];
6199 int num;
6200 rtx lo_half[], hi_half[];
6201 {
6202 while (num--)
6203 {
6204 rtx op = operands[num];
6205
6206 /* simplify_subreg refuse to split volatile memory addresses, but we
6207 still have to handle it. */
6208 if (GET_CODE (op) == MEM)
6209 {
6210 lo_half[num] = adjust_address (op, DImode, 0);
6211 hi_half[num] = adjust_address (op, DImode, 8);
6212 }
6213 else
6214 {
6215 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6216 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6217 }
6218 }
6219 }
6220 \f
6221 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6222 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6223 is the expression of the binary operation. The output may either be
6224 emitted here, or returned to the caller, like all output_* functions.
6225
6226 There is no guarantee that the operands are the same mode, as they
6227 might be within FLOAT or FLOAT_EXTEND expressions. */
6228
6229 #ifndef SYSV386_COMPAT
6230 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6231 wants to fix the assemblers because that causes incompatibility
6232 with gcc. No-one wants to fix gcc because that causes
6233 incompatibility with assemblers... You can use the option of
6234 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6235 #define SYSV386_COMPAT 1
6236 #endif
6237
6238 const char *
6239 output_387_binary_op (insn, operands)
6240 rtx insn;
6241 rtx *operands;
6242 {
6243 static char buf[30];
6244 const char *p;
6245 const char *ssep;
6246 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6247
6248 #ifdef ENABLE_CHECKING
6249 /* Even if we do not want to check the inputs, this documents input
6250 constraints. Which helps in understanding the following code. */
6251 if (STACK_REG_P (operands[0])
6252 && ((REG_P (operands[1])
6253 && REGNO (operands[0]) == REGNO (operands[1])
6254 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6255 || (REG_P (operands[2])
6256 && REGNO (operands[0]) == REGNO (operands[2])
6257 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6258 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6259 ; /* ok */
6260 else if (!is_sse)
6261 abort ();
6262 #endif
6263
6264 switch (GET_CODE (operands[3]))
6265 {
6266 case PLUS:
6267 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6268 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6269 p = "fiadd";
6270 else
6271 p = "fadd";
6272 ssep = "add";
6273 break;
6274
6275 case MINUS:
6276 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6277 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6278 p = "fisub";
6279 else
6280 p = "fsub";
6281 ssep = "sub";
6282 break;
6283
6284 case MULT:
6285 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6286 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6287 p = "fimul";
6288 else
6289 p = "fmul";
6290 ssep = "mul";
6291 break;
6292
6293 case DIV:
6294 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6295 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6296 p = "fidiv";
6297 else
6298 p = "fdiv";
6299 ssep = "div";
6300 break;
6301
6302 default:
6303 abort ();
6304 }
6305
6306 if (is_sse)
6307 {
6308 strcpy (buf, ssep);
6309 if (GET_MODE (operands[0]) == SFmode)
6310 strcat (buf, "ss\t{%2, %0|%0, %2}");
6311 else
6312 strcat (buf, "sd\t{%2, %0|%0, %2}");
6313 return buf;
6314 }
6315 strcpy (buf, p);
6316
6317 switch (GET_CODE (operands[3]))
6318 {
6319 case MULT:
6320 case PLUS:
6321 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6322 {
6323 rtx temp = operands[2];
6324 operands[2] = operands[1];
6325 operands[1] = temp;
6326 }
6327
6328 /* know operands[0] == operands[1]. */
6329
6330 if (GET_CODE (operands[2]) == MEM)
6331 {
6332 p = "%z2\t%2";
6333 break;
6334 }
6335
6336 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6337 {
6338 if (STACK_TOP_P (operands[0]))
6339 /* How is it that we are storing to a dead operand[2]?
6340 Well, presumably operands[1] is dead too. We can't
6341 store the result to st(0) as st(0) gets popped on this
6342 instruction. Instead store to operands[2] (which I
6343 think has to be st(1)). st(1) will be popped later.
6344 gcc <= 2.8.1 didn't have this check and generated
6345 assembly code that the Unixware assembler rejected. */
6346 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6347 else
6348 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6349 break;
6350 }
6351
6352 if (STACK_TOP_P (operands[0]))
6353 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6354 else
6355 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6356 break;
6357
6358 case MINUS:
6359 case DIV:
6360 if (GET_CODE (operands[1]) == MEM)
6361 {
6362 p = "r%z1\t%1";
6363 break;
6364 }
6365
6366 if (GET_CODE (operands[2]) == MEM)
6367 {
6368 p = "%z2\t%2";
6369 break;
6370 }
6371
6372 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6373 {
6374 #if SYSV386_COMPAT
6375 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6376 derived assemblers, confusingly reverse the direction of
6377 the operation for fsub{r} and fdiv{r} when the
6378 destination register is not st(0). The Intel assembler
6379 doesn't have this brain damage. Read !SYSV386_COMPAT to
6380 figure out what the hardware really does. */
6381 if (STACK_TOP_P (operands[0]))
6382 p = "{p\t%0, %2|rp\t%2, %0}";
6383 else
6384 p = "{rp\t%2, %0|p\t%0, %2}";
6385 #else
6386 if (STACK_TOP_P (operands[0]))
6387 /* As above for fmul/fadd, we can't store to st(0). */
6388 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6389 else
6390 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6391 #endif
6392 break;
6393 }
6394
6395 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6396 {
6397 #if SYSV386_COMPAT
6398 if (STACK_TOP_P (operands[0]))
6399 p = "{rp\t%0, %1|p\t%1, %0}";
6400 else
6401 p = "{p\t%1, %0|rp\t%0, %1}";
6402 #else
6403 if (STACK_TOP_P (operands[0]))
6404 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6405 else
6406 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6407 #endif
6408 break;
6409 }
6410
6411 if (STACK_TOP_P (operands[0]))
6412 {
6413 if (STACK_TOP_P (operands[1]))
6414 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6415 else
6416 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6417 break;
6418 }
6419 else if (STACK_TOP_P (operands[1]))
6420 {
6421 #if SYSV386_COMPAT
6422 p = "{\t%1, %0|r\t%0, %1}";
6423 #else
6424 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6425 #endif
6426 }
6427 else
6428 {
6429 #if SYSV386_COMPAT
6430 p = "{r\t%2, %0|\t%0, %2}";
6431 #else
6432 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6433 #endif
6434 }
6435 break;
6436
6437 default:
6438 abort ();
6439 }
6440
6441 strcat (buf, p);
6442 return buf;
6443 }
6444
6445 /* Output code to initialize control word copies used by
6446 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6447 is set to control word rounding downwards. */
6448 void
6449 emit_i387_cw_initialization (normal, round_down)
6450 rtx normal, round_down;
6451 {
6452 rtx reg = gen_reg_rtx (HImode);
6453
6454 emit_insn (gen_x86_fnstcw_1 (normal));
6455 emit_move_insn (reg, normal);
6456 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6457 && !TARGET_64BIT)
6458 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6459 else
6460 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6461 emit_move_insn (round_down, reg);
6462 }
6463
6464 /* Output code for INSN to convert a float to a signed int. OPERANDS
6465 are the insn operands. The output may be [HSD]Imode and the input
6466 operand may be [SDX]Fmode. */
6467
6468 const char *
6469 output_fix_trunc (insn, operands)
6470 rtx insn;
6471 rtx *operands;
6472 {
6473 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6474 int dimode_p = GET_MODE (operands[0]) == DImode;
6475
6476 /* Jump through a hoop or two for DImode, since the hardware has no
6477 non-popping instruction. We used to do this a different way, but
6478 that was somewhat fragile and broke with post-reload splitters. */
6479 if (dimode_p && !stack_top_dies)
6480 output_asm_insn ("fld\t%y1", operands);
6481
6482 if (!STACK_TOP_P (operands[1]))
6483 abort ();
6484
6485 if (GET_CODE (operands[0]) != MEM)
6486 abort ();
6487
6488 output_asm_insn ("fldcw\t%3", operands);
6489 if (stack_top_dies || dimode_p)
6490 output_asm_insn ("fistp%z0\t%0", operands);
6491 else
6492 output_asm_insn ("fist%z0\t%0", operands);
6493 output_asm_insn ("fldcw\t%2", operands);
6494
6495 return "";
6496 }
6497
6498 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6499 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6500 when fucom should be used. */
6501
6502 const char *
6503 output_fp_compare (insn, operands, eflags_p, unordered_p)
6504 rtx insn;
6505 rtx *operands;
6506 int eflags_p, unordered_p;
6507 {
6508 int stack_top_dies;
6509 rtx cmp_op0 = operands[0];
6510 rtx cmp_op1 = operands[1];
6511 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6512
6513 if (eflags_p == 2)
6514 {
6515 cmp_op0 = cmp_op1;
6516 cmp_op1 = operands[2];
6517 }
6518 if (is_sse)
6519 {
6520 if (GET_MODE (operands[0]) == SFmode)
6521 if (unordered_p)
6522 return "ucomiss\t{%1, %0|%0, %1}";
6523 else
6524 return "comiss\t{%1, %0|%0, %y}";
6525 else
6526 if (unordered_p)
6527 return "ucomisd\t{%1, %0|%0, %1}";
6528 else
6529 return "comisd\t{%1, %0|%0, %y}";
6530 }
6531
6532 if (! STACK_TOP_P (cmp_op0))
6533 abort ();
6534
6535 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6536
6537 if (STACK_REG_P (cmp_op1)
6538 && stack_top_dies
6539 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6540 && REGNO (cmp_op1) != FIRST_STACK_REG)
6541 {
6542 /* If both the top of the 387 stack dies, and the other operand
6543 is also a stack register that dies, then this must be a
6544 `fcompp' float compare */
6545
6546 if (eflags_p == 1)
6547 {
6548 /* There is no double popping fcomi variant. Fortunately,
6549 eflags is immune from the fstp's cc clobbering. */
6550 if (unordered_p)
6551 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6552 else
6553 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6554 return "fstp\t%y0";
6555 }
6556 else
6557 {
6558 if (eflags_p == 2)
6559 {
6560 if (unordered_p)
6561 return "fucompp\n\tfnstsw\t%0";
6562 else
6563 return "fcompp\n\tfnstsw\t%0";
6564 }
6565 else
6566 {
6567 if (unordered_p)
6568 return "fucompp";
6569 else
6570 return "fcompp";
6571 }
6572 }
6573 }
6574 else
6575 {
6576 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6577
6578 static const char * const alt[24] =
6579 {
6580 "fcom%z1\t%y1",
6581 "fcomp%z1\t%y1",
6582 "fucom%z1\t%y1",
6583 "fucomp%z1\t%y1",
6584
6585 "ficom%z1\t%y1",
6586 "ficomp%z1\t%y1",
6587 NULL,
6588 NULL,
6589
6590 "fcomi\t{%y1, %0|%0, %y1}",
6591 "fcomip\t{%y1, %0|%0, %y1}",
6592 "fucomi\t{%y1, %0|%0, %y1}",
6593 "fucomip\t{%y1, %0|%0, %y1}",
6594
6595 NULL,
6596 NULL,
6597 NULL,
6598 NULL,
6599
6600 "fcom%z2\t%y2\n\tfnstsw\t%0",
6601 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6602 "fucom%z2\t%y2\n\tfnstsw\t%0",
6603 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6604
6605 "ficom%z2\t%y2\n\tfnstsw\t%0",
6606 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6607 NULL,
6608 NULL
6609 };
6610
6611 int mask;
6612 const char *ret;
6613
6614 mask = eflags_p << 3;
6615 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6616 mask |= unordered_p << 1;
6617 mask |= stack_top_dies;
6618
6619 if (mask >= 24)
6620 abort ();
6621 ret = alt[mask];
6622 if (ret == NULL)
6623 abort ();
6624
6625 return ret;
6626 }
6627 }
6628
6629 void
6630 ix86_output_addr_vec_elt (file, value)
6631 FILE *file;
6632 int value;
6633 {
6634 const char *directive = ASM_LONG;
6635
6636 if (TARGET_64BIT)
6637 {
6638 #ifdef ASM_QUAD
6639 directive = ASM_QUAD;
6640 #else
6641 abort ();
6642 #endif
6643 }
6644
6645 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6646 }
6647
6648 void
6649 ix86_output_addr_diff_elt (file, value, rel)
6650 FILE *file;
6651 int value, rel;
6652 {
6653 if (TARGET_64BIT)
6654 fprintf (file, "%s%s%d-.+(.-%s%d)\n",
6655 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6656 else if (HAVE_AS_GOTOFF_IN_DATA)
6657 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6658 else
6659 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6660 ASM_LONG, LPREFIX, value);
6661 }
6662 \f
6663 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6664 for the target. */
6665
6666 void
6667 ix86_expand_clear (dest)
6668 rtx dest;
6669 {
6670 rtx tmp;
6671
6672 /* We play register width games, which are only valid after reload. */
6673 if (!reload_completed)
6674 abort ();
6675
6676 /* Avoid HImode and its attendant prefix byte. */
6677 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6678 dest = gen_rtx_REG (SImode, REGNO (dest));
6679
6680 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6681
6682 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6683 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6684 {
6685 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6686 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6687 }
6688
6689 emit_insn (tmp);
6690 }
6691
6692 void
6693 ix86_expand_move (mode, operands)
6694 enum machine_mode mode;
6695 rtx operands[];
6696 {
6697 int strict = (reload_in_progress || reload_completed);
6698 rtx insn;
6699
6700 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6701 {
6702 /* Emit insns to move operands[1] into operands[0]. */
6703
6704 if (GET_CODE (operands[0]) == MEM)
6705 operands[1] = force_reg (Pmode, operands[1]);
6706 else
6707 {
6708 rtx temp = operands[0];
6709 if (GET_CODE (temp) != REG)
6710 temp = gen_reg_rtx (Pmode);
6711 temp = legitimize_pic_address (operands[1], temp);
6712 if (temp == operands[0])
6713 return;
6714 operands[1] = temp;
6715 }
6716 }
6717 else
6718 {
6719 if (GET_CODE (operands[0]) == MEM
6720 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6721 || !push_operand (operands[0], mode))
6722 && GET_CODE (operands[1]) == MEM)
6723 operands[1] = force_reg (mode, operands[1]);
6724
6725 if (push_operand (operands[0], mode)
6726 && ! general_no_elim_operand (operands[1], mode))
6727 operands[1] = copy_to_mode_reg (mode, operands[1]);
6728
6729 /* Force large constants in 64bit compilation into register
6730 to get them CSEed. */
6731 if (TARGET_64BIT && mode == DImode
6732 && immediate_operand (operands[1], mode)
6733 && !x86_64_zero_extended_value (operands[1])
6734 && !register_operand (operands[0], mode)
6735 && optimize && !reload_completed && !reload_in_progress)
6736 operands[1] = copy_to_mode_reg (mode, operands[1]);
6737
6738 if (FLOAT_MODE_P (mode))
6739 {
6740 /* If we are loading a floating point constant to a register,
6741 force the value to memory now, since we'll get better code
6742 out the back end. */
6743
6744 if (strict)
6745 ;
6746 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6747 && register_operand (operands[0], mode))
6748 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6749 }
6750 }
6751
6752 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6753
6754 emit_insn (insn);
6755 }
6756
6757 void
6758 ix86_expand_vector_move (mode, operands)
6759 enum machine_mode mode;
6760 rtx operands[];
6761 {
6762 /* Force constants other than zero into memory. We do not know how
6763 the instructions used to build constants modify the upper 64 bits
6764 of the register, once we have that information we may be able
6765 to handle some of them more efficiently. */
6766 if ((reload_in_progress | reload_completed) == 0
6767 && register_operand (operands[0], mode)
6768 && CONSTANT_P (operands[1]))
6769 {
6770 rtx addr = gen_reg_rtx (Pmode);
6771 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6772 operands[1] = gen_rtx_MEM (mode, addr);
6773 }
6774
6775 /* Make operand1 a register if it isn't already. */
6776 if ((reload_in_progress | reload_completed) == 0
6777 && !register_operand (operands[0], mode)
6778 && !register_operand (operands[1], mode)
6779 && operands[1] != CONST0_RTX (mode))
6780 {
6781 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
6782 emit_move_insn (operands[0], temp);
6783 return;
6784 }
6785
6786 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6787 }
6788
6789 /* Attempt to expand a binary operator. Make the expansion closer to the
6790 actual machine, then just general_operand, which will allow 3 separate
6791 memory references (one output, two input) in a single insn. */
6792
6793 void
6794 ix86_expand_binary_operator (code, mode, operands)
6795 enum rtx_code code;
6796 enum machine_mode mode;
6797 rtx operands[];
6798 {
6799 int matching_memory;
6800 rtx src1, src2, dst, op, clob;
6801
6802 dst = operands[0];
6803 src1 = operands[1];
6804 src2 = operands[2];
6805
6806 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6807 if (GET_RTX_CLASS (code) == 'c'
6808 && (rtx_equal_p (dst, src2)
6809 || immediate_operand (src1, mode)))
6810 {
6811 rtx temp = src1;
6812 src1 = src2;
6813 src2 = temp;
6814 }
6815
6816 /* If the destination is memory, and we do not have matching source
6817 operands, do things in registers. */
6818 matching_memory = 0;
6819 if (GET_CODE (dst) == MEM)
6820 {
6821 if (rtx_equal_p (dst, src1))
6822 matching_memory = 1;
6823 else if (GET_RTX_CLASS (code) == 'c'
6824 && rtx_equal_p (dst, src2))
6825 matching_memory = 2;
6826 else
6827 dst = gen_reg_rtx (mode);
6828 }
6829
6830 /* Both source operands cannot be in memory. */
6831 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6832 {
6833 if (matching_memory != 2)
6834 src2 = force_reg (mode, src2);
6835 else
6836 src1 = force_reg (mode, src1);
6837 }
6838
6839 /* If the operation is not commutable, source 1 cannot be a constant
6840 or non-matching memory. */
6841 if ((CONSTANT_P (src1)
6842 || (!matching_memory && GET_CODE (src1) == MEM))
6843 && GET_RTX_CLASS (code) != 'c')
6844 src1 = force_reg (mode, src1);
6845
6846 /* If optimizing, copy to regs to improve CSE */
6847 if (optimize && ! no_new_pseudos)
6848 {
6849 if (GET_CODE (dst) == MEM)
6850 dst = gen_reg_rtx (mode);
6851 if (GET_CODE (src1) == MEM)
6852 src1 = force_reg (mode, src1);
6853 if (GET_CODE (src2) == MEM)
6854 src2 = force_reg (mode, src2);
6855 }
6856
6857 /* Emit the instruction. */
6858
6859 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6860 if (reload_in_progress)
6861 {
6862 /* Reload doesn't know about the flags register, and doesn't know that
6863 it doesn't want to clobber it. We can only do this with PLUS. */
6864 if (code != PLUS)
6865 abort ();
6866 emit_insn (op);
6867 }
6868 else
6869 {
6870 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6871 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6872 }
6873
6874 /* Fix up the destination if needed. */
6875 if (dst != operands[0])
6876 emit_move_insn (operands[0], dst);
6877 }
6878
6879 /* Return TRUE or FALSE depending on whether the binary operator meets the
6880 appropriate constraints. */
6881
6882 int
6883 ix86_binary_operator_ok (code, mode, operands)
6884 enum rtx_code code;
6885 enum machine_mode mode ATTRIBUTE_UNUSED;
6886 rtx operands[3];
6887 {
6888 /* Both source operands cannot be in memory. */
6889 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6890 return 0;
6891 /* If the operation is not commutable, source 1 cannot be a constant. */
6892 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6893 return 0;
6894 /* If the destination is memory, we must have a matching source operand. */
6895 if (GET_CODE (operands[0]) == MEM
6896 && ! (rtx_equal_p (operands[0], operands[1])
6897 || (GET_RTX_CLASS (code) == 'c'
6898 && rtx_equal_p (operands[0], operands[2]))))
6899 return 0;
6900 /* If the operation is not commutable and the source 1 is memory, we must
6901 have a matching destination. */
6902 if (GET_CODE (operands[1]) == MEM
6903 && GET_RTX_CLASS (code) != 'c'
6904 && ! rtx_equal_p (operands[0], operands[1]))
6905 return 0;
6906 return 1;
6907 }
6908
6909 /* Attempt to expand a unary operator. Make the expansion closer to the
6910 actual machine, then just general_operand, which will allow 2 separate
6911 memory references (one output, one input) in a single insn. */
6912
6913 void
6914 ix86_expand_unary_operator (code, mode, operands)
6915 enum rtx_code code;
6916 enum machine_mode mode;
6917 rtx operands[];
6918 {
6919 int matching_memory;
6920 rtx src, dst, op, clob;
6921
6922 dst = operands[0];
6923 src = operands[1];
6924
6925 /* If the destination is memory, and we do not have matching source
6926 operands, do things in registers. */
6927 matching_memory = 0;
6928 if (GET_CODE (dst) == MEM)
6929 {
6930 if (rtx_equal_p (dst, src))
6931 matching_memory = 1;
6932 else
6933 dst = gen_reg_rtx (mode);
6934 }
6935
6936 /* When source operand is memory, destination must match. */
6937 if (!matching_memory && GET_CODE (src) == MEM)
6938 src = force_reg (mode, src);
6939
6940 /* If optimizing, copy to regs to improve CSE */
6941 if (optimize && ! no_new_pseudos)
6942 {
6943 if (GET_CODE (dst) == MEM)
6944 dst = gen_reg_rtx (mode);
6945 if (GET_CODE (src) == MEM)
6946 src = force_reg (mode, src);
6947 }
6948
6949 /* Emit the instruction. */
6950
6951 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6952 if (reload_in_progress || code == NOT)
6953 {
6954 /* Reload doesn't know about the flags register, and doesn't know that
6955 it doesn't want to clobber it. */
6956 if (code != NOT)
6957 abort ();
6958 emit_insn (op);
6959 }
6960 else
6961 {
6962 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6963 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6964 }
6965
6966 /* Fix up the destination if needed. */
6967 if (dst != operands[0])
6968 emit_move_insn (operands[0], dst);
6969 }
6970
6971 /* Return TRUE or FALSE depending on whether the unary operator meets the
6972 appropriate constraints. */
6973
6974 int
6975 ix86_unary_operator_ok (code, mode, operands)
6976 enum rtx_code code ATTRIBUTE_UNUSED;
6977 enum machine_mode mode ATTRIBUTE_UNUSED;
6978 rtx operands[2] ATTRIBUTE_UNUSED;
6979 {
6980 /* If one of operands is memory, source and destination must match. */
6981 if ((GET_CODE (operands[0]) == MEM
6982 || GET_CODE (operands[1]) == MEM)
6983 && ! rtx_equal_p (operands[0], operands[1]))
6984 return FALSE;
6985 return TRUE;
6986 }
6987
6988 /* Return TRUE or FALSE depending on whether the first SET in INSN
6989 has source and destination with matching CC modes, and that the
6990 CC mode is at least as constrained as REQ_MODE. */
6991
6992 int
6993 ix86_match_ccmode (insn, req_mode)
6994 rtx insn;
6995 enum machine_mode req_mode;
6996 {
6997 rtx set;
6998 enum machine_mode set_mode;
6999
7000 set = PATTERN (insn);
7001 if (GET_CODE (set) == PARALLEL)
7002 set = XVECEXP (set, 0, 0);
7003 if (GET_CODE (set) != SET)
7004 abort ();
7005 if (GET_CODE (SET_SRC (set)) != COMPARE)
7006 abort ();
7007
7008 set_mode = GET_MODE (SET_DEST (set));
7009 switch (set_mode)
7010 {
7011 case CCNOmode:
7012 if (req_mode != CCNOmode
7013 && (req_mode != CCmode
7014 || XEXP (SET_SRC (set), 1) != const0_rtx))
7015 return 0;
7016 break;
7017 case CCmode:
7018 if (req_mode == CCGCmode)
7019 return 0;
7020 /* FALLTHRU */
7021 case CCGCmode:
7022 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7023 return 0;
7024 /* FALLTHRU */
7025 case CCGOCmode:
7026 if (req_mode == CCZmode)
7027 return 0;
7028 /* FALLTHRU */
7029 case CCZmode:
7030 break;
7031
7032 default:
7033 abort ();
7034 }
7035
7036 return (GET_MODE (SET_SRC (set)) == set_mode);
7037 }
7038
7039 /* Generate insn patterns to do an integer compare of OPERANDS. */
7040
7041 static rtx
7042 ix86_expand_int_compare (code, op0, op1)
7043 enum rtx_code code;
7044 rtx op0, op1;
7045 {
7046 enum machine_mode cmpmode;
7047 rtx tmp, flags;
7048
7049 cmpmode = SELECT_CC_MODE (code, op0, op1);
7050 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7051
7052 /* This is very simple, but making the interface the same as in the
7053 FP case makes the rest of the code easier. */
7054 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7055 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7056
7057 /* Return the test that should be put into the flags user, i.e.
7058 the bcc, scc, or cmov instruction. */
7059 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7060 }
7061
7062 /* Figure out whether to use ordered or unordered fp comparisons.
7063 Return the appropriate mode to use. */
7064
7065 enum machine_mode
7066 ix86_fp_compare_mode (code)
7067 enum rtx_code code ATTRIBUTE_UNUSED;
7068 {
7069 /* ??? In order to make all comparisons reversible, we do all comparisons
7070 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7071 all forms trapping and nontrapping comparisons, we can make inequality
7072 comparisons trapping again, since it results in better code when using
7073 FCOM based compares. */
7074 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7075 }
7076
7077 enum machine_mode
7078 ix86_cc_mode (code, op0, op1)
7079 enum rtx_code code;
7080 rtx op0, op1;
7081 {
7082 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7083 return ix86_fp_compare_mode (code);
7084 switch (code)
7085 {
7086 /* Only zero flag is needed. */
7087 case EQ: /* ZF=0 */
7088 case NE: /* ZF!=0 */
7089 return CCZmode;
7090 /* Codes needing carry flag. */
7091 case GEU: /* CF=0 */
7092 case GTU: /* CF=0 & ZF=0 */
7093 case LTU: /* CF=1 */
7094 case LEU: /* CF=1 | ZF=1 */
7095 return CCmode;
7096 /* Codes possibly doable only with sign flag when
7097 comparing against zero. */
7098 case GE: /* SF=OF or SF=0 */
7099 case LT: /* SF<>OF or SF=1 */
7100 if (op1 == const0_rtx)
7101 return CCGOCmode;
7102 else
7103 /* For other cases Carry flag is not required. */
7104 return CCGCmode;
7105 /* Codes doable only with sign flag when comparing
7106 against zero, but we miss jump instruction for it
7107 so we need to use relational tests agains overflow
7108 that thus needs to be zero. */
7109 case GT: /* ZF=0 & SF=OF */
7110 case LE: /* ZF=1 | SF<>OF */
7111 if (op1 == const0_rtx)
7112 return CCNOmode;
7113 else
7114 return CCGCmode;
7115 /* strcmp pattern do (use flags) and combine may ask us for proper
7116 mode. */
7117 case USE:
7118 return CCmode;
7119 default:
7120 abort ();
7121 }
7122 }
7123
7124 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7125
7126 int
7127 ix86_use_fcomi_compare (code)
7128 enum rtx_code code ATTRIBUTE_UNUSED;
7129 {
7130 enum rtx_code swapped_code = swap_condition (code);
7131 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7132 || (ix86_fp_comparison_cost (swapped_code)
7133 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7134 }
7135
7136 /* Swap, force into registers, or otherwise massage the two operands
7137 to a fp comparison. The operands are updated in place; the new
7138 comparsion code is returned. */
7139
7140 static enum rtx_code
7141 ix86_prepare_fp_compare_args (code, pop0, pop1)
7142 enum rtx_code code;
7143 rtx *pop0, *pop1;
7144 {
7145 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7146 rtx op0 = *pop0, op1 = *pop1;
7147 enum machine_mode op_mode = GET_MODE (op0);
7148 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7149
7150 /* All of the unordered compare instructions only work on registers.
7151 The same is true of the XFmode compare instructions. The same is
7152 true of the fcomi compare instructions. */
7153
7154 if (!is_sse
7155 && (fpcmp_mode == CCFPUmode
7156 || op_mode == XFmode
7157 || op_mode == TFmode
7158 || ix86_use_fcomi_compare (code)))
7159 {
7160 op0 = force_reg (op_mode, op0);
7161 op1 = force_reg (op_mode, op1);
7162 }
7163 else
7164 {
7165 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7166 things around if they appear profitable, otherwise force op0
7167 into a register. */
7168
7169 if (standard_80387_constant_p (op0) == 0
7170 || (GET_CODE (op0) == MEM
7171 && ! (standard_80387_constant_p (op1) == 0
7172 || GET_CODE (op1) == MEM)))
7173 {
7174 rtx tmp;
7175 tmp = op0, op0 = op1, op1 = tmp;
7176 code = swap_condition (code);
7177 }
7178
7179 if (GET_CODE (op0) != REG)
7180 op0 = force_reg (op_mode, op0);
7181
7182 if (CONSTANT_P (op1))
7183 {
7184 if (standard_80387_constant_p (op1))
7185 op1 = force_reg (op_mode, op1);
7186 else
7187 op1 = validize_mem (force_const_mem (op_mode, op1));
7188 }
7189 }
7190
7191 /* Try to rearrange the comparison to make it cheaper. */
7192 if (ix86_fp_comparison_cost (code)
7193 > ix86_fp_comparison_cost (swap_condition (code))
7194 && (GET_CODE (op1) == REG || !no_new_pseudos))
7195 {
7196 rtx tmp;
7197 tmp = op0, op0 = op1, op1 = tmp;
7198 code = swap_condition (code);
7199 if (GET_CODE (op0) != REG)
7200 op0 = force_reg (op_mode, op0);
7201 }
7202
7203 *pop0 = op0;
7204 *pop1 = op1;
7205 return code;
7206 }
7207
7208 /* Convert comparison codes we use to represent FP comparison to integer
7209 code that will result in proper branch. Return UNKNOWN if no such code
7210 is available. */
7211 static enum rtx_code
7212 ix86_fp_compare_code_to_integer (code)
7213 enum rtx_code code;
7214 {
7215 switch (code)
7216 {
7217 case GT:
7218 return GTU;
7219 case GE:
7220 return GEU;
7221 case ORDERED:
7222 case UNORDERED:
7223 return code;
7224 break;
7225 case UNEQ:
7226 return EQ;
7227 break;
7228 case UNLT:
7229 return LTU;
7230 break;
7231 case UNLE:
7232 return LEU;
7233 break;
7234 case LTGT:
7235 return NE;
7236 break;
7237 default:
7238 return UNKNOWN;
7239 }
7240 }
7241
7242 /* Split comparison code CODE into comparisons we can do using branch
7243 instructions. BYPASS_CODE is comparison code for branch that will
7244 branch around FIRST_CODE and SECOND_CODE. If some of branches
7245 is not required, set value to NIL.
7246 We never require more than two branches. */
7247 static void
7248 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7249 enum rtx_code code, *bypass_code, *first_code, *second_code;
7250 {
7251 *first_code = code;
7252 *bypass_code = NIL;
7253 *second_code = NIL;
7254
7255 /* The fcomi comparison sets flags as follows:
7256
7257 cmp ZF PF CF
7258 > 0 0 0
7259 < 0 0 1
7260 = 1 0 0
7261 un 1 1 1 */
7262
7263 switch (code)
7264 {
7265 case GT: /* GTU - CF=0 & ZF=0 */
7266 case GE: /* GEU - CF=0 */
7267 case ORDERED: /* PF=0 */
7268 case UNORDERED: /* PF=1 */
7269 case UNEQ: /* EQ - ZF=1 */
7270 case UNLT: /* LTU - CF=1 */
7271 case UNLE: /* LEU - CF=1 | ZF=1 */
7272 case LTGT: /* EQ - ZF=0 */
7273 break;
7274 case LT: /* LTU - CF=1 - fails on unordered */
7275 *first_code = UNLT;
7276 *bypass_code = UNORDERED;
7277 break;
7278 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7279 *first_code = UNLE;
7280 *bypass_code = UNORDERED;
7281 break;
7282 case EQ: /* EQ - ZF=1 - fails on unordered */
7283 *first_code = UNEQ;
7284 *bypass_code = UNORDERED;
7285 break;
7286 case NE: /* NE - ZF=0 - fails on unordered */
7287 *first_code = LTGT;
7288 *second_code = UNORDERED;
7289 break;
7290 case UNGE: /* GEU - CF=0 - fails on unordered */
7291 *first_code = GE;
7292 *second_code = UNORDERED;
7293 break;
7294 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7295 *first_code = GT;
7296 *second_code = UNORDERED;
7297 break;
7298 default:
7299 abort ();
7300 }
7301 if (!TARGET_IEEE_FP)
7302 {
7303 *second_code = NIL;
7304 *bypass_code = NIL;
7305 }
7306 }
7307
7308 /* Return cost of comparison done fcom + arithmetics operations on AX.
7309 All following functions do use number of instructions as an cost metrics.
7310 In future this should be tweaked to compute bytes for optimize_size and
7311 take into account performance of various instructions on various CPUs. */
7312 static int
7313 ix86_fp_comparison_arithmetics_cost (code)
7314 enum rtx_code code;
7315 {
7316 if (!TARGET_IEEE_FP)
7317 return 4;
7318 /* The cost of code output by ix86_expand_fp_compare. */
7319 switch (code)
7320 {
7321 case UNLE:
7322 case UNLT:
7323 case LTGT:
7324 case GT:
7325 case GE:
7326 case UNORDERED:
7327 case ORDERED:
7328 case UNEQ:
7329 return 4;
7330 break;
7331 case LT:
7332 case NE:
7333 case EQ:
7334 case UNGE:
7335 return 5;
7336 break;
7337 case LE:
7338 case UNGT:
7339 return 6;
7340 break;
7341 default:
7342 abort ();
7343 }
7344 }
7345
7346 /* Return cost of comparison done using fcomi operation.
7347 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7348 static int
7349 ix86_fp_comparison_fcomi_cost (code)
7350 enum rtx_code code;
7351 {
7352 enum rtx_code bypass_code, first_code, second_code;
7353 /* Return arbitarily high cost when instruction is not supported - this
7354 prevents gcc from using it. */
7355 if (!TARGET_CMOVE)
7356 return 1024;
7357 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7358 return (bypass_code != NIL || second_code != NIL) + 2;
7359 }
7360
7361 /* Return cost of comparison done using sahf operation.
7362 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7363 static int
7364 ix86_fp_comparison_sahf_cost (code)
7365 enum rtx_code code;
7366 {
7367 enum rtx_code bypass_code, first_code, second_code;
7368 /* Return arbitarily high cost when instruction is not preferred - this
7369 avoids gcc from using it. */
7370 if (!TARGET_USE_SAHF && !optimize_size)
7371 return 1024;
7372 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7373 return (bypass_code != NIL || second_code != NIL) + 3;
7374 }
7375
7376 /* Compute cost of the comparison done using any method.
7377 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7378 static int
7379 ix86_fp_comparison_cost (code)
7380 enum rtx_code code;
7381 {
7382 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7383 int min;
7384
7385 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7386 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7387
7388 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7389 if (min > sahf_cost)
7390 min = sahf_cost;
7391 if (min > fcomi_cost)
7392 min = fcomi_cost;
7393 return min;
7394 }
7395
7396 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7397
7398 static rtx
7399 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7400 enum rtx_code code;
7401 rtx op0, op1, scratch;
7402 rtx *second_test;
7403 rtx *bypass_test;
7404 {
7405 enum machine_mode fpcmp_mode, intcmp_mode;
7406 rtx tmp, tmp2;
7407 int cost = ix86_fp_comparison_cost (code);
7408 enum rtx_code bypass_code, first_code, second_code;
7409
7410 fpcmp_mode = ix86_fp_compare_mode (code);
7411 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7412
7413 if (second_test)
7414 *second_test = NULL_RTX;
7415 if (bypass_test)
7416 *bypass_test = NULL_RTX;
7417
7418 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7419
7420 /* Do fcomi/sahf based test when profitable. */
7421 if ((bypass_code == NIL || bypass_test)
7422 && (second_code == NIL || second_test)
7423 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7424 {
7425 if (TARGET_CMOVE)
7426 {
7427 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7428 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7429 tmp);
7430 emit_insn (tmp);
7431 }
7432 else
7433 {
7434 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7435 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7436 if (!scratch)
7437 scratch = gen_reg_rtx (HImode);
7438 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7439 emit_insn (gen_x86_sahf_1 (scratch));
7440 }
7441
7442 /* The FP codes work out to act like unsigned. */
7443 intcmp_mode = fpcmp_mode;
7444 code = first_code;
7445 if (bypass_code != NIL)
7446 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7447 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7448 const0_rtx);
7449 if (second_code != NIL)
7450 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7451 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7452 const0_rtx);
7453 }
7454 else
7455 {
7456 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7457 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7458 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7459 if (!scratch)
7460 scratch = gen_reg_rtx (HImode);
7461 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7462
7463 /* In the unordered case, we have to check C2 for NaN's, which
7464 doesn't happen to work out to anything nice combination-wise.
7465 So do some bit twiddling on the value we've got in AH to come
7466 up with an appropriate set of condition codes. */
7467
7468 intcmp_mode = CCNOmode;
7469 switch (code)
7470 {
7471 case GT:
7472 case UNGT:
7473 if (code == GT || !TARGET_IEEE_FP)
7474 {
7475 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7476 code = EQ;
7477 }
7478 else
7479 {
7480 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7481 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7482 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7483 intcmp_mode = CCmode;
7484 code = GEU;
7485 }
7486 break;
7487 case LT:
7488 case UNLT:
7489 if (code == LT && TARGET_IEEE_FP)
7490 {
7491 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7492 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7493 intcmp_mode = CCmode;
7494 code = EQ;
7495 }
7496 else
7497 {
7498 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7499 code = NE;
7500 }
7501 break;
7502 case GE:
7503 case UNGE:
7504 if (code == GE || !TARGET_IEEE_FP)
7505 {
7506 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7507 code = EQ;
7508 }
7509 else
7510 {
7511 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7512 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7513 GEN_INT (0x01)));
7514 code = NE;
7515 }
7516 break;
7517 case LE:
7518 case UNLE:
7519 if (code == LE && TARGET_IEEE_FP)
7520 {
7521 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7522 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7523 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7524 intcmp_mode = CCmode;
7525 code = LTU;
7526 }
7527 else
7528 {
7529 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7530 code = NE;
7531 }
7532 break;
7533 case EQ:
7534 case UNEQ:
7535 if (code == EQ && TARGET_IEEE_FP)
7536 {
7537 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7538 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7539 intcmp_mode = CCmode;
7540 code = EQ;
7541 }
7542 else
7543 {
7544 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7545 code = NE;
7546 break;
7547 }
7548 break;
7549 case NE:
7550 case LTGT:
7551 if (code == NE && TARGET_IEEE_FP)
7552 {
7553 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7554 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7555 GEN_INT (0x40)));
7556 code = NE;
7557 }
7558 else
7559 {
7560 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7561 code = EQ;
7562 }
7563 break;
7564
7565 case UNORDERED:
7566 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7567 code = NE;
7568 break;
7569 case ORDERED:
7570 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7571 code = EQ;
7572 break;
7573
7574 default:
7575 abort ();
7576 }
7577 }
7578
7579 /* Return the test that should be put into the flags user, i.e.
7580 the bcc, scc, or cmov instruction. */
7581 return gen_rtx_fmt_ee (code, VOIDmode,
7582 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7583 const0_rtx);
7584 }
7585
7586 rtx
7587 ix86_expand_compare (code, second_test, bypass_test)
7588 enum rtx_code code;
7589 rtx *second_test, *bypass_test;
7590 {
7591 rtx op0, op1, ret;
7592 op0 = ix86_compare_op0;
7593 op1 = ix86_compare_op1;
7594
7595 if (second_test)
7596 *second_test = NULL_RTX;
7597 if (bypass_test)
7598 *bypass_test = NULL_RTX;
7599
7600 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7601 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7602 second_test, bypass_test);
7603 else
7604 ret = ix86_expand_int_compare (code, op0, op1);
7605
7606 return ret;
7607 }
7608
7609 /* Return true if the CODE will result in nontrivial jump sequence. */
7610 bool
7611 ix86_fp_jump_nontrivial_p (code)
7612 enum rtx_code code;
7613 {
7614 enum rtx_code bypass_code, first_code, second_code;
7615 if (!TARGET_CMOVE)
7616 return true;
7617 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7618 return bypass_code != NIL || second_code != NIL;
7619 }
7620
7621 void
7622 ix86_expand_branch (code, label)
7623 enum rtx_code code;
7624 rtx label;
7625 {
7626 rtx tmp;
7627
7628 switch (GET_MODE (ix86_compare_op0))
7629 {
7630 case QImode:
7631 case HImode:
7632 case SImode:
7633 simple:
7634 tmp = ix86_expand_compare (code, NULL, NULL);
7635 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7636 gen_rtx_LABEL_REF (VOIDmode, label),
7637 pc_rtx);
7638 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7639 return;
7640
7641 case SFmode:
7642 case DFmode:
7643 case XFmode:
7644 case TFmode:
7645 {
7646 rtvec vec;
7647 int use_fcomi;
7648 enum rtx_code bypass_code, first_code, second_code;
7649
7650 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7651 &ix86_compare_op1);
7652
7653 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7654
7655 /* Check whether we will use the natural sequence with one jump. If
7656 so, we can expand jump early. Otherwise delay expansion by
7657 creating compound insn to not confuse optimizers. */
7658 if (bypass_code == NIL && second_code == NIL
7659 && TARGET_CMOVE)
7660 {
7661 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7662 gen_rtx_LABEL_REF (VOIDmode, label),
7663 pc_rtx, NULL_RTX);
7664 }
7665 else
7666 {
7667 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7668 ix86_compare_op0, ix86_compare_op1);
7669 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7670 gen_rtx_LABEL_REF (VOIDmode, label),
7671 pc_rtx);
7672 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7673
7674 use_fcomi = ix86_use_fcomi_compare (code);
7675 vec = rtvec_alloc (3 + !use_fcomi);
7676 RTVEC_ELT (vec, 0) = tmp;
7677 RTVEC_ELT (vec, 1)
7678 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7679 RTVEC_ELT (vec, 2)
7680 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7681 if (! use_fcomi)
7682 RTVEC_ELT (vec, 3)
7683 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7684
7685 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7686 }
7687 return;
7688 }
7689
7690 case DImode:
7691 if (TARGET_64BIT)
7692 goto simple;
7693 /* Expand DImode branch into multiple compare+branch. */
7694 {
7695 rtx lo[2], hi[2], label2;
7696 enum rtx_code code1, code2, code3;
7697
7698 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7699 {
7700 tmp = ix86_compare_op0;
7701 ix86_compare_op0 = ix86_compare_op1;
7702 ix86_compare_op1 = tmp;
7703 code = swap_condition (code);
7704 }
7705 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7706 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7707
7708 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7709 avoid two branches. This costs one extra insn, so disable when
7710 optimizing for size. */
7711
7712 if ((code == EQ || code == NE)
7713 && (!optimize_size
7714 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7715 {
7716 rtx xor0, xor1;
7717
7718 xor1 = hi[0];
7719 if (hi[1] != const0_rtx)
7720 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7721 NULL_RTX, 0, OPTAB_WIDEN);
7722
7723 xor0 = lo[0];
7724 if (lo[1] != const0_rtx)
7725 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7726 NULL_RTX, 0, OPTAB_WIDEN);
7727
7728 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7729 NULL_RTX, 0, OPTAB_WIDEN);
7730
7731 ix86_compare_op0 = tmp;
7732 ix86_compare_op1 = const0_rtx;
7733 ix86_expand_branch (code, label);
7734 return;
7735 }
7736
7737 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7738 op1 is a constant and the low word is zero, then we can just
7739 examine the high word. */
7740
7741 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7742 switch (code)
7743 {
7744 case LT: case LTU: case GE: case GEU:
7745 ix86_compare_op0 = hi[0];
7746 ix86_compare_op1 = hi[1];
7747 ix86_expand_branch (code, label);
7748 return;
7749 default:
7750 break;
7751 }
7752
7753 /* Otherwise, we need two or three jumps. */
7754
7755 label2 = gen_label_rtx ();
7756
7757 code1 = code;
7758 code2 = swap_condition (code);
7759 code3 = unsigned_condition (code);
7760
7761 switch (code)
7762 {
7763 case LT: case GT: case LTU: case GTU:
7764 break;
7765
7766 case LE: code1 = LT; code2 = GT; break;
7767 case GE: code1 = GT; code2 = LT; break;
7768 case LEU: code1 = LTU; code2 = GTU; break;
7769 case GEU: code1 = GTU; code2 = LTU; break;
7770
7771 case EQ: code1 = NIL; code2 = NE; break;
7772 case NE: code2 = NIL; break;
7773
7774 default:
7775 abort ();
7776 }
7777
7778 /*
7779 * a < b =>
7780 * if (hi(a) < hi(b)) goto true;
7781 * if (hi(a) > hi(b)) goto false;
7782 * if (lo(a) < lo(b)) goto true;
7783 * false:
7784 */
7785
7786 ix86_compare_op0 = hi[0];
7787 ix86_compare_op1 = hi[1];
7788
7789 if (code1 != NIL)
7790 ix86_expand_branch (code1, label);
7791 if (code2 != NIL)
7792 ix86_expand_branch (code2, label2);
7793
7794 ix86_compare_op0 = lo[0];
7795 ix86_compare_op1 = lo[1];
7796 ix86_expand_branch (code3, label);
7797
7798 if (code2 != NIL)
7799 emit_label (label2);
7800 return;
7801 }
7802
7803 default:
7804 abort ();
7805 }
7806 }
7807
7808 /* Split branch based on floating point condition. */
7809 void
7810 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7811 enum rtx_code code;
7812 rtx op1, op2, target1, target2, tmp;
7813 {
7814 rtx second, bypass;
7815 rtx label = NULL_RTX;
7816 rtx condition;
7817 int bypass_probability = -1, second_probability = -1, probability = -1;
7818 rtx i;
7819
7820 if (target2 != pc_rtx)
7821 {
7822 rtx tmp = target2;
7823 code = reverse_condition_maybe_unordered (code);
7824 target2 = target1;
7825 target1 = tmp;
7826 }
7827
7828 condition = ix86_expand_fp_compare (code, op1, op2,
7829 tmp, &second, &bypass);
7830
7831 if (split_branch_probability >= 0)
7832 {
7833 /* Distribute the probabilities across the jumps.
7834 Assume the BYPASS and SECOND to be always test
7835 for UNORDERED. */
7836 probability = split_branch_probability;
7837
7838 /* Value of 1 is low enough to make no need for probability
7839 to be updated. Later we may run some experiments and see
7840 if unordered values are more frequent in practice. */
7841 if (bypass)
7842 bypass_probability = 1;
7843 if (second)
7844 second_probability = 1;
7845 }
7846 if (bypass != NULL_RTX)
7847 {
7848 label = gen_label_rtx ();
7849 i = emit_jump_insn (gen_rtx_SET
7850 (VOIDmode, pc_rtx,
7851 gen_rtx_IF_THEN_ELSE (VOIDmode,
7852 bypass,
7853 gen_rtx_LABEL_REF (VOIDmode,
7854 label),
7855 pc_rtx)));
7856 if (bypass_probability >= 0)
7857 REG_NOTES (i)
7858 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7859 GEN_INT (bypass_probability),
7860 REG_NOTES (i));
7861 }
7862 i = emit_jump_insn (gen_rtx_SET
7863 (VOIDmode, pc_rtx,
7864 gen_rtx_IF_THEN_ELSE (VOIDmode,
7865 condition, target1, target2)));
7866 if (probability >= 0)
7867 REG_NOTES (i)
7868 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7869 GEN_INT (probability),
7870 REG_NOTES (i));
7871 if (second != NULL_RTX)
7872 {
7873 i = emit_jump_insn (gen_rtx_SET
7874 (VOIDmode, pc_rtx,
7875 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7876 target2)));
7877 if (second_probability >= 0)
7878 REG_NOTES (i)
7879 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7880 GEN_INT (second_probability),
7881 REG_NOTES (i));
7882 }
7883 if (label != NULL_RTX)
7884 emit_label (label);
7885 }
7886
7887 int
7888 ix86_expand_setcc (code, dest)
7889 enum rtx_code code;
7890 rtx dest;
7891 {
7892 rtx ret, tmp, tmpreg;
7893 rtx second_test, bypass_test;
7894
7895 if (GET_MODE (ix86_compare_op0) == DImode
7896 && !TARGET_64BIT)
7897 return 0; /* FAIL */
7898
7899 if (GET_MODE (dest) != QImode)
7900 abort ();
7901
7902 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7903 PUT_MODE (ret, QImode);
7904
7905 tmp = dest;
7906 tmpreg = dest;
7907
7908 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7909 if (bypass_test || second_test)
7910 {
7911 rtx test = second_test;
7912 int bypass = 0;
7913 rtx tmp2 = gen_reg_rtx (QImode);
7914 if (bypass_test)
7915 {
7916 if (second_test)
7917 abort ();
7918 test = bypass_test;
7919 bypass = 1;
7920 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7921 }
7922 PUT_MODE (test, QImode);
7923 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7924
7925 if (bypass)
7926 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7927 else
7928 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7929 }
7930
7931 return 1; /* DONE */
7932 }
7933
7934 int
7935 ix86_expand_int_movcc (operands)
7936 rtx operands[];
7937 {
7938 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7939 rtx compare_seq, compare_op;
7940 rtx second_test, bypass_test;
7941 enum machine_mode mode = GET_MODE (operands[0]);
7942
7943 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7944 In case comparsion is done with immediate, we can convert it to LTU or
7945 GEU by altering the integer. */
7946
7947 if ((code == LEU || code == GTU)
7948 && GET_CODE (ix86_compare_op1) == CONST_INT
7949 && mode != HImode
7950 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
7951 && GET_CODE (operands[2]) == CONST_INT
7952 && GET_CODE (operands[3]) == CONST_INT)
7953 {
7954 if (code == LEU)
7955 code = LTU;
7956 else
7957 code = GEU;
7958 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7959 }
7960
7961 start_sequence ();
7962 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7963 compare_seq = gen_sequence ();
7964 end_sequence ();
7965
7966 compare_code = GET_CODE (compare_op);
7967
7968 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7969 HImode insns, we'd be swallowed in word prefix ops. */
7970
7971 if (mode != HImode
7972 && (mode != DImode || TARGET_64BIT)
7973 && GET_CODE (operands[2]) == CONST_INT
7974 && GET_CODE (operands[3]) == CONST_INT)
7975 {
7976 rtx out = operands[0];
7977 HOST_WIDE_INT ct = INTVAL (operands[2]);
7978 HOST_WIDE_INT cf = INTVAL (operands[3]);
7979 HOST_WIDE_INT diff;
7980
7981 if ((compare_code == LTU || compare_code == GEU)
7982 && !second_test && !bypass_test)
7983 {
7984
7985 /* Detect overlap between destination and compare sources. */
7986 rtx tmp = out;
7987
7988 /* To simplify rest of code, restrict to the GEU case. */
7989 if (compare_code == LTU)
7990 {
7991 int tmp = ct;
7992 ct = cf;
7993 cf = tmp;
7994 compare_code = reverse_condition (compare_code);
7995 code = reverse_condition (code);
7996 }
7997 diff = ct - cf;
7998
7999 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8000 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8001 tmp = gen_reg_rtx (mode);
8002
8003 emit_insn (compare_seq);
8004 if (mode == DImode)
8005 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8006 else
8007 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8008
8009 if (diff == 1)
8010 {
8011 /*
8012 * cmpl op0,op1
8013 * sbbl dest,dest
8014 * [addl dest, ct]
8015 *
8016 * Size 5 - 8.
8017 */
8018 if (ct)
8019 tmp = expand_simple_binop (mode, PLUS,
8020 tmp, GEN_INT (ct),
8021 tmp, 1, OPTAB_DIRECT);
8022 }
8023 else if (cf == -1)
8024 {
8025 /*
8026 * cmpl op0,op1
8027 * sbbl dest,dest
8028 * orl $ct, dest
8029 *
8030 * Size 8.
8031 */
8032 tmp = expand_simple_binop (mode, IOR,
8033 tmp, GEN_INT (ct),
8034 tmp, 1, OPTAB_DIRECT);
8035 }
8036 else if (diff == -1 && ct)
8037 {
8038 /*
8039 * cmpl op0,op1
8040 * sbbl dest,dest
8041 * xorl $-1, dest
8042 * [addl dest, cf]
8043 *
8044 * Size 8 - 11.
8045 */
8046 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8047 if (cf)
8048 tmp = expand_simple_binop (mode, PLUS,
8049 tmp, GEN_INT (cf),
8050 tmp, 1, OPTAB_DIRECT);
8051 }
8052 else
8053 {
8054 /*
8055 * cmpl op0,op1
8056 * sbbl dest,dest
8057 * andl cf - ct, dest
8058 * [addl dest, ct]
8059 *
8060 * Size 8 - 11.
8061 */
8062 tmp = expand_simple_binop (mode, AND,
8063 tmp,
8064 gen_int_mode (cf - ct, mode),
8065 tmp, 1, OPTAB_DIRECT);
8066 if (ct)
8067 tmp = expand_simple_binop (mode, PLUS,
8068 tmp, GEN_INT (ct),
8069 tmp, 1, OPTAB_DIRECT);
8070 }
8071
8072 if (tmp != out)
8073 emit_move_insn (out, tmp);
8074
8075 return 1; /* DONE */
8076 }
8077
8078 diff = ct - cf;
8079 if (diff < 0)
8080 {
8081 HOST_WIDE_INT tmp;
8082 tmp = ct, ct = cf, cf = tmp;
8083 diff = -diff;
8084 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8085 {
8086 /* We may be reversing unordered compare to normal compare, that
8087 is not valid in general (we may convert non-trapping condition
8088 to trapping one), however on i386 we currently emit all
8089 comparisons unordered. */
8090 compare_code = reverse_condition_maybe_unordered (compare_code);
8091 code = reverse_condition_maybe_unordered (code);
8092 }
8093 else
8094 {
8095 compare_code = reverse_condition (compare_code);
8096 code = reverse_condition (code);
8097 }
8098 }
8099 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8100 || diff == 3 || diff == 5 || diff == 9)
8101 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8102 {
8103 /*
8104 * xorl dest,dest
8105 * cmpl op1,op2
8106 * setcc dest
8107 * lea cf(dest*(ct-cf)),dest
8108 *
8109 * Size 14.
8110 *
8111 * This also catches the degenerate setcc-only case.
8112 */
8113
8114 rtx tmp;
8115 int nops;
8116
8117 out = emit_store_flag (out, code, ix86_compare_op0,
8118 ix86_compare_op1, VOIDmode, 0, 1);
8119
8120 nops = 0;
8121 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8122 done in proper mode to match. */
8123 if (diff == 1)
8124 tmp = out;
8125 else
8126 {
8127 rtx out1;
8128 out1 = out;
8129 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8130 nops++;
8131 if (diff & 1)
8132 {
8133 tmp = gen_rtx_PLUS (mode, tmp, out1);
8134 nops++;
8135 }
8136 }
8137 if (cf != 0)
8138 {
8139 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8140 nops++;
8141 }
8142 if (tmp != out
8143 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8144 {
8145 if (nops == 1)
8146 {
8147 rtx clob;
8148
8149 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8150 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8151
8152 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8153 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8154 emit_insn (tmp);
8155 }
8156 else
8157 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8158 }
8159 if (out != operands[0])
8160 emit_move_insn (operands[0], out);
8161
8162 return 1; /* DONE */
8163 }
8164
8165 /*
8166 * General case: Jumpful:
8167 * xorl dest,dest cmpl op1, op2
8168 * cmpl op1, op2 movl ct, dest
8169 * setcc dest jcc 1f
8170 * decl dest movl cf, dest
8171 * andl (cf-ct),dest 1:
8172 * addl ct,dest
8173 *
8174 * Size 20. Size 14.
8175 *
8176 * This is reasonably steep, but branch mispredict costs are
8177 * high on modern cpus, so consider failing only if optimizing
8178 * for space.
8179 *
8180 * %%% Parameterize branch_cost on the tuning architecture, then
8181 * use that. The 80386 couldn't care less about mispredicts.
8182 */
8183
8184 if (!optimize_size && !TARGET_CMOVE)
8185 {
8186 if (ct == 0)
8187 {
8188 ct = cf;
8189 cf = 0;
8190 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8191 {
8192 /* We may be reversing unordered compare to normal compare,
8193 that is not valid in general (we may convert non-trapping
8194 condition to trapping one), however on i386 we currently
8195 emit all comparisons unordered. */
8196 compare_code = reverse_condition_maybe_unordered (compare_code);
8197 code = reverse_condition_maybe_unordered (code);
8198 }
8199 else
8200 {
8201 compare_code = reverse_condition (compare_code);
8202 code = reverse_condition (code);
8203 }
8204 }
8205
8206 out = emit_store_flag (out, code, ix86_compare_op0,
8207 ix86_compare_op1, VOIDmode, 0, 1);
8208
8209 out = expand_simple_binop (mode, PLUS,
8210 out, constm1_rtx,
8211 out, 1, OPTAB_DIRECT);
8212 out = expand_simple_binop (mode, AND,
8213 out,
8214 gen_int_mode (cf - ct, mode),
8215 out, 1, OPTAB_DIRECT);
8216 out = expand_simple_binop (mode, PLUS,
8217 out, GEN_INT (ct),
8218 out, 1, OPTAB_DIRECT);
8219 if (out != operands[0])
8220 emit_move_insn (operands[0], out);
8221
8222 return 1; /* DONE */
8223 }
8224 }
8225
8226 if (!TARGET_CMOVE)
8227 {
8228 /* Try a few things more with specific constants and a variable. */
8229
8230 optab op;
8231 rtx var, orig_out, out, tmp;
8232
8233 if (optimize_size)
8234 return 0; /* FAIL */
8235
8236 /* If one of the two operands is an interesting constant, load a
8237 constant with the above and mask it in with a logical operation. */
8238
8239 if (GET_CODE (operands[2]) == CONST_INT)
8240 {
8241 var = operands[3];
8242 if (INTVAL (operands[2]) == 0)
8243 operands[3] = constm1_rtx, op = and_optab;
8244 else if (INTVAL (operands[2]) == -1)
8245 operands[3] = const0_rtx, op = ior_optab;
8246 else
8247 return 0; /* FAIL */
8248 }
8249 else if (GET_CODE (operands[3]) == CONST_INT)
8250 {
8251 var = operands[2];
8252 if (INTVAL (operands[3]) == 0)
8253 operands[2] = constm1_rtx, op = and_optab;
8254 else if (INTVAL (operands[3]) == -1)
8255 operands[2] = const0_rtx, op = ior_optab;
8256 else
8257 return 0; /* FAIL */
8258 }
8259 else
8260 return 0; /* FAIL */
8261
8262 orig_out = operands[0];
8263 tmp = gen_reg_rtx (mode);
8264 operands[0] = tmp;
8265
8266 /* Recurse to get the constant loaded. */
8267 if (ix86_expand_int_movcc (operands) == 0)
8268 return 0; /* FAIL */
8269
8270 /* Mask in the interesting variable. */
8271 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8272 OPTAB_WIDEN);
8273 if (out != orig_out)
8274 emit_move_insn (orig_out, out);
8275
8276 return 1; /* DONE */
8277 }
8278
8279 /*
8280 * For comparison with above,
8281 *
8282 * movl cf,dest
8283 * movl ct,tmp
8284 * cmpl op1,op2
8285 * cmovcc tmp,dest
8286 *
8287 * Size 15.
8288 */
8289
8290 if (! nonimmediate_operand (operands[2], mode))
8291 operands[2] = force_reg (mode, operands[2]);
8292 if (! nonimmediate_operand (operands[3], mode))
8293 operands[3] = force_reg (mode, operands[3]);
8294
8295 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8296 {
8297 rtx tmp = gen_reg_rtx (mode);
8298 emit_move_insn (tmp, operands[3]);
8299 operands[3] = tmp;
8300 }
8301 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8302 {
8303 rtx tmp = gen_reg_rtx (mode);
8304 emit_move_insn (tmp, operands[2]);
8305 operands[2] = tmp;
8306 }
8307 if (! register_operand (operands[2], VOIDmode)
8308 && ! register_operand (operands[3], VOIDmode))
8309 operands[2] = force_reg (mode, operands[2]);
8310
8311 emit_insn (compare_seq);
8312 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8313 gen_rtx_IF_THEN_ELSE (mode,
8314 compare_op, operands[2],
8315 operands[3])));
8316 if (bypass_test)
8317 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8318 gen_rtx_IF_THEN_ELSE (mode,
8319 bypass_test,
8320 operands[3],
8321 operands[0])));
8322 if (second_test)
8323 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8324 gen_rtx_IF_THEN_ELSE (mode,
8325 second_test,
8326 operands[2],
8327 operands[0])));
8328
8329 return 1; /* DONE */
8330 }
8331
8332 int
8333 ix86_expand_fp_movcc (operands)
8334 rtx operands[];
8335 {
8336 enum rtx_code code;
8337 rtx tmp;
8338 rtx compare_op, second_test, bypass_test;
8339
8340 /* For SF/DFmode conditional moves based on comparisons
8341 in same mode, we may want to use SSE min/max instructions. */
8342 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8343 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8344 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8345 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8346 && (!TARGET_IEEE_FP
8347 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8348 /* We may be called from the post-reload splitter. */
8349 && (!REG_P (operands[0])
8350 || SSE_REG_P (operands[0])
8351 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8352 {
8353 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8354 code = GET_CODE (operands[1]);
8355
8356 /* See if we have (cross) match between comparison operands and
8357 conditional move operands. */
8358 if (rtx_equal_p (operands[2], op1))
8359 {
8360 rtx tmp = op0;
8361 op0 = op1;
8362 op1 = tmp;
8363 code = reverse_condition_maybe_unordered (code);
8364 }
8365 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8366 {
8367 /* Check for min operation. */
8368 if (code == LT)
8369 {
8370 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8371 if (memory_operand (op0, VOIDmode))
8372 op0 = force_reg (GET_MODE (operands[0]), op0);
8373 if (GET_MODE (operands[0]) == SFmode)
8374 emit_insn (gen_minsf3 (operands[0], op0, op1));
8375 else
8376 emit_insn (gen_mindf3 (operands[0], op0, op1));
8377 return 1;
8378 }
8379 /* Check for max operation. */
8380 if (code == GT)
8381 {
8382 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8383 if (memory_operand (op0, VOIDmode))
8384 op0 = force_reg (GET_MODE (operands[0]), op0);
8385 if (GET_MODE (operands[0]) == SFmode)
8386 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8387 else
8388 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8389 return 1;
8390 }
8391 }
8392 /* Manage condition to be sse_comparison_operator. In case we are
8393 in non-ieee mode, try to canonicalize the destination operand
8394 to be first in the comparison - this helps reload to avoid extra
8395 moves. */
8396 if (!sse_comparison_operator (operands[1], VOIDmode)
8397 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8398 {
8399 rtx tmp = ix86_compare_op0;
8400 ix86_compare_op0 = ix86_compare_op1;
8401 ix86_compare_op1 = tmp;
8402 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8403 VOIDmode, ix86_compare_op0,
8404 ix86_compare_op1);
8405 }
8406 /* Similary try to manage result to be first operand of conditional
8407 move. We also don't support the NE comparison on SSE, so try to
8408 avoid it. */
8409 if ((rtx_equal_p (operands[0], operands[3])
8410 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8411 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8412 {
8413 rtx tmp = operands[2];
8414 operands[2] = operands[3];
8415 operands[3] = tmp;
8416 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8417 (GET_CODE (operands[1])),
8418 VOIDmode, ix86_compare_op0,
8419 ix86_compare_op1);
8420 }
8421 if (GET_MODE (operands[0]) == SFmode)
8422 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8423 operands[2], operands[3],
8424 ix86_compare_op0, ix86_compare_op1));
8425 else
8426 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8427 operands[2], operands[3],
8428 ix86_compare_op0, ix86_compare_op1));
8429 return 1;
8430 }
8431
8432 /* The floating point conditional move instructions don't directly
8433 support conditions resulting from a signed integer comparison. */
8434
8435 code = GET_CODE (operands[1]);
8436 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8437
8438 /* The floating point conditional move instructions don't directly
8439 support signed integer comparisons. */
8440
8441 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8442 {
8443 if (second_test != NULL || bypass_test != NULL)
8444 abort ();
8445 tmp = gen_reg_rtx (QImode);
8446 ix86_expand_setcc (code, tmp);
8447 code = NE;
8448 ix86_compare_op0 = tmp;
8449 ix86_compare_op1 = const0_rtx;
8450 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8451 }
8452 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8453 {
8454 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8455 emit_move_insn (tmp, operands[3]);
8456 operands[3] = tmp;
8457 }
8458 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8459 {
8460 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8461 emit_move_insn (tmp, operands[2]);
8462 operands[2] = tmp;
8463 }
8464
8465 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8466 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8467 compare_op,
8468 operands[2],
8469 operands[3])));
8470 if (bypass_test)
8471 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8472 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8473 bypass_test,
8474 operands[3],
8475 operands[0])));
8476 if (second_test)
8477 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8478 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8479 second_test,
8480 operands[2],
8481 operands[0])));
8482
8483 return 1;
8484 }
8485
8486 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8487 works for floating pointer parameters and nonoffsetable memories.
8488 For pushes, it returns just stack offsets; the values will be saved
8489 in the right order. Maximally three parts are generated. */
8490
8491 static int
8492 ix86_split_to_parts (operand, parts, mode)
8493 rtx operand;
8494 rtx *parts;
8495 enum machine_mode mode;
8496 {
8497 int size;
8498
8499 if (!TARGET_64BIT)
8500 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8501 else
8502 size = (GET_MODE_SIZE (mode) + 4) / 8;
8503
8504 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8505 abort ();
8506 if (size < 2 || size > 3)
8507 abort ();
8508
8509 /* Optimize constant pool reference to immediates. This is used by fp moves,
8510 that force all constants to memory to allow combining. */
8511
8512 if (GET_CODE (operand) == MEM
8513 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8514 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8515 operand = get_pool_constant (XEXP (operand, 0));
8516
8517 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8518 {
8519 /* The only non-offsetable memories we handle are pushes. */
8520 if (! push_operand (operand, VOIDmode))
8521 abort ();
8522
8523 operand = copy_rtx (operand);
8524 PUT_MODE (operand, Pmode);
8525 parts[0] = parts[1] = parts[2] = operand;
8526 }
8527 else if (!TARGET_64BIT)
8528 {
8529 if (mode == DImode)
8530 split_di (&operand, 1, &parts[0], &parts[1]);
8531 else
8532 {
8533 if (REG_P (operand))
8534 {
8535 if (!reload_completed)
8536 abort ();
8537 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8538 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8539 if (size == 3)
8540 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8541 }
8542 else if (offsettable_memref_p (operand))
8543 {
8544 operand = adjust_address (operand, SImode, 0);
8545 parts[0] = operand;
8546 parts[1] = adjust_address (operand, SImode, 4);
8547 if (size == 3)
8548 parts[2] = adjust_address (operand, SImode, 8);
8549 }
8550 else if (GET_CODE (operand) == CONST_DOUBLE)
8551 {
8552 REAL_VALUE_TYPE r;
8553 long l[4];
8554
8555 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8556 switch (mode)
8557 {
8558 case XFmode:
8559 case TFmode:
8560 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8561 parts[2] = gen_int_mode (l[2], SImode);
8562 break;
8563 case DFmode:
8564 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8565 break;
8566 default:
8567 abort ();
8568 }
8569 parts[1] = gen_int_mode (l[1], SImode);
8570 parts[0] = gen_int_mode (l[0], SImode);
8571 }
8572 else
8573 abort ();
8574 }
8575 }
8576 else
8577 {
8578 if (mode == TImode)
8579 split_ti (&operand, 1, &parts[0], &parts[1]);
8580 if (mode == XFmode || mode == TFmode)
8581 {
8582 if (REG_P (operand))
8583 {
8584 if (!reload_completed)
8585 abort ();
8586 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8587 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8588 }
8589 else if (offsettable_memref_p (operand))
8590 {
8591 operand = adjust_address (operand, DImode, 0);
8592 parts[0] = operand;
8593 parts[1] = adjust_address (operand, SImode, 8);
8594 }
8595 else if (GET_CODE (operand) == CONST_DOUBLE)
8596 {
8597 REAL_VALUE_TYPE r;
8598 long l[3];
8599
8600 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8601 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8602 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8603 if (HOST_BITS_PER_WIDE_INT >= 64)
8604 parts[0]
8605 = gen_int_mode
8606 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8607 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8608 DImode);
8609 else
8610 parts[0] = immed_double_const (l[0], l[1], DImode);
8611 parts[1] = gen_int_mode (l[2], SImode);
8612 }
8613 else
8614 abort ();
8615 }
8616 }
8617
8618 return size;
8619 }
8620
8621 /* Emit insns to perform a move or push of DI, DF, and XF values.
8622 Return false when normal moves are needed; true when all required
8623 insns have been emitted. Operands 2-4 contain the input values
8624 int the correct order; operands 5-7 contain the output values. */
8625
8626 void
8627 ix86_split_long_move (operands)
8628 rtx operands[];
8629 {
8630 rtx part[2][3];
8631 int nparts;
8632 int push = 0;
8633 int collisions = 0;
8634 enum machine_mode mode = GET_MODE (operands[0]);
8635
8636 /* The DFmode expanders may ask us to move double.
8637 For 64bit target this is single move. By hiding the fact
8638 here we simplify i386.md splitters. */
8639 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8640 {
8641 /* Optimize constant pool reference to immediates. This is used by
8642 fp moves, that force all constants to memory to allow combining. */
8643
8644 if (GET_CODE (operands[1]) == MEM
8645 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8646 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8647 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8648 if (push_operand (operands[0], VOIDmode))
8649 {
8650 operands[0] = copy_rtx (operands[0]);
8651 PUT_MODE (operands[0], Pmode);
8652 }
8653 else
8654 operands[0] = gen_lowpart (DImode, operands[0]);
8655 operands[1] = gen_lowpart (DImode, operands[1]);
8656 emit_move_insn (operands[0], operands[1]);
8657 return;
8658 }
8659
8660 /* The only non-offsettable memory we handle is push. */
8661 if (push_operand (operands[0], VOIDmode))
8662 push = 1;
8663 else if (GET_CODE (operands[0]) == MEM
8664 && ! offsettable_memref_p (operands[0]))
8665 abort ();
8666
8667 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8668 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8669
8670 /* When emitting push, take care for source operands on the stack. */
8671 if (push && GET_CODE (operands[1]) == MEM
8672 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8673 {
8674 if (nparts == 3)
8675 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8676 XEXP (part[1][2], 0));
8677 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8678 XEXP (part[1][1], 0));
8679 }
8680
8681 /* We need to do copy in the right order in case an address register
8682 of the source overlaps the destination. */
8683 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8684 {
8685 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8686 collisions++;
8687 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8688 collisions++;
8689 if (nparts == 3
8690 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8691 collisions++;
8692
8693 /* Collision in the middle part can be handled by reordering. */
8694 if (collisions == 1 && nparts == 3
8695 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8696 {
8697 rtx tmp;
8698 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8699 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8700 }
8701
8702 /* If there are more collisions, we can't handle it by reordering.
8703 Do an lea to the last part and use only one colliding move. */
8704 else if (collisions > 1)
8705 {
8706 collisions = 1;
8707 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8708 XEXP (part[1][0], 0)));
8709 part[1][0] = change_address (part[1][0],
8710 TARGET_64BIT ? DImode : SImode,
8711 part[0][nparts - 1]);
8712 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8713 if (nparts == 3)
8714 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8715 }
8716 }
8717
8718 if (push)
8719 {
8720 if (!TARGET_64BIT)
8721 {
8722 if (nparts == 3)
8723 {
8724 /* We use only first 12 bytes of TFmode value, but for pushing we
8725 are required to adjust stack as if we were pushing real 16byte
8726 value. */
8727 if (mode == TFmode && !TARGET_64BIT)
8728 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8729 GEN_INT (-4)));
8730 emit_move_insn (part[0][2], part[1][2]);
8731 }
8732 }
8733 else
8734 {
8735 /* In 64bit mode we don't have 32bit push available. In case this is
8736 register, it is OK - we will just use larger counterpart. We also
8737 retype memory - these comes from attempt to avoid REX prefix on
8738 moving of second half of TFmode value. */
8739 if (GET_MODE (part[1][1]) == SImode)
8740 {
8741 if (GET_CODE (part[1][1]) == MEM)
8742 part[1][1] = adjust_address (part[1][1], DImode, 0);
8743 else if (REG_P (part[1][1]))
8744 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8745 else
8746 abort ();
8747 if (GET_MODE (part[1][0]) == SImode)
8748 part[1][0] = part[1][1];
8749 }
8750 }
8751 emit_move_insn (part[0][1], part[1][1]);
8752 emit_move_insn (part[0][0], part[1][0]);
8753 return;
8754 }
8755
8756 /* Choose correct order to not overwrite the source before it is copied. */
8757 if ((REG_P (part[0][0])
8758 && REG_P (part[1][1])
8759 && (REGNO (part[0][0]) == REGNO (part[1][1])
8760 || (nparts == 3
8761 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8762 || (collisions > 0
8763 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8764 {
8765 if (nparts == 3)
8766 {
8767 operands[2] = part[0][2];
8768 operands[3] = part[0][1];
8769 operands[4] = part[0][0];
8770 operands[5] = part[1][2];
8771 operands[6] = part[1][1];
8772 operands[7] = part[1][0];
8773 }
8774 else
8775 {
8776 operands[2] = part[0][1];
8777 operands[3] = part[0][0];
8778 operands[5] = part[1][1];
8779 operands[6] = part[1][0];
8780 }
8781 }
8782 else
8783 {
8784 if (nparts == 3)
8785 {
8786 operands[2] = part[0][0];
8787 operands[3] = part[0][1];
8788 operands[4] = part[0][2];
8789 operands[5] = part[1][0];
8790 operands[6] = part[1][1];
8791 operands[7] = part[1][2];
8792 }
8793 else
8794 {
8795 operands[2] = part[0][0];
8796 operands[3] = part[0][1];
8797 operands[5] = part[1][0];
8798 operands[6] = part[1][1];
8799 }
8800 }
8801 emit_move_insn (operands[2], operands[5]);
8802 emit_move_insn (operands[3], operands[6]);
8803 if (nparts == 3)
8804 emit_move_insn (operands[4], operands[7]);
8805
8806 return;
8807 }
8808
8809 void
8810 ix86_split_ashldi (operands, scratch)
8811 rtx *operands, scratch;
8812 {
8813 rtx low[2], high[2];
8814 int count;
8815
8816 if (GET_CODE (operands[2]) == CONST_INT)
8817 {
8818 split_di (operands, 2, low, high);
8819 count = INTVAL (operands[2]) & 63;
8820
8821 if (count >= 32)
8822 {
8823 emit_move_insn (high[0], low[1]);
8824 emit_move_insn (low[0], const0_rtx);
8825
8826 if (count > 32)
8827 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8828 }
8829 else
8830 {
8831 if (!rtx_equal_p (operands[0], operands[1]))
8832 emit_move_insn (operands[0], operands[1]);
8833 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8834 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8835 }
8836 }
8837 else
8838 {
8839 if (!rtx_equal_p (operands[0], operands[1]))
8840 emit_move_insn (operands[0], operands[1]);
8841
8842 split_di (operands, 1, low, high);
8843
8844 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8845 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8846
8847 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8848 {
8849 if (! no_new_pseudos)
8850 scratch = force_reg (SImode, const0_rtx);
8851 else
8852 emit_move_insn (scratch, const0_rtx);
8853
8854 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8855 scratch));
8856 }
8857 else
8858 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8859 }
8860 }
8861
8862 void
8863 ix86_split_ashrdi (operands, scratch)
8864 rtx *operands, scratch;
8865 {
8866 rtx low[2], high[2];
8867 int count;
8868
8869 if (GET_CODE (operands[2]) == CONST_INT)
8870 {
8871 split_di (operands, 2, low, high);
8872 count = INTVAL (operands[2]) & 63;
8873
8874 if (count >= 32)
8875 {
8876 emit_move_insn (low[0], high[1]);
8877
8878 if (! reload_completed)
8879 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8880 else
8881 {
8882 emit_move_insn (high[0], low[0]);
8883 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8884 }
8885
8886 if (count > 32)
8887 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8888 }
8889 else
8890 {
8891 if (!rtx_equal_p (operands[0], operands[1]))
8892 emit_move_insn (operands[0], operands[1]);
8893 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8894 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8895 }
8896 }
8897 else
8898 {
8899 if (!rtx_equal_p (operands[0], operands[1]))
8900 emit_move_insn (operands[0], operands[1]);
8901
8902 split_di (operands, 1, low, high);
8903
8904 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8905 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8906
8907 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8908 {
8909 if (! no_new_pseudos)
8910 scratch = gen_reg_rtx (SImode);
8911 emit_move_insn (scratch, high[0]);
8912 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8913 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8914 scratch));
8915 }
8916 else
8917 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8918 }
8919 }
8920
8921 void
8922 ix86_split_lshrdi (operands, scratch)
8923 rtx *operands, scratch;
8924 {
8925 rtx low[2], high[2];
8926 int count;
8927
8928 if (GET_CODE (operands[2]) == CONST_INT)
8929 {
8930 split_di (operands, 2, low, high);
8931 count = INTVAL (operands[2]) & 63;
8932
8933 if (count >= 32)
8934 {
8935 emit_move_insn (low[0], high[1]);
8936 emit_move_insn (high[0], const0_rtx);
8937
8938 if (count > 32)
8939 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8940 }
8941 else
8942 {
8943 if (!rtx_equal_p (operands[0], operands[1]))
8944 emit_move_insn (operands[0], operands[1]);
8945 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8946 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8947 }
8948 }
8949 else
8950 {
8951 if (!rtx_equal_p (operands[0], operands[1]))
8952 emit_move_insn (operands[0], operands[1]);
8953
8954 split_di (operands, 1, low, high);
8955
8956 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8957 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8958
8959 /* Heh. By reversing the arguments, we can reuse this pattern. */
8960 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8961 {
8962 if (! no_new_pseudos)
8963 scratch = force_reg (SImode, const0_rtx);
8964 else
8965 emit_move_insn (scratch, const0_rtx);
8966
8967 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8968 scratch));
8969 }
8970 else
8971 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8972 }
8973 }
8974
8975 /* Helper function for the string operations below. Dest VARIABLE whether
8976 it is aligned to VALUE bytes. If true, jump to the label. */
8977 static rtx
8978 ix86_expand_aligntest (variable, value)
8979 rtx variable;
8980 int value;
8981 {
8982 rtx label = gen_label_rtx ();
8983 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8984 if (GET_MODE (variable) == DImode)
8985 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8986 else
8987 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8988 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8989 1, label);
8990 return label;
8991 }
8992
8993 /* Adjust COUNTER by the VALUE. */
8994 static void
8995 ix86_adjust_counter (countreg, value)
8996 rtx countreg;
8997 HOST_WIDE_INT value;
8998 {
8999 if (GET_MODE (countreg) == DImode)
9000 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9001 else
9002 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9003 }
9004
9005 /* Zero extend possibly SImode EXP to Pmode register. */
9006 rtx
9007 ix86_zero_extend_to_Pmode (exp)
9008 rtx exp;
9009 {
9010 rtx r;
9011 if (GET_MODE (exp) == VOIDmode)
9012 return force_reg (Pmode, exp);
9013 if (GET_MODE (exp) == Pmode)
9014 return copy_to_mode_reg (Pmode, exp);
9015 r = gen_reg_rtx (Pmode);
9016 emit_insn (gen_zero_extendsidi2 (r, exp));
9017 return r;
9018 }
9019
9020 /* Expand string move (memcpy) operation. Use i386 string operations when
9021 profitable. expand_clrstr contains similar code. */
9022 int
9023 ix86_expand_movstr (dst, src, count_exp, align_exp)
9024 rtx dst, src, count_exp, align_exp;
9025 {
9026 rtx srcreg, destreg, countreg;
9027 enum machine_mode counter_mode;
9028 HOST_WIDE_INT align = 0;
9029 unsigned HOST_WIDE_INT count = 0;
9030 rtx insns;
9031
9032 start_sequence ();
9033
9034 if (GET_CODE (align_exp) == CONST_INT)
9035 align = INTVAL (align_exp);
9036
9037 /* This simple hack avoids all inlining code and simplifies code below. */
9038 if (!TARGET_ALIGN_STRINGOPS)
9039 align = 64;
9040
9041 if (GET_CODE (count_exp) == CONST_INT)
9042 count = INTVAL (count_exp);
9043
9044 /* Figure out proper mode for counter. For 32bits it is always SImode,
9045 for 64bits use SImode when possible, otherwise DImode.
9046 Set count to number of bytes copied when known at compile time. */
9047 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9048 || x86_64_zero_extended_value (count_exp))
9049 counter_mode = SImode;
9050 else
9051 counter_mode = DImode;
9052
9053 if (counter_mode != SImode && counter_mode != DImode)
9054 abort ();
9055
9056 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9057 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9058
9059 emit_insn (gen_cld ());
9060
9061 /* When optimizing for size emit simple rep ; movsb instruction for
9062 counts not divisible by 4. */
9063
9064 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9065 {
9066 countreg = ix86_zero_extend_to_Pmode (count_exp);
9067 if (TARGET_64BIT)
9068 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9069 destreg, srcreg, countreg));
9070 else
9071 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9072 destreg, srcreg, countreg));
9073 }
9074
9075 /* For constant aligned (or small unaligned) copies use rep movsl
9076 followed by code copying the rest. For PentiumPro ensure 8 byte
9077 alignment to allow rep movsl acceleration. */
9078
9079 else if (count != 0
9080 && (align >= 8
9081 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9082 || optimize_size || count < (unsigned int) 64))
9083 {
9084 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9085 if (count & ~(size - 1))
9086 {
9087 countreg = copy_to_mode_reg (counter_mode,
9088 GEN_INT ((count >> (size == 4 ? 2 : 3))
9089 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9090 countreg = ix86_zero_extend_to_Pmode (countreg);
9091 if (size == 4)
9092 {
9093 if (TARGET_64BIT)
9094 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9095 destreg, srcreg, countreg));
9096 else
9097 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9098 destreg, srcreg, countreg));
9099 }
9100 else
9101 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9102 destreg, srcreg, countreg));
9103 }
9104 if (size == 8 && (count & 0x04))
9105 emit_insn (gen_strmovsi (destreg, srcreg));
9106 if (count & 0x02)
9107 emit_insn (gen_strmovhi (destreg, srcreg));
9108 if (count & 0x01)
9109 emit_insn (gen_strmovqi (destreg, srcreg));
9110 }
9111 /* The generic code based on the glibc implementation:
9112 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9113 allowing accelerated copying there)
9114 - copy the data using rep movsl
9115 - copy the rest. */
9116 else
9117 {
9118 rtx countreg2;
9119 rtx label = NULL;
9120
9121 /* In case we don't know anything about the alignment, default to
9122 library version, since it is usually equally fast and result in
9123 shorter code. */
9124 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9125 {
9126 end_sequence ();
9127 return 0;
9128 }
9129
9130 if (TARGET_SINGLE_STRINGOP)
9131 emit_insn (gen_cld ());
9132
9133 countreg2 = gen_reg_rtx (Pmode);
9134 countreg = copy_to_mode_reg (counter_mode, count_exp);
9135
9136 /* We don't use loops to align destination and to copy parts smaller
9137 than 4 bytes, because gcc is able to optimize such code better (in
9138 the case the destination or the count really is aligned, gcc is often
9139 able to predict the branches) and also it is friendlier to the
9140 hardware branch prediction.
9141
9142 Using loops is benefical for generic case, because we can
9143 handle small counts using the loops. Many CPUs (such as Athlon)
9144 have large REP prefix setup costs.
9145
9146 This is quite costy. Maybe we can revisit this decision later or
9147 add some customizability to this code. */
9148
9149 if (count == 0
9150 && align < (TARGET_PENTIUMPRO && (count == 0
9151 || count >= (unsigned int) 260)
9152 ? 8 : UNITS_PER_WORD))
9153 {
9154 label = gen_label_rtx ();
9155 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9156 LEU, 0, counter_mode, 1, label);
9157 }
9158 if (align <= 1)
9159 {
9160 rtx label = ix86_expand_aligntest (destreg, 1);
9161 emit_insn (gen_strmovqi (destreg, srcreg));
9162 ix86_adjust_counter (countreg, 1);
9163 emit_label (label);
9164 LABEL_NUSES (label) = 1;
9165 }
9166 if (align <= 2)
9167 {
9168 rtx label = ix86_expand_aligntest (destreg, 2);
9169 emit_insn (gen_strmovhi (destreg, srcreg));
9170 ix86_adjust_counter (countreg, 2);
9171 emit_label (label);
9172 LABEL_NUSES (label) = 1;
9173 }
9174 if (align <= 4
9175 && ((TARGET_PENTIUMPRO && (count == 0
9176 || count >= (unsigned int) 260))
9177 || TARGET_64BIT))
9178 {
9179 rtx label = ix86_expand_aligntest (destreg, 4);
9180 emit_insn (gen_strmovsi (destreg, srcreg));
9181 ix86_adjust_counter (countreg, 4);
9182 emit_label (label);
9183 LABEL_NUSES (label) = 1;
9184 }
9185
9186 if (!TARGET_SINGLE_STRINGOP)
9187 emit_insn (gen_cld ());
9188 if (TARGET_64BIT)
9189 {
9190 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9191 GEN_INT (3)));
9192 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9193 destreg, srcreg, countreg2));
9194 }
9195 else
9196 {
9197 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9198 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9199 destreg, srcreg, countreg2));
9200 }
9201
9202 if (label)
9203 {
9204 emit_label (label);
9205 LABEL_NUSES (label) = 1;
9206 }
9207 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9208 emit_insn (gen_strmovsi (destreg, srcreg));
9209 if ((align <= 4 || count == 0) && TARGET_64BIT)
9210 {
9211 rtx label = ix86_expand_aligntest (countreg, 4);
9212 emit_insn (gen_strmovsi (destreg, srcreg));
9213 emit_label (label);
9214 LABEL_NUSES (label) = 1;
9215 }
9216 if (align > 2 && count != 0 && (count & 2))
9217 emit_insn (gen_strmovhi (destreg, srcreg));
9218 if (align <= 2 || count == 0)
9219 {
9220 rtx label = ix86_expand_aligntest (countreg, 2);
9221 emit_insn (gen_strmovhi (destreg, srcreg));
9222 emit_label (label);
9223 LABEL_NUSES (label) = 1;
9224 }
9225 if (align > 1 && count != 0 && (count & 1))
9226 emit_insn (gen_strmovqi (destreg, srcreg));
9227 if (align <= 1 || count == 0)
9228 {
9229 rtx label = ix86_expand_aligntest (countreg, 1);
9230 emit_insn (gen_strmovqi (destreg, srcreg));
9231 emit_label (label);
9232 LABEL_NUSES (label) = 1;
9233 }
9234 }
9235
9236 insns = get_insns ();
9237 end_sequence ();
9238
9239 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9240 emit_insns (insns);
9241 return 1;
9242 }
9243
9244 /* Expand string clear operation (bzero). Use i386 string operations when
9245 profitable. expand_movstr contains similar code. */
9246 int
9247 ix86_expand_clrstr (src, count_exp, align_exp)
9248 rtx src, count_exp, align_exp;
9249 {
9250 rtx destreg, zeroreg, countreg;
9251 enum machine_mode counter_mode;
9252 HOST_WIDE_INT align = 0;
9253 unsigned HOST_WIDE_INT count = 0;
9254
9255 if (GET_CODE (align_exp) == CONST_INT)
9256 align = INTVAL (align_exp);
9257
9258 /* This simple hack avoids all inlining code and simplifies code below. */
9259 if (!TARGET_ALIGN_STRINGOPS)
9260 align = 32;
9261
9262 if (GET_CODE (count_exp) == CONST_INT)
9263 count = INTVAL (count_exp);
9264 /* Figure out proper mode for counter. For 32bits it is always SImode,
9265 for 64bits use SImode when possible, otherwise DImode.
9266 Set count to number of bytes copied when known at compile time. */
9267 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9268 || x86_64_zero_extended_value (count_exp))
9269 counter_mode = SImode;
9270 else
9271 counter_mode = DImode;
9272
9273 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9274
9275 emit_insn (gen_cld ());
9276
9277 /* When optimizing for size emit simple rep ; movsb instruction for
9278 counts not divisible by 4. */
9279
9280 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9281 {
9282 countreg = ix86_zero_extend_to_Pmode (count_exp);
9283 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9284 if (TARGET_64BIT)
9285 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9286 destreg, countreg));
9287 else
9288 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9289 destreg, countreg));
9290 }
9291 else if (count != 0
9292 && (align >= 8
9293 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9294 || optimize_size || count < (unsigned int) 64))
9295 {
9296 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9297 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9298 if (count & ~(size - 1))
9299 {
9300 countreg = copy_to_mode_reg (counter_mode,
9301 GEN_INT ((count >> (size == 4 ? 2 : 3))
9302 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9303 countreg = ix86_zero_extend_to_Pmode (countreg);
9304 if (size == 4)
9305 {
9306 if (TARGET_64BIT)
9307 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9308 destreg, countreg));
9309 else
9310 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9311 destreg, countreg));
9312 }
9313 else
9314 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9315 destreg, countreg));
9316 }
9317 if (size == 8 && (count & 0x04))
9318 emit_insn (gen_strsetsi (destreg,
9319 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9320 if (count & 0x02)
9321 emit_insn (gen_strsethi (destreg,
9322 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9323 if (count & 0x01)
9324 emit_insn (gen_strsetqi (destreg,
9325 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9326 }
9327 else
9328 {
9329 rtx countreg2;
9330 rtx label = NULL;
9331
9332 /* In case we don't know anything about the alignment, default to
9333 library version, since it is usually equally fast and result in
9334 shorter code. */
9335 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9336 return 0;
9337
9338 if (TARGET_SINGLE_STRINGOP)
9339 emit_insn (gen_cld ());
9340
9341 countreg2 = gen_reg_rtx (Pmode);
9342 countreg = copy_to_mode_reg (counter_mode, count_exp);
9343 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9344
9345 if (count == 0
9346 && align < (TARGET_PENTIUMPRO && (count == 0
9347 || count >= (unsigned int) 260)
9348 ? 8 : UNITS_PER_WORD))
9349 {
9350 label = gen_label_rtx ();
9351 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9352 LEU, 0, counter_mode, 1, label);
9353 }
9354 if (align <= 1)
9355 {
9356 rtx label = ix86_expand_aligntest (destreg, 1);
9357 emit_insn (gen_strsetqi (destreg,
9358 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9359 ix86_adjust_counter (countreg, 1);
9360 emit_label (label);
9361 LABEL_NUSES (label) = 1;
9362 }
9363 if (align <= 2)
9364 {
9365 rtx label = ix86_expand_aligntest (destreg, 2);
9366 emit_insn (gen_strsethi (destreg,
9367 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9368 ix86_adjust_counter (countreg, 2);
9369 emit_label (label);
9370 LABEL_NUSES (label) = 1;
9371 }
9372 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9373 || count >= (unsigned int) 260))
9374 {
9375 rtx label = ix86_expand_aligntest (destreg, 4);
9376 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9377 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9378 : zeroreg)));
9379 ix86_adjust_counter (countreg, 4);
9380 emit_label (label);
9381 LABEL_NUSES (label) = 1;
9382 }
9383
9384 if (!TARGET_SINGLE_STRINGOP)
9385 emit_insn (gen_cld ());
9386 if (TARGET_64BIT)
9387 {
9388 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9389 GEN_INT (3)));
9390 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9391 destreg, countreg2));
9392 }
9393 else
9394 {
9395 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9396 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9397 destreg, countreg2));
9398 }
9399
9400 if (label)
9401 {
9402 emit_label (label);
9403 LABEL_NUSES (label) = 1;
9404 }
9405 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9406 emit_insn (gen_strsetsi (destreg,
9407 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9408 if (TARGET_64BIT && (align <= 4 || count == 0))
9409 {
9410 rtx label = ix86_expand_aligntest (destreg, 2);
9411 emit_insn (gen_strsetsi (destreg,
9412 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9413 emit_label (label);
9414 LABEL_NUSES (label) = 1;
9415 }
9416 if (align > 2 && count != 0 && (count & 2))
9417 emit_insn (gen_strsethi (destreg,
9418 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9419 if (align <= 2 || count == 0)
9420 {
9421 rtx label = ix86_expand_aligntest (destreg, 2);
9422 emit_insn (gen_strsethi (destreg,
9423 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9424 emit_label (label);
9425 LABEL_NUSES (label) = 1;
9426 }
9427 if (align > 1 && count != 0 && (count & 1))
9428 emit_insn (gen_strsetqi (destreg,
9429 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9430 if (align <= 1 || count == 0)
9431 {
9432 rtx label = ix86_expand_aligntest (destreg, 1);
9433 emit_insn (gen_strsetqi (destreg,
9434 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9435 emit_label (label);
9436 LABEL_NUSES (label) = 1;
9437 }
9438 }
9439 return 1;
9440 }
9441 /* Expand strlen. */
9442 int
9443 ix86_expand_strlen (out, src, eoschar, align)
9444 rtx out, src, eoschar, align;
9445 {
9446 rtx addr, scratch1, scratch2, scratch3, scratch4;
9447
9448 /* The generic case of strlen expander is long. Avoid it's
9449 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9450
9451 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9452 && !TARGET_INLINE_ALL_STRINGOPS
9453 && !optimize_size
9454 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9455 return 0;
9456
9457 addr = force_reg (Pmode, XEXP (src, 0));
9458 scratch1 = gen_reg_rtx (Pmode);
9459
9460 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9461 && !optimize_size)
9462 {
9463 /* Well it seems that some optimizer does not combine a call like
9464 foo(strlen(bar), strlen(bar));
9465 when the move and the subtraction is done here. It does calculate
9466 the length just once when these instructions are done inside of
9467 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9468 often used and I use one fewer register for the lifetime of
9469 output_strlen_unroll() this is better. */
9470
9471 emit_move_insn (out, addr);
9472
9473 ix86_expand_strlensi_unroll_1 (out, align);
9474
9475 /* strlensi_unroll_1 returns the address of the zero at the end of
9476 the string, like memchr(), so compute the length by subtracting
9477 the start address. */
9478 if (TARGET_64BIT)
9479 emit_insn (gen_subdi3 (out, out, addr));
9480 else
9481 emit_insn (gen_subsi3 (out, out, addr));
9482 }
9483 else
9484 {
9485 scratch2 = gen_reg_rtx (Pmode);
9486 scratch3 = gen_reg_rtx (Pmode);
9487 scratch4 = force_reg (Pmode, constm1_rtx);
9488
9489 emit_move_insn (scratch3, addr);
9490 eoschar = force_reg (QImode, eoschar);
9491
9492 emit_insn (gen_cld ());
9493 if (TARGET_64BIT)
9494 {
9495 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9496 align, scratch4, scratch3));
9497 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9498 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9499 }
9500 else
9501 {
9502 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9503 align, scratch4, scratch3));
9504 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9505 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9506 }
9507 }
9508 return 1;
9509 }
9510
9511 /* Expand the appropriate insns for doing strlen if not just doing
9512 repnz; scasb
9513
9514 out = result, initialized with the start address
9515 align_rtx = alignment of the address.
9516 scratch = scratch register, initialized with the startaddress when
9517 not aligned, otherwise undefined
9518
9519 This is just the body. It needs the initialisations mentioned above and
9520 some address computing at the end. These things are done in i386.md. */
9521
9522 static void
9523 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9524 rtx out, align_rtx;
9525 {
9526 int align;
9527 rtx tmp;
9528 rtx align_2_label = NULL_RTX;
9529 rtx align_3_label = NULL_RTX;
9530 rtx align_4_label = gen_label_rtx ();
9531 rtx end_0_label = gen_label_rtx ();
9532 rtx mem;
9533 rtx tmpreg = gen_reg_rtx (SImode);
9534 rtx scratch = gen_reg_rtx (SImode);
9535
9536 align = 0;
9537 if (GET_CODE (align_rtx) == CONST_INT)
9538 align = INTVAL (align_rtx);
9539
9540 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9541
9542 /* Is there a known alignment and is it less than 4? */
9543 if (align < 4)
9544 {
9545 rtx scratch1 = gen_reg_rtx (Pmode);
9546 emit_move_insn (scratch1, out);
9547 /* Is there a known alignment and is it not 2? */
9548 if (align != 2)
9549 {
9550 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9551 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9552
9553 /* Leave just the 3 lower bits. */
9554 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9555 NULL_RTX, 0, OPTAB_WIDEN);
9556
9557 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9558 Pmode, 1, align_4_label);
9559 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9560 Pmode, 1, align_2_label);
9561 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9562 Pmode, 1, align_3_label);
9563 }
9564 else
9565 {
9566 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9567 check if is aligned to 4 - byte. */
9568
9569 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9570 NULL_RTX, 0, OPTAB_WIDEN);
9571
9572 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9573 Pmode, 1, align_4_label);
9574 }
9575
9576 mem = gen_rtx_MEM (QImode, out);
9577
9578 /* Now compare the bytes. */
9579
9580 /* Compare the first n unaligned byte on a byte per byte basis. */
9581 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9582 QImode, 1, end_0_label);
9583
9584 /* Increment the address. */
9585 if (TARGET_64BIT)
9586 emit_insn (gen_adddi3 (out, out, const1_rtx));
9587 else
9588 emit_insn (gen_addsi3 (out, out, const1_rtx));
9589
9590 /* Not needed with an alignment of 2 */
9591 if (align != 2)
9592 {
9593 emit_label (align_2_label);
9594
9595 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9596 end_0_label);
9597
9598 if (TARGET_64BIT)
9599 emit_insn (gen_adddi3 (out, out, const1_rtx));
9600 else
9601 emit_insn (gen_addsi3 (out, out, const1_rtx));
9602
9603 emit_label (align_3_label);
9604 }
9605
9606 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9607 end_0_label);
9608
9609 if (TARGET_64BIT)
9610 emit_insn (gen_adddi3 (out, out, const1_rtx));
9611 else
9612 emit_insn (gen_addsi3 (out, out, const1_rtx));
9613 }
9614
9615 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9616 align this loop. It gives only huge programs, but does not help to
9617 speed up. */
9618 emit_label (align_4_label);
9619
9620 mem = gen_rtx_MEM (SImode, out);
9621 emit_move_insn (scratch, mem);
9622 if (TARGET_64BIT)
9623 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9624 else
9625 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9626
9627 /* This formula yields a nonzero result iff one of the bytes is zero.
9628 This saves three branches inside loop and many cycles. */
9629
9630 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9631 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9632 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9633 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9634 gen_int_mode (0x80808080, SImode)));
9635 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9636 align_4_label);
9637
9638 if (TARGET_CMOVE)
9639 {
9640 rtx reg = gen_reg_rtx (SImode);
9641 rtx reg2 = gen_reg_rtx (Pmode);
9642 emit_move_insn (reg, tmpreg);
9643 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9644
9645 /* If zero is not in the first two bytes, move two bytes forward. */
9646 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9647 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9648 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9649 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9650 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9651 reg,
9652 tmpreg)));
9653 /* Emit lea manually to avoid clobbering of flags. */
9654 emit_insn (gen_rtx_SET (SImode, reg2,
9655 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9656
9657 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9658 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9659 emit_insn (gen_rtx_SET (VOIDmode, out,
9660 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9661 reg2,
9662 out)));
9663
9664 }
9665 else
9666 {
9667 rtx end_2_label = gen_label_rtx ();
9668 /* Is zero in the first two bytes? */
9669
9670 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9671 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9672 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9673 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9674 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9675 pc_rtx);
9676 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9677 JUMP_LABEL (tmp) = end_2_label;
9678
9679 /* Not in the first two. Move two bytes forward. */
9680 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9681 if (TARGET_64BIT)
9682 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9683 else
9684 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9685
9686 emit_label (end_2_label);
9687
9688 }
9689
9690 /* Avoid branch in fixing the byte. */
9691 tmpreg = gen_lowpart (QImode, tmpreg);
9692 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9693 if (TARGET_64BIT)
9694 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9695 else
9696 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9697
9698 emit_label (end_0_label);
9699 }
9700 \f
9701 /* Clear stack slot assignments remembered from previous functions.
9702 This is called from INIT_EXPANDERS once before RTL is emitted for each
9703 function. */
9704
9705 static void
9706 ix86_init_machine_status (p)
9707 struct function *p;
9708 {
9709 p->machine = (struct machine_function *)
9710 xcalloc (1, sizeof (struct machine_function));
9711 }
9712
9713 /* Mark machine specific bits of P for GC. */
9714 static void
9715 ix86_mark_machine_status (p)
9716 struct function *p;
9717 {
9718 struct machine_function *machine = p->machine;
9719 enum machine_mode mode;
9720 int n;
9721
9722 if (! machine)
9723 return;
9724
9725 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9726 mode = (enum machine_mode) ((int) mode + 1))
9727 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9728 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9729 }
9730
9731 static void
9732 ix86_free_machine_status (p)
9733 struct function *p;
9734 {
9735 free (p->machine);
9736 p->machine = NULL;
9737 }
9738
9739 /* Return a MEM corresponding to a stack slot with mode MODE.
9740 Allocate a new slot if necessary.
9741
9742 The RTL for a function can have several slots available: N is
9743 which slot to use. */
9744
9745 rtx
9746 assign_386_stack_local (mode, n)
9747 enum machine_mode mode;
9748 int n;
9749 {
9750 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9751 abort ();
9752
9753 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9754 ix86_stack_locals[(int) mode][n]
9755 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9756
9757 return ix86_stack_locals[(int) mode][n];
9758 }
9759 \f
9760 /* Calculate the length of the memory address in the instruction
9761 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9762
9763 static int
9764 memory_address_length (addr)
9765 rtx addr;
9766 {
9767 struct ix86_address parts;
9768 rtx base, index, disp;
9769 int len;
9770
9771 if (GET_CODE (addr) == PRE_DEC
9772 || GET_CODE (addr) == POST_INC
9773 || GET_CODE (addr) == PRE_MODIFY
9774 || GET_CODE (addr) == POST_MODIFY)
9775 return 0;
9776
9777 if (! ix86_decompose_address (addr, &parts))
9778 abort ();
9779
9780 base = parts.base;
9781 index = parts.index;
9782 disp = parts.disp;
9783 len = 0;
9784
9785 /* Register Indirect. */
9786 if (base && !index && !disp)
9787 {
9788 /* Special cases: ebp and esp need the two-byte modrm form. */
9789 if (addr == stack_pointer_rtx
9790 || addr == arg_pointer_rtx
9791 || addr == frame_pointer_rtx
9792 || addr == hard_frame_pointer_rtx)
9793 len = 1;
9794 }
9795
9796 /* Direct Addressing. */
9797 else if (disp && !base && !index)
9798 len = 4;
9799
9800 else
9801 {
9802 /* Find the length of the displacement constant. */
9803 if (disp)
9804 {
9805 if (GET_CODE (disp) == CONST_INT
9806 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9807 len = 1;
9808 else
9809 len = 4;
9810 }
9811
9812 /* An index requires the two-byte modrm form. */
9813 if (index)
9814 len += 1;
9815 }
9816
9817 return len;
9818 }
9819
9820 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9821 expect that insn have 8bit immediate alternative. */
9822 int
9823 ix86_attr_length_immediate_default (insn, shortform)
9824 rtx insn;
9825 int shortform;
9826 {
9827 int len = 0;
9828 int i;
9829 extract_insn_cached (insn);
9830 for (i = recog_data.n_operands - 1; i >= 0; --i)
9831 if (CONSTANT_P (recog_data.operand[i]))
9832 {
9833 if (len)
9834 abort ();
9835 if (shortform
9836 && GET_CODE (recog_data.operand[i]) == CONST_INT
9837 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9838 len = 1;
9839 else
9840 {
9841 switch (get_attr_mode (insn))
9842 {
9843 case MODE_QI:
9844 len+=1;
9845 break;
9846 case MODE_HI:
9847 len+=2;
9848 break;
9849 case MODE_SI:
9850 len+=4;
9851 break;
9852 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9853 case MODE_DI:
9854 len+=4;
9855 break;
9856 default:
9857 fatal_insn ("unknown insn mode", insn);
9858 }
9859 }
9860 }
9861 return len;
9862 }
9863 /* Compute default value for "length_address" attribute. */
9864 int
9865 ix86_attr_length_address_default (insn)
9866 rtx insn;
9867 {
9868 int i;
9869 extract_insn_cached (insn);
9870 for (i = recog_data.n_operands - 1; i >= 0; --i)
9871 if (GET_CODE (recog_data.operand[i]) == MEM)
9872 {
9873 return memory_address_length (XEXP (recog_data.operand[i], 0));
9874 break;
9875 }
9876 return 0;
9877 }
9878 \f
9879 /* Return the maximum number of instructions a cpu can issue. */
9880
9881 static int
9882 ix86_issue_rate ()
9883 {
9884 switch (ix86_cpu)
9885 {
9886 case PROCESSOR_PENTIUM:
9887 case PROCESSOR_K6:
9888 return 2;
9889
9890 case PROCESSOR_PENTIUMPRO:
9891 case PROCESSOR_PENTIUM4:
9892 case PROCESSOR_ATHLON:
9893 return 3;
9894
9895 default:
9896 return 1;
9897 }
9898 }
9899
9900 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9901 by DEP_INSN and nothing set by DEP_INSN. */
9902
9903 static int
9904 ix86_flags_dependant (insn, dep_insn, insn_type)
9905 rtx insn, dep_insn;
9906 enum attr_type insn_type;
9907 {
9908 rtx set, set2;
9909
9910 /* Simplify the test for uninteresting insns. */
9911 if (insn_type != TYPE_SETCC
9912 && insn_type != TYPE_ICMOV
9913 && insn_type != TYPE_FCMOV
9914 && insn_type != TYPE_IBR)
9915 return 0;
9916
9917 if ((set = single_set (dep_insn)) != 0)
9918 {
9919 set = SET_DEST (set);
9920 set2 = NULL_RTX;
9921 }
9922 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9923 && XVECLEN (PATTERN (dep_insn), 0) == 2
9924 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9925 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9926 {
9927 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9928 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9929 }
9930 else
9931 return 0;
9932
9933 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9934 return 0;
9935
9936 /* This test is true if the dependent insn reads the flags but
9937 not any other potentially set register. */
9938 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9939 return 0;
9940
9941 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9942 return 0;
9943
9944 return 1;
9945 }
9946
9947 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9948 address with operands set by DEP_INSN. */
9949
9950 static int
9951 ix86_agi_dependant (insn, dep_insn, insn_type)
9952 rtx insn, dep_insn;
9953 enum attr_type insn_type;
9954 {
9955 rtx addr;
9956
9957 if (insn_type == TYPE_LEA
9958 && TARGET_PENTIUM)
9959 {
9960 addr = PATTERN (insn);
9961 if (GET_CODE (addr) == SET)
9962 ;
9963 else if (GET_CODE (addr) == PARALLEL
9964 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9965 addr = XVECEXP (addr, 0, 0);
9966 else
9967 abort ();
9968 addr = SET_SRC (addr);
9969 }
9970 else
9971 {
9972 int i;
9973 extract_insn_cached (insn);
9974 for (i = recog_data.n_operands - 1; i >= 0; --i)
9975 if (GET_CODE (recog_data.operand[i]) == MEM)
9976 {
9977 addr = XEXP (recog_data.operand[i], 0);
9978 goto found;
9979 }
9980 return 0;
9981 found:;
9982 }
9983
9984 return modified_in_p (addr, dep_insn);
9985 }
9986
9987 static int
9988 ix86_adjust_cost (insn, link, dep_insn, cost)
9989 rtx insn, link, dep_insn;
9990 int cost;
9991 {
9992 enum attr_type insn_type, dep_insn_type;
9993 enum attr_memory memory, dep_memory;
9994 rtx set, set2;
9995 int dep_insn_code_number;
9996
9997 /* Anti and output depenancies have zero cost on all CPUs. */
9998 if (REG_NOTE_KIND (link) != 0)
9999 return 0;
10000
10001 dep_insn_code_number = recog_memoized (dep_insn);
10002
10003 /* If we can't recognize the insns, we can't really do anything. */
10004 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10005 return cost;
10006
10007 insn_type = get_attr_type (insn);
10008 dep_insn_type = get_attr_type (dep_insn);
10009
10010 switch (ix86_cpu)
10011 {
10012 case PROCESSOR_PENTIUM:
10013 /* Address Generation Interlock adds a cycle of latency. */
10014 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10015 cost += 1;
10016
10017 /* ??? Compares pair with jump/setcc. */
10018 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10019 cost = 0;
10020
10021 /* Floating point stores require value to be ready one cycle ealier. */
10022 if (insn_type == TYPE_FMOV
10023 && get_attr_memory (insn) == MEMORY_STORE
10024 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10025 cost += 1;
10026 break;
10027
10028 case PROCESSOR_PENTIUMPRO:
10029 memory = get_attr_memory (insn);
10030 dep_memory = get_attr_memory (dep_insn);
10031
10032 /* Since we can't represent delayed latencies of load+operation,
10033 increase the cost here for non-imov insns. */
10034 if (dep_insn_type != TYPE_IMOV
10035 && dep_insn_type != TYPE_FMOV
10036 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10037 cost += 1;
10038
10039 /* INT->FP conversion is expensive. */
10040 if (get_attr_fp_int_src (dep_insn))
10041 cost += 5;
10042
10043 /* There is one cycle extra latency between an FP op and a store. */
10044 if (insn_type == TYPE_FMOV
10045 && (set = single_set (dep_insn)) != NULL_RTX
10046 && (set2 = single_set (insn)) != NULL_RTX
10047 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10048 && GET_CODE (SET_DEST (set2)) == MEM)
10049 cost += 1;
10050
10051 /* Show ability of reorder buffer to hide latency of load by executing
10052 in parallel with previous instruction in case
10053 previous instruction is not needed to compute the address. */
10054 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10055 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10056 {
10057 /* Claim moves to take one cycle, as core can issue one load
10058 at time and the next load can start cycle later. */
10059 if (dep_insn_type == TYPE_IMOV
10060 || dep_insn_type == TYPE_FMOV)
10061 cost = 1;
10062 else if (cost > 1)
10063 cost--;
10064 }
10065 break;
10066
10067 case PROCESSOR_K6:
10068 memory = get_attr_memory (insn);
10069 dep_memory = get_attr_memory (dep_insn);
10070 /* The esp dependency is resolved before the instruction is really
10071 finished. */
10072 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10073 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10074 return 1;
10075
10076 /* Since we can't represent delayed latencies of load+operation,
10077 increase the cost here for non-imov insns. */
10078 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10079 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10080
10081 /* INT->FP conversion is expensive. */
10082 if (get_attr_fp_int_src (dep_insn))
10083 cost += 5;
10084
10085 /* Show ability of reorder buffer to hide latency of load by executing
10086 in parallel with previous instruction in case
10087 previous instruction is not needed to compute the address. */
10088 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10089 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10090 {
10091 /* Claim moves to take one cycle, as core can issue one load
10092 at time and the next load can start cycle later. */
10093 if (dep_insn_type == TYPE_IMOV
10094 || dep_insn_type == TYPE_FMOV)
10095 cost = 1;
10096 else if (cost > 2)
10097 cost -= 2;
10098 else
10099 cost = 1;
10100 }
10101 break;
10102
10103 case PROCESSOR_ATHLON:
10104 memory = get_attr_memory (insn);
10105 dep_memory = get_attr_memory (dep_insn);
10106
10107 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10108 {
10109 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10110 cost += 2;
10111 else
10112 cost += 3;
10113 }
10114 /* Show ability of reorder buffer to hide latency of load by executing
10115 in parallel with previous instruction in case
10116 previous instruction is not needed to compute the address. */
10117 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10118 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10119 {
10120 /* Claim moves to take one cycle, as core can issue one load
10121 at time and the next load can start cycle later. */
10122 if (dep_insn_type == TYPE_IMOV
10123 || dep_insn_type == TYPE_FMOV)
10124 cost = 0;
10125 else if (cost >= 3)
10126 cost -= 3;
10127 else
10128 cost = 0;
10129 }
10130
10131 default:
10132 break;
10133 }
10134
10135 return cost;
10136 }
10137
10138 static union
10139 {
10140 struct ppro_sched_data
10141 {
10142 rtx decode[3];
10143 int issued_this_cycle;
10144 } ppro;
10145 } ix86_sched_data;
10146
10147 static int
10148 ix86_safe_length (insn)
10149 rtx insn;
10150 {
10151 if (recog_memoized (insn) >= 0)
10152 return get_attr_length (insn);
10153 else
10154 return 128;
10155 }
10156
10157 static int
10158 ix86_safe_length_prefix (insn)
10159 rtx insn;
10160 {
10161 if (recog_memoized (insn) >= 0)
10162 return get_attr_length (insn);
10163 else
10164 return 0;
10165 }
10166
10167 static enum attr_memory
10168 ix86_safe_memory (insn)
10169 rtx insn;
10170 {
10171 if (recog_memoized (insn) >= 0)
10172 return get_attr_memory (insn);
10173 else
10174 return MEMORY_UNKNOWN;
10175 }
10176
10177 static enum attr_pent_pair
10178 ix86_safe_pent_pair (insn)
10179 rtx insn;
10180 {
10181 if (recog_memoized (insn) >= 0)
10182 return get_attr_pent_pair (insn);
10183 else
10184 return PENT_PAIR_NP;
10185 }
10186
10187 static enum attr_ppro_uops
10188 ix86_safe_ppro_uops (insn)
10189 rtx insn;
10190 {
10191 if (recog_memoized (insn) >= 0)
10192 return get_attr_ppro_uops (insn);
10193 else
10194 return PPRO_UOPS_MANY;
10195 }
10196
10197 static void
10198 ix86_dump_ppro_packet (dump)
10199 FILE *dump;
10200 {
10201 if (ix86_sched_data.ppro.decode[0])
10202 {
10203 fprintf (dump, "PPRO packet: %d",
10204 INSN_UID (ix86_sched_data.ppro.decode[0]));
10205 if (ix86_sched_data.ppro.decode[1])
10206 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10207 if (ix86_sched_data.ppro.decode[2])
10208 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10209 fputc ('\n', dump);
10210 }
10211 }
10212
10213 /* We're beginning a new block. Initialize data structures as necessary. */
10214
10215 static void
10216 ix86_sched_init (dump, sched_verbose, veclen)
10217 FILE *dump ATTRIBUTE_UNUSED;
10218 int sched_verbose ATTRIBUTE_UNUSED;
10219 int veclen ATTRIBUTE_UNUSED;
10220 {
10221 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10222 }
10223
10224 /* Shift INSN to SLOT, and shift everything else down. */
10225
10226 static void
10227 ix86_reorder_insn (insnp, slot)
10228 rtx *insnp, *slot;
10229 {
10230 if (insnp != slot)
10231 {
10232 rtx insn = *insnp;
10233 do
10234 insnp[0] = insnp[1];
10235 while (++insnp != slot);
10236 *insnp = insn;
10237 }
10238 }
10239
10240 /* Find an instruction with given pairability and minimal amount of cycles
10241 lost by the fact that the CPU waits for both pipelines to finish before
10242 reading next instructions. Also take care that both instructions together
10243 can not exceed 7 bytes. */
10244
10245 static rtx *
10246 ix86_pent_find_pair (e_ready, ready, type, first)
10247 rtx *e_ready;
10248 rtx *ready;
10249 enum attr_pent_pair type;
10250 rtx first;
10251 {
10252 int mincycles, cycles;
10253 enum attr_pent_pair tmp;
10254 enum attr_memory memory;
10255 rtx *insnp, *bestinsnp = NULL;
10256
10257 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10258 return NULL;
10259
10260 memory = ix86_safe_memory (first);
10261 cycles = result_ready_cost (first);
10262 mincycles = INT_MAX;
10263
10264 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10265 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10266 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10267 {
10268 enum attr_memory second_memory;
10269 int secondcycles, currentcycles;
10270
10271 second_memory = ix86_safe_memory (*insnp);
10272 secondcycles = result_ready_cost (*insnp);
10273 currentcycles = abs (cycles - secondcycles);
10274
10275 if (secondcycles >= 1 && cycles >= 1)
10276 {
10277 /* Two read/modify/write instructions together takes two
10278 cycles longer. */
10279 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10280 currentcycles += 2;
10281
10282 /* Read modify/write instruction followed by read/modify
10283 takes one cycle longer. */
10284 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10285 && tmp != PENT_PAIR_UV
10286 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10287 currentcycles += 1;
10288 }
10289 if (currentcycles < mincycles)
10290 bestinsnp = insnp, mincycles = currentcycles;
10291 }
10292
10293 return bestinsnp;
10294 }
10295
10296 /* Subroutines of ix86_sched_reorder. */
10297
10298 static void
10299 ix86_sched_reorder_pentium (ready, e_ready)
10300 rtx *ready;
10301 rtx *e_ready;
10302 {
10303 enum attr_pent_pair pair1, pair2;
10304 rtx *insnp;
10305
10306 /* This wouldn't be necessary if Haifa knew that static insn ordering
10307 is important to which pipe an insn is issued to. So we have to make
10308 some minor rearrangements. */
10309
10310 pair1 = ix86_safe_pent_pair (*e_ready);
10311
10312 /* If the first insn is non-pairable, let it be. */
10313 if (pair1 == PENT_PAIR_NP)
10314 return;
10315
10316 pair2 = PENT_PAIR_NP;
10317 insnp = 0;
10318
10319 /* If the first insn is UV or PV pairable, search for a PU
10320 insn to go with. */
10321 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10322 {
10323 insnp = ix86_pent_find_pair (e_ready-1, ready,
10324 PENT_PAIR_PU, *e_ready);
10325 if (insnp)
10326 pair2 = PENT_PAIR_PU;
10327 }
10328
10329 /* If the first insn is PU or UV pairable, search for a PV
10330 insn to go with. */
10331 if (pair2 == PENT_PAIR_NP
10332 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10333 {
10334 insnp = ix86_pent_find_pair (e_ready-1, ready,
10335 PENT_PAIR_PV, *e_ready);
10336 if (insnp)
10337 pair2 = PENT_PAIR_PV;
10338 }
10339
10340 /* If the first insn is pairable, search for a UV
10341 insn to go with. */
10342 if (pair2 == PENT_PAIR_NP)
10343 {
10344 insnp = ix86_pent_find_pair (e_ready-1, ready,
10345 PENT_PAIR_UV, *e_ready);
10346 if (insnp)
10347 pair2 = PENT_PAIR_UV;
10348 }
10349
10350 if (pair2 == PENT_PAIR_NP)
10351 return;
10352
10353 /* Found something! Decide if we need to swap the order. */
10354 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10355 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10356 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10357 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10358 ix86_reorder_insn (insnp, e_ready);
10359 else
10360 ix86_reorder_insn (insnp, e_ready - 1);
10361 }
10362
10363 static void
10364 ix86_sched_reorder_ppro (ready, e_ready)
10365 rtx *ready;
10366 rtx *e_ready;
10367 {
10368 rtx decode[3];
10369 enum attr_ppro_uops cur_uops;
10370 int issued_this_cycle;
10371 rtx *insnp;
10372 int i;
10373
10374 /* At this point .ppro.decode contains the state of the three
10375 decoders from last "cycle". That is, those insns that were
10376 actually independent. But here we're scheduling for the
10377 decoder, and we may find things that are decodable in the
10378 same cycle. */
10379
10380 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10381 issued_this_cycle = 0;
10382
10383 insnp = e_ready;
10384 cur_uops = ix86_safe_ppro_uops (*insnp);
10385
10386 /* If the decoders are empty, and we've a complex insn at the
10387 head of the priority queue, let it issue without complaint. */
10388 if (decode[0] == NULL)
10389 {
10390 if (cur_uops == PPRO_UOPS_MANY)
10391 {
10392 decode[0] = *insnp;
10393 goto ppro_done;
10394 }
10395
10396 /* Otherwise, search for a 2-4 uop unsn to issue. */
10397 while (cur_uops != PPRO_UOPS_FEW)
10398 {
10399 if (insnp == ready)
10400 break;
10401 cur_uops = ix86_safe_ppro_uops (*--insnp);
10402 }
10403
10404 /* If so, move it to the head of the line. */
10405 if (cur_uops == PPRO_UOPS_FEW)
10406 ix86_reorder_insn (insnp, e_ready);
10407
10408 /* Issue the head of the queue. */
10409 issued_this_cycle = 1;
10410 decode[0] = *e_ready--;
10411 }
10412
10413 /* Look for simple insns to fill in the other two slots. */
10414 for (i = 1; i < 3; ++i)
10415 if (decode[i] == NULL)
10416 {
10417 if (ready >= e_ready)
10418 goto ppro_done;
10419
10420 insnp = e_ready;
10421 cur_uops = ix86_safe_ppro_uops (*insnp);
10422 while (cur_uops != PPRO_UOPS_ONE)
10423 {
10424 if (insnp == ready)
10425 break;
10426 cur_uops = ix86_safe_ppro_uops (*--insnp);
10427 }
10428
10429 /* Found one. Move it to the head of the queue and issue it. */
10430 if (cur_uops == PPRO_UOPS_ONE)
10431 {
10432 ix86_reorder_insn (insnp, e_ready);
10433 decode[i] = *e_ready--;
10434 issued_this_cycle++;
10435 continue;
10436 }
10437
10438 /* ??? Didn't find one. Ideally, here we would do a lazy split
10439 of 2-uop insns, issue one and queue the other. */
10440 }
10441
10442 ppro_done:
10443 if (issued_this_cycle == 0)
10444 issued_this_cycle = 1;
10445 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10446 }
10447
10448 /* We are about to being issuing insns for this clock cycle.
10449 Override the default sort algorithm to better slot instructions. */
10450 static int
10451 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10452 FILE *dump ATTRIBUTE_UNUSED;
10453 int sched_verbose ATTRIBUTE_UNUSED;
10454 rtx *ready;
10455 int *n_readyp;
10456 int clock_var ATTRIBUTE_UNUSED;
10457 {
10458 int n_ready = *n_readyp;
10459 rtx *e_ready = ready + n_ready - 1;
10460
10461 if (n_ready < 2)
10462 goto out;
10463
10464 switch (ix86_cpu)
10465 {
10466 default:
10467 break;
10468
10469 case PROCESSOR_PENTIUM:
10470 ix86_sched_reorder_pentium (ready, e_ready);
10471 break;
10472
10473 case PROCESSOR_PENTIUMPRO:
10474 ix86_sched_reorder_ppro (ready, e_ready);
10475 break;
10476 }
10477
10478 out:
10479 return ix86_issue_rate ();
10480 }
10481
10482 /* We are about to issue INSN. Return the number of insns left on the
10483 ready queue that can be issued this cycle. */
10484
10485 static int
10486 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10487 FILE *dump;
10488 int sched_verbose;
10489 rtx insn;
10490 int can_issue_more;
10491 {
10492 int i;
10493 switch (ix86_cpu)
10494 {
10495 default:
10496 return can_issue_more - 1;
10497
10498 case PROCESSOR_PENTIUMPRO:
10499 {
10500 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10501
10502 if (uops == PPRO_UOPS_MANY)
10503 {
10504 if (sched_verbose)
10505 ix86_dump_ppro_packet (dump);
10506 ix86_sched_data.ppro.decode[0] = insn;
10507 ix86_sched_data.ppro.decode[1] = NULL;
10508 ix86_sched_data.ppro.decode[2] = NULL;
10509 if (sched_verbose)
10510 ix86_dump_ppro_packet (dump);
10511 ix86_sched_data.ppro.decode[0] = NULL;
10512 }
10513 else if (uops == PPRO_UOPS_FEW)
10514 {
10515 if (sched_verbose)
10516 ix86_dump_ppro_packet (dump);
10517 ix86_sched_data.ppro.decode[0] = insn;
10518 ix86_sched_data.ppro.decode[1] = NULL;
10519 ix86_sched_data.ppro.decode[2] = NULL;
10520 }
10521 else
10522 {
10523 for (i = 0; i < 3; ++i)
10524 if (ix86_sched_data.ppro.decode[i] == NULL)
10525 {
10526 ix86_sched_data.ppro.decode[i] = insn;
10527 break;
10528 }
10529 if (i == 3)
10530 abort ();
10531 if (i == 2)
10532 {
10533 if (sched_verbose)
10534 ix86_dump_ppro_packet (dump);
10535 ix86_sched_data.ppro.decode[0] = NULL;
10536 ix86_sched_data.ppro.decode[1] = NULL;
10537 ix86_sched_data.ppro.decode[2] = NULL;
10538 }
10539 }
10540 }
10541 return --ix86_sched_data.ppro.issued_this_cycle;
10542 }
10543 }
10544 \f
10545 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10546 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10547 appropriate. */
10548
10549 void
10550 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10551 rtx insns;
10552 rtx dstref, srcref, dstreg, srcreg;
10553 {
10554 rtx insn;
10555
10556 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10557 if (INSN_P (insn))
10558 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10559 dstreg, srcreg);
10560 }
10561
10562 /* Subroutine of above to actually do the updating by recursively walking
10563 the rtx. */
10564
10565 static void
10566 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10567 rtx x;
10568 rtx dstref, srcref, dstreg, srcreg;
10569 {
10570 enum rtx_code code = GET_CODE (x);
10571 const char *format_ptr = GET_RTX_FORMAT (code);
10572 int i, j;
10573
10574 if (code == MEM && XEXP (x, 0) == dstreg)
10575 MEM_COPY_ATTRIBUTES (x, dstref);
10576 else if (code == MEM && XEXP (x, 0) == srcreg)
10577 MEM_COPY_ATTRIBUTES (x, srcref);
10578
10579 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10580 {
10581 if (*format_ptr == 'e')
10582 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10583 dstreg, srcreg);
10584 else if (*format_ptr == 'E')
10585 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10586 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10587 dstreg, srcreg);
10588 }
10589 }
10590 \f
10591 /* Compute the alignment given to a constant that is being placed in memory.
10592 EXP is the constant and ALIGN is the alignment that the object would
10593 ordinarily have.
10594 The value of this function is used instead of that alignment to align
10595 the object. */
10596
10597 int
10598 ix86_constant_alignment (exp, align)
10599 tree exp;
10600 int align;
10601 {
10602 if (TREE_CODE (exp) == REAL_CST)
10603 {
10604 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10605 return 64;
10606 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10607 return 128;
10608 }
10609 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10610 && align < 256)
10611 return 256;
10612
10613 return align;
10614 }
10615
10616 /* Compute the alignment for a static variable.
10617 TYPE is the data type, and ALIGN is the alignment that
10618 the object would ordinarily have. The value of this function is used
10619 instead of that alignment to align the object. */
10620
10621 int
10622 ix86_data_alignment (type, align)
10623 tree type;
10624 int align;
10625 {
10626 if (AGGREGATE_TYPE_P (type)
10627 && TYPE_SIZE (type)
10628 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10629 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10630 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10631 return 256;
10632
10633 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10634 to 16byte boundary. */
10635 if (TARGET_64BIT)
10636 {
10637 if (AGGREGATE_TYPE_P (type)
10638 && TYPE_SIZE (type)
10639 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10640 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10641 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10642 return 128;
10643 }
10644
10645 if (TREE_CODE (type) == ARRAY_TYPE)
10646 {
10647 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10648 return 64;
10649 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10650 return 128;
10651 }
10652 else if (TREE_CODE (type) == COMPLEX_TYPE)
10653 {
10654
10655 if (TYPE_MODE (type) == DCmode && align < 64)
10656 return 64;
10657 if (TYPE_MODE (type) == XCmode && align < 128)
10658 return 128;
10659 }
10660 else if ((TREE_CODE (type) == RECORD_TYPE
10661 || TREE_CODE (type) == UNION_TYPE
10662 || TREE_CODE (type) == QUAL_UNION_TYPE)
10663 && TYPE_FIELDS (type))
10664 {
10665 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10666 return 64;
10667 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10668 return 128;
10669 }
10670 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10671 || TREE_CODE (type) == INTEGER_TYPE)
10672 {
10673 if (TYPE_MODE (type) == DFmode && align < 64)
10674 return 64;
10675 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10676 return 128;
10677 }
10678
10679 return align;
10680 }
10681
10682 /* Compute the alignment for a local variable.
10683 TYPE is the data type, and ALIGN is the alignment that
10684 the object would ordinarily have. The value of this macro is used
10685 instead of that alignment to align the object. */
10686
10687 int
10688 ix86_local_alignment (type, align)
10689 tree type;
10690 int align;
10691 {
10692 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10693 to 16byte boundary. */
10694 if (TARGET_64BIT)
10695 {
10696 if (AGGREGATE_TYPE_P (type)
10697 && TYPE_SIZE (type)
10698 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10699 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10700 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10701 return 128;
10702 }
10703 if (TREE_CODE (type) == ARRAY_TYPE)
10704 {
10705 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10706 return 64;
10707 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10708 return 128;
10709 }
10710 else if (TREE_CODE (type) == COMPLEX_TYPE)
10711 {
10712 if (TYPE_MODE (type) == DCmode && align < 64)
10713 return 64;
10714 if (TYPE_MODE (type) == XCmode && align < 128)
10715 return 128;
10716 }
10717 else if ((TREE_CODE (type) == RECORD_TYPE
10718 || TREE_CODE (type) == UNION_TYPE
10719 || TREE_CODE (type) == QUAL_UNION_TYPE)
10720 && TYPE_FIELDS (type))
10721 {
10722 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10723 return 64;
10724 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10725 return 128;
10726 }
10727 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10728 || TREE_CODE (type) == INTEGER_TYPE)
10729 {
10730
10731 if (TYPE_MODE (type) == DFmode && align < 64)
10732 return 64;
10733 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10734 return 128;
10735 }
10736 return align;
10737 }
10738 \f
10739 /* Emit RTL insns to initialize the variable parts of a trampoline.
10740 FNADDR is an RTX for the address of the function's pure code.
10741 CXT is an RTX for the static chain value for the function. */
10742 void
10743 x86_initialize_trampoline (tramp, fnaddr, cxt)
10744 rtx tramp, fnaddr, cxt;
10745 {
10746 if (!TARGET_64BIT)
10747 {
10748 /* Compute offset from the end of the jmp to the target function. */
10749 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10750 plus_constant (tramp, 10),
10751 NULL_RTX, 1, OPTAB_DIRECT);
10752 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10753 gen_int_mode (0xb9, QImode));
10754 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10755 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10756 gen_int_mode (0xe9, QImode));
10757 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10758 }
10759 else
10760 {
10761 int offset = 0;
10762 /* Try to load address using shorter movl instead of movabs.
10763 We may want to support movq for kernel mode, but kernel does not use
10764 trampolines at the moment. */
10765 if (x86_64_zero_extended_value (fnaddr))
10766 {
10767 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10768 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10769 gen_int_mode (0xbb41, HImode));
10770 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10771 gen_lowpart (SImode, fnaddr));
10772 offset += 6;
10773 }
10774 else
10775 {
10776 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10777 gen_int_mode (0xbb49, HImode));
10778 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10779 fnaddr);
10780 offset += 10;
10781 }
10782 /* Load static chain using movabs to r10. */
10783 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10784 gen_int_mode (0xba49, HImode));
10785 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10786 cxt);
10787 offset += 10;
10788 /* Jump to the r11 */
10789 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10790 gen_int_mode (0xff49, HImode));
10791 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10792 gen_int_mode (0xe3, QImode));
10793 offset += 3;
10794 if (offset > TRAMPOLINE_SIZE)
10795 abort ();
10796 }
10797 }
10798 \f
10799 #define def_builtin(MASK, NAME, TYPE, CODE) \
10800 do { \
10801 if ((MASK) & target_flags) \
10802 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10803 } while (0)
10804
10805 struct builtin_description
10806 {
10807 const unsigned int mask;
10808 const enum insn_code icode;
10809 const char *const name;
10810 const enum ix86_builtins code;
10811 const enum rtx_code comparison;
10812 const unsigned int flag;
10813 };
10814
10815 static const struct builtin_description bdesc_comi[] =
10816 {
10817 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10818 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10819 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10820 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10821 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10822 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10823 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10824 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10825 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10826 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10827 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10828 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10829 };
10830
10831 static const struct builtin_description bdesc_2arg[] =
10832 {
10833 /* SSE */
10834 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10835 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10836 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10837 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10838 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10839 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10840 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10841 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10842
10843 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10844 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10845 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10846 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10847 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10848 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10849 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10850 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10851 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10852 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10853 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10854 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10855 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10856 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10857 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10858 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10859 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10860 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10861 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10862 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10863 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10864 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10865 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10866 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10867
10868 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10869 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10870 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10871 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10872
10873 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10874 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10875 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10876 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10877 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10878
10879 /* MMX */
10880 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10881 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10882 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10883 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10884 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10885 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10886
10887 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10888 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10889 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10890 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10891 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10892 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10893 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10894 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10895
10896 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10897 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10898 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10899
10900 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10901 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10902 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10903 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10904
10905 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10906 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10907
10908 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10909 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10910 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10911 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10912 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10913 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10914
10915 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10916 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10917 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10918 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10919
10920 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10921 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10922 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10923 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10924 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10925 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10926
10927 /* Special. */
10928 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10929 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10930 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10931
10932 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10933 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10934
10935 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10936 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10937 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10938 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10939 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10940 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10941
10942 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10943 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10944 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10945 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10946 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10947 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10948
10949 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10950 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10951 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10952 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10953
10954 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10955 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10956
10957 };
10958
10959 static const struct builtin_description bdesc_1arg[] =
10960 {
10961 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10962 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10963
10964 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10965 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10966 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10967
10968 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10969 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10970 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10971 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10972
10973 };
10974
10975 void
10976 ix86_init_builtins ()
10977 {
10978 if (TARGET_MMX)
10979 ix86_init_mmx_sse_builtins ();
10980 }
10981
10982 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10983 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10984 builtins. */
10985 static void
10986 ix86_init_mmx_sse_builtins ()
10987 {
10988 const struct builtin_description * d;
10989 size_t i;
10990 tree endlink = void_list_node;
10991
10992 tree pchar_type_node = build_pointer_type (char_type_node);
10993 tree pfloat_type_node = build_pointer_type (float_type_node);
10994 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10995 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10996
10997 /* Comparisons. */
10998 tree int_ftype_v4sf_v4sf
10999 = build_function_type (integer_type_node,
11000 tree_cons (NULL_TREE, V4SF_type_node,
11001 tree_cons (NULL_TREE,
11002 V4SF_type_node,
11003 endlink)));
11004 tree v4si_ftype_v4sf_v4sf
11005 = build_function_type (V4SI_type_node,
11006 tree_cons (NULL_TREE, V4SF_type_node,
11007 tree_cons (NULL_TREE,
11008 V4SF_type_node,
11009 endlink)));
11010 /* MMX/SSE/integer conversions. */
11011 tree int_ftype_v4sf
11012 = build_function_type (integer_type_node,
11013 tree_cons (NULL_TREE, V4SF_type_node,
11014 endlink));
11015 tree int_ftype_v8qi
11016 = build_function_type (integer_type_node,
11017 tree_cons (NULL_TREE, V8QI_type_node,
11018 endlink));
11019 tree v4sf_ftype_v4sf_int
11020 = build_function_type (V4SF_type_node,
11021 tree_cons (NULL_TREE, V4SF_type_node,
11022 tree_cons (NULL_TREE, integer_type_node,
11023 endlink)));
11024 tree v4sf_ftype_v4sf_v2si
11025 = build_function_type (V4SF_type_node,
11026 tree_cons (NULL_TREE, V4SF_type_node,
11027 tree_cons (NULL_TREE, V2SI_type_node,
11028 endlink)));
11029 tree int_ftype_v4hi_int
11030 = build_function_type (integer_type_node,
11031 tree_cons (NULL_TREE, V4HI_type_node,
11032 tree_cons (NULL_TREE, integer_type_node,
11033 endlink)));
11034 tree v4hi_ftype_v4hi_int_int
11035 = build_function_type (V4HI_type_node,
11036 tree_cons (NULL_TREE, V4HI_type_node,
11037 tree_cons (NULL_TREE, integer_type_node,
11038 tree_cons (NULL_TREE,
11039 integer_type_node,
11040 endlink))));
11041 /* Miscellaneous. */
11042 tree v8qi_ftype_v4hi_v4hi
11043 = build_function_type (V8QI_type_node,
11044 tree_cons (NULL_TREE, V4HI_type_node,
11045 tree_cons (NULL_TREE, V4HI_type_node,
11046 endlink)));
11047 tree v4hi_ftype_v2si_v2si
11048 = build_function_type (V4HI_type_node,
11049 tree_cons (NULL_TREE, V2SI_type_node,
11050 tree_cons (NULL_TREE, V2SI_type_node,
11051 endlink)));
11052 tree v4sf_ftype_v4sf_v4sf_int
11053 = build_function_type (V4SF_type_node,
11054 tree_cons (NULL_TREE, V4SF_type_node,
11055 tree_cons (NULL_TREE, V4SF_type_node,
11056 tree_cons (NULL_TREE,
11057 integer_type_node,
11058 endlink))));
11059 tree v4hi_ftype_v8qi_v8qi
11060 = build_function_type (V4HI_type_node,
11061 tree_cons (NULL_TREE, V8QI_type_node,
11062 tree_cons (NULL_TREE, V8QI_type_node,
11063 endlink)));
11064 tree v2si_ftype_v4hi_v4hi
11065 = build_function_type (V2SI_type_node,
11066 tree_cons (NULL_TREE, V4HI_type_node,
11067 tree_cons (NULL_TREE, V4HI_type_node,
11068 endlink)));
11069 tree v4hi_ftype_v4hi_int
11070 = build_function_type (V4HI_type_node,
11071 tree_cons (NULL_TREE, V4HI_type_node,
11072 tree_cons (NULL_TREE, integer_type_node,
11073 endlink)));
11074 tree v4hi_ftype_v4hi_di
11075 = build_function_type (V4HI_type_node,
11076 tree_cons (NULL_TREE, V4HI_type_node,
11077 tree_cons (NULL_TREE,
11078 long_long_integer_type_node,
11079 endlink)));
11080 tree v2si_ftype_v2si_di
11081 = build_function_type (V2SI_type_node,
11082 tree_cons (NULL_TREE, V2SI_type_node,
11083 tree_cons (NULL_TREE,
11084 long_long_integer_type_node,
11085 endlink)));
11086 tree void_ftype_void
11087 = build_function_type (void_type_node, endlink);
11088 tree void_ftype_unsigned
11089 = build_function_type (void_type_node,
11090 tree_cons (NULL_TREE, unsigned_type_node,
11091 endlink));
11092 tree unsigned_ftype_void
11093 = build_function_type (unsigned_type_node, endlink);
11094 tree di_ftype_void
11095 = build_function_type (long_long_unsigned_type_node, endlink);
11096 tree v4sf_ftype_void
11097 = build_function_type (V4SF_type_node, endlink);
11098 tree v2si_ftype_v4sf
11099 = build_function_type (V2SI_type_node,
11100 tree_cons (NULL_TREE, V4SF_type_node,
11101 endlink));
11102 /* Loads/stores. */
11103 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11104 tree_cons (NULL_TREE, V8QI_type_node,
11105 tree_cons (NULL_TREE,
11106 pchar_type_node,
11107 endlink)));
11108 tree void_ftype_v8qi_v8qi_pchar
11109 = build_function_type (void_type_node, maskmovq_args);
11110 tree v4sf_ftype_pfloat
11111 = build_function_type (V4SF_type_node,
11112 tree_cons (NULL_TREE, pfloat_type_node,
11113 endlink));
11114 /* @@@ the type is bogus */
11115 tree v4sf_ftype_v4sf_pv2si
11116 = build_function_type (V4SF_type_node,
11117 tree_cons (NULL_TREE, V4SF_type_node,
11118 tree_cons (NULL_TREE, pv2si_type_node,
11119 endlink)));
11120 tree void_ftype_pv2si_v4sf
11121 = build_function_type (void_type_node,
11122 tree_cons (NULL_TREE, pv2si_type_node,
11123 tree_cons (NULL_TREE, V4SF_type_node,
11124 endlink)));
11125 tree void_ftype_pfloat_v4sf
11126 = build_function_type (void_type_node,
11127 tree_cons (NULL_TREE, pfloat_type_node,
11128 tree_cons (NULL_TREE, V4SF_type_node,
11129 endlink)));
11130 tree void_ftype_pdi_di
11131 = build_function_type (void_type_node,
11132 tree_cons (NULL_TREE, pdi_type_node,
11133 tree_cons (NULL_TREE,
11134 long_long_unsigned_type_node,
11135 endlink)));
11136 /* Normal vector unops. */
11137 tree v4sf_ftype_v4sf
11138 = build_function_type (V4SF_type_node,
11139 tree_cons (NULL_TREE, V4SF_type_node,
11140 endlink));
11141
11142 /* Normal vector binops. */
11143 tree v4sf_ftype_v4sf_v4sf
11144 = build_function_type (V4SF_type_node,
11145 tree_cons (NULL_TREE, V4SF_type_node,
11146 tree_cons (NULL_TREE, V4SF_type_node,
11147 endlink)));
11148 tree v8qi_ftype_v8qi_v8qi
11149 = build_function_type (V8QI_type_node,
11150 tree_cons (NULL_TREE, V8QI_type_node,
11151 tree_cons (NULL_TREE, V8QI_type_node,
11152 endlink)));
11153 tree v4hi_ftype_v4hi_v4hi
11154 = build_function_type (V4HI_type_node,
11155 tree_cons (NULL_TREE, V4HI_type_node,
11156 tree_cons (NULL_TREE, V4HI_type_node,
11157 endlink)));
11158 tree v2si_ftype_v2si_v2si
11159 = build_function_type (V2SI_type_node,
11160 tree_cons (NULL_TREE, V2SI_type_node,
11161 tree_cons (NULL_TREE, V2SI_type_node,
11162 endlink)));
11163 tree di_ftype_di_di
11164 = build_function_type (long_long_unsigned_type_node,
11165 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11166 tree_cons (NULL_TREE,
11167 long_long_unsigned_type_node,
11168 endlink)));
11169
11170 tree v2si_ftype_v2sf
11171 = build_function_type (V2SI_type_node,
11172 tree_cons (NULL_TREE, V2SF_type_node,
11173 endlink));
11174 tree v2sf_ftype_v2si
11175 = build_function_type (V2SF_type_node,
11176 tree_cons (NULL_TREE, V2SI_type_node,
11177 endlink));
11178 tree v2si_ftype_v2si
11179 = build_function_type (V2SI_type_node,
11180 tree_cons (NULL_TREE, V2SI_type_node,
11181 endlink));
11182 tree v2sf_ftype_v2sf
11183 = build_function_type (V2SF_type_node,
11184 tree_cons (NULL_TREE, V2SF_type_node,
11185 endlink));
11186 tree v2sf_ftype_v2sf_v2sf
11187 = build_function_type (V2SF_type_node,
11188 tree_cons (NULL_TREE, V2SF_type_node,
11189 tree_cons (NULL_TREE,
11190 V2SF_type_node,
11191 endlink)));
11192 tree v2si_ftype_v2sf_v2sf
11193 = build_function_type (V2SI_type_node,
11194 tree_cons (NULL_TREE, V2SF_type_node,
11195 tree_cons (NULL_TREE,
11196 V2SF_type_node,
11197 endlink)));
11198
11199 /* Add all builtins that are more or less simple operations on two
11200 operands. */
11201 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11202 {
11203 /* Use one of the operands; the target can have a different mode for
11204 mask-generating compares. */
11205 enum machine_mode mode;
11206 tree type;
11207
11208 if (d->name == 0)
11209 continue;
11210 mode = insn_data[d->icode].operand[1].mode;
11211
11212 switch (mode)
11213 {
11214 case V4SFmode:
11215 type = v4sf_ftype_v4sf_v4sf;
11216 break;
11217 case V8QImode:
11218 type = v8qi_ftype_v8qi_v8qi;
11219 break;
11220 case V4HImode:
11221 type = v4hi_ftype_v4hi_v4hi;
11222 break;
11223 case V2SImode:
11224 type = v2si_ftype_v2si_v2si;
11225 break;
11226 case DImode:
11227 type = di_ftype_di_di;
11228 break;
11229
11230 default:
11231 abort ();
11232 }
11233
11234 /* Override for comparisons. */
11235 if (d->icode == CODE_FOR_maskcmpv4sf3
11236 || d->icode == CODE_FOR_maskncmpv4sf3
11237 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11238 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11239 type = v4si_ftype_v4sf_v4sf;
11240
11241 def_builtin (d->mask, d->name, type, d->code);
11242 }
11243
11244 /* Add the remaining MMX insns with somewhat more complicated types. */
11245 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11246 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11247 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11248 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11249 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11250 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11251 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11252
11253 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11254 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11255 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11256
11257 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11258 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11259
11260 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11261 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11262
11263 /* comi/ucomi insns. */
11264 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
11265 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11266
11267 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11268 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11269 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11270
11271 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11272 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11273 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11274 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11275 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11276 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11277
11278 def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11279 def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11280 def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11281 def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11282
11283 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11284 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11285
11286 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11287
11288 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11289 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11290 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11291 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11292 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11293 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11294
11295 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11296 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11297 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11298 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11299
11300 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11301 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11302 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11303 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11304
11305 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11306
11307 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11308
11309 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11310 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11311 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11312 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11313 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11314 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11315
11316 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11317
11318 /* Original 3DNow! */
11319 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11320 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11321 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11322 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11323 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11324 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11325 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11326 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11327 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11328 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11329 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11330 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11331 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11332 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11333 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11334 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11335 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11336 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11337 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11338 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11339
11340 /* 3DNow! extension as used in the Athlon CPU. */
11341 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11342 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11343 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11344 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11345 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11346 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11347
11348 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11349 }
11350
11351 /* Errors in the source file can cause expand_expr to return const0_rtx
11352 where we expect a vector. To avoid crashing, use one of the vector
11353 clear instructions. */
11354 static rtx
11355 safe_vector_operand (x, mode)
11356 rtx x;
11357 enum machine_mode mode;
11358 {
11359 if (x != const0_rtx)
11360 return x;
11361 x = gen_reg_rtx (mode);
11362
11363 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11364 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11365 : gen_rtx_SUBREG (DImode, x, 0)));
11366 else
11367 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11368 : gen_rtx_SUBREG (V4SFmode, x, 0)));
11369 return x;
11370 }
11371
11372 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11373
11374 static rtx
11375 ix86_expand_binop_builtin (icode, arglist, target)
11376 enum insn_code icode;
11377 tree arglist;
11378 rtx target;
11379 {
11380 rtx pat;
11381 tree arg0 = TREE_VALUE (arglist);
11382 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11383 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11384 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11385 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11386 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11387 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11388
11389 if (VECTOR_MODE_P (mode0))
11390 op0 = safe_vector_operand (op0, mode0);
11391 if (VECTOR_MODE_P (mode1))
11392 op1 = safe_vector_operand (op1, mode1);
11393
11394 if (! target
11395 || GET_MODE (target) != tmode
11396 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11397 target = gen_reg_rtx (tmode);
11398
11399 /* In case the insn wants input operands in modes different from
11400 the result, abort. */
11401 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11402 abort ();
11403
11404 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11405 op0 = copy_to_mode_reg (mode0, op0);
11406 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11407 op1 = copy_to_mode_reg (mode1, op1);
11408
11409 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11410 yet one of the two must not be a memory. This is normally enforced
11411 by expanders, but we didn't bother to create one here. */
11412 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11413 op0 = copy_to_mode_reg (mode0, op0);
11414
11415 pat = GEN_FCN (icode) (target, op0, op1);
11416 if (! pat)
11417 return 0;
11418 emit_insn (pat);
11419 return target;
11420 }
11421
11422 /* In type_for_mode we restrict the ability to create TImode types
11423 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11424 to have a V4SFmode signature. Convert them in-place to TImode. */
11425
11426 static rtx
11427 ix86_expand_timode_binop_builtin (icode, arglist, target)
11428 enum insn_code icode;
11429 tree arglist;
11430 rtx target;
11431 {
11432 rtx pat;
11433 tree arg0 = TREE_VALUE (arglist);
11434 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11435 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11436 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11437
11438 op0 = gen_lowpart (TImode, op0);
11439 op1 = gen_lowpart (TImode, op1);
11440 target = gen_reg_rtx (TImode);
11441
11442 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11443 op0 = copy_to_mode_reg (TImode, op0);
11444 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11445 op1 = copy_to_mode_reg (TImode, op1);
11446
11447 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11448 yet one of the two must not be a memory. This is normally enforced
11449 by expanders, but we didn't bother to create one here. */
11450 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11451 op0 = copy_to_mode_reg (TImode, op0);
11452
11453 pat = GEN_FCN (icode) (target, op0, op1);
11454 if (! pat)
11455 return 0;
11456 emit_insn (pat);
11457
11458 return gen_lowpart (V4SFmode, target);
11459 }
11460
11461 /* Subroutine of ix86_expand_builtin to take care of stores. */
11462
11463 static rtx
11464 ix86_expand_store_builtin (icode, arglist)
11465 enum insn_code icode;
11466 tree arglist;
11467 {
11468 rtx pat;
11469 tree arg0 = TREE_VALUE (arglist);
11470 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11471 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11472 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11473 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11474 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11475
11476 if (VECTOR_MODE_P (mode1))
11477 op1 = safe_vector_operand (op1, mode1);
11478
11479 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11480
11481 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11482 op1 = copy_to_mode_reg (mode1, op1);
11483
11484 pat = GEN_FCN (icode) (op0, op1);
11485 if (pat)
11486 emit_insn (pat);
11487 return 0;
11488 }
11489
11490 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11491
11492 static rtx
11493 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11494 enum insn_code icode;
11495 tree arglist;
11496 rtx target;
11497 int do_load;
11498 {
11499 rtx pat;
11500 tree arg0 = TREE_VALUE (arglist);
11501 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11502 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11503 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11504
11505 if (! target
11506 || GET_MODE (target) != tmode
11507 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11508 target = gen_reg_rtx (tmode);
11509 if (do_load)
11510 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11511 else
11512 {
11513 if (VECTOR_MODE_P (mode0))
11514 op0 = safe_vector_operand (op0, mode0);
11515
11516 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11517 op0 = copy_to_mode_reg (mode0, op0);
11518 }
11519
11520 pat = GEN_FCN (icode) (target, op0);
11521 if (! pat)
11522 return 0;
11523 emit_insn (pat);
11524 return target;
11525 }
11526
11527 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11528 sqrtss, rsqrtss, rcpss. */
11529
11530 static rtx
11531 ix86_expand_unop1_builtin (icode, arglist, target)
11532 enum insn_code icode;
11533 tree arglist;
11534 rtx target;
11535 {
11536 rtx pat;
11537 tree arg0 = TREE_VALUE (arglist);
11538 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11539 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11540 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11541
11542 if (! target
11543 || GET_MODE (target) != tmode
11544 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11545 target = gen_reg_rtx (tmode);
11546
11547 if (VECTOR_MODE_P (mode0))
11548 op0 = safe_vector_operand (op0, mode0);
11549
11550 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11551 op0 = copy_to_mode_reg (mode0, op0);
11552
11553 op1 = op0;
11554 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
11555 op1 = copy_to_mode_reg (mode0, op1);
11556
11557 pat = GEN_FCN (icode) (target, op0, op1);
11558 if (! pat)
11559 return 0;
11560 emit_insn (pat);
11561 return target;
11562 }
11563
11564 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11565
11566 static rtx
11567 ix86_expand_sse_compare (d, arglist, target)
11568 const struct builtin_description *d;
11569 tree arglist;
11570 rtx target;
11571 {
11572 rtx pat;
11573 tree arg0 = TREE_VALUE (arglist);
11574 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11575 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11576 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11577 rtx op2;
11578 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11579 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11580 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11581 enum rtx_code comparison = d->comparison;
11582
11583 if (VECTOR_MODE_P (mode0))
11584 op0 = safe_vector_operand (op0, mode0);
11585 if (VECTOR_MODE_P (mode1))
11586 op1 = safe_vector_operand (op1, mode1);
11587
11588 /* Swap operands if we have a comparison that isn't available in
11589 hardware. */
11590 if (d->flag)
11591 {
11592 rtx tmp = gen_reg_rtx (mode1);
11593 emit_move_insn (tmp, op1);
11594 op1 = op0;
11595 op0 = tmp;
11596 }
11597
11598 if (! target
11599 || GET_MODE (target) != tmode
11600 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11601 target = gen_reg_rtx (tmode);
11602
11603 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11604 op0 = copy_to_mode_reg (mode0, op0);
11605 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11606 op1 = copy_to_mode_reg (mode1, op1);
11607
11608 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11609 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11610 if (! pat)
11611 return 0;
11612 emit_insn (pat);
11613 return target;
11614 }
11615
11616 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11617
11618 static rtx
11619 ix86_expand_sse_comi (d, arglist, target)
11620 const struct builtin_description *d;
11621 tree arglist;
11622 rtx target;
11623 {
11624 rtx pat;
11625 tree arg0 = TREE_VALUE (arglist);
11626 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11627 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11628 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11629 rtx op2;
11630 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11631 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11632 enum rtx_code comparison = d->comparison;
11633
11634 if (VECTOR_MODE_P (mode0))
11635 op0 = safe_vector_operand (op0, mode0);
11636 if (VECTOR_MODE_P (mode1))
11637 op1 = safe_vector_operand (op1, mode1);
11638
11639 /* Swap operands if we have a comparison that isn't available in
11640 hardware. */
11641 if (d->flag)
11642 {
11643 rtx tmp = op1;
11644 op1 = op0;
11645 op0 = tmp;
11646 }
11647
11648 target = gen_reg_rtx (SImode);
11649 emit_move_insn (target, const0_rtx);
11650 target = gen_rtx_SUBREG (QImode, target, 0);
11651
11652 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11653 op0 = copy_to_mode_reg (mode0, op0);
11654 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11655 op1 = copy_to_mode_reg (mode1, op1);
11656
11657 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11658 pat = GEN_FCN (d->icode) (op0, op1, op2);
11659 if (! pat)
11660 return 0;
11661 emit_insn (pat);
11662 emit_insn (gen_rtx_SET (VOIDmode,
11663 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11664 gen_rtx_fmt_ee (comparison, QImode,
11665 gen_rtx_REG (CCmode, FLAGS_REG),
11666 const0_rtx)));
11667
11668 return SUBREG_REG (target);
11669 }
11670
11671 /* Expand an expression EXP that calls a built-in function,
11672 with result going to TARGET if that's convenient
11673 (and in mode MODE if that's convenient).
11674 SUBTARGET may be used as the target for computing one of EXP's operands.
11675 IGNORE is nonzero if the value is to be ignored. */
11676
11677 rtx
11678 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11679 tree exp;
11680 rtx target;
11681 rtx subtarget ATTRIBUTE_UNUSED;
11682 enum machine_mode mode ATTRIBUTE_UNUSED;
11683 int ignore ATTRIBUTE_UNUSED;
11684 {
11685 const struct builtin_description *d;
11686 size_t i;
11687 enum insn_code icode;
11688 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11689 tree arglist = TREE_OPERAND (exp, 1);
11690 tree arg0, arg1, arg2;
11691 rtx op0, op1, op2, pat;
11692 enum machine_mode tmode, mode0, mode1, mode2;
11693 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11694
11695 switch (fcode)
11696 {
11697 case IX86_BUILTIN_EMMS:
11698 emit_insn (gen_emms ());
11699 return 0;
11700
11701 case IX86_BUILTIN_SFENCE:
11702 emit_insn (gen_sfence ());
11703 return 0;
11704
11705 case IX86_BUILTIN_PEXTRW:
11706 icode = CODE_FOR_mmx_pextrw;
11707 arg0 = TREE_VALUE (arglist);
11708 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11709 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11710 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11711 tmode = insn_data[icode].operand[0].mode;
11712 mode0 = insn_data[icode].operand[1].mode;
11713 mode1 = insn_data[icode].operand[2].mode;
11714
11715 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11716 op0 = copy_to_mode_reg (mode0, op0);
11717 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11718 {
11719 /* @@@ better error message */
11720 error ("selector must be an immediate");
11721 return gen_reg_rtx (tmode);
11722 }
11723 if (target == 0
11724 || GET_MODE (target) != tmode
11725 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11726 target = gen_reg_rtx (tmode);
11727 pat = GEN_FCN (icode) (target, op0, op1);
11728 if (! pat)
11729 return 0;
11730 emit_insn (pat);
11731 return target;
11732
11733 case IX86_BUILTIN_PINSRW:
11734 icode = CODE_FOR_mmx_pinsrw;
11735 arg0 = TREE_VALUE (arglist);
11736 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11737 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11738 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11739 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11740 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11741 tmode = insn_data[icode].operand[0].mode;
11742 mode0 = insn_data[icode].operand[1].mode;
11743 mode1 = insn_data[icode].operand[2].mode;
11744 mode2 = insn_data[icode].operand[3].mode;
11745
11746 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11747 op0 = copy_to_mode_reg (mode0, op0);
11748 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11749 op1 = copy_to_mode_reg (mode1, op1);
11750 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11751 {
11752 /* @@@ better error message */
11753 error ("selector must be an immediate");
11754 return const0_rtx;
11755 }
11756 if (target == 0
11757 || GET_MODE (target) != tmode
11758 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11759 target = gen_reg_rtx (tmode);
11760 pat = GEN_FCN (icode) (target, op0, op1, op2);
11761 if (! pat)
11762 return 0;
11763 emit_insn (pat);
11764 return target;
11765
11766 case IX86_BUILTIN_MASKMOVQ:
11767 icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq;
11768 /* Note the arg order is different from the operand order. */
11769 arg1 = TREE_VALUE (arglist);
11770 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11771 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11772 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11773 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11774 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11775 mode0 = insn_data[icode].operand[0].mode;
11776 mode1 = insn_data[icode].operand[1].mode;
11777 mode2 = insn_data[icode].operand[2].mode;
11778
11779 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11780 op0 = copy_to_mode_reg (mode0, op0);
11781 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11782 op1 = copy_to_mode_reg (mode1, op1);
11783 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11784 op2 = copy_to_mode_reg (mode2, op2);
11785 pat = GEN_FCN (icode) (op0, op1, op2);
11786 if (! pat)
11787 return 0;
11788 emit_insn (pat);
11789 return 0;
11790
11791 case IX86_BUILTIN_SQRTSS:
11792 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11793 case IX86_BUILTIN_RSQRTSS:
11794 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11795 case IX86_BUILTIN_RCPSS:
11796 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11797
11798 case IX86_BUILTIN_ANDPS:
11799 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
11800 arglist, target);
11801 case IX86_BUILTIN_ANDNPS:
11802 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
11803 arglist, target);
11804 case IX86_BUILTIN_ORPS:
11805 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
11806 arglist, target);
11807 case IX86_BUILTIN_XORPS:
11808 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
11809 arglist, target);
11810
11811 case IX86_BUILTIN_LOADAPS:
11812 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11813
11814 case IX86_BUILTIN_LOADUPS:
11815 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11816
11817 case IX86_BUILTIN_STOREAPS:
11818 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
11819 case IX86_BUILTIN_STOREUPS:
11820 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
11821
11822 case IX86_BUILTIN_LOADSS:
11823 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11824
11825 case IX86_BUILTIN_STORESS:
11826 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
11827
11828 case IX86_BUILTIN_LOADHPS:
11829 case IX86_BUILTIN_LOADLPS:
11830 icode = (fcode == IX86_BUILTIN_LOADHPS
11831 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11832 arg0 = TREE_VALUE (arglist);
11833 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11834 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11835 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11836 tmode = insn_data[icode].operand[0].mode;
11837 mode0 = insn_data[icode].operand[1].mode;
11838 mode1 = insn_data[icode].operand[2].mode;
11839
11840 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11841 op0 = copy_to_mode_reg (mode0, op0);
11842 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11843 if (target == 0
11844 || GET_MODE (target) != tmode
11845 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11846 target = gen_reg_rtx (tmode);
11847 pat = GEN_FCN (icode) (target, op0, op1);
11848 if (! pat)
11849 return 0;
11850 emit_insn (pat);
11851 return target;
11852
11853 case IX86_BUILTIN_STOREHPS:
11854 case IX86_BUILTIN_STORELPS:
11855 icode = (fcode == IX86_BUILTIN_STOREHPS
11856 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11857 arg0 = TREE_VALUE (arglist);
11858 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11859 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11860 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11861 mode0 = insn_data[icode].operand[1].mode;
11862 mode1 = insn_data[icode].operand[2].mode;
11863
11864 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11865 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11866 op1 = copy_to_mode_reg (mode1, op1);
11867
11868 pat = GEN_FCN (icode) (op0, op0, op1);
11869 if (! pat)
11870 return 0;
11871 emit_insn (pat);
11872 return 0;
11873
11874 case IX86_BUILTIN_MOVNTPS:
11875 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
11876 case IX86_BUILTIN_MOVNTQ:
11877 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
11878
11879 case IX86_BUILTIN_LDMXCSR:
11880 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11881 target = assign_386_stack_local (SImode, 0);
11882 emit_move_insn (target, op0);
11883 emit_insn (gen_ldmxcsr (target));
11884 return 0;
11885
11886 case IX86_BUILTIN_STMXCSR:
11887 target = assign_386_stack_local (SImode, 0);
11888 emit_insn (gen_stmxcsr (target));
11889 return copy_to_mode_reg (SImode, target);
11890
11891 case IX86_BUILTIN_SHUFPS:
11892 icode = CODE_FOR_sse_shufps;
11893 arg0 = TREE_VALUE (arglist);
11894 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11895 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11896 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11897 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11898 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11899 tmode = insn_data[icode].operand[0].mode;
11900 mode0 = insn_data[icode].operand[1].mode;
11901 mode1 = insn_data[icode].operand[2].mode;
11902 mode2 = insn_data[icode].operand[3].mode;
11903
11904 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11905 op0 = copy_to_mode_reg (mode0, op0);
11906 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11907 op1 = copy_to_mode_reg (mode1, op1);
11908 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11909 {
11910 /* @@@ better error message */
11911 error ("mask must be an immediate");
11912 return gen_reg_rtx (tmode);
11913 }
11914 if (target == 0
11915 || GET_MODE (target) != tmode
11916 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11917 target = gen_reg_rtx (tmode);
11918 pat = GEN_FCN (icode) (target, op0, op1, op2);
11919 if (! pat)
11920 return 0;
11921 emit_insn (pat);
11922 return target;
11923
11924 case IX86_BUILTIN_PSHUFW:
11925 icode = CODE_FOR_mmx_pshufw;
11926 arg0 = TREE_VALUE (arglist);
11927 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11928 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11929 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11930 tmode = insn_data[icode].operand[0].mode;
11931 mode1 = insn_data[icode].operand[1].mode;
11932 mode2 = insn_data[icode].operand[2].mode;
11933
11934 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11935 op0 = copy_to_mode_reg (mode1, op0);
11936 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
11937 {
11938 /* @@@ better error message */
11939 error ("mask must be an immediate");
11940 return const0_rtx;
11941 }
11942 if (target == 0
11943 || GET_MODE (target) != tmode
11944 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11945 target = gen_reg_rtx (tmode);
11946 pat = GEN_FCN (icode) (target, op0, op1);
11947 if (! pat)
11948 return 0;
11949 emit_insn (pat);
11950 return target;
11951
11952 case IX86_BUILTIN_FEMMS:
11953 emit_insn (gen_femms ());
11954 return NULL_RTX;
11955
11956 case IX86_BUILTIN_PAVGUSB:
11957 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11958
11959 case IX86_BUILTIN_PF2ID:
11960 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11961
11962 case IX86_BUILTIN_PFACC:
11963 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11964
11965 case IX86_BUILTIN_PFADD:
11966 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11967
11968 case IX86_BUILTIN_PFCMPEQ:
11969 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11970
11971 case IX86_BUILTIN_PFCMPGE:
11972 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11973
11974 case IX86_BUILTIN_PFCMPGT:
11975 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11976
11977 case IX86_BUILTIN_PFMAX:
11978 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11979
11980 case IX86_BUILTIN_PFMIN:
11981 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11982
11983 case IX86_BUILTIN_PFMUL:
11984 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11985
11986 case IX86_BUILTIN_PFRCP:
11987 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11988
11989 case IX86_BUILTIN_PFRCPIT1:
11990 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11991
11992 case IX86_BUILTIN_PFRCPIT2:
11993 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11994
11995 case IX86_BUILTIN_PFRSQIT1:
11996 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11997
11998 case IX86_BUILTIN_PFRSQRT:
11999 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12000
12001 case IX86_BUILTIN_PFSUB:
12002 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12003
12004 case IX86_BUILTIN_PFSUBR:
12005 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12006
12007 case IX86_BUILTIN_PI2FD:
12008 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12009
12010 case IX86_BUILTIN_PMULHRW:
12011 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12012
12013 case IX86_BUILTIN_PF2IW:
12014 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12015
12016 case IX86_BUILTIN_PFNACC:
12017 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12018
12019 case IX86_BUILTIN_PFPNACC:
12020 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12021
12022 case IX86_BUILTIN_PI2FW:
12023 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12024
12025 case IX86_BUILTIN_PSWAPDSI:
12026 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12027
12028 case IX86_BUILTIN_PSWAPDSF:
12029 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12030
12031 case IX86_BUILTIN_SSE_ZERO:
12032 target = gen_reg_rtx (V4SFmode);
12033 emit_insn (gen_sse_clrv4sf (target));
12034 return target;
12035
12036 case IX86_BUILTIN_MMX_ZERO:
12037 target = gen_reg_rtx (DImode);
12038 emit_insn (gen_mmx_clrdi (target));
12039 return target;
12040
12041 default:
12042 break;
12043 }
12044
12045 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12046 if (d->code == fcode)
12047 {
12048 /* Compares are treated specially. */
12049 if (d->icode == CODE_FOR_maskcmpv4sf3
12050 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12051 || d->icode == CODE_FOR_maskncmpv4sf3
12052 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12053 return ix86_expand_sse_compare (d, arglist, target);
12054
12055 return ix86_expand_binop_builtin (d->icode, arglist, target);
12056 }
12057
12058 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12059 if (d->code == fcode)
12060 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12061
12062 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12063 if (d->code == fcode)
12064 return ix86_expand_sse_comi (d, arglist, target);
12065
12066 /* @@@ Should really do something sensible here. */
12067 return 0;
12068 }
12069
12070 /* Store OPERAND to the memory after reload is completed. This means
12071 that we can't easily use assign_stack_local. */
12072 rtx
12073 ix86_force_to_memory (mode, operand)
12074 enum machine_mode mode;
12075 rtx operand;
12076 {
12077 rtx result;
12078 if (!reload_completed)
12079 abort ();
12080 if (TARGET_64BIT && TARGET_RED_ZONE)
12081 {
12082 result = gen_rtx_MEM (mode,
12083 gen_rtx_PLUS (Pmode,
12084 stack_pointer_rtx,
12085 GEN_INT (-RED_ZONE_SIZE)));
12086 emit_move_insn (result, operand);
12087 }
12088 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12089 {
12090 switch (mode)
12091 {
12092 case HImode:
12093 case SImode:
12094 operand = gen_lowpart (DImode, operand);
12095 /* FALLTHRU */
12096 case DImode:
12097 emit_insn (
12098 gen_rtx_SET (VOIDmode,
12099 gen_rtx_MEM (DImode,
12100 gen_rtx_PRE_DEC (DImode,
12101 stack_pointer_rtx)),
12102 operand));
12103 break;
12104 default:
12105 abort ();
12106 }
12107 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12108 }
12109 else
12110 {
12111 switch (mode)
12112 {
12113 case DImode:
12114 {
12115 rtx operands[2];
12116 split_di (&operand, 1, operands, operands + 1);
12117 emit_insn (
12118 gen_rtx_SET (VOIDmode,
12119 gen_rtx_MEM (SImode,
12120 gen_rtx_PRE_DEC (Pmode,
12121 stack_pointer_rtx)),
12122 operands[1]));
12123 emit_insn (
12124 gen_rtx_SET (VOIDmode,
12125 gen_rtx_MEM (SImode,
12126 gen_rtx_PRE_DEC (Pmode,
12127 stack_pointer_rtx)),
12128 operands[0]));
12129 }
12130 break;
12131 case HImode:
12132 /* It is better to store HImodes as SImodes. */
12133 if (!TARGET_PARTIAL_REG_STALL)
12134 operand = gen_lowpart (SImode, operand);
12135 /* FALLTHRU */
12136 case SImode:
12137 emit_insn (
12138 gen_rtx_SET (VOIDmode,
12139 gen_rtx_MEM (GET_MODE (operand),
12140 gen_rtx_PRE_DEC (SImode,
12141 stack_pointer_rtx)),
12142 operand));
12143 break;
12144 default:
12145 abort ();
12146 }
12147 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12148 }
12149 return result;
12150 }
12151
12152 /* Free operand from the memory. */
12153 void
12154 ix86_free_from_memory (mode)
12155 enum machine_mode mode;
12156 {
12157 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12158 {
12159 int size;
12160
12161 if (mode == DImode || TARGET_64BIT)
12162 size = 8;
12163 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12164 size = 2;
12165 else
12166 size = 4;
12167 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12168 to pop or add instruction if registers are available. */
12169 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12170 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12171 GEN_INT (size))));
12172 }
12173 }
12174
12175 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12176 QImode must go into class Q_REGS.
12177 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12178 movdf to do mem-to-mem moves through integer regs. */
12179 enum reg_class
12180 ix86_preferred_reload_class (x, class)
12181 rtx x;
12182 enum reg_class class;
12183 {
12184 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12185 {
12186 /* SSE can't load any constant directly yet. */
12187 if (SSE_CLASS_P (class))
12188 return NO_REGS;
12189 /* Floats can load 0 and 1. */
12190 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12191 {
12192 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12193 if (MAYBE_SSE_CLASS_P (class))
12194 return (reg_class_subset_p (class, GENERAL_REGS)
12195 ? GENERAL_REGS : FLOAT_REGS);
12196 else
12197 return class;
12198 }
12199 /* General regs can load everything. */
12200 if (reg_class_subset_p (class, GENERAL_REGS))
12201 return GENERAL_REGS;
12202 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12203 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12204 return NO_REGS;
12205 }
12206 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12207 return NO_REGS;
12208 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12209 return Q_REGS;
12210 return class;
12211 }
12212
12213 /* If we are copying between general and FP registers, we need a memory
12214 location. The same is true for SSE and MMX registers.
12215
12216 The macro can't work reliably when one of the CLASSES is class containing
12217 registers from multiple units (SSE, MMX, integer). We avoid this by never
12218 combining those units in single alternative in the machine description.
12219 Ensure that this constraint holds to avoid unexpected surprises.
12220
12221 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12222 enforce these sanity checks. */
12223 int
12224 ix86_secondary_memory_needed (class1, class2, mode, strict)
12225 enum reg_class class1, class2;
12226 enum machine_mode mode;
12227 int strict;
12228 {
12229 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12230 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12231 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12232 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12233 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12234 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12235 {
12236 if (strict)
12237 abort ();
12238 else
12239 return 1;
12240 }
12241 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12242 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12243 && (mode) != SImode)
12244 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12245 && (mode) != SImode));
12246 }
12247 /* Return the cost of moving data from a register in class CLASS1 to
12248 one in class CLASS2.
12249
12250 It is not required that the cost always equal 2 when FROM is the same as TO;
12251 on some machines it is expensive to move between registers if they are not
12252 general registers. */
12253 int
12254 ix86_register_move_cost (mode, class1, class2)
12255 enum machine_mode mode;
12256 enum reg_class class1, class2;
12257 {
12258 /* In case we require secondary memory, compute cost of the store followed
12259 by load. In case of copying from general_purpose_register we may emit
12260 multiple stores followed by single load causing memory size mismatch
12261 stall. Count this as arbitarily high cost of 20. */
12262 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12263 {
12264 int add_cost = 0;
12265 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12266 add_cost = 20;
12267 return (MEMORY_MOVE_COST (mode, class1, 0)
12268 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12269 }
12270 /* Moves between SSE/MMX and integer unit are expensive. */
12271 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12272 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12273 return ix86_cost->mmxsse_to_integer;
12274 if (MAYBE_FLOAT_CLASS_P (class1))
12275 return ix86_cost->fp_move;
12276 if (MAYBE_SSE_CLASS_P (class1))
12277 return ix86_cost->sse_move;
12278 if (MAYBE_MMX_CLASS_P (class1))
12279 return ix86_cost->mmx_move;
12280 return 2;
12281 }
12282
12283 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12284 int
12285 ix86_hard_regno_mode_ok (regno, mode)
12286 int regno;
12287 enum machine_mode mode;
12288 {
12289 /* Flags and only flags can only hold CCmode values. */
12290 if (CC_REGNO_P (regno))
12291 return GET_MODE_CLASS (mode) == MODE_CC;
12292 if (GET_MODE_CLASS (mode) == MODE_CC
12293 || GET_MODE_CLASS (mode) == MODE_RANDOM
12294 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12295 return 0;
12296 if (FP_REGNO_P (regno))
12297 return VALID_FP_MODE_P (mode);
12298 if (SSE_REGNO_P (regno))
12299 return VALID_SSE_REG_MODE (mode);
12300 if (MMX_REGNO_P (regno))
12301 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12302 /* We handle both integer and floats in the general purpose registers.
12303 In future we should be able to handle vector modes as well. */
12304 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12305 return 0;
12306 /* Take care for QImode values - they can be in non-QI regs, but then
12307 they do cause partial register stalls. */
12308 if (regno < 4 || mode != QImode || TARGET_64BIT)
12309 return 1;
12310 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12311 }
12312
12313 /* Return the cost of moving data of mode M between a
12314 register and memory. A value of 2 is the default; this cost is
12315 relative to those in `REGISTER_MOVE_COST'.
12316
12317 If moving between registers and memory is more expensive than
12318 between two registers, you should define this macro to express the
12319 relative cost.
12320
12321 Model also increased moving costs of QImode registers in non
12322 Q_REGS classes.
12323 */
12324 int
12325 ix86_memory_move_cost (mode, class, in)
12326 enum machine_mode mode;
12327 enum reg_class class;
12328 int in;
12329 {
12330 if (FLOAT_CLASS_P (class))
12331 {
12332 int index;
12333 switch (mode)
12334 {
12335 case SFmode:
12336 index = 0;
12337 break;
12338 case DFmode:
12339 index = 1;
12340 break;
12341 case XFmode:
12342 case TFmode:
12343 index = 2;
12344 break;
12345 default:
12346 return 100;
12347 }
12348 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12349 }
12350 if (SSE_CLASS_P (class))
12351 {
12352 int index;
12353 switch (GET_MODE_SIZE (mode))
12354 {
12355 case 4:
12356 index = 0;
12357 break;
12358 case 8:
12359 index = 1;
12360 break;
12361 case 16:
12362 index = 2;
12363 break;
12364 default:
12365 return 100;
12366 }
12367 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12368 }
12369 if (MMX_CLASS_P (class))
12370 {
12371 int index;
12372 switch (GET_MODE_SIZE (mode))
12373 {
12374 case 4:
12375 index = 0;
12376 break;
12377 case 8:
12378 index = 1;
12379 break;
12380 default:
12381 return 100;
12382 }
12383 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12384 }
12385 switch (GET_MODE_SIZE (mode))
12386 {
12387 case 1:
12388 if (in)
12389 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12390 : ix86_cost->movzbl_load);
12391 else
12392 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12393 : ix86_cost->int_store[0] + 4);
12394 break;
12395 case 2:
12396 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12397 default:
12398 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12399 if (mode == TFmode)
12400 mode = XFmode;
12401 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12402 * (int) GET_MODE_SIZE (mode) / 4);
12403 }
12404 }
12405
12406 #ifdef DO_GLOBAL_CTORS_BODY
12407 static void
12408 ix86_svr3_asm_out_constructor (symbol, priority)
12409 rtx symbol;
12410 int priority ATTRIBUTE_UNUSED;
12411 {
12412 init_section ();
12413 fputs ("\tpushl $", asm_out_file);
12414 assemble_name (asm_out_file, XSTR (symbol, 0));
12415 fputc ('\n', asm_out_file);
12416 }
12417 #endif
12418
12419 /* Order the registers for register allocator. */
12420
12421 void
12422 x86_order_regs_for_local_alloc ()
12423 {
12424 int pos = 0;
12425 int i;
12426
12427 /* First allocate the local general purpose registers. */
12428 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12429 if (GENERAL_REGNO_P (i) && call_used_regs[i])
12430 reg_alloc_order [pos++] = i;
12431
12432 /* Global general purpose registers. */
12433 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12434 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
12435 reg_alloc_order [pos++] = i;
12436
12437 /* x87 registers come first in case we are doing FP math
12438 using them. */
12439 if (!TARGET_SSE_MATH)
12440 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12441 reg_alloc_order [pos++] = i;
12442
12443 /* SSE registers. */
12444 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
12445 reg_alloc_order [pos++] = i;
12446 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
12447 reg_alloc_order [pos++] = i;
12448
12449 /* x87 registerts. */
12450 if (TARGET_SSE_MATH)
12451 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12452 reg_alloc_order [pos++] = i;
12453
12454 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
12455 reg_alloc_order [pos++] = i;
12456
12457 /* Initialize the rest of array as we do not allocate some registers
12458 at all. */
12459 while (pos < FIRST_PSEUDO_REGISTER)
12460 reg_alloc_order [pos++] = 0;
12461 }
This page took 0.661333 seconds and 6 git commands to generate.