]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.md (pushqi1): New.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include <setjmp.h>
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "toplev.h"
42 #include "basic-block.h"
43 #include "ggc.h"
44
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
47 #endif
48
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
59 3, /* MOVE_RATIO */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8}, /* cost of loading integer registers */
69 2, /* cost of moving MMX register */
70 {4, 8}, /* cost of loading MMX registers
71 in SImode and DImode */
72 {4, 8}, /* cost of storing MMX registers
73 in SImode and DImode */
74 2, /* cost of moving SSE register */
75 {4, 8, 16}, /* cost of loading SSE registers
76 in SImode, DImode and TImode */
77 {4, 8, 16}, /* cost of storing SSE registers
78 in SImode, DImode and TImode */
79 3, /* MMX or SSE register to integer */
80 };
81
82 struct processor_costs i486_cost = { /* 486 specific costs */
83 1, /* cost of an add instruction */
84 1, /* cost of a lea instruction */
85 3, /* variable shift costs */
86 2, /* constant shift costs */
87 12, /* cost of starting a multiply */
88 1, /* cost of multiply per each bit set */
89 40, /* cost of a divide/mod */
90 15, /* "large" insn */
91 3, /* MOVE_RATIO */
92 4, /* cost for loading QImode using movzbl */
93 {2, 4, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 4, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {8, 8, 8}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {8, 8, 8}, /* cost of loading integer registers */
101 2, /* cost of moving MMX register */
102 {4, 8}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {4, 8}, /* cost of storing MMX registers
105 in SImode and DImode */
106 2, /* cost of moving SSE register */
107 {4, 8, 16}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {4, 8, 16}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3 /* MMX or SSE register to integer */
112 };
113
114 struct processor_costs pentium_cost = {
115 1, /* cost of an add instruction */
116 1, /* cost of a lea instruction */
117 4, /* variable shift costs */
118 1, /* constant shift costs */
119 11, /* cost of starting a multiply */
120 0, /* cost of multiply per each bit set */
121 25, /* cost of a divide/mod */
122 8, /* "large" insn */
123 6, /* MOVE_RATIO */
124 6, /* cost for loading QImode using movzbl */
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
127 Relative to reg-reg move (2). */
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {2, 2, 6}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
132 {4, 4, 6}, /* cost of loading integer registers */
133 8, /* cost of moving MMX register */
134 {8, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {8, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3 /* MMX or SSE register to integer */
144 };
145
146 struct processor_costs pentiumpro_cost = {
147 1, /* cost of an add instruction */
148 1, /* cost of a lea instruction */
149 1, /* variable shift costs */
150 1, /* constant shift costs */
151 4, /* cost of starting a multiply */
152 0, /* cost of multiply per each bit set */
153 17, /* cost of a divide/mod */
154 8, /* "large" insn */
155 6, /* MOVE_RATIO */
156 2, /* cost for loading QImode using movzbl */
157 {4, 4, 4}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 6}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {4, 4, 6}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {2, 2}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {2, 2}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {2, 2, 8}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {2, 2, 8}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3 /* MMX or SSE register to integer */
176 };
177
178 struct processor_costs k6_cost = {
179 1, /* cost of an add instruction */
180 2, /* cost of a lea instruction */
181 1, /* variable shift costs */
182 1, /* constant shift costs */
183 3, /* cost of starting a multiply */
184 0, /* cost of multiply per each bit set */
185 18, /* cost of a divide/mod */
186 8, /* "large" insn */
187 4, /* MOVE_RATIO */
188 3, /* cost for loading QImode using movzbl */
189 {4, 5, 4}, /* cost of loading integer registers
190 in QImode, HImode and SImode.
191 Relative to reg-reg move (2). */
192 {2, 3, 2}, /* cost of storing integer registers */
193 4, /* cost of reg,reg fld/fst */
194 {6, 6, 6}, /* cost of loading fp registers
195 in SFmode, DFmode and XFmode */
196 {4, 4, 4}, /* cost of loading integer registers */
197 2, /* cost of moving MMX register */
198 {2, 2}, /* cost of loading MMX registers
199 in SImode and DImode */
200 {2, 2}, /* cost of storing MMX registers
201 in SImode and DImode */
202 2, /* cost of moving SSE register */
203 {2, 2, 8}, /* cost of loading SSE registers
204 in SImode, DImode and TImode */
205 {2, 2, 8}, /* cost of storing SSE registers
206 in SImode, DImode and TImode */
207 6 /* MMX or SSE register to integer */
208 };
209
210 struct processor_costs athlon_cost = {
211 1, /* cost of an add instruction */
212 2, /* cost of a lea instruction */
213 1, /* variable shift costs */
214 1, /* constant shift costs */
215 5, /* cost of starting a multiply */
216 0, /* cost of multiply per each bit set */
217 42, /* cost of a divide/mod */
218 8, /* "large" insn */
219 9, /* MOVE_RATIO */
220 4, /* cost for loading QImode using movzbl */
221 {4, 5, 4}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
223 Relative to reg-reg move (2). */
224 {2, 3, 2}, /* cost of storing integer registers */
225 4, /* cost of reg,reg fld/fst */
226 {6, 6, 20}, /* cost of loading fp registers
227 in SFmode, DFmode and XFmode */
228 {4, 4, 16}, /* cost of loading integer registers */
229 2, /* cost of moving MMX register */
230 {2, 2}, /* cost of loading MMX registers
231 in SImode and DImode */
232 {2, 2}, /* cost of storing MMX registers
233 in SImode and DImode */
234 2, /* cost of moving SSE register */
235 {2, 2, 8}, /* cost of loading SSE registers
236 in SImode, DImode and TImode */
237 {2, 2, 8}, /* cost of storing SSE registers
238 in SImode, DImode and TImode */
239 6 /* MMX or SSE register to integer */
240 };
241
242 struct processor_costs pentium4_cost = {
243 1, /* cost of an add instruction */
244 1, /* cost of a lea instruction */
245 8, /* variable shift costs */
246 8, /* constant shift costs */
247 30, /* cost of starting a multiply */
248 0, /* cost of multiply per each bit set */
249 112, /* cost of a divide/mod */
250 16, /* "large" insn */
251 6, /* MOVE_RATIO */
252 2, /* cost for loading QImode using movzbl */
253 {4, 5, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 3, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 12, /* cost of moving SSE register */
267 {12, 12, 12}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 10, /* MMX or SSE register to integer */
272 };
273
274 struct processor_costs *ix86_cost = &pentium_cost;
275
276 /* Processor feature/optimization bitmasks. */
277 #define m_386 (1<<PROCESSOR_I386)
278 #define m_486 (1<<PROCESSOR_I486)
279 #define m_PENT (1<<PROCESSOR_PENTIUM)
280 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
281 #define m_K6 (1<<PROCESSOR_K6)
282 #define m_ATHLON (1<<PROCESSOR_ATHLON)
283 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
284
285 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
286 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
287 const int x86_zero_extend_with_and = m_486 | m_PENT;
288 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
289 const int x86_double_with_add = ~m_386;
290 const int x86_use_bit_test = m_386;
291 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
292 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
293 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
294 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
295 const int x86_partial_reg_stall = m_PPRO;
296 const int x86_use_loop = m_K6;
297 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
298 const int x86_use_mov0 = m_K6;
299 const int x86_use_cltd = ~(m_PENT | m_K6);
300 const int x86_read_modify_write = ~m_PENT;
301 const int x86_read_modify = ~(m_PENT | m_PPRO);
302 const int x86_split_long_moves = m_PPRO;
303 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
304 const int x86_single_stringop = m_386 | m_PENT4;
305 const int x86_qimode_math = ~(0);
306 const int x86_promote_qi_regs = 0;
307 const int x86_himode_math = ~(m_PPRO);
308 const int x86_promote_hi_regs = m_PPRO;
309 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
310 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
311 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
312 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
313 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
314 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
315 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
316
317 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
318
319 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
320 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
321 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
322
323 /* Array of the smallest class containing reg number REGNO, indexed by
324 REGNO. Used by REGNO_REG_CLASS in i386.h. */
325
326 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
327 {
328 /* ax, dx, cx, bx */
329 AREG, DREG, CREG, BREG,
330 /* si, di, bp, sp */
331 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
332 /* FP registers */
333 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
334 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
335 /* arg pointer */
336 NON_Q_REGS,
337 /* flags, fpsr, dirflag, frame */
338 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
339 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
340 SSE_REGS, SSE_REGS,
341 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
342 MMX_REGS, MMX_REGS,
343 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
344 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
345 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
346 SSE_REGS, SSE_REGS,
347 };
348
349 /* The "default" register map used in 32bit mode. */
350
351 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
352 {
353 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
354 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
355 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
360 };
361
362 /* The "default" register map used in 64bit mode. */
363 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
364 {
365 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
366 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
367 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
368 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
369 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
370 8,9,10,11,12,13,14,15, /* extended integer registers */
371 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
372 };
373
374 /* Define the register numbers to be used in Dwarf debugging information.
375 The SVR4 reference port C compiler uses the following register numbers
376 in its Dwarf output code:
377 0 for %eax (gcc regno = 0)
378 1 for %ecx (gcc regno = 2)
379 2 for %edx (gcc regno = 1)
380 3 for %ebx (gcc regno = 3)
381 4 for %esp (gcc regno = 7)
382 5 for %ebp (gcc regno = 6)
383 6 for %esi (gcc regno = 4)
384 7 for %edi (gcc regno = 5)
385 The following three DWARF register numbers are never generated by
386 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
387 believes these numbers have these meanings.
388 8 for %eip (no gcc equivalent)
389 9 for %eflags (gcc regno = 17)
390 10 for %trapno (no gcc equivalent)
391 It is not at all clear how we should number the FP stack registers
392 for the x86 architecture. If the version of SDB on x86/svr4 were
393 a bit less brain dead with respect to floating-point then we would
394 have a precedent to follow with respect to DWARF register numbers
395 for x86 FP registers, but the SDB on x86/svr4 is so completely
396 broken with respect to FP registers that it is hardly worth thinking
397 of it as something to strive for compatibility with.
398 The version of x86/svr4 SDB I have at the moment does (partially)
399 seem to believe that DWARF register number 11 is associated with
400 the x86 register %st(0), but that's about all. Higher DWARF
401 register numbers don't seem to be associated with anything in
402 particular, and even for DWARF regno 11, SDB only seems to under-
403 stand that it should say that a variable lives in %st(0) (when
404 asked via an `=' command) if we said it was in DWARF regno 11,
405 but SDB still prints garbage when asked for the value of the
406 variable in question (via a `/' command).
407 (Also note that the labels SDB prints for various FP stack regs
408 when doing an `x' command are all wrong.)
409 Note that these problems generally don't affect the native SVR4
410 C compiler because it doesn't allow the use of -O with -g and
411 because when it is *not* optimizing, it allocates a memory
412 location for each floating-point variable, and the memory
413 location is what gets described in the DWARF AT_location
414 attribute for the variable in question.
415 Regardless of the severe mental illness of the x86/svr4 SDB, we
416 do something sensible here and we use the following DWARF
417 register numbers. Note that these are all stack-top-relative
418 numbers.
419 11 for %st(0) (gcc regno = 8)
420 12 for %st(1) (gcc regno = 9)
421 13 for %st(2) (gcc regno = 10)
422 14 for %st(3) (gcc regno = 11)
423 15 for %st(4) (gcc regno = 12)
424 16 for %st(5) (gcc regno = 13)
425 17 for %st(6) (gcc regno = 14)
426 18 for %st(7) (gcc regno = 15)
427 */
428 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
429 {
430 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
431 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
432 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
433 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
434 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
435 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
436 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
437 };
438
439 /* Test and compare insns in i386.md store the information needed to
440 generate branch and scc insns here. */
441
442 struct rtx_def *ix86_compare_op0 = NULL_RTX;
443 struct rtx_def *ix86_compare_op1 = NULL_RTX;
444
445 #define MAX_386_STACK_LOCALS 2
446
447 /* Define the structure for the machine field in struct function. */
448 struct machine_function
449 {
450 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
451 int accesses_prev_frame;
452 };
453
454 #define ix86_stack_locals (cfun->machine->stack_locals)
455
456 /* Structure describing stack frame layout.
457 Stack grows downward:
458
459 [arguments]
460 <- ARG_POINTER
461 saved pc
462
463 saved frame pointer if frame_pointer_needed
464 <- HARD_FRAME_POINTER
465 [saved regs]
466
467 [padding1] \
468 )
469 [va_arg registers] (
470 > to_allocate <- FRAME_POINTER
471 [frame] (
472 )
473 [padding2] /
474 */
475 struct ix86_frame
476 {
477 int nregs;
478 int padding1;
479 HOST_WIDE_INT frame;
480 int padding2;
481 int outgoing_arguments_size;
482
483 HOST_WIDE_INT to_allocate;
484 /* The offsets relative to ARG_POINTER. */
485 HOST_WIDE_INT frame_pointer_offset;
486 HOST_WIDE_INT hard_frame_pointer_offset;
487 HOST_WIDE_INT stack_pointer_offset;
488 };
489
490 /* Code model option as passed by user. */
491 const char *ix86_cmodel_string;
492 /* Parsed value. */
493 enum cmodel ix86_cmodel;
494
495 /* which cpu are we scheduling for */
496 enum processor_type ix86_cpu;
497
498 /* which instruction set architecture to use. */
499 int ix86_arch;
500
501 /* Strings to hold which cpu and instruction set architecture to use. */
502 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
503 const char *ix86_arch_string; /* for -march=<xxx> */
504
505 /* # of registers to use to pass arguments. */
506 const char *ix86_regparm_string;
507
508 /* ix86_regparm_string as a number */
509 int ix86_regparm;
510
511 /* Alignment to use for loops and jumps: */
512
513 /* Power of two alignment for loops. */
514 const char *ix86_align_loops_string;
515
516 /* Power of two alignment for non-loop jumps. */
517 const char *ix86_align_jumps_string;
518
519 /* Power of two alignment for stack boundary in bytes. */
520 const char *ix86_preferred_stack_boundary_string;
521
522 /* Preferred alignment for stack boundary in bits. */
523 int ix86_preferred_stack_boundary;
524
525 /* Values 1-5: see jump.c */
526 int ix86_branch_cost;
527 const char *ix86_branch_cost_string;
528
529 /* Power of two alignment for functions. */
530 int ix86_align_funcs;
531 const char *ix86_align_funcs_string;
532
533 /* Power of two alignment for loops. */
534 int ix86_align_loops;
535
536 /* Power of two alignment for non-loop jumps. */
537 int ix86_align_jumps;
538 \f
539 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
540 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
541 int, int, FILE *));
542 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
543 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
544 rtx *, rtx *));
545 static rtx gen_push PARAMS ((rtx));
546 static int memory_address_length PARAMS ((rtx addr));
547 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
548 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
549 static int ix86_safe_length PARAMS ((rtx));
550 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
551 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
552 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
553 static void ix86_dump_ppro_packet PARAMS ((FILE *));
554 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
555 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
556 rtx));
557 static void ix86_init_machine_status PARAMS ((struct function *));
558 static void ix86_mark_machine_status PARAMS ((struct function *));
559 static void ix86_free_machine_status PARAMS ((struct function *));
560 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
561 static int ix86_safe_length_prefix PARAMS ((rtx));
562 static int ix86_nsaved_regs PARAMS((void));
563 static void ix86_emit_save_regs PARAMS((void));
564 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
565 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
566 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
567 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
568 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
569 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
570
571 struct ix86_address
572 {
573 rtx base, index, disp;
574 HOST_WIDE_INT scale;
575 };
576
577 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
578
579 struct builtin_description;
580 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
581 rtx));
582 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
583 rtx));
584 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
585 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
586 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
587 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
588 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
589 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
590 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
591 enum rtx_code *,
592 enum rtx_code *,
593 enum rtx_code *));
594 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
595 rtx *, rtx *));
596 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
597 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
598 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
599 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
600 static int ix86_save_reg PARAMS ((int));
601 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
602 \f
603 /* Sometimes certain combinations of command options do not make
604 sense on a particular target machine. You can define a macro
605 `OVERRIDE_OPTIONS' to take account of this. This macro, if
606 defined, is executed once just after all the command options have
607 been parsed.
608
609 Don't use this macro to turn on various extra optimizations for
610 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
611
612 void
613 override_options ()
614 {
615 int i;
616 /* Comes from final.c -- no real reason to change it. */
617 #define MAX_CODE_ALIGN 16
618
619 static struct ptt
620 {
621 struct processor_costs *cost; /* Processor costs */
622 int target_enable; /* Target flags to enable. */
623 int target_disable; /* Target flags to disable. */
624 int align_loop; /* Default alignments. */
625 int align_jump;
626 int align_func;
627 int branch_cost;
628 }
629 const processor_target_table[PROCESSOR_max] =
630 {
631 {&i386_cost, 0, 0, 2, 2, 2, 1},
632 {&i486_cost, 0, 0, 4, 4, 4, 1},
633 {&pentium_cost, 0, 0, -4, -4, -4, 1},
634 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
635 {&k6_cost, 0, 0, -5, -5, 4, 1},
636 {&athlon_cost, 0, 0, 4, -4, 4, 1},
637 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
638 };
639
640 static struct pta
641 {
642 const char *name; /* processor name or nickname. */
643 enum processor_type processor;
644 }
645 const processor_alias_table[] =
646 {
647 {"i386", PROCESSOR_I386},
648 {"i486", PROCESSOR_I486},
649 {"i586", PROCESSOR_PENTIUM},
650 {"pentium", PROCESSOR_PENTIUM},
651 {"i686", PROCESSOR_PENTIUMPRO},
652 {"pentiumpro", PROCESSOR_PENTIUMPRO},
653 {"k6", PROCESSOR_K6},
654 {"athlon", PROCESSOR_ATHLON},
655 {"pentium4", PROCESSOR_PENTIUM4},
656 };
657
658 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
659
660 #ifdef SUBTARGET_OVERRIDE_OPTIONS
661 SUBTARGET_OVERRIDE_OPTIONS;
662 #endif
663
664 ix86_arch = PROCESSOR_I386;
665 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
666
667 if (ix86_cmodel_string != 0)
668 {
669 if (!strcmp (ix86_cmodel_string, "small"))
670 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
671 else if (flag_pic)
672 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
673 else if (!strcmp (ix86_cmodel_string, "32"))
674 ix86_cmodel = CM_32;
675 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
676 ix86_cmodel = CM_KERNEL;
677 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
678 ix86_cmodel = CM_MEDIUM;
679 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
680 ix86_cmodel = CM_LARGE;
681 else
682 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
683 }
684 else
685 {
686 ix86_cmodel = CM_32;
687 if (TARGET_64BIT)
688 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
689 }
690 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
691 error ("Code model `%s' not supported in the %s bit mode.",
692 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
693 if (ix86_cmodel == CM_LARGE)
694 sorry ("Code model `large' not supported yet.");
695
696 if (ix86_arch_string != 0)
697 {
698 for (i = 0; i < pta_size; i++)
699 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
700 {
701 ix86_arch = processor_alias_table[i].processor;
702 /* Default cpu tuning to the architecture. */
703 ix86_cpu = ix86_arch;
704 break;
705 }
706
707 if (i == pta_size)
708 error ("bad value (%s) for -march= switch", ix86_arch_string);
709 }
710
711 if (ix86_cpu_string != 0)
712 {
713 for (i = 0; i < pta_size; i++)
714 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
715 {
716 ix86_cpu = processor_alias_table[i].processor;
717 break;
718 }
719 if (i == pta_size)
720 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
721 }
722
723 ix86_cost = processor_target_table[ix86_cpu].cost;
724 target_flags |= processor_target_table[ix86_cpu].target_enable;
725 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
726
727 /* Arrange to set up i386_stack_locals for all functions. */
728 init_machine_status = ix86_init_machine_status;
729 mark_machine_status = ix86_mark_machine_status;
730 free_machine_status = ix86_free_machine_status;
731
732 /* Validate -mregparm= value. */
733 if (ix86_regparm_string)
734 {
735 i = atoi (ix86_regparm_string);
736 if (i < 0 || i > REGPARM_MAX)
737 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
738 else
739 ix86_regparm = i;
740 }
741
742 /* Validate -malign-loops= value, or provide default. */
743 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
744 if (ix86_align_loops_string)
745 {
746 i = atoi (ix86_align_loops_string);
747 if (i < 0 || i > MAX_CODE_ALIGN)
748 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
749 else
750 ix86_align_loops = i;
751 }
752
753 /* Validate -malign-jumps= value, or provide default. */
754 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
755 if (ix86_align_jumps_string)
756 {
757 i = atoi (ix86_align_jumps_string);
758 if (i < 0 || i > MAX_CODE_ALIGN)
759 error ("-malign-jumps=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
760 else
761 ix86_align_jumps = i;
762 }
763
764 /* Validate -malign-functions= value, or provide default. */
765 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
766 if (ix86_align_funcs_string)
767 {
768 i = atoi (ix86_align_funcs_string);
769 if (i < 0 || i > MAX_CODE_ALIGN)
770 error ("-malign-functions=%d is not between 0 and %d",
771 i, MAX_CODE_ALIGN);
772 else
773 ix86_align_funcs = i;
774 }
775
776 /* Validate -mpreferred-stack-boundary= value, or provide default.
777 The default of 128 bits is for Pentium III's SSE __m128. */
778 ix86_preferred_stack_boundary = 128;
779 if (ix86_preferred_stack_boundary_string)
780 {
781 i = atoi (ix86_preferred_stack_boundary_string);
782 if (i < 2 || i > 31)
783 error ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
784 else
785 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
786 }
787
788 /* Validate -mbranch-cost= value, or provide default. */
789 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
790 if (ix86_branch_cost_string)
791 {
792 i = atoi (ix86_branch_cost_string);
793 if (i < 0 || i > 5)
794 error ("-mbranch-cost=%d is not between 0 and 5", i);
795 else
796 ix86_branch_cost = i;
797 }
798
799 /* Keep nonleaf frame pointers. */
800 if (TARGET_OMIT_LEAF_FRAME_POINTER)
801 flag_omit_frame_pointer = 1;
802
803 /* If we're doing fast math, we don't care about comparison order
804 wrt NaNs. This lets us use a shorter comparison sequence. */
805 if (flag_unsafe_math_optimizations)
806 target_flags &= ~MASK_IEEE_FP;
807
808 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
809 on by -msse. */
810 if (TARGET_SSE)
811 target_flags |= MASK_MMX;
812 }
813 \f
814 void
815 optimization_options (level, size)
816 int level;
817 int size ATTRIBUTE_UNUSED;
818 {
819 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
820 make the problem with not enough registers even worse. */
821 #ifdef INSN_SCHEDULING
822 if (level > 1)
823 flag_schedule_insns = 0;
824 #endif
825 }
826 \f
827 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
828 attribute for DECL. The attributes in ATTRIBUTES have previously been
829 assigned to DECL. */
830
831 int
832 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
833 tree decl ATTRIBUTE_UNUSED;
834 tree attributes ATTRIBUTE_UNUSED;
835 tree identifier ATTRIBUTE_UNUSED;
836 tree args ATTRIBUTE_UNUSED;
837 {
838 return 0;
839 }
840
841 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
842 attribute for TYPE. The attributes in ATTRIBUTES have previously been
843 assigned to TYPE. */
844
845 int
846 ix86_valid_type_attribute_p (type, attributes, identifier, args)
847 tree type;
848 tree attributes ATTRIBUTE_UNUSED;
849 tree identifier;
850 tree args;
851 {
852 if (TREE_CODE (type) != FUNCTION_TYPE
853 && TREE_CODE (type) != METHOD_TYPE
854 && TREE_CODE (type) != FIELD_DECL
855 && TREE_CODE (type) != TYPE_DECL)
856 return 0;
857
858 /* Stdcall attribute says callee is responsible for popping arguments
859 if they are not variable. */
860 if (is_attribute_p ("stdcall", identifier))
861 return (args == NULL_TREE);
862
863 /* Cdecl attribute says the callee is a normal C declaration. */
864 if (is_attribute_p ("cdecl", identifier))
865 return (args == NULL_TREE);
866
867 /* Regparm attribute specifies how many integer arguments are to be
868 passed in registers. */
869 if (is_attribute_p ("regparm", identifier))
870 {
871 tree cst;
872
873 if (! args || TREE_CODE (args) != TREE_LIST
874 || TREE_CHAIN (args) != NULL_TREE
875 || TREE_VALUE (args) == NULL_TREE)
876 return 0;
877
878 cst = TREE_VALUE (args);
879 if (TREE_CODE (cst) != INTEGER_CST)
880 return 0;
881
882 if (compare_tree_int (cst, REGPARM_MAX) > 0)
883 return 0;
884
885 return 1;
886 }
887
888 return 0;
889 }
890
891 /* Return 0 if the attributes for two types are incompatible, 1 if they
892 are compatible, and 2 if they are nearly compatible (which causes a
893 warning to be generated). */
894
895 int
896 ix86_comp_type_attributes (type1, type2)
897 tree type1;
898 tree type2;
899 {
900 /* Check for mismatch of non-default calling convention. */
901 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
902
903 if (TREE_CODE (type1) != FUNCTION_TYPE)
904 return 1;
905
906 /* Check for mismatched return types (cdecl vs stdcall). */
907 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
908 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
909 return 0;
910 return 1;
911 }
912 \f
913 /* Value is the number of bytes of arguments automatically
914 popped when returning from a subroutine call.
915 FUNDECL is the declaration node of the function (as a tree),
916 FUNTYPE is the data type of the function (as a tree),
917 or for a library call it is an identifier node for the subroutine name.
918 SIZE is the number of bytes of arguments passed on the stack.
919
920 On the 80386, the RTD insn may be used to pop them if the number
921 of args is fixed, but if the number is variable then the caller
922 must pop them all. RTD can't be used for library calls now
923 because the library is compiled with the Unix compiler.
924 Use of RTD is a selectable option, since it is incompatible with
925 standard Unix calling sequences. If the option is not selected,
926 the caller must always pop the args.
927
928 The attribute stdcall is equivalent to RTD on a per module basis. */
929
930 int
931 ix86_return_pops_args (fundecl, funtype, size)
932 tree fundecl;
933 tree funtype;
934 int size;
935 {
936 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
937
938 /* Cdecl functions override -mrtd, and never pop the stack. */
939 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
940
941 /* Stdcall functions will pop the stack if not variable args. */
942 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
943 rtd = 1;
944
945 if (rtd
946 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
947 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
948 == void_type_node)))
949 return size;
950 }
951
952 /* Lose any fake structure return argument. */
953 if (aggregate_value_p (TREE_TYPE (funtype)))
954 return GET_MODE_SIZE (Pmode);
955
956 return 0;
957 }
958 \f
959 /* Argument support functions. */
960
961 /* Initialize a variable CUM of type CUMULATIVE_ARGS
962 for a call to a function whose data type is FNTYPE.
963 For a library call, FNTYPE is 0. */
964
965 void
966 init_cumulative_args (cum, fntype, libname)
967 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
968 tree fntype; /* tree ptr for function decl */
969 rtx libname; /* SYMBOL_REF of library name or 0 */
970 {
971 static CUMULATIVE_ARGS zero_cum;
972 tree param, next_param;
973
974 if (TARGET_DEBUG_ARG)
975 {
976 fprintf (stderr, "\ninit_cumulative_args (");
977 if (fntype)
978 fprintf (stderr, "fntype code = %s, ret code = %s",
979 tree_code_name[(int) TREE_CODE (fntype)],
980 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
981 else
982 fprintf (stderr, "no fntype");
983
984 if (libname)
985 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
986 }
987
988 *cum = zero_cum;
989
990 /* Set up the number of registers to use for passing arguments. */
991 cum->nregs = ix86_regparm;
992 if (fntype)
993 {
994 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
995
996 if (attr)
997 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
998 }
999
1000 /* Determine if this function has variable arguments. This is
1001 indicated by the last argument being 'void_type_mode' if there
1002 are no variable arguments. If there are variable arguments, then
1003 we won't pass anything in registers */
1004
1005 if (cum->nregs)
1006 {
1007 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1008 param != 0; param = next_param)
1009 {
1010 next_param = TREE_CHAIN (param);
1011 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1012 cum->nregs = 0;
1013 }
1014 }
1015
1016 if (TARGET_DEBUG_ARG)
1017 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1018
1019 return;
1020 }
1021
1022 /* Update the data in CUM to advance over an argument
1023 of mode MODE and data type TYPE.
1024 (TYPE is null for libcalls where that information may not be available.) */
1025
1026 void
1027 function_arg_advance (cum, mode, type, named)
1028 CUMULATIVE_ARGS *cum; /* current arg information */
1029 enum machine_mode mode; /* current arg mode */
1030 tree type; /* type of the argument or 0 if lib support */
1031 int named; /* whether or not the argument was named */
1032 {
1033 int bytes =
1034 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1035 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1036
1037 if (TARGET_DEBUG_ARG)
1038 fprintf (stderr,
1039 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1040 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1041 if (TARGET_SSE && mode == TImode)
1042 {
1043 cum->sse_words += words;
1044 cum->sse_nregs -= 1;
1045 cum->sse_regno += 1;
1046 if (cum->sse_nregs <= 0)
1047 {
1048 cum->sse_nregs = 0;
1049 cum->sse_regno = 0;
1050 }
1051 }
1052 else
1053 {
1054 cum->words += words;
1055 cum->nregs -= words;
1056 cum->regno += words;
1057
1058 if (cum->nregs <= 0)
1059 {
1060 cum->nregs = 0;
1061 cum->regno = 0;
1062 }
1063 }
1064 return;
1065 }
1066
1067 /* Define where to put the arguments to a function.
1068 Value is zero to push the argument on the stack,
1069 or a hard register in which to store the argument.
1070
1071 MODE is the argument's machine mode.
1072 TYPE is the data type of the argument (as a tree).
1073 This is null for libcalls where that information may
1074 not be available.
1075 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1076 the preceding args and about the function being called.
1077 NAMED is nonzero if this argument is a named parameter
1078 (otherwise it is an extra parameter matching an ellipsis). */
1079
1080 struct rtx_def *
1081 function_arg (cum, mode, type, named)
1082 CUMULATIVE_ARGS *cum; /* current arg information */
1083 enum machine_mode mode; /* current arg mode */
1084 tree type; /* type of the argument or 0 if lib support */
1085 int named; /* != 0 for normal args, == 0 for ... args */
1086 {
1087 rtx ret = NULL_RTX;
1088 int bytes =
1089 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1090 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1091
1092 switch (mode)
1093 {
1094 /* For now, pass fp/complex values on the stack. */
1095 default:
1096 break;
1097
1098 case BLKmode:
1099 case DImode:
1100 case SImode:
1101 case HImode:
1102 case QImode:
1103 if (words <= cum->nregs)
1104 ret = gen_rtx_REG (mode, cum->regno);
1105 break;
1106 case TImode:
1107 if (cum->sse_nregs)
1108 ret = gen_rtx_REG (mode, cum->sse_regno);
1109 break;
1110 }
1111
1112 if (TARGET_DEBUG_ARG)
1113 {
1114 fprintf (stderr,
1115 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1116 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1117
1118 if (ret)
1119 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1120 else
1121 fprintf (stderr, ", stack");
1122
1123 fprintf (stderr, " )\n");
1124 }
1125
1126 return ret;
1127 }
1128 \f
1129
1130 /* Return nonzero if OP is general operand representable on x86_64. */
1131
1132 int
1133 x86_64_general_operand (op, mode)
1134 rtx op;
1135 enum machine_mode mode;
1136 {
1137 if (!TARGET_64BIT)
1138 return general_operand (op, mode);
1139 if (nonimmediate_operand (op, mode))
1140 return 1;
1141 return x86_64_sign_extended_value (op);
1142 }
1143
1144 /* Return nonzero if OP is general operand representable on x86_64
1145 as eighter sign extended or zero extended constant. */
1146
1147 int
1148 x86_64_szext_general_operand (op, mode)
1149 rtx op;
1150 enum machine_mode mode;
1151 {
1152 if (!TARGET_64BIT)
1153 return general_operand (op, mode);
1154 if (nonimmediate_operand (op, mode))
1155 return 1;
1156 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1157 }
1158
1159 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
1160
1161 int
1162 x86_64_nonmemory_operand (op, mode)
1163 rtx op;
1164 enum machine_mode mode;
1165 {
1166 if (!TARGET_64BIT)
1167 return nonmemory_operand (op, mode);
1168 if (register_operand (op, mode))
1169 return 1;
1170 return x86_64_sign_extended_value (op);
1171 }
1172
1173 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
1174
1175 int
1176 x86_64_movabs_operand (op, mode)
1177 rtx op;
1178 enum machine_mode mode;
1179 {
1180 if (!TARGET_64BIT || !flag_pic)
1181 return nonmemory_operand (op, mode);
1182 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
1183 return 1;
1184 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
1185 return 1;
1186 return 0;
1187 }
1188
1189 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
1190
1191 int
1192 x86_64_szext_nonmemory_operand (op, mode)
1193 rtx op;
1194 enum machine_mode mode;
1195 {
1196 if (!TARGET_64BIT)
1197 return nonmemory_operand (op, mode);
1198 if (register_operand (op, mode))
1199 return 1;
1200 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1201 }
1202
1203 /* Return nonzero if OP is immediate operand representable on x86_64. */
1204
1205 int
1206 x86_64_immediate_operand (op, mode)
1207 rtx op;
1208 enum machine_mode mode;
1209 {
1210 if (!TARGET_64BIT)
1211 return immediate_operand (op, mode);
1212 return x86_64_sign_extended_value (op);
1213 }
1214
1215 /* Return nonzero if OP is immediate operand representable on x86_64. */
1216
1217 int
1218 x86_64_zext_immediate_operand (op, mode)
1219 rtx op;
1220 enum machine_mode mode ATTRIBUTE_UNUSED;
1221 {
1222 return x86_64_zero_extended_value (op);
1223 }
1224
1225 /* Return nonzero if OP is (const_int 1), else return zero. */
1226
1227 int
1228 const_int_1_operand (op, mode)
1229 rtx op;
1230 enum machine_mode mode ATTRIBUTE_UNUSED;
1231 {
1232 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1233 }
1234
1235 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1236 reference and a constant. */
1237
1238 int
1239 symbolic_operand (op, mode)
1240 register rtx op;
1241 enum machine_mode mode ATTRIBUTE_UNUSED;
1242 {
1243 switch (GET_CODE (op))
1244 {
1245 case SYMBOL_REF:
1246 case LABEL_REF:
1247 return 1;
1248
1249 case CONST:
1250 op = XEXP (op, 0);
1251 if (GET_CODE (op) == SYMBOL_REF
1252 || GET_CODE (op) == LABEL_REF
1253 || (GET_CODE (op) == UNSPEC
1254 && XINT (op, 1) >= 6
1255 && XINT (op, 1) <= 7))
1256 return 1;
1257 if (GET_CODE (op) != PLUS
1258 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1259 return 0;
1260
1261 op = XEXP (op, 0);
1262 if (GET_CODE (op) == SYMBOL_REF
1263 || GET_CODE (op) == LABEL_REF)
1264 return 1;
1265 /* Only @GOTOFF gets offsets. */
1266 if (GET_CODE (op) != UNSPEC
1267 || XINT (op, 1) != 7)
1268 return 0;
1269
1270 op = XVECEXP (op, 0, 0);
1271 if (GET_CODE (op) == SYMBOL_REF
1272 || GET_CODE (op) == LABEL_REF)
1273 return 1;
1274 return 0;
1275
1276 default:
1277 return 0;
1278 }
1279 }
1280
1281 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1282
1283 int
1284 pic_symbolic_operand (op, mode)
1285 register rtx op;
1286 enum machine_mode mode ATTRIBUTE_UNUSED;
1287 {
1288 if (GET_CODE (op) == CONST)
1289 {
1290 op = XEXP (op, 0);
1291 if (GET_CODE (op) == UNSPEC)
1292 return 1;
1293 if (GET_CODE (op) != PLUS
1294 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1295 return 0;
1296 op = XEXP (op, 0);
1297 if (GET_CODE (op) == UNSPEC)
1298 return 1;
1299 }
1300 return 0;
1301 }
1302
1303 /* Test for a valid operand for a call instruction. Don't allow the
1304 arg pointer register or virtual regs since they may decay into
1305 reg + const, which the patterns can't handle. */
1306
1307 int
1308 call_insn_operand (op, mode)
1309 rtx op;
1310 enum machine_mode mode ATTRIBUTE_UNUSED;
1311 {
1312 /* Disallow indirect through a virtual register. This leads to
1313 compiler aborts when trying to eliminate them. */
1314 if (GET_CODE (op) == REG
1315 && (op == arg_pointer_rtx
1316 || op == frame_pointer_rtx
1317 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1318 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1319 return 0;
1320
1321 /* Disallow `call 1234'. Due to varying assembler lameness this
1322 gets either rejected or translated to `call .+1234'. */
1323 if (GET_CODE (op) == CONST_INT)
1324 return 0;
1325
1326 /* Explicitly allow SYMBOL_REF even if pic. */
1327 if (GET_CODE (op) == SYMBOL_REF)
1328 return 1;
1329
1330 /* Half-pic doesn't allow anything but registers and constants.
1331 We've just taken care of the later. */
1332 if (HALF_PIC_P ())
1333 return register_operand (op, Pmode);
1334
1335 /* Otherwise we can allow any general_operand in the address. */
1336 return general_operand (op, Pmode);
1337 }
1338
1339 int
1340 constant_call_address_operand (op, mode)
1341 rtx op;
1342 enum machine_mode mode ATTRIBUTE_UNUSED;
1343 {
1344 if (GET_CODE (op) == CONST
1345 && GET_CODE (XEXP (op, 0)) == PLUS
1346 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1347 op = XEXP (XEXP (op, 0), 0);
1348 return GET_CODE (op) == SYMBOL_REF;
1349 }
1350
1351 /* Match exactly zero and one. */
1352
1353 int
1354 const0_operand (op, mode)
1355 register rtx op;
1356 enum machine_mode mode;
1357 {
1358 return op == CONST0_RTX (mode);
1359 }
1360
1361 int
1362 const1_operand (op, mode)
1363 register rtx op;
1364 enum machine_mode mode ATTRIBUTE_UNUSED;
1365 {
1366 return op == const1_rtx;
1367 }
1368
1369 /* Match 2, 4, or 8. Used for leal multiplicands. */
1370
1371 int
1372 const248_operand (op, mode)
1373 register rtx op;
1374 enum machine_mode mode ATTRIBUTE_UNUSED;
1375 {
1376 return (GET_CODE (op) == CONST_INT
1377 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1378 }
1379
1380 /* True if this is a constant appropriate for an increment or decremenmt. */
1381
1382 int
1383 incdec_operand (op, mode)
1384 register rtx op;
1385 enum machine_mode mode;
1386 {
1387 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1388 registers, since carry flag is not set. */
1389 if (TARGET_PENTIUM4 && !optimize_size)
1390 return 0;
1391 if (op == const1_rtx || op == constm1_rtx)
1392 return 1;
1393 if (GET_CODE (op) != CONST_INT)
1394 return 0;
1395 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1396 return 1;
1397 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1398 return 1;
1399 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1400 return 1;
1401 return 0;
1402 }
1403
1404 /* Return false if this is the stack pointer, or any other fake
1405 register eliminable to the stack pointer. Otherwise, this is
1406 a register operand.
1407
1408 This is used to prevent esp from being used as an index reg.
1409 Which would only happen in pathological cases. */
1410
1411 int
1412 reg_no_sp_operand (op, mode)
1413 register rtx op;
1414 enum machine_mode mode;
1415 {
1416 rtx t = op;
1417 if (GET_CODE (t) == SUBREG)
1418 t = SUBREG_REG (t);
1419 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1420 return 0;
1421
1422 return register_operand (op, mode);
1423 }
1424
1425 int
1426 mmx_reg_operand (op, mode)
1427 register rtx op;
1428 enum machine_mode mode ATTRIBUTE_UNUSED;
1429 {
1430 return MMX_REG_P (op);
1431 }
1432
1433 /* Return false if this is any eliminable register. Otherwise
1434 general_operand. */
1435
1436 int
1437 general_no_elim_operand (op, mode)
1438 register rtx op;
1439 enum machine_mode mode;
1440 {
1441 rtx t = op;
1442 if (GET_CODE (t) == SUBREG)
1443 t = SUBREG_REG (t);
1444 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1445 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1446 || t == virtual_stack_dynamic_rtx)
1447 return 0;
1448
1449 return general_operand (op, mode);
1450 }
1451
1452 /* Return false if this is any eliminable register. Otherwise
1453 register_operand or const_int. */
1454
1455 int
1456 nonmemory_no_elim_operand (op, mode)
1457 register rtx op;
1458 enum machine_mode mode;
1459 {
1460 rtx t = op;
1461 if (GET_CODE (t) == SUBREG)
1462 t = SUBREG_REG (t);
1463 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1464 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1465 || t == virtual_stack_dynamic_rtx)
1466 return 0;
1467
1468 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1469 }
1470
1471 /* Return true if op is a Q_REGS class register. */
1472
1473 int
1474 q_regs_operand (op, mode)
1475 register rtx op;
1476 enum machine_mode mode;
1477 {
1478 if (mode != VOIDmode && GET_MODE (op) != mode)
1479 return 0;
1480 if (GET_CODE (op) == SUBREG)
1481 op = SUBREG_REG (op);
1482 return QI_REG_P (op);
1483 }
1484
1485 /* Return true if op is a NON_Q_REGS class register. */
1486
1487 int
1488 non_q_regs_operand (op, mode)
1489 register rtx op;
1490 enum machine_mode mode;
1491 {
1492 if (mode != VOIDmode && GET_MODE (op) != mode)
1493 return 0;
1494 if (GET_CODE (op) == SUBREG)
1495 op = SUBREG_REG (op);
1496 return NON_QI_REG_P (op);
1497 }
1498
1499 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1500 insns. */
1501 int
1502 sse_comparison_operator (op, mode)
1503 rtx op;
1504 enum machine_mode mode ATTRIBUTE_UNUSED;
1505 {
1506 enum rtx_code code = GET_CODE (op);
1507 switch (code)
1508 {
1509 /* Operations supported directly. */
1510 case EQ:
1511 case LT:
1512 case LE:
1513 case UNORDERED:
1514 case NE:
1515 case UNGE:
1516 case UNGT:
1517 case ORDERED:
1518 return 1;
1519 /* These are equivalent to ones above in non-IEEE comparisons. */
1520 case UNEQ:
1521 case UNLT:
1522 case UNLE:
1523 case LTGT:
1524 case GE:
1525 case GT:
1526 return !TARGET_IEEE_FP;
1527 default:
1528 return 0;
1529 }
1530 }
1531 /* Return 1 if OP is a valid comparison operator in valid mode. */
1532 int
1533 ix86_comparison_operator (op, mode)
1534 register rtx op;
1535 enum machine_mode mode;
1536 {
1537 enum machine_mode inmode;
1538 enum rtx_code code = GET_CODE (op);
1539 if (mode != VOIDmode && GET_MODE (op) != mode)
1540 return 0;
1541 if (GET_RTX_CLASS (code) != '<')
1542 return 0;
1543 inmode = GET_MODE (XEXP (op, 0));
1544
1545 if (inmode == CCFPmode || inmode == CCFPUmode)
1546 {
1547 enum rtx_code second_code, bypass_code;
1548 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1549 return (bypass_code == NIL && second_code == NIL);
1550 }
1551 switch (code)
1552 {
1553 case EQ: case NE:
1554 return 1;
1555 case LT: case GE:
1556 if (inmode == CCmode || inmode == CCGCmode
1557 || inmode == CCGOCmode || inmode == CCNOmode)
1558 return 1;
1559 return 0;
1560 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1561 if (inmode == CCmode)
1562 return 1;
1563 return 0;
1564 case GT: case LE:
1565 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1566 return 1;
1567 return 0;
1568 default:
1569 return 0;
1570 }
1571 }
1572
1573 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1574
1575 int
1576 fcmov_comparison_operator (op, mode)
1577 register rtx op;
1578 enum machine_mode mode;
1579 {
1580 enum machine_mode inmode;
1581 enum rtx_code code = GET_CODE (op);
1582 if (mode != VOIDmode && GET_MODE (op) != mode)
1583 return 0;
1584 if (GET_RTX_CLASS (code) != '<')
1585 return 0;
1586 inmode = GET_MODE (XEXP (op, 0));
1587 if (inmode == CCFPmode || inmode == CCFPUmode)
1588 {
1589 enum rtx_code second_code, bypass_code;
1590 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1591 if (bypass_code != NIL || second_code != NIL)
1592 return 0;
1593 code = ix86_fp_compare_code_to_integer (code);
1594 }
1595 /* i387 supports just limited amount of conditional codes. */
1596 switch (code)
1597 {
1598 case LTU: case GTU: case LEU: case GEU:
1599 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
1600 return 1;
1601 return 0;
1602 case ORDERED: case UNORDERED:
1603 case EQ: case NE:
1604 return 1;
1605 default:
1606 return 0;
1607 }
1608 }
1609
1610 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1611
1612 int
1613 promotable_binary_operator (op, mode)
1614 register rtx op;
1615 enum machine_mode mode ATTRIBUTE_UNUSED;
1616 {
1617 switch (GET_CODE (op))
1618 {
1619 case MULT:
1620 /* Modern CPUs have same latency for HImode and SImode multiply,
1621 but 386 and 486 do HImode multiply faster. */
1622 return ix86_cpu > PROCESSOR_I486;
1623 case PLUS:
1624 case AND:
1625 case IOR:
1626 case XOR:
1627 case ASHIFT:
1628 return 1;
1629 default:
1630 return 0;
1631 }
1632 }
1633
1634 /* Nearly general operand, but accept any const_double, since we wish
1635 to be able to drop them into memory rather than have them get pulled
1636 into registers. */
1637
1638 int
1639 cmp_fp_expander_operand (op, mode)
1640 register rtx op;
1641 enum machine_mode mode;
1642 {
1643 if (mode != VOIDmode && mode != GET_MODE (op))
1644 return 0;
1645 if (GET_CODE (op) == CONST_DOUBLE)
1646 return 1;
1647 return general_operand (op, mode);
1648 }
1649
1650 /* Match an SI or HImode register for a zero_extract. */
1651
1652 int
1653 ext_register_operand (op, mode)
1654 register rtx op;
1655 enum machine_mode mode ATTRIBUTE_UNUSED;
1656 {
1657 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1658 return 0;
1659 return register_operand (op, VOIDmode);
1660 }
1661
1662 /* Return 1 if this is a valid binary floating-point operation.
1663 OP is the expression matched, and MODE is its mode. */
1664
1665 int
1666 binary_fp_operator (op, mode)
1667 register rtx op;
1668 enum machine_mode mode;
1669 {
1670 if (mode != VOIDmode && mode != GET_MODE (op))
1671 return 0;
1672
1673 switch (GET_CODE (op))
1674 {
1675 case PLUS:
1676 case MINUS:
1677 case MULT:
1678 case DIV:
1679 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1680
1681 default:
1682 return 0;
1683 }
1684 }
1685
1686 int
1687 mult_operator(op, mode)
1688 register rtx op;
1689 enum machine_mode mode ATTRIBUTE_UNUSED;
1690 {
1691 return GET_CODE (op) == MULT;
1692 }
1693
1694 int
1695 div_operator(op, mode)
1696 register rtx op;
1697 enum machine_mode mode ATTRIBUTE_UNUSED;
1698 {
1699 return GET_CODE (op) == DIV;
1700 }
1701
1702 int
1703 arith_or_logical_operator (op, mode)
1704 rtx op;
1705 enum machine_mode mode;
1706 {
1707 return ((mode == VOIDmode || GET_MODE (op) == mode)
1708 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1709 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1710 }
1711
1712 /* Returns 1 if OP is memory operand with a displacement. */
1713
1714 int
1715 memory_displacement_operand (op, mode)
1716 register rtx op;
1717 enum machine_mode mode;
1718 {
1719 struct ix86_address parts;
1720
1721 if (! memory_operand (op, mode))
1722 return 0;
1723
1724 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1725 abort ();
1726
1727 return parts.disp != NULL_RTX;
1728 }
1729
1730 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1731 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1732
1733 ??? It seems likely that this will only work because cmpsi is an
1734 expander, and no actual insns use this. */
1735
1736 int
1737 cmpsi_operand (op, mode)
1738 rtx op;
1739 enum machine_mode mode;
1740 {
1741 if (general_operand (op, mode))
1742 return 1;
1743
1744 if (GET_CODE (op) == AND
1745 && GET_MODE (op) == SImode
1746 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1747 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1748 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1749 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1750 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1751 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1752 return 1;
1753
1754 return 0;
1755 }
1756
1757 /* Returns 1 if OP is memory operand that can not be represented by the
1758 modRM array. */
1759
1760 int
1761 long_memory_operand (op, mode)
1762 register rtx op;
1763 enum machine_mode mode;
1764 {
1765 if (! memory_operand (op, mode))
1766 return 0;
1767
1768 return memory_address_length (op) != 0;
1769 }
1770
1771 /* Return nonzero if the rtx is known aligned. */
1772
1773 int
1774 aligned_operand (op, mode)
1775 rtx op;
1776 enum machine_mode mode;
1777 {
1778 struct ix86_address parts;
1779
1780 if (!general_operand (op, mode))
1781 return 0;
1782
1783 /* Registers and immediate operands are always "aligned". */
1784 if (GET_CODE (op) != MEM)
1785 return 1;
1786
1787 /* Don't even try to do any aligned optimizations with volatiles. */
1788 if (MEM_VOLATILE_P (op))
1789 return 0;
1790
1791 op = XEXP (op, 0);
1792
1793 /* Pushes and pops are only valid on the stack pointer. */
1794 if (GET_CODE (op) == PRE_DEC
1795 || GET_CODE (op) == POST_INC)
1796 return 1;
1797
1798 /* Decode the address. */
1799 if (! ix86_decompose_address (op, &parts))
1800 abort ();
1801
1802 /* Look for some component that isn't known to be aligned. */
1803 if (parts.index)
1804 {
1805 if (parts.scale < 4
1806 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1807 return 0;
1808 }
1809 if (parts.base)
1810 {
1811 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1812 return 0;
1813 }
1814 if (parts.disp)
1815 {
1816 if (GET_CODE (parts.disp) != CONST_INT
1817 || (INTVAL (parts.disp) & 3) != 0)
1818 return 0;
1819 }
1820
1821 /* Didn't find one -- this must be an aligned address. */
1822 return 1;
1823 }
1824 \f
1825 /* Return true if the constant is something that can be loaded with
1826 a special instruction. Only handle 0.0 and 1.0; others are less
1827 worthwhile. */
1828
1829 int
1830 standard_80387_constant_p (x)
1831 rtx x;
1832 {
1833 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
1834 return -1;
1835 /* Note that on the 80387, other constants, such as pi, that we should support
1836 too. On some machines, these are much slower to load as standard constant,
1837 than to load from doubles in memory. */
1838 if (x == CONST0_RTX (GET_MODE (x)))
1839 return 1;
1840 if (x == CONST1_RTX (GET_MODE (x)))
1841 return 2;
1842 return 0;
1843 }
1844
1845 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1846 */
1847 int
1848 standard_sse_constant_p (x)
1849 rtx x;
1850 {
1851 if (GET_CODE (x) != CONST_DOUBLE)
1852 return -1;
1853 return (x == CONST0_RTX (GET_MODE (x)));
1854 }
1855
1856 /* Returns 1 if OP contains a symbol reference */
1857
1858 int
1859 symbolic_reference_mentioned_p (op)
1860 rtx op;
1861 {
1862 register const char *fmt;
1863 register int i;
1864
1865 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1866 return 1;
1867
1868 fmt = GET_RTX_FORMAT (GET_CODE (op));
1869 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1870 {
1871 if (fmt[i] == 'E')
1872 {
1873 register int j;
1874
1875 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1876 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1877 return 1;
1878 }
1879
1880 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1881 return 1;
1882 }
1883
1884 return 0;
1885 }
1886
1887 /* Return 1 if it is appropriate to emit `ret' instructions in the
1888 body of a function. Do this only if the epilogue is simple, needing a
1889 couple of insns. Prior to reloading, we can't tell how many registers
1890 must be saved, so return 0 then. Return 0 if there is no frame
1891 marker to de-allocate.
1892
1893 If NON_SAVING_SETJMP is defined and true, then it is not possible
1894 for the epilogue to be simple, so return 0. This is a special case
1895 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1896 until final, but jump_optimize may need to know sooner if a
1897 `return' is OK. */
1898
1899 int
1900 ix86_can_use_return_insn_p ()
1901 {
1902 struct ix86_frame frame;
1903
1904 #ifdef NON_SAVING_SETJMP
1905 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1906 return 0;
1907 #endif
1908 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1909 if (profile_block_flag == 2)
1910 return 0;
1911 #endif
1912
1913 if (! reload_completed || frame_pointer_needed)
1914 return 0;
1915
1916 /* Don't allow more than 32 pop, since that's all we can do
1917 with one instruction. */
1918 if (current_function_pops_args
1919 && current_function_args_size >= 32768)
1920 return 0;
1921
1922 ix86_compute_frame_layout (&frame);
1923 return frame.to_allocate == 0 && frame.nregs == 0;
1924 }
1925 \f
1926 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
1927 int
1928 x86_64_sign_extended_value (value)
1929 rtx value;
1930 {
1931 switch (GET_CODE (value))
1932 {
1933 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
1934 to be at least 32 and this all acceptable constants are
1935 represented as CONST_INT. */
1936 case CONST_INT:
1937 if (HOST_BITS_PER_WIDE_INT == 32)
1938 return 1;
1939 else
1940 {
1941 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
1942 return trunc_int_for_mode (val, SImode) == val;
1943 }
1944 break;
1945
1946 /* For certain code models, the symbolic references are known to fit. */
1947 case SYMBOL_REF:
1948 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
1949
1950 /* For certain code models, the code is near as well. */
1951 case LABEL_REF:
1952 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
1953
1954 /* We also may accept the offsetted memory references in certain special
1955 cases. */
1956 case CONST:
1957 if (GET_CODE (XEXP (value, 0)) == UNSPEC
1958 && XVECLEN (XEXP (value, 0), 0) == 1
1959 && XINT (XEXP (value, 0), 1) == 15)
1960 return 1;
1961 else if (GET_CODE (XEXP (value, 0)) == PLUS)
1962 {
1963 rtx op1 = XEXP (XEXP (value, 0), 0);
1964 rtx op2 = XEXP (XEXP (value, 0), 1);
1965 HOST_WIDE_INT offset;
1966
1967 if (ix86_cmodel == CM_LARGE)
1968 return 0;
1969 if (GET_CODE (op2) != CONST_INT)
1970 return 0;
1971 offset = trunc_int_for_mode (INTVAL (op2), DImode);
1972 switch (GET_CODE (op1))
1973 {
1974 case SYMBOL_REF:
1975 /* For CM_SMALL assume that latest object is 1MB before
1976 end of 31bits boundary. We may also accept pretty
1977 large negative constants knowing that all objects are
1978 in the positive half of address space. */
1979 if (ix86_cmodel == CM_SMALL
1980 && offset < 1024*1024*1024
1981 && trunc_int_for_mode (offset, SImode) == offset)
1982 return 1;
1983 /* For CM_KERNEL we know that all object resist in the
1984 negative half of 32bits address space. We may not
1985 accept negative offsets, since they may be just off
1986 and we may accept pretty large possitive ones. */
1987 if (ix86_cmodel == CM_KERNEL
1988 && offset > 0
1989 && trunc_int_for_mode (offset, SImode) == offset)
1990 return 1;
1991 break;
1992 case LABEL_REF:
1993 /* These conditions are similar to SYMBOL_REF ones, just the
1994 constraints for code models differ. */
1995 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
1996 && offset < 1024*1024*1024
1997 && trunc_int_for_mode (offset, SImode) == offset)
1998 return 1;
1999 if (ix86_cmodel == CM_KERNEL
2000 && offset > 0
2001 && trunc_int_for_mode (offset, SImode) == offset)
2002 return 1;
2003 break;
2004 default:
2005 return 0;
2006 }
2007 }
2008 return 0;
2009 default:
2010 return 0;
2011 }
2012 }
2013
2014 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
2015 int
2016 x86_64_zero_extended_value (value)
2017 rtx value;
2018 {
2019 switch (GET_CODE (value))
2020 {
2021 case CONST_DOUBLE:
2022 if (HOST_BITS_PER_WIDE_INT == 32)
2023 return (GET_MODE (value) == VOIDmode
2024 && !CONST_DOUBLE_HIGH (value));
2025 else
2026 return 0;
2027 case CONST_INT:
2028 if (HOST_BITS_PER_WIDE_INT == 32)
2029 return INTVAL (value) >= 0;
2030 else
2031 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
2032 break;
2033
2034 /* For certain code models, the symbolic references are known to fit. */
2035 case SYMBOL_REF:
2036 return ix86_cmodel == CM_SMALL;
2037
2038 /* For certain code models, the code is near as well. */
2039 case LABEL_REF:
2040 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
2041
2042 /* We also may accept the offsetted memory references in certain special
2043 cases. */
2044 case CONST:
2045 if (GET_CODE (XEXP (value, 0)) == PLUS)
2046 {
2047 rtx op1 = XEXP (XEXP (value, 0), 0);
2048 rtx op2 = XEXP (XEXP (value, 0), 1);
2049
2050 if (ix86_cmodel == CM_LARGE)
2051 return 0;
2052 switch (GET_CODE (op1))
2053 {
2054 case SYMBOL_REF:
2055 return 0;
2056 /* For small code model we may accept pretty large possitive
2057 offsets, since one bit is available for free. Negative
2058 offsets are limited by the size of NULL pointer area
2059 specified by the ABI. */
2060 if (ix86_cmodel == CM_SMALL
2061 && GET_CODE (op2) == CONST_INT
2062 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2063 && (trunc_int_for_mode (INTVAL (op2), SImode)
2064 == INTVAL (op2)))
2065 return 1;
2066 /* ??? For the kernel, we may accept adjustment of
2067 -0x10000000, since we know that it will just convert
2068 negative address space to possitive, but perhaps this
2069 is not worthwhile. */
2070 break;
2071 case LABEL_REF:
2072 /* These conditions are similar to SYMBOL_REF ones, just the
2073 constraints for code models differ. */
2074 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2075 && GET_CODE (op2) == CONST_INT
2076 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2077 && (trunc_int_for_mode (INTVAL (op2), SImode)
2078 == INTVAL (op2)))
2079 return 1;
2080 break;
2081 default:
2082 return 0;
2083 }
2084 }
2085 return 0;
2086 default:
2087 return 0;
2088 }
2089 }
2090
2091 /* Value should be nonzero if functions must have frame pointers.
2092 Zero means the frame pointer need not be set up (and parms may
2093 be accessed via the stack pointer) in functions that seem suitable. */
2094
2095 int
2096 ix86_frame_pointer_required ()
2097 {
2098 /* If we accessed previous frames, then the generated code expects
2099 to be able to access the saved ebp value in our frame. */
2100 if (cfun->machine->accesses_prev_frame)
2101 return 1;
2102
2103 /* Several x86 os'es need a frame pointer for other reasons,
2104 usually pertaining to setjmp. */
2105 if (SUBTARGET_FRAME_POINTER_REQUIRED)
2106 return 1;
2107
2108 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2109 the frame pointer by default. Turn it back on now if we've not
2110 got a leaf function. */
2111 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
2112 return 1;
2113
2114 return 0;
2115 }
2116
2117 /* Record that the current function accesses previous call frames. */
2118
2119 void
2120 ix86_setup_frame_addresses ()
2121 {
2122 cfun->machine->accesses_prev_frame = 1;
2123 }
2124 \f
2125 static char pic_label_name[32];
2126
2127 /* This function generates code for -fpic that loads %ebx with
2128 the return address of the caller and then returns. */
2129
2130 void
2131 ix86_asm_file_end (file)
2132 FILE *file;
2133 {
2134 rtx xops[2];
2135
2136 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
2137 return;
2138
2139 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2140 to updating relocations to a section being discarded such that this
2141 doesn't work. Ought to detect this at configure time. */
2142 #if 0 && defined (ASM_OUTPUT_SECTION_NAME)
2143 /* The trick here is to create a linkonce section containing the
2144 pic label thunk, but to refer to it with an internal label.
2145 Because the label is internal, we don't have inter-dso name
2146 binding issues on hosts that don't support ".hidden".
2147
2148 In order to use these macros, however, we must create a fake
2149 function decl. */
2150 {
2151 tree decl = build_decl (FUNCTION_DECL,
2152 get_identifier ("i686.get_pc_thunk"),
2153 error_mark_node);
2154 DECL_ONE_ONLY (decl) = 1;
2155 UNIQUE_SECTION (decl, 0);
2156 named_section (decl, NULL, 0);
2157 }
2158 #else
2159 text_section ();
2160 #endif
2161
2162 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2163 internal (non-global) label that's being emitted, it didn't make
2164 sense to have .type information for local labels. This caused
2165 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2166 me debug info for a label that you're declaring non-global?) this
2167 was changed to call ASM_OUTPUT_LABEL() instead. */
2168
2169 ASM_OUTPUT_LABEL (file, pic_label_name);
2170
2171 xops[0] = pic_offset_table_rtx;
2172 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
2173 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
2174 output_asm_insn ("ret", xops);
2175 }
2176
2177 void
2178 load_pic_register ()
2179 {
2180 rtx gotsym, pclab;
2181
2182 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2183
2184 if (TARGET_DEEP_BRANCH_PREDICTION)
2185 {
2186 if (! pic_label_name[0])
2187 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
2188 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
2189 }
2190 else
2191 {
2192 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
2193 }
2194
2195 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2196
2197 if (! TARGET_DEEP_BRANCH_PREDICTION)
2198 emit_insn (gen_popsi1 (pic_offset_table_rtx));
2199
2200 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
2201 }
2202
2203 /* Generate an SImode "push" pattern for input ARG. */
2204
2205 static rtx
2206 gen_push (arg)
2207 rtx arg;
2208 {
2209 return gen_rtx_SET (VOIDmode,
2210 gen_rtx_MEM (SImode,
2211 gen_rtx_PRE_DEC (SImode,
2212 stack_pointer_rtx)),
2213 arg);
2214 }
2215
2216 /* Return 1 if we need to save REGNO. */
2217 static int
2218 ix86_save_reg (regno)
2219 int regno;
2220 {
2221 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2222 || current_function_uses_const_pool);
2223 return ((regs_ever_live[regno] && !call_used_regs[regno]
2224 && !fixed_regs[regno]
2225 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
2226 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used));
2227
2228 }
2229
2230 /* Return number of registers to be saved on the stack. */
2231
2232 static int
2233 ix86_nsaved_regs ()
2234 {
2235 int nregs = 0;
2236 int regno;
2237
2238 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2239 if (ix86_save_reg (regno))
2240 nregs++;
2241 return nregs;
2242 }
2243
2244 /* Return the offset between two registers, one to be eliminated, and the other
2245 its replacement, at the start of a routine. */
2246
2247 HOST_WIDE_INT
2248 ix86_initial_elimination_offset (from, to)
2249 int from;
2250 int to;
2251 {
2252 struct ix86_frame frame;
2253 ix86_compute_frame_layout (&frame);
2254
2255 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2256 return frame.hard_frame_pointer_offset;
2257 else if (from == FRAME_POINTER_REGNUM
2258 && to == HARD_FRAME_POINTER_REGNUM)
2259 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
2260 else
2261 {
2262 if (to != STACK_POINTER_REGNUM)
2263 abort ();
2264 else if (from == ARG_POINTER_REGNUM)
2265 return frame.stack_pointer_offset;
2266 else if (from != FRAME_POINTER_REGNUM)
2267 abort ();
2268 else
2269 return frame.stack_pointer_offset - frame.frame_pointer_offset;
2270 }
2271 }
2272
2273 /* Fill structure ix86_frame about frame of currently computed function. */
2274
2275 static void
2276 ix86_compute_frame_layout (frame)
2277 struct ix86_frame *frame;
2278 {
2279 HOST_WIDE_INT total_size;
2280 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
2281 int offset;
2282 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
2283 HOST_WIDE_INT size = get_frame_size ();
2284
2285 frame->nregs = ix86_nsaved_regs ();
2286 total_size = size;
2287
2288 /* Skip return value and save base pointer. */
2289 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
2290
2291 frame->hard_frame_pointer_offset = offset;
2292
2293 /* Do some sanity checking of stack_alignment_needed and
2294 preferred_alignment, since i386 port is the only using those features
2295 that may break easilly. */
2296
2297 if (size && !stack_alignment_needed)
2298 abort ();
2299 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2300 abort ();
2301 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2302 abort ();
2303 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2304 abort ();
2305
2306 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2307 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
2308
2309 /* Register save area */
2310 offset += frame->nregs * UNITS_PER_WORD;
2311
2312 /* Align start of frame for local function. */
2313 frame->padding1 = ((offset + stack_alignment_needed - 1)
2314 & -stack_alignment_needed) - offset;
2315
2316 offset += frame->padding1;
2317
2318 /* Frame pointer points here. */
2319 frame->frame_pointer_offset = offset;
2320
2321 offset += size;
2322
2323 /* Add outgoing arguments area. */
2324 if (ACCUMULATE_OUTGOING_ARGS)
2325 {
2326 offset += current_function_outgoing_args_size;
2327 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2328 }
2329 else
2330 frame->outgoing_arguments_size = 0;
2331
2332 /* Align stack boundary. */
2333 frame->padding2 = ((offset + preferred_alignment - 1)
2334 & -preferred_alignment) - offset;
2335
2336 offset += frame->padding2;
2337
2338 /* We've reached end of stack frame. */
2339 frame->stack_pointer_offset = offset;
2340
2341 /* Size prologue needs to allocate. */
2342 frame->to_allocate =
2343 (size + frame->padding1 + frame->padding2
2344 + frame->outgoing_arguments_size);
2345
2346 #if 0
2347 fprintf (stderr, "nregs: %i\n", frame->nregs);
2348 fprintf (stderr, "size: %i\n", size);
2349 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2350 fprintf (stderr, "padding1: %i\n", frame->padding1);
2351 fprintf (stderr, "padding2: %i\n", frame->padding2);
2352 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
2353 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2354 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2355 frame->hard_frame_pointer_offset);
2356 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2357 #endif
2358 }
2359
2360 /* Emit code to save registers in the prologue. */
2361
2362 static void
2363 ix86_emit_save_regs ()
2364 {
2365 register int regno;
2366 rtx insn;
2367
2368 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2369 if (ix86_save_reg (regno))
2370 {
2371 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
2372 RTX_FRAME_RELATED_P (insn) = 1;
2373 }
2374 }
2375
2376 /* Expand the prologue into a bunch of separate insns. */
2377
2378 void
2379 ix86_expand_prologue ()
2380 {
2381 rtx insn;
2382 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2383 || current_function_uses_const_pool);
2384 struct ix86_frame frame;
2385
2386 ix86_compute_frame_layout (&frame);
2387
2388 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2389 slower on all targets. Also sdb doesn't like it. */
2390
2391 if (frame_pointer_needed)
2392 {
2393 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
2394 RTX_FRAME_RELATED_P (insn) = 1;
2395
2396 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2397 RTX_FRAME_RELATED_P (insn) = 1;
2398 }
2399
2400 ix86_emit_save_regs ();
2401
2402 if (frame.to_allocate == 0)
2403 ;
2404 else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
2405 {
2406 if (frame_pointer_needed)
2407 insn = emit_insn (gen_pro_epilogue_adjust_stack
2408 (stack_pointer_rtx, stack_pointer_rtx,
2409 GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
2410 else
2411 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2412 GEN_INT (-frame.to_allocate)));
2413 RTX_FRAME_RELATED_P (insn) = 1;
2414 }
2415 else
2416 {
2417 /* ??? Is this only valid for Win32? */
2418
2419 rtx arg0, sym;
2420
2421 arg0 = gen_rtx_REG (SImode, 0);
2422 emit_move_insn (arg0, GEN_INT (frame.to_allocate));
2423
2424 sym = gen_rtx_MEM (FUNCTION_MODE,
2425 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2426 insn = emit_call_insn (gen_call (sym, const0_rtx));
2427
2428 CALL_INSN_FUNCTION_USAGE (insn)
2429 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2430 CALL_INSN_FUNCTION_USAGE (insn));
2431 }
2432
2433 #ifdef SUBTARGET_PROLOGUE
2434 SUBTARGET_PROLOGUE;
2435 #endif
2436
2437 if (pic_reg_used)
2438 load_pic_register ();
2439
2440 /* If we are profiling, make sure no instructions are scheduled before
2441 the call to mcount. However, if -fpic, the above call will have
2442 done that. */
2443 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2444 emit_insn (gen_blockage ());
2445 }
2446
2447 /* Emit code to add TSIZE to esp value. Use POP instruction when
2448 profitable. */
2449
2450 static void
2451 ix86_emit_epilogue_esp_adjustment (tsize)
2452 int tsize;
2453 {
2454 /* If a frame pointer is present, we must be sure to tie the sp
2455 to the fp so that we don't mis-schedule. */
2456 if (frame_pointer_needed)
2457 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2458 stack_pointer_rtx,
2459 GEN_INT (tsize),
2460 hard_frame_pointer_rtx));
2461 else
2462 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2463 GEN_INT (tsize)));
2464 }
2465
2466 /* Emit code to restore saved registers using MOV insns. First register
2467 is restored from POINTER + OFFSET. */
2468 static void
2469 ix86_emit_restore_regs_using_mov (pointer, offset)
2470 rtx pointer;
2471 int offset;
2472 {
2473 int regno;
2474
2475 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2476 if (ix86_save_reg (regno))
2477 {
2478 emit_move_insn (gen_rtx_REG (Pmode, regno),
2479 adj_offsettable_operand (gen_rtx_MEM (Pmode,
2480 pointer),
2481 offset));
2482 offset += UNITS_PER_WORD;
2483 }
2484 }
2485
2486 /* Restore function stack, frame, and registers. */
2487
2488 void
2489 ix86_expand_epilogue (emit_return)
2490 int emit_return;
2491 {
2492 int regno;
2493 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2494 struct ix86_frame frame;
2495 HOST_WIDE_INT offset;
2496
2497 ix86_compute_frame_layout (&frame);
2498
2499 /* Calculate start of saved registers relative to ebp. */
2500 offset = -frame.nregs * UNITS_PER_WORD;
2501
2502 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2503 if (profile_block_flag == 2)
2504 {
2505 FUNCTION_BLOCK_PROFILER_EXIT;
2506 }
2507 #endif
2508
2509 /* If we're only restoring one register and sp is not valid then
2510 using a move instruction to restore the register since it's
2511 less work than reloading sp and popping the register.
2512
2513 The default code result in stack adjustment using add/lea instruction,
2514 while this code results in LEAVE instruction (or discrete equivalent),
2515 so it is profitable in some other cases as well. Especially when there
2516 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2517 and there is exactly one register to pop. This heruistic may need some
2518 tuning in future. */
2519 if ((!sp_valid && frame.nregs <= 1)
2520 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2521 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2522 && frame.nregs == 1))
2523 {
2524 /* Restore registers. We can use ebp or esp to address the memory
2525 locations. If both are available, default to ebp, since offsets
2526 are known to be small. Only exception is esp pointing directly to the
2527 end of block of saved registers, where we may simplify addressing
2528 mode. */
2529
2530 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2531 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
2532 else
2533 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2534
2535 if (!frame_pointer_needed)
2536 ix86_emit_epilogue_esp_adjustment (frame.to_allocate
2537 + frame.nregs * UNITS_PER_WORD);
2538 /* If not an i386, mov & pop is faster than "leave". */
2539 else if (TARGET_USE_LEAVE || optimize_size)
2540 emit_insn (gen_leave ());
2541 else
2542 {
2543 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2544 hard_frame_pointer_rtx,
2545 const0_rtx,
2546 hard_frame_pointer_rtx));
2547 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2548 }
2549 }
2550 else
2551 {
2552 /* First step is to deallocate the stack frame so that we can
2553 pop the registers. */
2554 if (!sp_valid)
2555 {
2556 if (!frame_pointer_needed)
2557 abort ();
2558 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2559 hard_frame_pointer_rtx,
2560 GEN_INT (offset),
2561 hard_frame_pointer_rtx));
2562 }
2563 else if (frame.to_allocate)
2564 ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
2565
2566 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2567 if (ix86_save_reg (regno))
2568 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2569 if (frame_pointer_needed)
2570 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2571 }
2572
2573 /* Sibcall epilogues don't want a return instruction. */
2574 if (! emit_return)
2575 return;
2576
2577 if (current_function_pops_args && current_function_args_size)
2578 {
2579 rtx popc = GEN_INT (current_function_pops_args);
2580
2581 /* i386 can only pop 64K bytes. If asked to pop more, pop
2582 return address, do explicit add, and jump indirectly to the
2583 caller. */
2584
2585 if (current_function_pops_args >= 65536)
2586 {
2587 rtx ecx = gen_rtx_REG (SImode, 2);
2588
2589 emit_insn (gen_popsi1 (ecx));
2590 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2591 emit_jump_insn (gen_return_indirect_internal (ecx));
2592 }
2593 else
2594 emit_jump_insn (gen_return_pop_internal (popc));
2595 }
2596 else
2597 emit_jump_insn (gen_return_internal ());
2598 }
2599 \f
2600 /* Extract the parts of an RTL expression that is a valid memory address
2601 for an instruction. Return false if the structure of the address is
2602 grossly off. */
2603
2604 static int
2605 ix86_decompose_address (addr, out)
2606 register rtx addr;
2607 struct ix86_address *out;
2608 {
2609 rtx base = NULL_RTX;
2610 rtx index = NULL_RTX;
2611 rtx disp = NULL_RTX;
2612 HOST_WIDE_INT scale = 1;
2613 rtx scale_rtx = NULL_RTX;
2614
2615 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2616 base = addr;
2617 else if (GET_CODE (addr) == PLUS)
2618 {
2619 rtx op0 = XEXP (addr, 0);
2620 rtx op1 = XEXP (addr, 1);
2621 enum rtx_code code0 = GET_CODE (op0);
2622 enum rtx_code code1 = GET_CODE (op1);
2623
2624 if (code0 == REG || code0 == SUBREG)
2625 {
2626 if (code1 == REG || code1 == SUBREG)
2627 index = op0, base = op1; /* index + base */
2628 else
2629 base = op0, disp = op1; /* base + displacement */
2630 }
2631 else if (code0 == MULT)
2632 {
2633 index = XEXP (op0, 0);
2634 scale_rtx = XEXP (op0, 1);
2635 if (code1 == REG || code1 == SUBREG)
2636 base = op1; /* index*scale + base */
2637 else
2638 disp = op1; /* index*scale + disp */
2639 }
2640 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2641 {
2642 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2643 scale_rtx = XEXP (XEXP (op0, 0), 1);
2644 base = XEXP (op0, 1);
2645 disp = op1;
2646 }
2647 else if (code0 == PLUS)
2648 {
2649 index = XEXP (op0, 0); /* index + base + disp */
2650 base = XEXP (op0, 1);
2651 disp = op1;
2652 }
2653 else
2654 return FALSE;
2655 }
2656 else if (GET_CODE (addr) == MULT)
2657 {
2658 index = XEXP (addr, 0); /* index*scale */
2659 scale_rtx = XEXP (addr, 1);
2660 }
2661 else if (GET_CODE (addr) == ASHIFT)
2662 {
2663 rtx tmp;
2664
2665 /* We're called for lea too, which implements ashift on occasion. */
2666 index = XEXP (addr, 0);
2667 tmp = XEXP (addr, 1);
2668 if (GET_CODE (tmp) != CONST_INT)
2669 return FALSE;
2670 scale = INTVAL (tmp);
2671 if ((unsigned HOST_WIDE_INT) scale > 3)
2672 return FALSE;
2673 scale = 1 << scale;
2674 }
2675 else
2676 disp = addr; /* displacement */
2677
2678 /* Extract the integral value of scale. */
2679 if (scale_rtx)
2680 {
2681 if (GET_CODE (scale_rtx) != CONST_INT)
2682 return FALSE;
2683 scale = INTVAL (scale_rtx);
2684 }
2685
2686 /* Allow arg pointer and stack pointer as index if there is not scaling */
2687 if (base && index && scale == 1
2688 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2689 || index == stack_pointer_rtx))
2690 {
2691 rtx tmp = base;
2692 base = index;
2693 index = tmp;
2694 }
2695
2696 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2697 if ((base == hard_frame_pointer_rtx
2698 || base == frame_pointer_rtx
2699 || base == arg_pointer_rtx) && !disp)
2700 disp = const0_rtx;
2701
2702 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2703 Avoid this by transforming to [%esi+0]. */
2704 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2705 && base && !index && !disp
2706 && REG_P (base)
2707 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2708 disp = const0_rtx;
2709
2710 /* Special case: encode reg+reg instead of reg*2. */
2711 if (!base && index && scale && scale == 2)
2712 base = index, scale = 1;
2713
2714 /* Special case: scaling cannot be encoded without base or displacement. */
2715 if (!base && !disp && index && scale != 1)
2716 disp = const0_rtx;
2717
2718 out->base = base;
2719 out->index = index;
2720 out->disp = disp;
2721 out->scale = scale;
2722
2723 return TRUE;
2724 }
2725 \f
2726 /* Return cost of the memory address x.
2727 For i386, it is better to use a complex address than let gcc copy
2728 the address into a reg and make a new pseudo. But not if the address
2729 requires to two regs - that would mean more pseudos with longer
2730 lifetimes. */
2731 int
2732 ix86_address_cost (x)
2733 rtx x;
2734 {
2735 struct ix86_address parts;
2736 int cost = 1;
2737
2738 if (!ix86_decompose_address (x, &parts))
2739 abort ();
2740
2741 /* More complex memory references are better. */
2742 if (parts.disp && parts.disp != const0_rtx)
2743 cost--;
2744
2745 /* Attempt to minimize number of registers in the address. */
2746 if ((parts.base
2747 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2748 || (parts.index
2749 && (!REG_P (parts.index)
2750 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2751 cost++;
2752
2753 if (parts.base
2754 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2755 && parts.index
2756 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2757 && parts.base != parts.index)
2758 cost++;
2759
2760 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2761 since it's predecode logic can't detect the length of instructions
2762 and it degenerates to vector decoded. Increase cost of such
2763 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2764 to split such addresses or even refuse such addresses at all.
2765
2766 Following addressing modes are affected:
2767 [base+scale*index]
2768 [scale*index+disp]
2769 [base+index]
2770
2771 The first and last case may be avoidable by explicitly coding the zero in
2772 memory address, but I don't have AMD-K6 machine handy to check this
2773 theory. */
2774
2775 if (TARGET_K6
2776 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2777 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2778 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2779 cost += 10;
2780
2781 return cost;
2782 }
2783 \f
2784 /* If X is a machine specific address (i.e. a symbol or label being
2785 referenced as a displacement from the GOT implemented using an
2786 UNSPEC), then return the base term. Otherwise return X. */
2787
2788 rtx
2789 ix86_find_base_term (x)
2790 rtx x;
2791 {
2792 rtx term;
2793
2794 if (GET_CODE (x) != PLUS
2795 || XEXP (x, 0) != pic_offset_table_rtx
2796 || GET_CODE (XEXP (x, 1)) != CONST)
2797 return x;
2798
2799 term = XEXP (XEXP (x, 1), 0);
2800
2801 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2802 term = XEXP (term, 0);
2803
2804 if (GET_CODE (term) != UNSPEC
2805 || XVECLEN (term, 0) != 1
2806 || XINT (term, 1) != 7)
2807 return x;
2808
2809 term = XVECEXP (term, 0, 0);
2810
2811 if (GET_CODE (term) != SYMBOL_REF
2812 && GET_CODE (term) != LABEL_REF)
2813 return x;
2814
2815 return term;
2816 }
2817 \f
2818 /* Determine if a given CONST RTX is a valid memory displacement
2819 in PIC mode. */
2820
2821 int
2822 legitimate_pic_address_disp_p (disp)
2823 register rtx disp;
2824 {
2825 if (GET_CODE (disp) != CONST)
2826 return 0;
2827 disp = XEXP (disp, 0);
2828
2829 if (GET_CODE (disp) == PLUS)
2830 {
2831 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2832 return 0;
2833 disp = XEXP (disp, 0);
2834 }
2835
2836 if (GET_CODE (disp) != UNSPEC
2837 || XVECLEN (disp, 0) != 1)
2838 return 0;
2839
2840 /* Must be @GOT or @GOTOFF. */
2841 if (XINT (disp, 1) != 6
2842 && XINT (disp, 1) != 7)
2843 return 0;
2844
2845 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2846 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2847 return 0;
2848
2849 return 1;
2850 }
2851
2852 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2853 memory address for an instruction. The MODE argument is the machine mode
2854 for the MEM expression that wants to use this address.
2855
2856 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2857 convert common non-canonical forms to canonical form so that they will
2858 be recognized. */
2859
2860 int
2861 legitimate_address_p (mode, addr, strict)
2862 enum machine_mode mode;
2863 register rtx addr;
2864 int strict;
2865 {
2866 struct ix86_address parts;
2867 rtx base, index, disp;
2868 HOST_WIDE_INT scale;
2869 const char *reason = NULL;
2870 rtx reason_rtx = NULL_RTX;
2871
2872 if (TARGET_DEBUG_ADDR)
2873 {
2874 fprintf (stderr,
2875 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2876 GET_MODE_NAME (mode), strict);
2877 debug_rtx (addr);
2878 }
2879
2880 if (! ix86_decompose_address (addr, &parts))
2881 {
2882 reason = "decomposition failed";
2883 goto report_error;
2884 }
2885
2886 base = parts.base;
2887 index = parts.index;
2888 disp = parts.disp;
2889 scale = parts.scale;
2890
2891 /* Validate base register.
2892
2893 Don't allow SUBREG's here, it can lead to spill failures when the base
2894 is one word out of a two word structure, which is represented internally
2895 as a DImode int. */
2896
2897 if (base)
2898 {
2899 reason_rtx = base;
2900
2901 if (GET_CODE (base) != REG)
2902 {
2903 reason = "base is not a register";
2904 goto report_error;
2905 }
2906
2907 if (GET_MODE (base) != Pmode)
2908 {
2909 reason = "base is not in Pmode";
2910 goto report_error;
2911 }
2912
2913 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2914 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2915 {
2916 reason = "base is not valid";
2917 goto report_error;
2918 }
2919 }
2920
2921 /* Validate index register.
2922
2923 Don't allow SUBREG's here, it can lead to spill failures when the index
2924 is one word out of a two word structure, which is represented internally
2925 as a DImode int. */
2926
2927 if (index)
2928 {
2929 reason_rtx = index;
2930
2931 if (GET_CODE (index) != REG)
2932 {
2933 reason = "index is not a register";
2934 goto report_error;
2935 }
2936
2937 if (GET_MODE (index) != Pmode)
2938 {
2939 reason = "index is not in Pmode";
2940 goto report_error;
2941 }
2942
2943 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2944 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2945 {
2946 reason = "index is not valid";
2947 goto report_error;
2948 }
2949 }
2950
2951 /* Validate scale factor. */
2952 if (scale != 1)
2953 {
2954 reason_rtx = GEN_INT (scale);
2955 if (!index)
2956 {
2957 reason = "scale without index";
2958 goto report_error;
2959 }
2960
2961 if (scale != 2 && scale != 4 && scale != 8)
2962 {
2963 reason = "scale is not a valid multiplier";
2964 goto report_error;
2965 }
2966 }
2967
2968 /* Validate displacement. */
2969 if (disp)
2970 {
2971 reason_rtx = disp;
2972
2973 if (!CONSTANT_ADDRESS_P (disp))
2974 {
2975 reason = "displacement is not constant";
2976 goto report_error;
2977 }
2978
2979 if (GET_CODE (disp) == CONST_DOUBLE)
2980 {
2981 reason = "displacement is a const_double";
2982 goto report_error;
2983 }
2984
2985 if (flag_pic && SYMBOLIC_CONST (disp))
2986 {
2987 if (! legitimate_pic_address_disp_p (disp))
2988 {
2989 reason = "displacement is an invalid pic construct";
2990 goto report_error;
2991 }
2992
2993 /* This code used to verify that a symbolic pic displacement
2994 includes the pic_offset_table_rtx register.
2995
2996 While this is good idea, unfortunately these constructs may
2997 be created by "adds using lea" optimization for incorrect
2998 code like:
2999
3000 int a;
3001 int foo(int i)
3002 {
3003 return *(&a+i);
3004 }
3005
3006 This code is nonsensical, but results in addressing
3007 GOT table with pic_offset_table_rtx base. We can't
3008 just refuse it easilly, since it gets matched by
3009 "addsi3" pattern, that later gets split to lea in the
3010 case output register differs from input. While this
3011 can be handled by separate addsi pattern for this case
3012 that never results in lea, this seems to be easier and
3013 correct fix for crash to disable this test. */
3014 }
3015 else if (HALF_PIC_P ())
3016 {
3017 if (! HALF_PIC_ADDRESS_P (disp)
3018 || (base != NULL_RTX || index != NULL_RTX))
3019 {
3020 reason = "displacement is an invalid half-pic reference";
3021 goto report_error;
3022 }
3023 }
3024 }
3025
3026 /* Everything looks valid. */
3027 if (TARGET_DEBUG_ADDR)
3028 fprintf (stderr, "Success.\n");
3029 return TRUE;
3030
3031 report_error:
3032 if (TARGET_DEBUG_ADDR)
3033 {
3034 fprintf (stderr, "Error: %s\n", reason);
3035 debug_rtx (reason_rtx);
3036 }
3037 return FALSE;
3038 }
3039 \f
3040 /* Return an unique alias set for the GOT. */
3041
3042 static HOST_WIDE_INT
3043 ix86_GOT_alias_set ()
3044 {
3045 static HOST_WIDE_INT set = -1;
3046 if (set == -1)
3047 set = new_alias_set ();
3048 return set;
3049 }
3050
3051 /* Return a legitimate reference for ORIG (an address) using the
3052 register REG. If REG is 0, a new pseudo is generated.
3053
3054 There are two types of references that must be handled:
3055
3056 1. Global data references must load the address from the GOT, via
3057 the PIC reg. An insn is emitted to do this load, and the reg is
3058 returned.
3059
3060 2. Static data references, constant pool addresses, and code labels
3061 compute the address as an offset from the GOT, whose base is in
3062 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
3063 differentiate them from global data objects. The returned
3064 address is the PIC reg + an unspec constant.
3065
3066 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
3067 reg also appears in the address. */
3068
3069 rtx
3070 legitimize_pic_address (orig, reg)
3071 rtx orig;
3072 rtx reg;
3073 {
3074 rtx addr = orig;
3075 rtx new = orig;
3076 rtx base;
3077
3078 if (GET_CODE (addr) == LABEL_REF
3079 || (GET_CODE (addr) == SYMBOL_REF
3080 && (CONSTANT_POOL_ADDRESS_P (addr)
3081 || SYMBOL_REF_FLAG (addr))))
3082 {
3083 /* This symbol may be referenced via a displacement from the PIC
3084 base address (@GOTOFF). */
3085
3086 current_function_uses_pic_offset_table = 1;
3087 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
3088 new = gen_rtx_CONST (Pmode, new);
3089 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3090
3091 if (reg != 0)
3092 {
3093 emit_move_insn (reg, new);
3094 new = reg;
3095 }
3096 }
3097 else if (GET_CODE (addr) == SYMBOL_REF)
3098 {
3099 /* This symbol must be referenced via a load from the
3100 Global Offset Table (@GOT). */
3101
3102 current_function_uses_pic_offset_table = 1;
3103 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
3104 new = gen_rtx_CONST (Pmode, new);
3105 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3106 new = gen_rtx_MEM (Pmode, new);
3107 RTX_UNCHANGING_P (new) = 1;
3108 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3109
3110 if (reg == 0)
3111 reg = gen_reg_rtx (Pmode);
3112 emit_move_insn (reg, new);
3113 new = reg;
3114 }
3115 else
3116 {
3117 if (GET_CODE (addr) == CONST)
3118 {
3119 addr = XEXP (addr, 0);
3120 if (GET_CODE (addr) == UNSPEC)
3121 {
3122 /* Check that the unspec is one of the ones we generate? */
3123 }
3124 else if (GET_CODE (addr) != PLUS)
3125 abort ();
3126 }
3127 if (GET_CODE (addr) == PLUS)
3128 {
3129 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
3130
3131 /* Check first to see if this is a constant offset from a @GOTOFF
3132 symbol reference. */
3133 if ((GET_CODE (op0) == LABEL_REF
3134 || (GET_CODE (op0) == SYMBOL_REF
3135 && (CONSTANT_POOL_ADDRESS_P (op0)
3136 || SYMBOL_REF_FLAG (op0))))
3137 && GET_CODE (op1) == CONST_INT)
3138 {
3139 current_function_uses_pic_offset_table = 1;
3140 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
3141 new = gen_rtx_PLUS (Pmode, new, op1);
3142 new = gen_rtx_CONST (Pmode, new);
3143 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3144
3145 if (reg != 0)
3146 {
3147 emit_move_insn (reg, new);
3148 new = reg;
3149 }
3150 }
3151 else
3152 {
3153 base = legitimize_pic_address (XEXP (addr, 0), reg);
3154 new = legitimize_pic_address (XEXP (addr, 1),
3155 base == reg ? NULL_RTX : reg);
3156
3157 if (GET_CODE (new) == CONST_INT)
3158 new = plus_constant (base, INTVAL (new));
3159 else
3160 {
3161 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
3162 {
3163 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
3164 new = XEXP (new, 1);
3165 }
3166 new = gen_rtx_PLUS (Pmode, base, new);
3167 }
3168 }
3169 }
3170 }
3171 return new;
3172 }
3173 \f
3174 /* Try machine-dependent ways of modifying an illegitimate address
3175 to be legitimate. If we find one, return the new, valid address.
3176 This macro is used in only one place: `memory_address' in explow.c.
3177
3178 OLDX is the address as it was before break_out_memory_refs was called.
3179 In some cases it is useful to look at this to decide what needs to be done.
3180
3181 MODE and WIN are passed so that this macro can use
3182 GO_IF_LEGITIMATE_ADDRESS.
3183
3184 It is always safe for this macro to do nothing. It exists to recognize
3185 opportunities to optimize the output.
3186
3187 For the 80386, we handle X+REG by loading X into a register R and
3188 using R+REG. R will go in a general reg and indexing will be used.
3189 However, if REG is a broken-out memory address or multiplication,
3190 nothing needs to be done because REG can certainly go in a general reg.
3191
3192 When -fpic is used, special handling is needed for symbolic references.
3193 See comments by legitimize_pic_address in i386.c for details. */
3194
3195 rtx
3196 legitimize_address (x, oldx, mode)
3197 register rtx x;
3198 register rtx oldx ATTRIBUTE_UNUSED;
3199 enum machine_mode mode;
3200 {
3201 int changed = 0;
3202 unsigned log;
3203
3204 if (TARGET_DEBUG_ADDR)
3205 {
3206 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3207 GET_MODE_NAME (mode));
3208 debug_rtx (x);
3209 }
3210
3211 if (flag_pic && SYMBOLIC_CONST (x))
3212 return legitimize_pic_address (x, 0);
3213
3214 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3215 if (GET_CODE (x) == ASHIFT
3216 && GET_CODE (XEXP (x, 1)) == CONST_INT
3217 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3218 {
3219 changed = 1;
3220 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
3221 GEN_INT (1 << log));
3222 }
3223
3224 if (GET_CODE (x) == PLUS)
3225 {
3226 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
3227
3228 if (GET_CODE (XEXP (x, 0)) == ASHIFT
3229 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3230 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3231 {
3232 changed = 1;
3233 XEXP (x, 0) = gen_rtx_MULT (Pmode,
3234 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
3235 GEN_INT (1 << log));
3236 }
3237
3238 if (GET_CODE (XEXP (x, 1)) == ASHIFT
3239 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
3240 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3241 {
3242 changed = 1;
3243 XEXP (x, 1) = gen_rtx_MULT (Pmode,
3244 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
3245 GEN_INT (1 << log));
3246 }
3247
3248 /* Put multiply first if it isn't already. */
3249 if (GET_CODE (XEXP (x, 1)) == MULT)
3250 {
3251 rtx tmp = XEXP (x, 0);
3252 XEXP (x, 0) = XEXP (x, 1);
3253 XEXP (x, 1) = tmp;
3254 changed = 1;
3255 }
3256
3257 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3258 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3259 created by virtual register instantiation, register elimination, and
3260 similar optimizations. */
3261 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
3262 {
3263 changed = 1;
3264 x = gen_rtx_PLUS (Pmode,
3265 gen_rtx_PLUS (Pmode, XEXP (x, 0),
3266 XEXP (XEXP (x, 1), 0)),
3267 XEXP (XEXP (x, 1), 1));
3268 }
3269
3270 /* Canonicalize
3271 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3272 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3273 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
3274 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3275 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
3276 && CONSTANT_P (XEXP (x, 1)))
3277 {
3278 rtx constant;
3279 rtx other = NULL_RTX;
3280
3281 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3282 {
3283 constant = XEXP (x, 1);
3284 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
3285 }
3286 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
3287 {
3288 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
3289 other = XEXP (x, 1);
3290 }
3291 else
3292 constant = 0;
3293
3294 if (constant)
3295 {
3296 changed = 1;
3297 x = gen_rtx_PLUS (Pmode,
3298 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
3299 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3300 plus_constant (other, INTVAL (constant)));
3301 }
3302 }
3303
3304 if (changed && legitimate_address_p (mode, x, FALSE))
3305 return x;
3306
3307 if (GET_CODE (XEXP (x, 0)) == MULT)
3308 {
3309 changed = 1;
3310 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3311 }
3312
3313 if (GET_CODE (XEXP (x, 1)) == MULT)
3314 {
3315 changed = 1;
3316 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3317 }
3318
3319 if (changed
3320 && GET_CODE (XEXP (x, 1)) == REG
3321 && GET_CODE (XEXP (x, 0)) == REG)
3322 return x;
3323
3324 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3325 {
3326 changed = 1;
3327 x = legitimize_pic_address (x, 0);
3328 }
3329
3330 if (changed && legitimate_address_p (mode, x, FALSE))
3331 return x;
3332
3333 if (GET_CODE (XEXP (x, 0)) == REG)
3334 {
3335 register rtx temp = gen_reg_rtx (Pmode);
3336 register rtx val = force_operand (XEXP (x, 1), temp);
3337 if (val != temp)
3338 emit_move_insn (temp, val);
3339
3340 XEXP (x, 1) = temp;
3341 return x;
3342 }
3343
3344 else if (GET_CODE (XEXP (x, 1)) == REG)
3345 {
3346 register rtx temp = gen_reg_rtx (Pmode);
3347 register rtx val = force_operand (XEXP (x, 0), temp);
3348 if (val != temp)
3349 emit_move_insn (temp, val);
3350
3351 XEXP (x, 0) = temp;
3352 return x;
3353 }
3354 }
3355
3356 return x;
3357 }
3358 \f
3359 /* Print an integer constant expression in assembler syntax. Addition
3360 and subtraction are the only arithmetic that may appear in these
3361 expressions. FILE is the stdio stream to write to, X is the rtx, and
3362 CODE is the operand print code from the output string. */
3363
3364 static void
3365 output_pic_addr_const (file, x, code)
3366 FILE *file;
3367 rtx x;
3368 int code;
3369 {
3370 char buf[256];
3371
3372 switch (GET_CODE (x))
3373 {
3374 case PC:
3375 if (flag_pic)
3376 putc ('.', file);
3377 else
3378 abort ();
3379 break;
3380
3381 case SYMBOL_REF:
3382 assemble_name (file, XSTR (x, 0));
3383 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3384 fputs ("@PLT", file);
3385 break;
3386
3387 case LABEL_REF:
3388 x = XEXP (x, 0);
3389 /* FALLTHRU */
3390 case CODE_LABEL:
3391 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3392 assemble_name (asm_out_file, buf);
3393 break;
3394
3395 case CONST_INT:
3396 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3397 break;
3398
3399 case CONST:
3400 /* This used to output parentheses around the expression,
3401 but that does not work on the 386 (either ATT or BSD assembler). */
3402 output_pic_addr_const (file, XEXP (x, 0), code);
3403 break;
3404
3405 case CONST_DOUBLE:
3406 if (GET_MODE (x) == VOIDmode)
3407 {
3408 /* We can use %d if the number is <32 bits and positive. */
3409 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
3410 fprintf (file, "0x%lx%08lx",
3411 (unsigned long) CONST_DOUBLE_HIGH (x),
3412 (unsigned long) CONST_DOUBLE_LOW (x));
3413 else
3414 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
3415 }
3416 else
3417 /* We can't handle floating point constants;
3418 PRINT_OPERAND must handle them. */
3419 output_operand_lossage ("floating constant misused");
3420 break;
3421
3422 case PLUS:
3423 /* Some assemblers need integer constants to appear first. */
3424 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3425 {
3426 output_pic_addr_const (file, XEXP (x, 0), code);
3427 putc ('+', file);
3428 output_pic_addr_const (file, XEXP (x, 1), code);
3429 }
3430 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3431 {
3432 output_pic_addr_const (file, XEXP (x, 1), code);
3433 putc ('+', file);
3434 output_pic_addr_const (file, XEXP (x, 0), code);
3435 }
3436 else
3437 abort ();
3438 break;
3439
3440 case MINUS:
3441 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3442 output_pic_addr_const (file, XEXP (x, 0), code);
3443 putc ('-', file);
3444 output_pic_addr_const (file, XEXP (x, 1), code);
3445 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3446 break;
3447
3448 case UNSPEC:
3449 if (XVECLEN (x, 0) != 1)
3450 abort ();
3451 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3452 switch (XINT (x, 1))
3453 {
3454 case 6:
3455 fputs ("@GOT", file);
3456 break;
3457 case 7:
3458 fputs ("@GOTOFF", file);
3459 break;
3460 case 8:
3461 fputs ("@PLT", file);
3462 break;
3463 default:
3464 output_operand_lossage ("invalid UNSPEC as operand");
3465 break;
3466 }
3467 break;
3468
3469 default:
3470 output_operand_lossage ("invalid expression as operand");
3471 }
3472 }
3473
3474 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3475 We need to handle our special PIC relocations. */
3476
3477 void
3478 i386_dwarf_output_addr_const (file, x)
3479 FILE *file;
3480 rtx x;
3481 {
3482 fprintf (file, "%s", INT_ASM_OP);
3483 if (flag_pic)
3484 output_pic_addr_const (file, x, '\0');
3485 else
3486 output_addr_const (file, x);
3487 fputc ('\n', file);
3488 }
3489
3490 /* In the name of slightly smaller debug output, and to cater to
3491 general assembler losage, recognize PIC+GOTOFF and turn it back
3492 into a direct symbol reference. */
3493
3494 rtx
3495 i386_simplify_dwarf_addr (orig_x)
3496 rtx orig_x;
3497 {
3498 rtx x = orig_x;
3499
3500 if (GET_CODE (x) != PLUS
3501 || GET_CODE (XEXP (x, 0)) != REG
3502 || GET_CODE (XEXP (x, 1)) != CONST)
3503 return orig_x;
3504
3505 x = XEXP (XEXP (x, 1), 0);
3506 if (GET_CODE (x) == UNSPEC
3507 && (XINT (x, 1) == 6
3508 || XINT (x, 1) == 7))
3509 return XVECEXP (x, 0, 0);
3510
3511 if (GET_CODE (x) == PLUS
3512 && GET_CODE (XEXP (x, 0)) == UNSPEC
3513 && GET_CODE (XEXP (x, 1)) == CONST_INT
3514 && (XINT (XEXP (x, 0), 1) == 6
3515 || XINT (XEXP (x, 0), 1) == 7))
3516 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3517
3518 return orig_x;
3519 }
3520 \f
3521 static void
3522 put_condition_code (code, mode, reverse, fp, file)
3523 enum rtx_code code;
3524 enum machine_mode mode;
3525 int reverse, fp;
3526 FILE *file;
3527 {
3528 const char *suffix;
3529
3530 if (mode == CCFPmode || mode == CCFPUmode)
3531 {
3532 enum rtx_code second_code, bypass_code;
3533 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3534 if (bypass_code != NIL || second_code != NIL)
3535 abort();
3536 code = ix86_fp_compare_code_to_integer (code);
3537 mode = CCmode;
3538 }
3539 if (reverse)
3540 code = reverse_condition (code);
3541
3542 switch (code)
3543 {
3544 case EQ:
3545 suffix = "e";
3546 break;
3547 case NE:
3548 suffix = "ne";
3549 break;
3550 case GT:
3551 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3552 abort ();
3553 suffix = "g";
3554 break;
3555 case GTU:
3556 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3557 Those same assemblers have the same but opposite losage on cmov. */
3558 if (mode != CCmode)
3559 abort ();
3560 suffix = fp ? "nbe" : "a";
3561 break;
3562 case LT:
3563 if (mode == CCNOmode || mode == CCGOCmode)
3564 suffix = "s";
3565 else if (mode == CCmode || mode == CCGCmode)
3566 suffix = "l";
3567 else
3568 abort ();
3569 break;
3570 case LTU:
3571 if (mode != CCmode)
3572 abort ();
3573 suffix = "b";
3574 break;
3575 case GE:
3576 if (mode == CCNOmode || mode == CCGOCmode)
3577 suffix = "ns";
3578 else if (mode == CCmode || mode == CCGCmode)
3579 suffix = "ge";
3580 else
3581 abort ();
3582 break;
3583 case GEU:
3584 /* ??? As above. */
3585 if (mode != CCmode)
3586 abort ();
3587 suffix = fp ? "nb" : "ae";
3588 break;
3589 case LE:
3590 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3591 abort ();
3592 suffix = "le";
3593 break;
3594 case LEU:
3595 if (mode != CCmode)
3596 abort ();
3597 suffix = "be";
3598 break;
3599 case UNORDERED:
3600 suffix = fp ? "u" : "p";
3601 break;
3602 case ORDERED:
3603 suffix = fp ? "nu" : "np";
3604 break;
3605 default:
3606 abort ();
3607 }
3608 fputs (suffix, file);
3609 }
3610
3611 void
3612 print_reg (x, code, file)
3613 rtx x;
3614 int code;
3615 FILE *file;
3616 {
3617 if (REGNO (x) == ARG_POINTER_REGNUM
3618 || REGNO (x) == FRAME_POINTER_REGNUM
3619 || REGNO (x) == FLAGS_REG
3620 || REGNO (x) == FPSR_REG)
3621 abort ();
3622
3623 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3624 putc ('%', file);
3625
3626 if (code == 'w')
3627 code = 2;
3628 else if (code == 'b')
3629 code = 1;
3630 else if (code == 'k')
3631 code = 4;
3632 else if (code == 'q')
3633 code = 8;
3634 else if (code == 'y')
3635 code = 3;
3636 else if (code == 'h')
3637 code = 0;
3638 else if (code == 'm' || MMX_REG_P (x))
3639 code = 5;
3640 else
3641 code = GET_MODE_SIZE (GET_MODE (x));
3642
3643 /* Irritatingly, AMD extended registers use different naming convention
3644 from the normal registers. */
3645 if (REX_INT_REG_P (x))
3646 {
3647 switch (code)
3648 {
3649 case 5:
3650 error ("Extended registers have no high halves\n");
3651 break;
3652 case 1:
3653 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3654 break;
3655 case 2:
3656 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3657 break;
3658 case 4:
3659 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3660 break;
3661 case 8:
3662 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3663 break;
3664 default:
3665 error ("Unsupported operand size for extended register.\n");
3666 break;
3667 }
3668 return;
3669 }
3670 switch (code)
3671 {
3672 case 5:
3673 fputs (hi_reg_name[REGNO (x)], file);
3674 break;
3675 case 3:
3676 if (STACK_TOP_P (x))
3677 {
3678 fputs ("st(0)", file);
3679 break;
3680 }
3681 /* FALLTHRU */
3682 case 8:
3683 case 4:
3684 case 12:
3685 if (! ANY_FP_REG_P (x))
3686 putc (code == 8 ? 'r' : 'e', file);
3687 /* FALLTHRU */
3688 case 16:
3689 case 2:
3690 fputs (hi_reg_name[REGNO (x)], file);
3691 break;
3692 case 1:
3693 fputs (qi_reg_name[REGNO (x)], file);
3694 break;
3695 case 0:
3696 fputs (qi_high_reg_name[REGNO (x)], file);
3697 break;
3698 default:
3699 abort ();
3700 }
3701 }
3702
3703 /* Meaning of CODE:
3704 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3705 C -- print opcode suffix for set/cmov insn.
3706 c -- like C, but print reversed condition
3707 R -- print the prefix for register names.
3708 z -- print the opcode suffix for the size of the current operand.
3709 * -- print a star (in certain assembler syntax)
3710 A -- print an absolute memory reference.
3711 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3712 s -- print a shift double count, followed by the assemblers argument
3713 delimiter.
3714 b -- print the QImode name of the register for the indicated operand.
3715 %b0 would print %al if operands[0] is reg 0.
3716 w -- likewise, print the HImode name of the register.
3717 k -- likewise, print the SImode name of the register.
3718 q -- likewise, print the DImode name of the register.
3719 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3720 y -- print "st(0)" instead of "st" as a register.
3721 m -- print "st(n)" as an mmx register.
3722 D -- print condition for SSE cmp instruction.
3723 */
3724
3725 void
3726 print_operand (file, x, code)
3727 FILE *file;
3728 rtx x;
3729 int code;
3730 {
3731 if (code)
3732 {
3733 switch (code)
3734 {
3735 case '*':
3736 if (ASSEMBLER_DIALECT == 0)
3737 putc ('*', file);
3738 return;
3739
3740 case 'A':
3741 if (ASSEMBLER_DIALECT == 0)
3742 putc ('*', file);
3743 else if (ASSEMBLER_DIALECT == 1)
3744 {
3745 /* Intel syntax. For absolute addresses, registers should not
3746 be surrounded by braces. */
3747 if (GET_CODE (x) != REG)
3748 {
3749 putc ('[', file);
3750 PRINT_OPERAND (file, x, 0);
3751 putc (']', file);
3752 return;
3753 }
3754 }
3755
3756 PRINT_OPERAND (file, x, 0);
3757 return;
3758
3759
3760 case 'L':
3761 if (ASSEMBLER_DIALECT == 0)
3762 putc ('l', file);
3763 return;
3764
3765 case 'W':
3766 if (ASSEMBLER_DIALECT == 0)
3767 putc ('w', file);
3768 return;
3769
3770 case 'B':
3771 if (ASSEMBLER_DIALECT == 0)
3772 putc ('b', file);
3773 return;
3774
3775 case 'Q':
3776 if (ASSEMBLER_DIALECT == 0)
3777 putc ('l', file);
3778 return;
3779
3780 case 'S':
3781 if (ASSEMBLER_DIALECT == 0)
3782 putc ('s', file);
3783 return;
3784
3785 case 'T':
3786 if (ASSEMBLER_DIALECT == 0)
3787 putc ('t', file);
3788 return;
3789
3790 case 'z':
3791 /* 387 opcodes don't get size suffixes if the operands are
3792 registers. */
3793
3794 if (STACK_REG_P (x))
3795 return;
3796
3797 /* this is the size of op from size of operand */
3798 switch (GET_MODE_SIZE (GET_MODE (x)))
3799 {
3800 case 2:
3801 #ifdef HAVE_GAS_FILDS_FISTS
3802 putc ('s', file);
3803 #endif
3804 return;
3805
3806 case 4:
3807 if (GET_MODE (x) == SFmode)
3808 {
3809 putc ('s', file);
3810 return;
3811 }
3812 else
3813 putc ('l', file);
3814 return;
3815
3816 case 12:
3817 case 16:
3818 putc ('t', file);
3819 return;
3820
3821 case 8:
3822 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3823 {
3824 #ifdef GAS_MNEMONICS
3825 putc ('q', file);
3826 #else
3827 putc ('l', file);
3828 putc ('l', file);
3829 #endif
3830 }
3831 else
3832 putc ('l', file);
3833 return;
3834
3835 default:
3836 abort ();
3837 }
3838
3839 case 'b':
3840 case 'w':
3841 case 'k':
3842 case 'q':
3843 case 'h':
3844 case 'y':
3845 case 'm':
3846 case 'X':
3847 case 'P':
3848 break;
3849
3850 case 's':
3851 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3852 {
3853 PRINT_OPERAND (file, x, 0);
3854 putc (',', file);
3855 }
3856 return;
3857
3858 case 'D':
3859 /* Little bit of braindamage here. The SSE compare instructions
3860 does use completely different names for the comparisons that the
3861 fp conditional moves. */
3862 switch (GET_CODE (x))
3863 {
3864 case EQ:
3865 case UNEQ:
3866 fputs ("eq", file);
3867 break;
3868 case LT:
3869 case UNLT:
3870 fputs ("lt", file);
3871 break;
3872 case LE:
3873 case UNLE:
3874 fputs ("le", file);
3875 break;
3876 case UNORDERED:
3877 fputs ("unord", file);
3878 break;
3879 case NE:
3880 case LTGT:
3881 fputs ("neq", file);
3882 break;
3883 case UNGE:
3884 case GE:
3885 fputs ("nlt", file);
3886 break;
3887 case UNGT:
3888 case GT:
3889 fputs ("nle", file);
3890 break;
3891 case ORDERED:
3892 fputs ("ord", file);
3893 break;
3894 default:
3895 abort ();
3896 break;
3897 }
3898 return;
3899 case 'C':
3900 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3901 return;
3902 case 'F':
3903 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3904 return;
3905
3906 /* Like above, but reverse condition */
3907 case 'c':
3908 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3909 return;
3910 case 'f':
3911 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3912 return;
3913
3914 default:
3915 {
3916 char str[50];
3917 sprintf (str, "invalid operand code `%c'", code);
3918 output_operand_lossage (str);
3919 }
3920 }
3921 }
3922
3923 if (GET_CODE (x) == REG)
3924 {
3925 PRINT_REG (x, code, file);
3926 }
3927
3928 else if (GET_CODE (x) == MEM)
3929 {
3930 /* No `byte ptr' prefix for call instructions. */
3931 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3932 {
3933 const char * size;
3934 switch (GET_MODE_SIZE (GET_MODE (x)))
3935 {
3936 case 1: size = "BYTE"; break;
3937 case 2: size = "WORD"; break;
3938 case 4: size = "DWORD"; break;
3939 case 8: size = "QWORD"; break;
3940 case 12: size = "XWORD"; break;
3941 case 16: size = "XMMWORD"; break;
3942 default:
3943 abort ();
3944 }
3945
3946 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3947 if (code == 'b')
3948 size = "BYTE";
3949 else if (code == 'w')
3950 size = "WORD";
3951 else if (code == 'k')
3952 size = "DWORD";
3953
3954 fputs (size, file);
3955 fputs (" PTR ", file);
3956 }
3957
3958 x = XEXP (x, 0);
3959 if (flag_pic && CONSTANT_ADDRESS_P (x))
3960 output_pic_addr_const (file, x, code);
3961 else
3962 output_address (x);
3963 }
3964
3965 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3966 {
3967 REAL_VALUE_TYPE r;
3968 long l;
3969
3970 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3971 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3972
3973 if (ASSEMBLER_DIALECT == 0)
3974 putc ('$', file);
3975 fprintf (file, "0x%lx", l);
3976 }
3977
3978 /* These float cases don't actually occur as immediate operands. */
3979 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3980 {
3981 REAL_VALUE_TYPE r;
3982 char dstr[30];
3983
3984 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3985 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3986 fprintf (file, "%s", dstr);
3987 }
3988
3989 else if (GET_CODE (x) == CONST_DOUBLE
3990 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
3991 {
3992 REAL_VALUE_TYPE r;
3993 char dstr[30];
3994
3995 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3996 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3997 fprintf (file, "%s", dstr);
3998 }
3999 else
4000 {
4001 if (code != 'P')
4002 {
4003 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
4004 {
4005 if (ASSEMBLER_DIALECT == 0)
4006 putc ('$', file);
4007 }
4008 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
4009 || GET_CODE (x) == LABEL_REF)
4010 {
4011 if (ASSEMBLER_DIALECT == 0)
4012 putc ('$', file);
4013 else
4014 fputs ("OFFSET FLAT:", file);
4015 }
4016 }
4017 if (GET_CODE (x) == CONST_INT)
4018 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4019 else if (flag_pic)
4020 output_pic_addr_const (file, x, code);
4021 else
4022 output_addr_const (file, x);
4023 }
4024 }
4025 \f
4026 /* Print a memory operand whose address is ADDR. */
4027
4028 void
4029 print_operand_address (file, addr)
4030 FILE *file;
4031 register rtx addr;
4032 {
4033 struct ix86_address parts;
4034 rtx base, index, disp;
4035 int scale;
4036
4037 if (! ix86_decompose_address (addr, &parts))
4038 abort ();
4039
4040 base = parts.base;
4041 index = parts.index;
4042 disp = parts.disp;
4043 scale = parts.scale;
4044
4045 if (!base && !index)
4046 {
4047 /* Displacement only requires special attention. */
4048
4049 if (GET_CODE (disp) == CONST_INT)
4050 {
4051 if (ASSEMBLER_DIALECT != 0)
4052 {
4053 if (USER_LABEL_PREFIX[0] == 0)
4054 putc ('%', file);
4055 fputs ("ds:", file);
4056 }
4057 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
4058 }
4059 else if (flag_pic)
4060 output_pic_addr_const (file, addr, 0);
4061 else
4062 output_addr_const (file, addr);
4063 }
4064 else
4065 {
4066 if (ASSEMBLER_DIALECT == 0)
4067 {
4068 if (disp)
4069 {
4070 if (flag_pic)
4071 output_pic_addr_const (file, disp, 0);
4072 else if (GET_CODE (disp) == LABEL_REF)
4073 output_asm_label (disp);
4074 else
4075 output_addr_const (file, disp);
4076 }
4077
4078 putc ('(', file);
4079 if (base)
4080 PRINT_REG (base, 0, file);
4081 if (index)
4082 {
4083 putc (',', file);
4084 PRINT_REG (index, 0, file);
4085 if (scale != 1)
4086 fprintf (file, ",%d", scale);
4087 }
4088 putc (')', file);
4089 }
4090 else
4091 {
4092 rtx offset = NULL_RTX;
4093
4094 if (disp)
4095 {
4096 /* Pull out the offset of a symbol; print any symbol itself. */
4097 if (GET_CODE (disp) == CONST
4098 && GET_CODE (XEXP (disp, 0)) == PLUS
4099 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
4100 {
4101 offset = XEXP (XEXP (disp, 0), 1);
4102 disp = gen_rtx_CONST (VOIDmode,
4103 XEXP (XEXP (disp, 0), 0));
4104 }
4105
4106 if (flag_pic)
4107 output_pic_addr_const (file, disp, 0);
4108 else if (GET_CODE (disp) == LABEL_REF)
4109 output_asm_label (disp);
4110 else if (GET_CODE (disp) == CONST_INT)
4111 offset = disp;
4112 else
4113 output_addr_const (file, disp);
4114 }
4115
4116 putc ('[', file);
4117 if (base)
4118 {
4119 PRINT_REG (base, 0, file);
4120 if (offset)
4121 {
4122 if (INTVAL (offset) >= 0)
4123 putc ('+', file);
4124 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4125 }
4126 }
4127 else if (offset)
4128 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4129 else
4130 putc ('0', file);
4131
4132 if (index)
4133 {
4134 putc ('+', file);
4135 PRINT_REG (index, 0, file);
4136 if (scale != 1)
4137 fprintf (file, "*%d", scale);
4138 }
4139 putc (']', file);
4140 }
4141 }
4142 }
4143 \f
4144 /* Split one or more DImode RTL references into pairs of SImode
4145 references. The RTL can be REG, offsettable MEM, integer constant, or
4146 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4147 split and "num" is its length. lo_half and hi_half are output arrays
4148 that parallel "operands". */
4149
4150 void
4151 split_di (operands, num, lo_half, hi_half)
4152 rtx operands[];
4153 int num;
4154 rtx lo_half[], hi_half[];
4155 {
4156 while (num--)
4157 {
4158 rtx op = operands[num];
4159 if (CONSTANT_P (op))
4160 split_double (op, &lo_half[num], &hi_half[num]);
4161 else if (! reload_completed)
4162 {
4163 lo_half[num] = gen_lowpart (SImode, op);
4164 hi_half[num] = gen_highpart (SImode, op);
4165 }
4166 else if (GET_CODE (op) == REG)
4167 {
4168 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
4169 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
4170 }
4171 else if (offsettable_memref_p (op))
4172 {
4173 rtx lo_addr = XEXP (op, 0);
4174 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
4175 lo_half[num] = change_address (op, SImode, lo_addr);
4176 hi_half[num] = change_address (op, SImode, hi_addr);
4177 }
4178 else
4179 abort ();
4180 }
4181 }
4182 \f
4183 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
4184 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4185 is the expression of the binary operation. The output may either be
4186 emitted here, or returned to the caller, like all output_* functions.
4187
4188 There is no guarantee that the operands are the same mode, as they
4189 might be within FLOAT or FLOAT_EXTEND expressions. */
4190
4191 #ifndef SYSV386_COMPAT
4192 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
4193 wants to fix the assemblers because that causes incompatibility
4194 with gcc. No-one wants to fix gcc because that causes
4195 incompatibility with assemblers... You can use the option of
4196 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4197 #define SYSV386_COMPAT 1
4198 #endif
4199
4200 const char *
4201 output_387_binary_op (insn, operands)
4202 rtx insn;
4203 rtx *operands;
4204 {
4205 static char buf[30];
4206 const char *p;
4207 const char *ssep;
4208 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
4209
4210 #ifdef ENABLE_CHECKING
4211 /* Even if we do not want to check the inputs, this documents input
4212 constraints. Which helps in understanding the following code. */
4213 if (STACK_REG_P (operands[0])
4214 && ((REG_P (operands[1])
4215 && REGNO (operands[0]) == REGNO (operands[1])
4216 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
4217 || (REG_P (operands[2])
4218 && REGNO (operands[0]) == REGNO (operands[2])
4219 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
4220 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
4221 ; /* ok */
4222 else if (!is_sse)
4223 abort ();
4224 #endif
4225
4226 switch (GET_CODE (operands[3]))
4227 {
4228 case PLUS:
4229 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4230 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4231 p = "fiadd";
4232 else
4233 p = "fadd";
4234 ssep = "add";
4235 break;
4236
4237 case MINUS:
4238 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4239 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4240 p = "fisub";
4241 else
4242 p = "fsub";
4243 ssep = "sub";
4244 break;
4245
4246 case MULT:
4247 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4248 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4249 p = "fimul";
4250 else
4251 p = "fmul";
4252 ssep = "mul";
4253 break;
4254
4255 case DIV:
4256 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4257 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4258 p = "fidiv";
4259 else
4260 p = "fdiv";
4261 ssep = "div";
4262 break;
4263
4264 default:
4265 abort ();
4266 }
4267
4268 if (is_sse)
4269 {
4270 strcpy (buf, ssep);
4271 if (GET_MODE (operands[0]) == SFmode)
4272 strcat (buf, "ss\t{%2, %0|%0, %2}");
4273 else
4274 strcat (buf, "sd\t{%2, %0|%0, %2}");
4275 return buf;
4276 }
4277 strcpy (buf, p);
4278
4279 switch (GET_CODE (operands[3]))
4280 {
4281 case MULT:
4282 case PLUS:
4283 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
4284 {
4285 rtx temp = operands[2];
4286 operands[2] = operands[1];
4287 operands[1] = temp;
4288 }
4289
4290 /* know operands[0] == operands[1]. */
4291
4292 if (GET_CODE (operands[2]) == MEM)
4293 {
4294 p = "%z2\t%2";
4295 break;
4296 }
4297
4298 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4299 {
4300 if (STACK_TOP_P (operands[0]))
4301 /* How is it that we are storing to a dead operand[2]?
4302 Well, presumably operands[1] is dead too. We can't
4303 store the result to st(0) as st(0) gets popped on this
4304 instruction. Instead store to operands[2] (which I
4305 think has to be st(1)). st(1) will be popped later.
4306 gcc <= 2.8.1 didn't have this check and generated
4307 assembly code that the Unixware assembler rejected. */
4308 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4309 else
4310 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4311 break;
4312 }
4313
4314 if (STACK_TOP_P (operands[0]))
4315 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4316 else
4317 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4318 break;
4319
4320 case MINUS:
4321 case DIV:
4322 if (GET_CODE (operands[1]) == MEM)
4323 {
4324 p = "r%z1\t%1";
4325 break;
4326 }
4327
4328 if (GET_CODE (operands[2]) == MEM)
4329 {
4330 p = "%z2\t%2";
4331 break;
4332 }
4333
4334 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4335 {
4336 #if SYSV386_COMPAT
4337 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4338 derived assemblers, confusingly reverse the direction of
4339 the operation for fsub{r} and fdiv{r} when the
4340 destination register is not st(0). The Intel assembler
4341 doesn't have this brain damage. Read !SYSV386_COMPAT to
4342 figure out what the hardware really does. */
4343 if (STACK_TOP_P (operands[0]))
4344 p = "{p\t%0, %2|rp\t%2, %0}";
4345 else
4346 p = "{rp\t%2, %0|p\t%0, %2}";
4347 #else
4348 if (STACK_TOP_P (operands[0]))
4349 /* As above for fmul/fadd, we can't store to st(0). */
4350 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4351 else
4352 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4353 #endif
4354 break;
4355 }
4356
4357 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
4358 {
4359 #if SYSV386_COMPAT
4360 if (STACK_TOP_P (operands[0]))
4361 p = "{rp\t%0, %1|p\t%1, %0}";
4362 else
4363 p = "{p\t%1, %0|rp\t%0, %1}";
4364 #else
4365 if (STACK_TOP_P (operands[0]))
4366 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4367 else
4368 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4369 #endif
4370 break;
4371 }
4372
4373 if (STACK_TOP_P (operands[0]))
4374 {
4375 if (STACK_TOP_P (operands[1]))
4376 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4377 else
4378 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
4379 break;
4380 }
4381 else if (STACK_TOP_P (operands[1]))
4382 {
4383 #if SYSV386_COMPAT
4384 p = "{\t%1, %0|r\t%0, %1}";
4385 #else
4386 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4387 #endif
4388 }
4389 else
4390 {
4391 #if SYSV386_COMPAT
4392 p = "{r\t%2, %0|\t%0, %2}";
4393 #else
4394 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4395 #endif
4396 }
4397 break;
4398
4399 default:
4400 abort ();
4401 }
4402
4403 strcat (buf, p);
4404 return buf;
4405 }
4406
4407 /* Output code for INSN to convert a float to a signed int. OPERANDS
4408 are the insn operands. The output may be [HSD]Imode and the input
4409 operand may be [SDX]Fmode. */
4410
4411 const char *
4412 output_fix_trunc (insn, operands)
4413 rtx insn;
4414 rtx *operands;
4415 {
4416 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4417 int dimode_p = GET_MODE (operands[0]) == DImode;
4418 rtx xops[4];
4419
4420 /* Jump through a hoop or two for DImode, since the hardware has no
4421 non-popping instruction. We used to do this a different way, but
4422 that was somewhat fragile and broke with post-reload splitters. */
4423 if (dimode_p && !stack_top_dies)
4424 output_asm_insn ("fld\t%y1", operands);
4425
4426 if (! STACK_TOP_P (operands[1]))
4427 abort ();
4428
4429 xops[0] = GEN_INT (12);
4430 xops[1] = adj_offsettable_operand (operands[2], 1);
4431 xops[1] = change_address (xops[1], QImode, NULL_RTX);
4432
4433 xops[2] = operands[0];
4434 if (GET_CODE (operands[0]) != MEM)
4435 xops[2] = operands[3];
4436
4437 output_asm_insn ("fnstcw\t%2", operands);
4438 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
4439 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
4440 output_asm_insn ("fldcw\t%2", operands);
4441 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
4442
4443 if (stack_top_dies || dimode_p)
4444 output_asm_insn ("fistp%z2\t%2", xops);
4445 else
4446 output_asm_insn ("fist%z2\t%2", xops);
4447
4448 output_asm_insn ("fldcw\t%2", operands);
4449
4450 if (GET_CODE (operands[0]) != MEM)
4451 {
4452 if (dimode_p)
4453 {
4454 split_di (operands+0, 1, xops+0, xops+1);
4455 split_di (operands+3, 1, xops+2, xops+3);
4456 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4457 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
4458 }
4459 else if (GET_MODE (operands[0]) == SImode)
4460 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
4461 else
4462 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
4463 }
4464
4465 return "";
4466 }
4467
4468 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4469 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4470 when fucom should be used. */
4471
4472 const char *
4473 output_fp_compare (insn, operands, eflags_p, unordered_p)
4474 rtx insn;
4475 rtx *operands;
4476 int eflags_p, unordered_p;
4477 {
4478 int stack_top_dies;
4479 rtx cmp_op0 = operands[0];
4480 rtx cmp_op1 = operands[1];
4481 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
4482
4483 if (eflags_p == 2)
4484 {
4485 cmp_op0 = cmp_op1;
4486 cmp_op1 = operands[2];
4487 }
4488 if (is_sse)
4489 {
4490 if (GET_MODE (operands[0]) == SFmode)
4491 if (unordered_p)
4492 return "ucomiss\t{%1, %0|%0, %1}";
4493 else
4494 return "comiss\t{%1, %0|%0, %y}";
4495 else
4496 if (unordered_p)
4497 return "ucomisd\t{%1, %0|%0, %1}";
4498 else
4499 return "comisd\t{%1, %0|%0, %y}";
4500 }
4501
4502 if (! STACK_TOP_P (cmp_op0))
4503 abort ();
4504
4505 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4506
4507 if (STACK_REG_P (cmp_op1)
4508 && stack_top_dies
4509 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4510 && REGNO (cmp_op1) != FIRST_STACK_REG)
4511 {
4512 /* If both the top of the 387 stack dies, and the other operand
4513 is also a stack register that dies, then this must be a
4514 `fcompp' float compare */
4515
4516 if (eflags_p == 1)
4517 {
4518 /* There is no double popping fcomi variant. Fortunately,
4519 eflags is immune from the fstp's cc clobbering. */
4520 if (unordered_p)
4521 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4522 else
4523 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4524 return "fstp\t%y0";
4525 }
4526 else
4527 {
4528 if (eflags_p == 2)
4529 {
4530 if (unordered_p)
4531 return "fucompp\n\tfnstsw\t%0";
4532 else
4533 return "fcompp\n\tfnstsw\t%0";
4534 }
4535 else
4536 {
4537 if (unordered_p)
4538 return "fucompp";
4539 else
4540 return "fcompp";
4541 }
4542 }
4543 }
4544 else
4545 {
4546 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4547
4548 static const char * const alt[24] =
4549 {
4550 "fcom%z1\t%y1",
4551 "fcomp%z1\t%y1",
4552 "fucom%z1\t%y1",
4553 "fucomp%z1\t%y1",
4554
4555 "ficom%z1\t%y1",
4556 "ficomp%z1\t%y1",
4557 NULL,
4558 NULL,
4559
4560 "fcomi\t{%y1, %0|%0, %y1}",
4561 "fcomip\t{%y1, %0|%0, %y1}",
4562 "fucomi\t{%y1, %0|%0, %y1}",
4563 "fucomip\t{%y1, %0|%0, %y1}",
4564
4565 NULL,
4566 NULL,
4567 NULL,
4568 NULL,
4569
4570 "fcom%z2\t%y2\n\tfnstsw\t%0",
4571 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4572 "fucom%z2\t%y2\n\tfnstsw\t%0",
4573 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4574
4575 "ficom%z2\t%y2\n\tfnstsw\t%0",
4576 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4577 NULL,
4578 NULL
4579 };
4580
4581 int mask;
4582 const char *ret;
4583
4584 mask = eflags_p << 3;
4585 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4586 mask |= unordered_p << 1;
4587 mask |= stack_top_dies;
4588
4589 if (mask >= 24)
4590 abort ();
4591 ret = alt[mask];
4592 if (ret == NULL)
4593 abort ();
4594
4595 return ret;
4596 }
4597 }
4598
4599 /* Output assembler code to FILE to initialize basic-block profiling.
4600
4601 If profile_block_flag == 2
4602
4603 Output code to call the subroutine `__bb_init_trace_func'
4604 and pass two parameters to it. The first parameter is
4605 the address of a block allocated in the object module.
4606 The second parameter is the number of the first basic block
4607 of the function.
4608
4609 The name of the block is a local symbol made with this statement:
4610
4611 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4612
4613 Of course, since you are writing the definition of
4614 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4615 can take a short cut in the definition of this macro and use the
4616 name that you know will result.
4617
4618 The number of the first basic block of the function is
4619 passed to the macro in BLOCK_OR_LABEL.
4620
4621 If described in a virtual assembler language the code to be
4622 output looks like:
4623
4624 parameter1 <- LPBX0
4625 parameter2 <- BLOCK_OR_LABEL
4626 call __bb_init_trace_func
4627
4628 else if profile_block_flag != 0
4629
4630 Output code to call the subroutine `__bb_init_func'
4631 and pass one single parameter to it, which is the same
4632 as the first parameter to `__bb_init_trace_func'.
4633
4634 The first word of this parameter is a flag which will be nonzero if
4635 the object module has already been initialized. So test this word
4636 first, and do not call `__bb_init_func' if the flag is nonzero.
4637 Note: When profile_block_flag == 2 the test need not be done
4638 but `__bb_init_trace_func' *must* be called.
4639
4640 BLOCK_OR_LABEL may be used to generate a label number as a
4641 branch destination in case `__bb_init_func' will not be called.
4642
4643 If described in a virtual assembler language the code to be
4644 output looks like:
4645
4646 cmp (LPBX0),0
4647 jne local_label
4648 parameter1 <- LPBX0
4649 call __bb_init_func
4650 local_label:
4651 */
4652
4653 void
4654 ix86_output_function_block_profiler (file, block_or_label)
4655 FILE *file;
4656 int block_or_label;
4657 {
4658 static int num_func = 0;
4659 rtx xops[8];
4660 char block_table[80], false_label[80];
4661
4662 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4663
4664 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4665 xops[5] = stack_pointer_rtx;
4666 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4667
4668 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4669
4670 switch (profile_block_flag)
4671 {
4672 case 2:
4673 xops[2] = GEN_INT (block_or_label);
4674 xops[3] = gen_rtx_MEM (Pmode,
4675 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4676 xops[6] = GEN_INT (8);
4677
4678 output_asm_insn ("push{l}\t%2", xops);
4679 if (!flag_pic)
4680 output_asm_insn ("push{l}\t%1", xops);
4681 else
4682 {
4683 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4684 output_asm_insn ("push{l}\t%7", xops);
4685 }
4686 output_asm_insn ("call\t%P3", xops);
4687 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4688 break;
4689
4690 default:
4691 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4692
4693 xops[0] = const0_rtx;
4694 xops[2] = gen_rtx_MEM (Pmode,
4695 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4696 xops[3] = gen_rtx_MEM (Pmode,
4697 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4698 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4699 xops[6] = GEN_INT (4);
4700
4701 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4702
4703 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4704 output_asm_insn ("jne\t%2", xops);
4705
4706 if (!flag_pic)
4707 output_asm_insn ("push{l}\t%1", xops);
4708 else
4709 {
4710 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4711 output_asm_insn ("push{l}\t%7", xops);
4712 }
4713 output_asm_insn ("call\t%P3", xops);
4714 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4715 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4716 num_func++;
4717 break;
4718 }
4719 }
4720
4721 /* Output assembler code to FILE to increment a counter associated
4722 with basic block number BLOCKNO.
4723
4724 If profile_block_flag == 2
4725
4726 Output code to initialize the global structure `__bb' and
4727 call the function `__bb_trace_func' which will increment the
4728 counter.
4729
4730 `__bb' consists of two words. In the first word the number
4731 of the basic block has to be stored. In the second word
4732 the address of a block allocated in the object module
4733 has to be stored.
4734
4735 The basic block number is given by BLOCKNO.
4736
4737 The address of the block is given by the label created with
4738
4739 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4740
4741 by FUNCTION_BLOCK_PROFILER.
4742
4743 Of course, since you are writing the definition of
4744 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4745 can take a short cut in the definition of this macro and use the
4746 name that you know will result.
4747
4748 If described in a virtual assembler language the code to be
4749 output looks like:
4750
4751 move BLOCKNO -> (__bb)
4752 move LPBX0 -> (__bb+4)
4753 call __bb_trace_func
4754
4755 Note that function `__bb_trace_func' must not change the
4756 machine state, especially the flag register. To grant
4757 this, you must output code to save and restore registers
4758 either in this macro or in the macros MACHINE_STATE_SAVE
4759 and MACHINE_STATE_RESTORE. The last two macros will be
4760 used in the function `__bb_trace_func', so you must make
4761 sure that the function prologue does not change any
4762 register prior to saving it with MACHINE_STATE_SAVE.
4763
4764 else if profile_block_flag != 0
4765
4766 Output code to increment the counter directly.
4767 Basic blocks are numbered separately from zero within each
4768 compiled object module. The count associated with block number
4769 BLOCKNO is at index BLOCKNO in an array of words; the name of
4770 this array is a local symbol made with this statement:
4771
4772 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4773
4774 Of course, since you are writing the definition of
4775 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4776 can take a short cut in the definition of this macro and use the
4777 name that you know will result.
4778
4779 If described in a virtual assembler language the code to be
4780 output looks like:
4781
4782 inc (LPBX2+4*BLOCKNO)
4783 */
4784
4785 void
4786 ix86_output_block_profiler (file, blockno)
4787 FILE *file ATTRIBUTE_UNUSED;
4788 int blockno;
4789 {
4790 rtx xops[8], cnt_rtx;
4791 char counts[80];
4792 char *block_table = counts;
4793
4794 switch (profile_block_flag)
4795 {
4796 case 2:
4797 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4798
4799 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4800 xops[2] = GEN_INT (blockno);
4801 xops[3] = gen_rtx_MEM (Pmode,
4802 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4803 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4804 xops[5] = plus_constant (xops[4], 4);
4805 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4806 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4807
4808 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4809
4810 output_asm_insn ("pushf", xops);
4811 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4812 if (flag_pic)
4813 {
4814 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4815 output_asm_insn ("push{l}\t%7", xops);
4816 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4817 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4818 output_asm_insn ("pop{l}\t%7", xops);
4819 }
4820 else
4821 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4822 output_asm_insn ("call\t%P3", xops);
4823 output_asm_insn ("popf", xops);
4824
4825 break;
4826
4827 default:
4828 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4829 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4830 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4831
4832 if (blockno)
4833 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4834
4835 if (flag_pic)
4836 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4837
4838 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4839 output_asm_insn ("inc{l}\t%0", xops);
4840
4841 break;
4842 }
4843 }
4844 \f
4845 void
4846 ix86_expand_move (mode, operands)
4847 enum machine_mode mode;
4848 rtx operands[];
4849 {
4850 int strict = (reload_in_progress || reload_completed);
4851 rtx insn;
4852
4853 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4854 {
4855 /* Emit insns to move operands[1] into operands[0]. */
4856
4857 if (GET_CODE (operands[0]) == MEM)
4858 operands[1] = force_reg (Pmode, operands[1]);
4859 else
4860 {
4861 rtx temp = operands[0];
4862 if (GET_CODE (temp) != REG)
4863 temp = gen_reg_rtx (Pmode);
4864 temp = legitimize_pic_address (operands[1], temp);
4865 if (temp == operands[0])
4866 return;
4867 operands[1] = temp;
4868 }
4869 }
4870 else
4871 {
4872 if (GET_CODE (operands[0]) == MEM
4873 && (GET_MODE (operands[0]) == QImode
4874 || !push_operand (operands[0], mode))
4875 && GET_CODE (operands[1]) == MEM)
4876 operands[1] = force_reg (mode, operands[1]);
4877
4878 if (push_operand (operands[0], mode)
4879 && ! general_no_elim_operand (operands[1], mode))
4880 operands[1] = copy_to_mode_reg (mode, operands[1]);
4881
4882 if (FLOAT_MODE_P (mode))
4883 {
4884 /* If we are loading a floating point constant to a register,
4885 force the value to memory now, since we'll get better code
4886 out the back end. */
4887
4888 if (strict)
4889 ;
4890 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4891 && register_operand (operands[0], mode))
4892 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4893 }
4894 }
4895
4896 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4897
4898 emit_insn (insn);
4899 }
4900
4901 /* Attempt to expand a binary operator. Make the expansion closer to the
4902 actual machine, then just general_operand, which will allow 3 separate
4903 memory references (one output, two input) in a single insn. */
4904
4905 void
4906 ix86_expand_binary_operator (code, mode, operands)
4907 enum rtx_code code;
4908 enum machine_mode mode;
4909 rtx operands[];
4910 {
4911 int matching_memory;
4912 rtx src1, src2, dst, op, clob;
4913
4914 dst = operands[0];
4915 src1 = operands[1];
4916 src2 = operands[2];
4917
4918 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4919 if (GET_RTX_CLASS (code) == 'c'
4920 && (rtx_equal_p (dst, src2)
4921 || immediate_operand (src1, mode)))
4922 {
4923 rtx temp = src1;
4924 src1 = src2;
4925 src2 = temp;
4926 }
4927
4928 /* If the destination is memory, and we do not have matching source
4929 operands, do things in registers. */
4930 matching_memory = 0;
4931 if (GET_CODE (dst) == MEM)
4932 {
4933 if (rtx_equal_p (dst, src1))
4934 matching_memory = 1;
4935 else if (GET_RTX_CLASS (code) == 'c'
4936 && rtx_equal_p (dst, src2))
4937 matching_memory = 2;
4938 else
4939 dst = gen_reg_rtx (mode);
4940 }
4941
4942 /* Both source operands cannot be in memory. */
4943 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4944 {
4945 if (matching_memory != 2)
4946 src2 = force_reg (mode, src2);
4947 else
4948 src1 = force_reg (mode, src1);
4949 }
4950
4951 /* If the operation is not commutable, source 1 cannot be a constant
4952 or non-matching memory. */
4953 if ((CONSTANT_P (src1)
4954 || (!matching_memory && GET_CODE (src1) == MEM))
4955 && GET_RTX_CLASS (code) != 'c')
4956 src1 = force_reg (mode, src1);
4957
4958 /* If optimizing, copy to regs to improve CSE */
4959 if (optimize && ! no_new_pseudos)
4960 {
4961 if (GET_CODE (dst) == MEM)
4962 dst = gen_reg_rtx (mode);
4963 if (GET_CODE (src1) == MEM)
4964 src1 = force_reg (mode, src1);
4965 if (GET_CODE (src2) == MEM)
4966 src2 = force_reg (mode, src2);
4967 }
4968
4969 /* Emit the instruction. */
4970
4971 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4972 if (reload_in_progress)
4973 {
4974 /* Reload doesn't know about the flags register, and doesn't know that
4975 it doesn't want to clobber it. We can only do this with PLUS. */
4976 if (code != PLUS)
4977 abort ();
4978 emit_insn (op);
4979 }
4980 else
4981 {
4982 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4983 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4984 }
4985
4986 /* Fix up the destination if needed. */
4987 if (dst != operands[0])
4988 emit_move_insn (operands[0], dst);
4989 }
4990
4991 /* Return TRUE or FALSE depending on whether the binary operator meets the
4992 appropriate constraints. */
4993
4994 int
4995 ix86_binary_operator_ok (code, mode, operands)
4996 enum rtx_code code;
4997 enum machine_mode mode ATTRIBUTE_UNUSED;
4998 rtx operands[3];
4999 {
5000 /* Both source operands cannot be in memory. */
5001 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
5002 return 0;
5003 /* If the operation is not commutable, source 1 cannot be a constant. */
5004 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
5005 return 0;
5006 /* If the destination is memory, we must have a matching source operand. */
5007 if (GET_CODE (operands[0]) == MEM
5008 && ! (rtx_equal_p (operands[0], operands[1])
5009 || (GET_RTX_CLASS (code) == 'c'
5010 && rtx_equal_p (operands[0], operands[2]))))
5011 return 0;
5012 /* If the operation is not commutable and the source 1 is memory, we must
5013 have a matching destionation. */
5014 if (GET_CODE (operands[1]) == MEM
5015 && GET_RTX_CLASS (code) != 'c'
5016 && ! rtx_equal_p (operands[0], operands[1]))
5017 return 0;
5018 return 1;
5019 }
5020
5021 /* Attempt to expand a unary operator. Make the expansion closer to the
5022 actual machine, then just general_operand, which will allow 2 separate
5023 memory references (one output, one input) in a single insn. */
5024
5025 void
5026 ix86_expand_unary_operator (code, mode, operands)
5027 enum rtx_code code;
5028 enum machine_mode mode;
5029 rtx operands[];
5030 {
5031 int matching_memory;
5032 rtx src, dst, op, clob;
5033
5034 dst = operands[0];
5035 src = operands[1];
5036
5037 /* If the destination is memory, and we do not have matching source
5038 operands, do things in registers. */
5039 matching_memory = 0;
5040 if (GET_CODE (dst) == MEM)
5041 {
5042 if (rtx_equal_p (dst, src))
5043 matching_memory = 1;
5044 else
5045 dst = gen_reg_rtx (mode);
5046 }
5047
5048 /* When source operand is memory, destination must match. */
5049 if (!matching_memory && GET_CODE (src) == MEM)
5050 src = force_reg (mode, src);
5051
5052 /* If optimizing, copy to regs to improve CSE */
5053 if (optimize && ! no_new_pseudos)
5054 {
5055 if (GET_CODE (dst) == MEM)
5056 dst = gen_reg_rtx (mode);
5057 if (GET_CODE (src) == MEM)
5058 src = force_reg (mode, src);
5059 }
5060
5061 /* Emit the instruction. */
5062
5063 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
5064 if (reload_in_progress || code == NOT)
5065 {
5066 /* Reload doesn't know about the flags register, and doesn't know that
5067 it doesn't want to clobber it. */
5068 if (code != NOT)
5069 abort ();
5070 emit_insn (op);
5071 }
5072 else
5073 {
5074 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5075 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5076 }
5077
5078 /* Fix up the destination if needed. */
5079 if (dst != operands[0])
5080 emit_move_insn (operands[0], dst);
5081 }
5082
5083 /* Return TRUE or FALSE depending on whether the unary operator meets the
5084 appropriate constraints. */
5085
5086 int
5087 ix86_unary_operator_ok (code, mode, operands)
5088 enum rtx_code code ATTRIBUTE_UNUSED;
5089 enum machine_mode mode ATTRIBUTE_UNUSED;
5090 rtx operands[2] ATTRIBUTE_UNUSED;
5091 {
5092 /* If one of operands is memory, source and destination must match. */
5093 if ((GET_CODE (operands[0]) == MEM
5094 || GET_CODE (operands[1]) == MEM)
5095 && ! rtx_equal_p (operands[0], operands[1]))
5096 return FALSE;
5097 return TRUE;
5098 }
5099
5100 /* Return TRUE or FALSE depending on whether the first SET in INSN
5101 has source and destination with matching CC modes, and that the
5102 CC mode is at least as constrained as REQ_MODE. */
5103
5104 int
5105 ix86_match_ccmode (insn, req_mode)
5106 rtx insn;
5107 enum machine_mode req_mode;
5108 {
5109 rtx set;
5110 enum machine_mode set_mode;
5111
5112 set = PATTERN (insn);
5113 if (GET_CODE (set) == PARALLEL)
5114 set = XVECEXP (set, 0, 0);
5115 if (GET_CODE (set) != SET)
5116 abort ();
5117 if (GET_CODE (SET_SRC (set)) != COMPARE)
5118 abort ();
5119
5120 set_mode = GET_MODE (SET_DEST (set));
5121 switch (set_mode)
5122 {
5123 case CCNOmode:
5124 if (req_mode != CCNOmode
5125 && (req_mode != CCmode
5126 || XEXP (SET_SRC (set), 1) != const0_rtx))
5127 return 0;
5128 break;
5129 case CCmode:
5130 if (req_mode == CCGCmode)
5131 return 0;
5132 /* FALLTHRU */
5133 case CCGCmode:
5134 if (req_mode == CCGOCmode || req_mode == CCNOmode)
5135 return 0;
5136 /* FALLTHRU */
5137 case CCGOCmode:
5138 if (req_mode == CCZmode)
5139 return 0;
5140 /* FALLTHRU */
5141 case CCZmode:
5142 break;
5143
5144 default:
5145 abort ();
5146 }
5147
5148 return (GET_MODE (SET_SRC (set)) == set_mode);
5149 }
5150
5151 /* Generate insn patterns to do an integer compare of OPERANDS. */
5152
5153 static rtx
5154 ix86_expand_int_compare (code, op0, op1)
5155 enum rtx_code code;
5156 rtx op0, op1;
5157 {
5158 enum machine_mode cmpmode;
5159 rtx tmp, flags;
5160
5161 cmpmode = SELECT_CC_MODE (code, op0, op1);
5162 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
5163
5164 /* This is very simple, but making the interface the same as in the
5165 FP case makes the rest of the code easier. */
5166 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
5167 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
5168
5169 /* Return the test that should be put into the flags user, i.e.
5170 the bcc, scc, or cmov instruction. */
5171 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
5172 }
5173
5174 /* Figure out whether to use ordered or unordered fp comparisons.
5175 Return the appropriate mode to use. */
5176
5177 enum machine_mode
5178 ix86_fp_compare_mode (code)
5179 enum rtx_code code ATTRIBUTE_UNUSED;
5180 {
5181 /* ??? In order to make all comparisons reversible, we do all comparisons
5182 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5183 all forms trapping and nontrapping comparisons, we can make inequality
5184 comparisons trapping again, since it results in better code when using
5185 FCOM based compares. */
5186 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
5187 }
5188
5189 enum machine_mode
5190 ix86_cc_mode (code, op0, op1)
5191 enum rtx_code code;
5192 rtx op0, op1;
5193 {
5194 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5195 return ix86_fp_compare_mode (code);
5196 switch (code)
5197 {
5198 /* Only zero flag is needed. */
5199 case EQ: /* ZF=0 */
5200 case NE: /* ZF!=0 */
5201 return CCZmode;
5202 /* Codes needing carry flag. */
5203 case GEU: /* CF=0 */
5204 case GTU: /* CF=0 & ZF=0 */
5205 case LTU: /* CF=1 */
5206 case LEU: /* CF=1 | ZF=1 */
5207 return CCmode;
5208 /* Codes possibly doable only with sign flag when
5209 comparing against zero. */
5210 case GE: /* SF=OF or SF=0 */
5211 case LT: /* SF<>OF or SF=1 */
5212 if (op1 == const0_rtx)
5213 return CCGOCmode;
5214 else
5215 /* For other cases Carry flag is not required. */
5216 return CCGCmode;
5217 /* Codes doable only with sign flag when comparing
5218 against zero, but we miss jump instruction for it
5219 so we need to use relational tests agains overflow
5220 that thus needs to be zero. */
5221 case GT: /* ZF=0 & SF=OF */
5222 case LE: /* ZF=1 | SF<>OF */
5223 if (op1 == const0_rtx)
5224 return CCNOmode;
5225 else
5226 return CCGCmode;
5227 default:
5228 abort ();
5229 }
5230 }
5231
5232 /* Return true if we should use an FCOMI instruction for this fp comparison. */
5233
5234 int
5235 ix86_use_fcomi_compare (code)
5236 enum rtx_code code ATTRIBUTE_UNUSED;
5237 {
5238 enum rtx_code swapped_code = swap_condition (code);
5239 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
5240 || (ix86_fp_comparison_cost (swapped_code)
5241 == ix86_fp_comparison_fcomi_cost (swapped_code)));
5242 }
5243
5244 /* Swap, force into registers, or otherwise massage the two operands
5245 to a fp comparison. The operands are updated in place; the new
5246 comparsion code is returned. */
5247
5248 static enum rtx_code
5249 ix86_prepare_fp_compare_args (code, pop0, pop1)
5250 enum rtx_code code;
5251 rtx *pop0, *pop1;
5252 {
5253 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
5254 rtx op0 = *pop0, op1 = *pop1;
5255 enum machine_mode op_mode = GET_MODE (op0);
5256 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
5257
5258 /* All of the unordered compare instructions only work on registers.
5259 The same is true of the XFmode compare instructions. The same is
5260 true of the fcomi compare instructions. */
5261
5262 if (!is_sse
5263 && (fpcmp_mode == CCFPUmode
5264 || op_mode == XFmode
5265 || op_mode == TFmode
5266 || ix86_use_fcomi_compare (code)))
5267 {
5268 op0 = force_reg (op_mode, op0);
5269 op1 = force_reg (op_mode, op1);
5270 }
5271 else
5272 {
5273 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5274 things around if they appear profitable, otherwise force op0
5275 into a register. */
5276
5277 if (standard_80387_constant_p (op0) == 0
5278 || (GET_CODE (op0) == MEM
5279 && ! (standard_80387_constant_p (op1) == 0
5280 || GET_CODE (op1) == MEM)))
5281 {
5282 rtx tmp;
5283 tmp = op0, op0 = op1, op1 = tmp;
5284 code = swap_condition (code);
5285 }
5286
5287 if (GET_CODE (op0) != REG)
5288 op0 = force_reg (op_mode, op0);
5289
5290 if (CONSTANT_P (op1))
5291 {
5292 if (standard_80387_constant_p (op1))
5293 op1 = force_reg (op_mode, op1);
5294 else
5295 op1 = validize_mem (force_const_mem (op_mode, op1));
5296 }
5297 }
5298
5299 /* Try to rearrange the comparison to make it cheaper. */
5300 if (ix86_fp_comparison_cost (code)
5301 > ix86_fp_comparison_cost (swap_condition (code))
5302 && (GET_CODE (op0) == REG || !reload_completed))
5303 {
5304 rtx tmp;
5305 tmp = op0, op0 = op1, op1 = tmp;
5306 code = swap_condition (code);
5307 if (GET_CODE (op0) != REG)
5308 op0 = force_reg (op_mode, op0);
5309 }
5310
5311 *pop0 = op0;
5312 *pop1 = op1;
5313 return code;
5314 }
5315
5316 /* Convert comparison codes we use to represent FP comparison to integer
5317 code that will result in proper branch. Return UNKNOWN if no such code
5318 is available. */
5319 static enum rtx_code
5320 ix86_fp_compare_code_to_integer (code)
5321 enum rtx_code code;
5322 {
5323 switch (code)
5324 {
5325 case GT:
5326 return GTU;
5327 case GE:
5328 return GEU;
5329 case ORDERED:
5330 case UNORDERED:
5331 return code;
5332 break;
5333 case UNEQ:
5334 return EQ;
5335 break;
5336 case UNLT:
5337 return LTU;
5338 break;
5339 case UNLE:
5340 return LEU;
5341 break;
5342 case LTGT:
5343 return NE;
5344 break;
5345 default:
5346 return UNKNOWN;
5347 }
5348 }
5349
5350 /* Split comparison code CODE into comparisons we can do using branch
5351 instructions. BYPASS_CODE is comparison code for branch that will
5352 branch around FIRST_CODE and SECOND_CODE. If some of branches
5353 is not required, set value to NIL.
5354 We never require more than two branches. */
5355 static void
5356 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5357 enum rtx_code code, *bypass_code, *first_code, *second_code;
5358 {
5359 *first_code = code;
5360 *bypass_code = NIL;
5361 *second_code = NIL;
5362
5363 /* The fcomi comparison sets flags as follows:
5364
5365 cmp ZF PF CF
5366 > 0 0 0
5367 < 0 0 1
5368 = 1 0 0
5369 un 1 1 1 */
5370
5371 switch (code)
5372 {
5373 case GT: /* GTU - CF=0 & ZF=0 */
5374 case GE: /* GEU - CF=0 */
5375 case ORDERED: /* PF=0 */
5376 case UNORDERED: /* PF=1 */
5377 case UNEQ: /* EQ - ZF=1 */
5378 case UNLT: /* LTU - CF=1 */
5379 case UNLE: /* LEU - CF=1 | ZF=1 */
5380 case LTGT: /* EQ - ZF=0 */
5381 break;
5382 case LT: /* LTU - CF=1 - fails on unordered */
5383 *first_code = UNLT;
5384 *bypass_code = UNORDERED;
5385 break;
5386 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5387 *first_code = UNLE;
5388 *bypass_code = UNORDERED;
5389 break;
5390 case EQ: /* EQ - ZF=1 - fails on unordered */
5391 *first_code = UNEQ;
5392 *bypass_code = UNORDERED;
5393 break;
5394 case NE: /* NE - ZF=0 - fails on unordered */
5395 *first_code = LTGT;
5396 *second_code = UNORDERED;
5397 break;
5398 case UNGE: /* GEU - CF=0 - fails on unordered */
5399 *first_code = GE;
5400 *second_code = UNORDERED;
5401 break;
5402 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5403 *first_code = GT;
5404 *second_code = UNORDERED;
5405 break;
5406 default:
5407 abort ();
5408 }
5409 if (!TARGET_IEEE_FP)
5410 {
5411 *second_code = NIL;
5412 *bypass_code = NIL;
5413 }
5414 }
5415
5416 /* Return cost of comparison done fcom + arithmetics operations on AX.
5417 All following functions do use number of instructions as an cost metrics.
5418 In future this should be tweaked to compute bytes for optimize_size and
5419 take into account performance of various instructions on various CPUs. */
5420 static int
5421 ix86_fp_comparison_arithmetics_cost (code)
5422 enum rtx_code code;
5423 {
5424 if (!TARGET_IEEE_FP)
5425 return 4;
5426 /* The cost of code output by ix86_expand_fp_compare. */
5427 switch (code)
5428 {
5429 case UNLE:
5430 case UNLT:
5431 case LTGT:
5432 case GT:
5433 case GE:
5434 case UNORDERED:
5435 case ORDERED:
5436 case UNEQ:
5437 return 4;
5438 break;
5439 case LT:
5440 case NE:
5441 case EQ:
5442 case UNGE:
5443 return 5;
5444 break;
5445 case LE:
5446 case UNGT:
5447 return 6;
5448 break;
5449 default:
5450 abort ();
5451 }
5452 }
5453
5454 /* Return cost of comparison done using fcomi operation.
5455 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5456 static int
5457 ix86_fp_comparison_fcomi_cost (code)
5458 enum rtx_code code;
5459 {
5460 enum rtx_code bypass_code, first_code, second_code;
5461 /* Return arbitarily high cost when instruction is not supported - this
5462 prevents gcc from using it. */
5463 if (!TARGET_CMOVE)
5464 return 1024;
5465 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5466 return (bypass_code != NIL || second_code != NIL) + 2;
5467 }
5468
5469 /* Return cost of comparison done using sahf operation.
5470 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5471 static int
5472 ix86_fp_comparison_sahf_cost (code)
5473 enum rtx_code code;
5474 {
5475 enum rtx_code bypass_code, first_code, second_code;
5476 /* Return arbitarily high cost when instruction is not preferred - this
5477 avoids gcc from using it. */
5478 if (!TARGET_USE_SAHF && !optimize_size)
5479 return 1024;
5480 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5481 return (bypass_code != NIL || second_code != NIL) + 3;
5482 }
5483
5484 /* Compute cost of the comparison done using any method.
5485 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5486 static int
5487 ix86_fp_comparison_cost (code)
5488 enum rtx_code code;
5489 {
5490 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5491 int min;
5492
5493 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5494 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5495
5496 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5497 if (min > sahf_cost)
5498 min = sahf_cost;
5499 if (min > fcomi_cost)
5500 min = fcomi_cost;
5501 return min;
5502 }
5503
5504 /* Generate insn patterns to do a floating point compare of OPERANDS. */
5505
5506 static rtx
5507 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
5508 enum rtx_code code;
5509 rtx op0, op1, scratch;
5510 rtx *second_test;
5511 rtx *bypass_test;
5512 {
5513 enum machine_mode fpcmp_mode, intcmp_mode;
5514 rtx tmp, tmp2;
5515 int cost = ix86_fp_comparison_cost (code);
5516 enum rtx_code bypass_code, first_code, second_code;
5517
5518 fpcmp_mode = ix86_fp_compare_mode (code);
5519 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5520
5521 if (second_test)
5522 *second_test = NULL_RTX;
5523 if (bypass_test)
5524 *bypass_test = NULL_RTX;
5525
5526 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5527
5528 /* Do fcomi/sahf based test when profitable. */
5529 if ((bypass_code == NIL || bypass_test)
5530 && (second_code == NIL || second_test)
5531 && ix86_fp_comparison_arithmetics_cost (code) > cost)
5532 {
5533 if (TARGET_CMOVE)
5534 {
5535 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5536 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5537 tmp);
5538 emit_insn (tmp);
5539 }
5540 else
5541 {
5542 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5543 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5544 if (!scratch)
5545 scratch = gen_reg_rtx (HImode);
5546 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5547 emit_insn (gen_x86_sahf_1 (scratch));
5548 }
5549
5550 /* The FP codes work out to act like unsigned. */
5551 intcmp_mode = fpcmp_mode;
5552 code = first_code;
5553 if (bypass_code != NIL)
5554 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5555 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5556 const0_rtx);
5557 if (second_code != NIL)
5558 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5559 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5560 const0_rtx);
5561 }
5562 else
5563 {
5564 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5565 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5566 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5567 if (!scratch)
5568 scratch = gen_reg_rtx (HImode);
5569 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5570
5571 /* In the unordered case, we have to check C2 for NaN's, which
5572 doesn't happen to work out to anything nice combination-wise.
5573 So do some bit twiddling on the value we've got in AH to come
5574 up with an appropriate set of condition codes. */
5575
5576 intcmp_mode = CCNOmode;
5577 switch (code)
5578 {
5579 case GT:
5580 case UNGT:
5581 if (code == GT || !TARGET_IEEE_FP)
5582 {
5583 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5584 code = EQ;
5585 }
5586 else
5587 {
5588 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5589 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5590 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5591 intcmp_mode = CCmode;
5592 code = GEU;
5593 }
5594 break;
5595 case LT:
5596 case UNLT:
5597 if (code == LT && TARGET_IEEE_FP)
5598 {
5599 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5600 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
5601 intcmp_mode = CCmode;
5602 code = EQ;
5603 }
5604 else
5605 {
5606 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5607 code = NE;
5608 }
5609 break;
5610 case GE:
5611 case UNGE:
5612 if (code == GE || !TARGET_IEEE_FP)
5613 {
5614 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
5615 code = EQ;
5616 }
5617 else
5618 {
5619 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5620 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5621 GEN_INT (0x01)));
5622 code = NE;
5623 }
5624 break;
5625 case LE:
5626 case UNLE:
5627 if (code == LE && TARGET_IEEE_FP)
5628 {
5629 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5630 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5631 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5632 intcmp_mode = CCmode;
5633 code = LTU;
5634 }
5635 else
5636 {
5637 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5638 code = NE;
5639 }
5640 break;
5641 case EQ:
5642 case UNEQ:
5643 if (code == EQ && TARGET_IEEE_FP)
5644 {
5645 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5646 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5647 intcmp_mode = CCmode;
5648 code = EQ;
5649 }
5650 else
5651 {
5652 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5653 code = NE;
5654 break;
5655 }
5656 break;
5657 case NE:
5658 case LTGT:
5659 if (code == NE && TARGET_IEEE_FP)
5660 {
5661 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5662 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5663 GEN_INT (0x40)));
5664 code = NE;
5665 }
5666 else
5667 {
5668 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5669 code = EQ;
5670 }
5671 break;
5672
5673 case UNORDERED:
5674 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5675 code = NE;
5676 break;
5677 case ORDERED:
5678 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5679 code = EQ;
5680 break;
5681
5682 default:
5683 abort ();
5684 }
5685 }
5686
5687 /* Return the test that should be put into the flags user, i.e.
5688 the bcc, scc, or cmov instruction. */
5689 return gen_rtx_fmt_ee (code, VOIDmode,
5690 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5691 const0_rtx);
5692 }
5693
5694 rtx
5695 ix86_expand_compare (code, second_test, bypass_test)
5696 enum rtx_code code;
5697 rtx *second_test, *bypass_test;
5698 {
5699 rtx op0, op1, ret;
5700 op0 = ix86_compare_op0;
5701 op1 = ix86_compare_op1;
5702
5703 if (second_test)
5704 *second_test = NULL_RTX;
5705 if (bypass_test)
5706 *bypass_test = NULL_RTX;
5707
5708 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5709 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
5710 second_test, bypass_test);
5711 else
5712 ret = ix86_expand_int_compare (code, op0, op1);
5713
5714 return ret;
5715 }
5716
5717 void
5718 ix86_expand_branch (code, label)
5719 enum rtx_code code;
5720 rtx label;
5721 {
5722 rtx tmp;
5723
5724 switch (GET_MODE (ix86_compare_op0))
5725 {
5726 case QImode:
5727 case HImode:
5728 case SImode:
5729 tmp = ix86_expand_compare (code, NULL, NULL);
5730 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5731 gen_rtx_LABEL_REF (VOIDmode, label),
5732 pc_rtx);
5733 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5734 return;
5735
5736 case SFmode:
5737 case DFmode:
5738 case XFmode:
5739 case TFmode:
5740 /* Don't expand the comparison early, so that we get better code
5741 when jump or whoever decides to reverse the comparison. */
5742 {
5743 rtvec vec;
5744 int use_fcomi;
5745
5746 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5747 &ix86_compare_op1);
5748
5749 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5750 ix86_compare_op0, ix86_compare_op1);
5751 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5752 gen_rtx_LABEL_REF (VOIDmode, label),
5753 pc_rtx);
5754 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5755
5756 use_fcomi = ix86_use_fcomi_compare (code);
5757 vec = rtvec_alloc (3 + !use_fcomi);
5758 RTVEC_ELT (vec, 0) = tmp;
5759 RTVEC_ELT (vec, 1)
5760 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5761 RTVEC_ELT (vec, 2)
5762 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5763 if (! use_fcomi)
5764 RTVEC_ELT (vec, 3)
5765 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5766
5767 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5768 return;
5769 }
5770
5771 case DImode:
5772 /* Expand DImode branch into multiple compare+branch. */
5773 {
5774 rtx lo[2], hi[2], label2;
5775 enum rtx_code code1, code2, code3;
5776
5777 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5778 {
5779 tmp = ix86_compare_op0;
5780 ix86_compare_op0 = ix86_compare_op1;
5781 ix86_compare_op1 = tmp;
5782 code = swap_condition (code);
5783 }
5784 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5785 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5786
5787 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5788 avoid two branches. This costs one extra insn, so disable when
5789 optimizing for size. */
5790
5791 if ((code == EQ || code == NE)
5792 && (!optimize_size
5793 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5794 {
5795 rtx xor0, xor1;
5796
5797 xor1 = hi[0];
5798 if (hi[1] != const0_rtx)
5799 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5800 NULL_RTX, 0, OPTAB_WIDEN);
5801
5802 xor0 = lo[0];
5803 if (lo[1] != const0_rtx)
5804 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5805 NULL_RTX, 0, OPTAB_WIDEN);
5806
5807 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5808 NULL_RTX, 0, OPTAB_WIDEN);
5809
5810 ix86_compare_op0 = tmp;
5811 ix86_compare_op1 = const0_rtx;
5812 ix86_expand_branch (code, label);
5813 return;
5814 }
5815
5816 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5817 op1 is a constant and the low word is zero, then we can just
5818 examine the high word. */
5819
5820 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5821 switch (code)
5822 {
5823 case LT: case LTU: case GE: case GEU:
5824 ix86_compare_op0 = hi[0];
5825 ix86_compare_op1 = hi[1];
5826 ix86_expand_branch (code, label);
5827 return;
5828 default:
5829 break;
5830 }
5831
5832 /* Otherwise, we need two or three jumps. */
5833
5834 label2 = gen_label_rtx ();
5835
5836 code1 = code;
5837 code2 = swap_condition (code);
5838 code3 = unsigned_condition (code);
5839
5840 switch (code)
5841 {
5842 case LT: case GT: case LTU: case GTU:
5843 break;
5844
5845 case LE: code1 = LT; code2 = GT; break;
5846 case GE: code1 = GT; code2 = LT; break;
5847 case LEU: code1 = LTU; code2 = GTU; break;
5848 case GEU: code1 = GTU; code2 = LTU; break;
5849
5850 case EQ: code1 = NIL; code2 = NE; break;
5851 case NE: code2 = NIL; break;
5852
5853 default:
5854 abort ();
5855 }
5856
5857 /*
5858 * a < b =>
5859 * if (hi(a) < hi(b)) goto true;
5860 * if (hi(a) > hi(b)) goto false;
5861 * if (lo(a) < lo(b)) goto true;
5862 * false:
5863 */
5864
5865 ix86_compare_op0 = hi[0];
5866 ix86_compare_op1 = hi[1];
5867
5868 if (code1 != NIL)
5869 ix86_expand_branch (code1, label);
5870 if (code2 != NIL)
5871 ix86_expand_branch (code2, label2);
5872
5873 ix86_compare_op0 = lo[0];
5874 ix86_compare_op1 = lo[1];
5875 ix86_expand_branch (code3, label);
5876
5877 if (code2 != NIL)
5878 emit_label (label2);
5879 return;
5880 }
5881
5882 default:
5883 abort ();
5884 }
5885 }
5886
5887 /* Split branch based on floating point condition. */
5888 void
5889 ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5890 rtx condition, op1, op2, target1, target2, tmp;
5891 {
5892 rtx second, bypass;
5893 rtx label = NULL_RTX;
5894 enum rtx_code code = GET_CODE (condition);
5895
5896 if (target2 != pc_rtx)
5897 {
5898 rtx tmp = target2;
5899 code = reverse_condition_maybe_unordered (code);
5900 target2 = target1;
5901 target1 = tmp;
5902 }
5903
5904 condition = ix86_expand_fp_compare (code, op1, op2,
5905 tmp, &second, &bypass);
5906 if (bypass != NULL_RTX)
5907 {
5908 label = gen_label_rtx ();
5909 emit_jump_insn (gen_rtx_SET
5910 (VOIDmode, pc_rtx,
5911 gen_rtx_IF_THEN_ELSE (VOIDmode,
5912 bypass,
5913 gen_rtx_LABEL_REF (VOIDmode,
5914 label),
5915 pc_rtx)));
5916 }
5917 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5918 comparison and first branch. The second branch takes longer to execute
5919 so place first branch the worse predicable one if possible. */
5920 if (second != NULL_RTX
5921 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5922 {
5923 rtx tmp = condition;
5924 condition = second;
5925 second = tmp;
5926 }
5927 emit_jump_insn (gen_rtx_SET
5928 (VOIDmode, pc_rtx,
5929 gen_rtx_IF_THEN_ELSE (VOIDmode,
5930 condition, target1, target2)));
5931 if (second != NULL_RTX)
5932 emit_jump_insn (gen_rtx_SET
5933 (VOIDmode, pc_rtx,
5934 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5935 if (label != NULL_RTX)
5936 emit_label (label);
5937 }
5938
5939 int
5940 ix86_expand_setcc (code, dest)
5941 enum rtx_code code;
5942 rtx dest;
5943 {
5944 rtx ret, tmp, tmpreg;
5945 rtx second_test, bypass_test;
5946 int type;
5947
5948 if (GET_MODE (ix86_compare_op0) == DImode)
5949 return 0; /* FAIL */
5950
5951 /* Three modes of generation:
5952 0 -- destination does not overlap compare sources:
5953 clear dest first, emit strict_low_part setcc.
5954 1 -- destination does overlap compare sources:
5955 emit subreg setcc, zero extend.
5956 2 -- destination is in QImode:
5957 emit setcc only.
5958 */
5959
5960 type = 0;
5961
5962 if (GET_MODE (dest) == QImode)
5963 type = 2;
5964 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5965 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5966 type = 1;
5967
5968 if (type == 0)
5969 emit_move_insn (dest, const0_rtx);
5970
5971 ret = ix86_expand_compare (code, &second_test, &bypass_test);
5972 PUT_MODE (ret, QImode);
5973
5974 tmp = dest;
5975 tmpreg = dest;
5976 if (type == 0)
5977 {
5978 tmp = gen_lowpart (QImode, dest);
5979 tmpreg = tmp;
5980 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5981 }
5982 else if (type == 1)
5983 {
5984 if (!cse_not_expected)
5985 tmp = gen_reg_rtx (QImode);
5986 else
5987 tmp = gen_lowpart (QImode, dest);
5988 tmpreg = tmp;
5989 }
5990
5991 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5992 if (bypass_test || second_test)
5993 {
5994 rtx test = second_test;
5995 int bypass = 0;
5996 rtx tmp2 = gen_reg_rtx (QImode);
5997 if (bypass_test)
5998 {
5999 if (second_test)
6000 abort();
6001 test = bypass_test;
6002 bypass = 1;
6003 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
6004 }
6005 PUT_MODE (test, QImode);
6006 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
6007
6008 if (bypass)
6009 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
6010 else
6011 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
6012 }
6013
6014 if (type == 1)
6015 {
6016 rtx clob;
6017
6018 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
6019 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
6020 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6021 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6022 emit_insn (tmp);
6023 }
6024
6025 return 1; /* DONE */
6026 }
6027
6028 int
6029 ix86_expand_int_movcc (operands)
6030 rtx operands[];
6031 {
6032 enum rtx_code code = GET_CODE (operands[1]), compare_code;
6033 rtx compare_seq, compare_op;
6034 rtx second_test, bypass_test;
6035
6036 /* When the compare code is not LTU or GEU, we can not use sbbl case.
6037 In case comparsion is done with immediate, we can convert it to LTU or
6038 GEU by altering the integer. */
6039
6040 if ((code == LEU || code == GTU)
6041 && GET_CODE (ix86_compare_op1) == CONST_INT
6042 && GET_MODE (operands[0]) != HImode
6043 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
6044 && GET_CODE (operands[2]) == CONST_INT
6045 && GET_CODE (operands[3]) == CONST_INT)
6046 {
6047 if (code == LEU)
6048 code = LTU;
6049 else
6050 code = GEU;
6051 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
6052 }
6053
6054 start_sequence ();
6055 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6056 compare_seq = gen_sequence ();
6057 end_sequence ();
6058
6059 compare_code = GET_CODE (compare_op);
6060
6061 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
6062 HImode insns, we'd be swallowed in word prefix ops. */
6063
6064 if (GET_MODE (operands[0]) != HImode
6065 && GET_CODE (operands[2]) == CONST_INT
6066 && GET_CODE (operands[3]) == CONST_INT)
6067 {
6068 rtx out = operands[0];
6069 HOST_WIDE_INT ct = INTVAL (operands[2]);
6070 HOST_WIDE_INT cf = INTVAL (operands[3]);
6071 HOST_WIDE_INT diff;
6072
6073 if ((compare_code == LTU || compare_code == GEU)
6074 && !second_test && !bypass_test)
6075 {
6076
6077 /* Detect overlap between destination and compare sources. */
6078 rtx tmp = out;
6079
6080 /* To simplify rest of code, restrict to the GEU case. */
6081 if (compare_code == LTU)
6082 {
6083 int tmp = ct;
6084 ct = cf;
6085 cf = tmp;
6086 compare_code = reverse_condition (compare_code);
6087 code = reverse_condition (code);
6088 }
6089 diff = ct - cf;
6090
6091 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
6092 || reg_overlap_mentioned_p (out, ix86_compare_op1))
6093 tmp = gen_reg_rtx (SImode);
6094
6095 emit_insn (compare_seq);
6096 emit_insn (gen_x86_movsicc_0_m1 (tmp));
6097
6098 if (diff == 1)
6099 {
6100 /*
6101 * cmpl op0,op1
6102 * sbbl dest,dest
6103 * [addl dest, ct]
6104 *
6105 * Size 5 - 8.
6106 */
6107 if (ct)
6108 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6109 }
6110 else if (cf == -1)
6111 {
6112 /*
6113 * cmpl op0,op1
6114 * sbbl dest,dest
6115 * orl $ct, dest
6116 *
6117 * Size 8.
6118 */
6119 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
6120 }
6121 else if (diff == -1 && ct)
6122 {
6123 /*
6124 * cmpl op0,op1
6125 * sbbl dest,dest
6126 * xorl $-1, dest
6127 * [addl dest, cf]
6128 *
6129 * Size 8 - 11.
6130 */
6131 emit_insn (gen_one_cmplsi2 (tmp, tmp));
6132 if (cf)
6133 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
6134 }
6135 else
6136 {
6137 /*
6138 * cmpl op0,op1
6139 * sbbl dest,dest
6140 * andl cf - ct, dest
6141 * [addl dest, ct]
6142 *
6143 * Size 8 - 11.
6144 */
6145 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
6146 if (ct)
6147 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6148 }
6149
6150 if (tmp != out)
6151 emit_move_insn (out, tmp);
6152
6153 return 1; /* DONE */
6154 }
6155
6156 diff = ct - cf;
6157 if (diff < 0)
6158 {
6159 HOST_WIDE_INT tmp;
6160 tmp = ct, ct = cf, cf = tmp;
6161 diff = -diff;
6162 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6163 {
6164 /* We may be reversing unordered compare to normal compare, that
6165 is not valid in general (we may convert non-trapping condition
6166 to trapping one), however on i386 we currently emit all
6167 comparisons unordered. */
6168 compare_code = reverse_condition_maybe_unordered (compare_code);
6169 code = reverse_condition_maybe_unordered (code);
6170 }
6171 else
6172 {
6173 compare_code = reverse_condition (compare_code);
6174 code = reverse_condition (code);
6175 }
6176 }
6177 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
6178 || diff == 3 || diff == 5 || diff == 9)
6179 {
6180 /*
6181 * xorl dest,dest
6182 * cmpl op1,op2
6183 * setcc dest
6184 * lea cf(dest*(ct-cf)),dest
6185 *
6186 * Size 14.
6187 *
6188 * This also catches the degenerate setcc-only case.
6189 */
6190
6191 rtx tmp;
6192 int nops;
6193
6194 out = emit_store_flag (out, code, ix86_compare_op0,
6195 ix86_compare_op1, VOIDmode, 0, 1);
6196
6197 nops = 0;
6198 if (diff == 1)
6199 tmp = out;
6200 else
6201 {
6202 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
6203 nops++;
6204 if (diff & 1)
6205 {
6206 tmp = gen_rtx_PLUS (SImode, tmp, out);
6207 nops++;
6208 }
6209 }
6210 if (cf != 0)
6211 {
6212 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
6213 nops++;
6214 }
6215 if (tmp != out)
6216 {
6217 if (nops == 0)
6218 emit_move_insn (out, tmp);
6219 else if (nops == 1)
6220 {
6221 rtx clob;
6222
6223 clob = gen_rtx_REG (CCmode, FLAGS_REG);
6224 clob = gen_rtx_CLOBBER (VOIDmode, clob);
6225
6226 tmp = gen_rtx_SET (VOIDmode, out, tmp);
6227 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6228 emit_insn (tmp);
6229 }
6230 else
6231 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
6232 }
6233 if (out != operands[0])
6234 emit_move_insn (operands[0], out);
6235
6236 return 1; /* DONE */
6237 }
6238
6239 /*
6240 * General case: Jumpful:
6241 * xorl dest,dest cmpl op1, op2
6242 * cmpl op1, op2 movl ct, dest
6243 * setcc dest jcc 1f
6244 * decl dest movl cf, dest
6245 * andl (cf-ct),dest 1:
6246 * addl ct,dest
6247 *
6248 * Size 20. Size 14.
6249 *
6250 * This is reasonably steep, but branch mispredict costs are
6251 * high on modern cpus, so consider failing only if optimizing
6252 * for space.
6253 *
6254 * %%% Parameterize branch_cost on the tuning architecture, then
6255 * use that. The 80386 couldn't care less about mispredicts.
6256 */
6257
6258 if (!optimize_size && !TARGET_CMOVE)
6259 {
6260 if (ct == 0)
6261 {
6262 ct = cf;
6263 cf = 0;
6264 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6265 {
6266 /* We may be reversing unordered compare to normal compare,
6267 that is not valid in general (we may convert non-trapping
6268 condition to trapping one), however on i386 we currently
6269 emit all comparisons unordered. */
6270 compare_code = reverse_condition_maybe_unordered (compare_code);
6271 code = reverse_condition_maybe_unordered (code);
6272 }
6273 else
6274 {
6275 compare_code = reverse_condition (compare_code);
6276 code = reverse_condition (code);
6277 }
6278 }
6279
6280 out = emit_store_flag (out, code, ix86_compare_op0,
6281 ix86_compare_op1, VOIDmode, 0, 1);
6282
6283 emit_insn (gen_addsi3 (out, out, constm1_rtx));
6284 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
6285 if (ct != 0)
6286 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6287 if (out != operands[0])
6288 emit_move_insn (operands[0], out);
6289
6290 return 1; /* DONE */
6291 }
6292 }
6293
6294 if (!TARGET_CMOVE)
6295 {
6296 /* Try a few things more with specific constants and a variable. */
6297
6298 optab op;
6299 rtx var, orig_out, out, tmp;
6300
6301 if (optimize_size)
6302 return 0; /* FAIL */
6303
6304 /* If one of the two operands is an interesting constant, load a
6305 constant with the above and mask it in with a logical operation. */
6306
6307 if (GET_CODE (operands[2]) == CONST_INT)
6308 {
6309 var = operands[3];
6310 if (INTVAL (operands[2]) == 0)
6311 operands[3] = constm1_rtx, op = and_optab;
6312 else if (INTVAL (operands[2]) == -1)
6313 operands[3] = const0_rtx, op = ior_optab;
6314 else
6315 return 0; /* FAIL */
6316 }
6317 else if (GET_CODE (operands[3]) == CONST_INT)
6318 {
6319 var = operands[2];
6320 if (INTVAL (operands[3]) == 0)
6321 operands[2] = constm1_rtx, op = and_optab;
6322 else if (INTVAL (operands[3]) == -1)
6323 operands[2] = const0_rtx, op = ior_optab;
6324 else
6325 return 0; /* FAIL */
6326 }
6327 else
6328 return 0; /* FAIL */
6329
6330 orig_out = operands[0];
6331 tmp = gen_reg_rtx (GET_MODE (orig_out));
6332 operands[0] = tmp;
6333
6334 /* Recurse to get the constant loaded. */
6335 if (ix86_expand_int_movcc (operands) == 0)
6336 return 0; /* FAIL */
6337
6338 /* Mask in the interesting variable. */
6339 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6340 OPTAB_WIDEN);
6341 if (out != orig_out)
6342 emit_move_insn (orig_out, out);
6343
6344 return 1; /* DONE */
6345 }
6346
6347 /*
6348 * For comparison with above,
6349 *
6350 * movl cf,dest
6351 * movl ct,tmp
6352 * cmpl op1,op2
6353 * cmovcc tmp,dest
6354 *
6355 * Size 15.
6356 */
6357
6358 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6359 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6360 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6361 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6362
6363 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6364 {
6365 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6366 emit_move_insn (tmp, operands[3]);
6367 operands[3] = tmp;
6368 }
6369 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6370 {
6371 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6372 emit_move_insn (tmp, operands[2]);
6373 operands[2] = tmp;
6374 }
6375
6376 emit_insn (compare_seq);
6377 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6378 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6379 compare_op, operands[2],
6380 operands[3])));
6381 if (bypass_test)
6382 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6383 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6384 bypass_test,
6385 operands[3],
6386 operands[0])));
6387 if (second_test)
6388 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6389 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6390 second_test,
6391 operands[2],
6392 operands[0])));
6393
6394 return 1; /* DONE */
6395 }
6396
6397 int
6398 ix86_expand_fp_movcc (operands)
6399 rtx operands[];
6400 {
6401 enum rtx_code code;
6402 rtx tmp;
6403 rtx compare_op, second_test, bypass_test;
6404
6405 /* For SF/DFmode conditional moves based on comparisons
6406 in same mode, we may want to use SSE min/max instructions. */
6407 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6408 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6409 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
6410 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
6411 && (!TARGET_IEEE_FP
6412 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
6413 /* We may be called from the post-reload splitter. */
6414 && (!REG_P (operands[0])
6415 || SSE_REG_P (operands[0])
6416 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
6417 {
6418 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6419 code = GET_CODE (operands[1]);
6420
6421 /* See if we have (cross) match between comparison operands and
6422 conditional move operands. */
6423 if (rtx_equal_p (operands[2], op1))
6424 {
6425 rtx tmp = op0;
6426 op0 = op1;
6427 op1 = tmp;
6428 code = reverse_condition_maybe_unordered (code);
6429 }
6430 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6431 {
6432 /* Check for min operation. */
6433 if (code == LT)
6434 {
6435 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6436 if (memory_operand (op0, VOIDmode))
6437 op0 = force_reg (GET_MODE (operands[0]), op0);
6438 if (GET_MODE (operands[0]) == SFmode)
6439 emit_insn (gen_minsf3 (operands[0], op0, op1));
6440 else
6441 emit_insn (gen_mindf3 (operands[0], op0, op1));
6442 return 1;
6443 }
6444 /* Check for max operation. */
6445 if (code == GT)
6446 {
6447 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6448 if (memory_operand (op0, VOIDmode))
6449 op0 = force_reg (GET_MODE (operands[0]), op0);
6450 if (GET_MODE (operands[0]) == SFmode)
6451 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6452 else
6453 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6454 return 1;
6455 }
6456 }
6457 /* Manage condition to be sse_comparison_operator. In case we are
6458 in non-ieee mode, try to canonicalize the destination operand
6459 to be first in the comparison - this helps reload to avoid extra
6460 moves. */
6461 if (!sse_comparison_operator (operands[1], VOIDmode)
6462 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6463 {
6464 rtx tmp = ix86_compare_op0;
6465 ix86_compare_op0 = ix86_compare_op1;
6466 ix86_compare_op1 = tmp;
6467 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6468 VOIDmode, ix86_compare_op0,
6469 ix86_compare_op1);
6470 }
6471 /* Similary try to manage result to be first operand of conditional
6472 move. We also don't support the NE comparison on SSE, so try to
6473 avoid it. */
6474 if (rtx_equal_p (operands[0], operands[3])
6475 || GET_CODE (operands[1]) == NE)
6476 {
6477 rtx tmp = operands[2];
6478 operands[2] = operands[3];
6479 operands[2] = tmp;
6480 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6481 (GET_CODE (operands[1])),
6482 VOIDmode, ix86_compare_op0,
6483 ix86_compare_op1);
6484 }
6485 if (GET_MODE (operands[0]) == SFmode)
6486 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6487 operands[2], operands[3],
6488 ix86_compare_op0, ix86_compare_op1));
6489 else
6490 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6491 operands[2], operands[3],
6492 ix86_compare_op0, ix86_compare_op1));
6493 return 1;
6494 }
6495
6496 /* The floating point conditional move instructions don't directly
6497 support conditions resulting from a signed integer comparison. */
6498
6499 code = GET_CODE (operands[1]);
6500 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6501
6502 /* The floating point conditional move instructions don't directly
6503 support signed integer comparisons. */
6504
6505 if (!fcmov_comparison_operator (compare_op, VOIDmode))
6506 {
6507 if (second_test != NULL || bypass_test != NULL)
6508 abort();
6509 tmp = gen_reg_rtx (QImode);
6510 ix86_expand_setcc (code, tmp);
6511 code = NE;
6512 ix86_compare_op0 = tmp;
6513 ix86_compare_op1 = const0_rtx;
6514 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6515 }
6516 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6517 {
6518 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6519 emit_move_insn (tmp, operands[3]);
6520 operands[3] = tmp;
6521 }
6522 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6523 {
6524 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6525 emit_move_insn (tmp, operands[2]);
6526 operands[2] = tmp;
6527 }
6528
6529 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6530 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6531 compare_op,
6532 operands[2],
6533 operands[3])));
6534 if (bypass_test)
6535 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6536 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6537 bypass_test,
6538 operands[3],
6539 operands[0])));
6540 if (second_test)
6541 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6542 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6543 second_test,
6544 operands[2],
6545 operands[0])));
6546
6547 return 1;
6548 }
6549
6550 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6551 works for floating pointer parameters and nonoffsetable memories.
6552 For pushes, it returns just stack offsets; the values will be saved
6553 in the right order. Maximally three parts are generated. */
6554
6555 static int
6556 ix86_split_to_parts (operand, parts, mode)
6557 rtx operand;
6558 rtx *parts;
6559 enum machine_mode mode;
6560 {
6561 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
6562
6563 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6564 abort ();
6565 if (size < 2 || size > 3)
6566 abort ();
6567
6568 /* Optimize constant pool reference to immediates. This is used by fp moves,
6569 that force all constants to memory to allow combining. */
6570
6571 if (GET_CODE (operand) == MEM
6572 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6573 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6574 operand = get_pool_constant (XEXP (operand, 0));
6575
6576 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
6577 {
6578 /* The only non-offsetable memories we handle are pushes. */
6579 if (! push_operand (operand, VOIDmode))
6580 abort ();
6581
6582 PUT_MODE (operand, SImode);
6583 parts[0] = parts[1] = parts[2] = operand;
6584 }
6585 else
6586 {
6587 if (mode == DImode)
6588 split_di (&operand, 1, &parts[0], &parts[1]);
6589 else
6590 {
6591 if (REG_P (operand))
6592 {
6593 if (!reload_completed)
6594 abort ();
6595 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6596 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6597 if (size == 3)
6598 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6599 }
6600 else if (offsettable_memref_p (operand))
6601 {
6602 PUT_MODE (operand, SImode);
6603 parts[0] = operand;
6604 parts[1] = adj_offsettable_operand (operand, 4);
6605 if (size == 3)
6606 parts[2] = adj_offsettable_operand (operand, 8);
6607 }
6608 else if (GET_CODE (operand) == CONST_DOUBLE)
6609 {
6610 REAL_VALUE_TYPE r;
6611 long l[4];
6612
6613 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6614 switch (mode)
6615 {
6616 case XFmode:
6617 case TFmode:
6618 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6619 parts[2] = GEN_INT (l[2]);
6620 break;
6621 case DFmode:
6622 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6623 break;
6624 default:
6625 abort ();
6626 }
6627 parts[1] = GEN_INT (l[1]);
6628 parts[0] = GEN_INT (l[0]);
6629 }
6630 else
6631 abort ();
6632 }
6633 }
6634
6635 return size;
6636 }
6637
6638 /* Emit insns to perform a move or push of DI, DF, and XF values.
6639 Return false when normal moves are needed; true when all required
6640 insns have been emitted. Operands 2-4 contain the input values
6641 int the correct order; operands 5-7 contain the output values. */
6642
6643 int
6644 ix86_split_long_move (operands1)
6645 rtx operands1[];
6646 {
6647 rtx part[2][3];
6648 rtx operands[2];
6649 int size;
6650 int push = 0;
6651 int collisions = 0;
6652
6653 /* Make our own copy to avoid clobbering the operands. */
6654 operands[0] = copy_rtx (operands1[0]);
6655 operands[1] = copy_rtx (operands1[1]);
6656
6657 /* The only non-offsettable memory we handle is push. */
6658 if (push_operand (operands[0], VOIDmode))
6659 push = 1;
6660 else if (GET_CODE (operands[0]) == MEM
6661 && ! offsettable_memref_p (operands[0]))
6662 abort ();
6663
6664 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
6665 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6666
6667 /* When emitting push, take care for source operands on the stack. */
6668 if (push && GET_CODE (operands[1]) == MEM
6669 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6670 {
6671 if (size == 3)
6672 part[1][1] = part[1][2];
6673 part[1][0] = part[1][1];
6674 }
6675
6676 /* We need to do copy in the right order in case an address register
6677 of the source overlaps the destination. */
6678 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6679 {
6680 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6681 collisions++;
6682 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6683 collisions++;
6684 if (size == 3
6685 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6686 collisions++;
6687
6688 /* Collision in the middle part can be handled by reordering. */
6689 if (collisions == 1 && size == 3
6690 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6691 {
6692 rtx tmp;
6693 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6694 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6695 }
6696
6697 /* If there are more collisions, we can't handle it by reordering.
6698 Do an lea to the last part and use only one colliding move. */
6699 else if (collisions > 1)
6700 {
6701 collisions = 1;
6702 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6703 XEXP (part[1][0], 0)));
6704 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6705 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6706 if (size == 3)
6707 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6708 }
6709 }
6710
6711 if (push)
6712 {
6713 if (size == 3)
6714 {
6715 /* We use only first 12 bytes of TFmode value, but for pushing we
6716 are required to adjust stack as if we were pushing real 16byte
6717 value. */
6718 if (GET_MODE (operands1[0]) == TFmode)
6719 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6720 GEN_INT (-4)));
6721 emit_insn (gen_push (part[1][2]));
6722 }
6723 emit_insn (gen_push (part[1][1]));
6724 emit_insn (gen_push (part[1][0]));
6725 return 1;
6726 }
6727
6728 /* Choose correct order to not overwrite the source before it is copied. */
6729 if ((REG_P (part[0][0])
6730 && REG_P (part[1][1])
6731 && (REGNO (part[0][0]) == REGNO (part[1][1])
6732 || (size == 3
6733 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6734 || (collisions > 0
6735 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6736 {
6737 if (size == 3)
6738 {
6739 operands1[2] = part[0][2];
6740 operands1[3] = part[0][1];
6741 operands1[4] = part[0][0];
6742 operands1[5] = part[1][2];
6743 operands1[6] = part[1][1];
6744 operands1[7] = part[1][0];
6745 }
6746 else
6747 {
6748 operands1[2] = part[0][1];
6749 operands1[3] = part[0][0];
6750 operands1[5] = part[1][1];
6751 operands1[6] = part[1][0];
6752 }
6753 }
6754 else
6755 {
6756 if (size == 3)
6757 {
6758 operands1[2] = part[0][0];
6759 operands1[3] = part[0][1];
6760 operands1[4] = part[0][2];
6761 operands1[5] = part[1][0];
6762 operands1[6] = part[1][1];
6763 operands1[7] = part[1][2];
6764 }
6765 else
6766 {
6767 operands1[2] = part[0][0];
6768 operands1[3] = part[0][1];
6769 operands1[5] = part[1][0];
6770 operands1[6] = part[1][1];
6771 }
6772 }
6773
6774 return 0;
6775 }
6776
6777 void
6778 ix86_split_ashldi (operands, scratch)
6779 rtx *operands, scratch;
6780 {
6781 rtx low[2], high[2];
6782 int count;
6783
6784 if (GET_CODE (operands[2]) == CONST_INT)
6785 {
6786 split_di (operands, 2, low, high);
6787 count = INTVAL (operands[2]) & 63;
6788
6789 if (count >= 32)
6790 {
6791 emit_move_insn (high[0], low[1]);
6792 emit_move_insn (low[0], const0_rtx);
6793
6794 if (count > 32)
6795 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6796 }
6797 else
6798 {
6799 if (!rtx_equal_p (operands[0], operands[1]))
6800 emit_move_insn (operands[0], operands[1]);
6801 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6802 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6803 }
6804 }
6805 else
6806 {
6807 if (!rtx_equal_p (operands[0], operands[1]))
6808 emit_move_insn (operands[0], operands[1]);
6809
6810 split_di (operands, 1, low, high);
6811
6812 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6813 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
6814
6815 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6816 {
6817 if (! no_new_pseudos)
6818 scratch = force_reg (SImode, const0_rtx);
6819 else
6820 emit_move_insn (scratch, const0_rtx);
6821
6822 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6823 scratch));
6824 }
6825 else
6826 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6827 }
6828 }
6829
6830 void
6831 ix86_split_ashrdi (operands, scratch)
6832 rtx *operands, scratch;
6833 {
6834 rtx low[2], high[2];
6835 int count;
6836
6837 if (GET_CODE (operands[2]) == CONST_INT)
6838 {
6839 split_di (operands, 2, low, high);
6840 count = INTVAL (operands[2]) & 63;
6841
6842 if (count >= 32)
6843 {
6844 emit_move_insn (low[0], high[1]);
6845
6846 if (! reload_completed)
6847 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6848 else
6849 {
6850 emit_move_insn (high[0], low[0]);
6851 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6852 }
6853
6854 if (count > 32)
6855 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6856 }
6857 else
6858 {
6859 if (!rtx_equal_p (operands[0], operands[1]))
6860 emit_move_insn (operands[0], operands[1]);
6861 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6862 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6863 }
6864 }
6865 else
6866 {
6867 if (!rtx_equal_p (operands[0], operands[1]))
6868 emit_move_insn (operands[0], operands[1]);
6869
6870 split_di (operands, 1, low, high);
6871
6872 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6873 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6874
6875 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6876 {
6877 if (! no_new_pseudos)
6878 scratch = gen_reg_rtx (SImode);
6879 emit_move_insn (scratch, high[0]);
6880 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6881 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6882 scratch));
6883 }
6884 else
6885 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
6886 }
6887 }
6888
6889 void
6890 ix86_split_lshrdi (operands, scratch)
6891 rtx *operands, scratch;
6892 {
6893 rtx low[2], high[2];
6894 int count;
6895
6896 if (GET_CODE (operands[2]) == CONST_INT)
6897 {
6898 split_di (operands, 2, low, high);
6899 count = INTVAL (operands[2]) & 63;
6900
6901 if (count >= 32)
6902 {
6903 emit_move_insn (low[0], high[1]);
6904 emit_move_insn (high[0], const0_rtx);
6905
6906 if (count > 32)
6907 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6908 }
6909 else
6910 {
6911 if (!rtx_equal_p (operands[0], operands[1]))
6912 emit_move_insn (operands[0], operands[1]);
6913 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6914 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6915 }
6916 }
6917 else
6918 {
6919 if (!rtx_equal_p (operands[0], operands[1]))
6920 emit_move_insn (operands[0], operands[1]);
6921
6922 split_di (operands, 1, low, high);
6923
6924 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6925 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6926
6927 /* Heh. By reversing the arguments, we can reuse this pattern. */
6928 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6929 {
6930 if (! no_new_pseudos)
6931 scratch = force_reg (SImode, const0_rtx);
6932 else
6933 emit_move_insn (scratch, const0_rtx);
6934
6935 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6936 scratch));
6937 }
6938 else
6939 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6940 }
6941 }
6942
6943 /* Expand the appropriate insns for doing strlen if not just doing
6944 repnz; scasb
6945
6946 out = result, initialized with the start address
6947 align_rtx = alignment of the address.
6948 scratch = scratch register, initialized with the startaddress when
6949 not aligned, otherwise undefined
6950
6951 This is just the body. It needs the initialisations mentioned above and
6952 some address computing at the end. These things are done in i386.md. */
6953
6954 void
6955 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6956 rtx out, align_rtx, scratch;
6957 {
6958 int align;
6959 rtx tmp;
6960 rtx align_2_label = NULL_RTX;
6961 rtx align_3_label = NULL_RTX;
6962 rtx align_4_label = gen_label_rtx ();
6963 rtx end_0_label = gen_label_rtx ();
6964 rtx mem;
6965 rtx tmpreg = gen_reg_rtx (SImode);
6966
6967 align = 0;
6968 if (GET_CODE (align_rtx) == CONST_INT)
6969 align = INTVAL (align_rtx);
6970
6971 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6972
6973 /* Is there a known alignment and is it less than 4? */
6974 if (align < 4)
6975 {
6976 /* Is there a known alignment and is it not 2? */
6977 if (align != 2)
6978 {
6979 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6980 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6981
6982 /* Leave just the 3 lower bits. */
6983 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6984 NULL_RTX, 0, OPTAB_WIDEN);
6985
6986 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6987 SImode, 1, 0, align_4_label);
6988 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6989 SImode, 1, 0, align_2_label);
6990 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6991 SImode, 1, 0, align_3_label);
6992 }
6993 else
6994 {
6995 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6996 check if is aligned to 4 - byte. */
6997
6998 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6999 NULL_RTX, 0, OPTAB_WIDEN);
7000
7001 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
7002 SImode, 1, 0, align_4_label);
7003 }
7004
7005 mem = gen_rtx_MEM (QImode, out);
7006
7007 /* Now compare the bytes. */
7008
7009 /* Compare the first n unaligned byte on a byte per byte basis. */
7010 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7011 QImode, 1, 0, end_0_label);
7012
7013 /* Increment the address. */
7014 emit_insn (gen_addsi3 (out, out, const1_rtx));
7015
7016 /* Not needed with an alignment of 2 */
7017 if (align != 2)
7018 {
7019 emit_label (align_2_label);
7020
7021 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7022 QImode, 1, 0, end_0_label);
7023
7024 emit_insn (gen_addsi3 (out, out, const1_rtx));
7025
7026 emit_label (align_3_label);
7027 }
7028
7029 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7030 QImode, 1, 0, end_0_label);
7031
7032 emit_insn (gen_addsi3 (out, out, const1_rtx));
7033 }
7034
7035 /* Generate loop to check 4 bytes at a time. It is not a good idea to
7036 align this loop. It gives only huge programs, but does not help to
7037 speed up. */
7038 emit_label (align_4_label);
7039
7040 mem = gen_rtx_MEM (SImode, out);
7041 emit_move_insn (scratch, mem);
7042 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
7043
7044 /* This formula yields a nonzero result iff one of the bytes is zero.
7045 This saves three branches inside loop and many cycles. */
7046
7047 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
7048 emit_insn (gen_one_cmplsi2 (scratch, scratch));
7049 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7050 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
7051 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
7052 SImode, 1, 0, align_4_label);
7053
7054 if (TARGET_CMOVE)
7055 {
7056 rtx reg = gen_reg_rtx (SImode);
7057 emit_move_insn (reg, tmpreg);
7058 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
7059
7060 /* If zero is not in the first two bytes, move two bytes forward. */
7061 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
7062 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7063 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
7064 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
7065 gen_rtx_IF_THEN_ELSE (SImode, tmp,
7066 reg,
7067 tmpreg)));
7068 /* Emit lea manually to avoid clobbering of flags. */
7069 emit_insn (gen_rtx_SET (SImode, reg,
7070 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
7071
7072 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7073 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
7074 emit_insn (gen_rtx_SET (VOIDmode, out,
7075 gen_rtx_IF_THEN_ELSE (SImode, tmp,
7076 reg,
7077 out)));
7078
7079 }
7080 else
7081 {
7082 rtx end_2_label = gen_label_rtx ();
7083 /* Is zero in the first two bytes? */
7084
7085 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
7086 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7087 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
7088 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7089 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
7090 pc_rtx);
7091 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7092 JUMP_LABEL (tmp) = end_2_label;
7093
7094 /* Not in the first two. Move two bytes forward. */
7095 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
7096 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
7097
7098 emit_label (end_2_label);
7099
7100 }
7101
7102 /* Avoid branch in fixing the byte. */
7103 tmpreg = gen_lowpart (QImode, tmpreg);
7104 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
7105 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
7106
7107 emit_label (end_0_label);
7108 }
7109 \f
7110 /* Clear stack slot assignments remembered from previous functions.
7111 This is called from INIT_EXPANDERS once before RTL is emitted for each
7112 function. */
7113
7114 static void
7115 ix86_init_machine_status (p)
7116 struct function *p;
7117 {
7118 p->machine = (struct machine_function *)
7119 xcalloc (1, sizeof (struct machine_function));
7120 }
7121
7122 /* Mark machine specific bits of P for GC. */
7123 static void
7124 ix86_mark_machine_status (p)
7125 struct function *p;
7126 {
7127 struct machine_function *machine = p->machine;
7128 enum machine_mode mode;
7129 int n;
7130
7131 if (! machine)
7132 return;
7133
7134 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
7135 mode = (enum machine_mode) ((int) mode + 1))
7136 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
7137 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
7138 }
7139
7140 static void
7141 ix86_free_machine_status (p)
7142 struct function *p;
7143 {
7144 free (p->machine);
7145 p->machine = NULL;
7146 }
7147
7148 /* Return a MEM corresponding to a stack slot with mode MODE.
7149 Allocate a new slot if necessary.
7150
7151 The RTL for a function can have several slots available: N is
7152 which slot to use. */
7153
7154 rtx
7155 assign_386_stack_local (mode, n)
7156 enum machine_mode mode;
7157 int n;
7158 {
7159 if (n < 0 || n >= MAX_386_STACK_LOCALS)
7160 abort ();
7161
7162 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
7163 ix86_stack_locals[(int) mode][n]
7164 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
7165
7166 return ix86_stack_locals[(int) mode][n];
7167 }
7168 \f
7169 /* Calculate the length of the memory address in the instruction
7170 encoding. Does not include the one-byte modrm, opcode, or prefix. */
7171
7172 static int
7173 memory_address_length (addr)
7174 rtx addr;
7175 {
7176 struct ix86_address parts;
7177 rtx base, index, disp;
7178 int len;
7179
7180 if (GET_CODE (addr) == PRE_DEC
7181 || GET_CODE (addr) == POST_INC)
7182 return 0;
7183
7184 if (! ix86_decompose_address (addr, &parts))
7185 abort ();
7186
7187 base = parts.base;
7188 index = parts.index;
7189 disp = parts.disp;
7190 len = 0;
7191
7192 /* Register Indirect. */
7193 if (base && !index && !disp)
7194 {
7195 /* Special cases: ebp and esp need the two-byte modrm form. */
7196 if (addr == stack_pointer_rtx
7197 || addr == arg_pointer_rtx
7198 || addr == frame_pointer_rtx
7199 || addr == hard_frame_pointer_rtx)
7200 len = 1;
7201 }
7202
7203 /* Direct Addressing. */
7204 else if (disp && !base && !index)
7205 len = 4;
7206
7207 else
7208 {
7209 /* Find the length of the displacement constant. */
7210 if (disp)
7211 {
7212 if (GET_CODE (disp) == CONST_INT
7213 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
7214 len = 1;
7215 else
7216 len = 4;
7217 }
7218
7219 /* An index requires the two-byte modrm form. */
7220 if (index)
7221 len += 1;
7222 }
7223
7224 return len;
7225 }
7226
7227 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
7228 expect that insn have 8bit immediate alternative. */
7229 int
7230 ix86_attr_length_immediate_default (insn, shortform)
7231 rtx insn;
7232 int shortform;
7233 {
7234 int len = 0;
7235 int i;
7236 extract_insn_cached (insn);
7237 for (i = recog_data.n_operands - 1; i >= 0; --i)
7238 if (CONSTANT_P (recog_data.operand[i]))
7239 {
7240 if (len)
7241 abort ();
7242 if (shortform
7243 && GET_CODE (recog_data.operand[i]) == CONST_INT
7244 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
7245 len = 1;
7246 else
7247 {
7248 switch (get_attr_mode (insn))
7249 {
7250 case MODE_QI:
7251 len+=1;
7252 break;
7253 case MODE_HI:
7254 len+=2;
7255 break;
7256 case MODE_SI:
7257 len+=4;
7258 break;
7259 default:
7260 fatal_insn ("Unknown insn mode", insn);
7261 }
7262 }
7263 }
7264 return len;
7265 }
7266 /* Compute default value for "length_address" attribute. */
7267 int
7268 ix86_attr_length_address_default (insn)
7269 rtx insn;
7270 {
7271 int i;
7272 extract_insn_cached (insn);
7273 for (i = recog_data.n_operands - 1; i >= 0; --i)
7274 if (GET_CODE (recog_data.operand[i]) == MEM)
7275 {
7276 return memory_address_length (XEXP (recog_data.operand[i], 0));
7277 break;
7278 }
7279 return 0;
7280 }
7281 \f
7282 /* Return the maximum number of instructions a cpu can issue. */
7283
7284 int
7285 ix86_issue_rate ()
7286 {
7287 switch (ix86_cpu)
7288 {
7289 case PROCESSOR_PENTIUM:
7290 case PROCESSOR_K6:
7291 return 2;
7292
7293 case PROCESSOR_PENTIUMPRO:
7294 case PROCESSOR_PENTIUM4:
7295 case PROCESSOR_ATHLON:
7296 return 3;
7297
7298 default:
7299 return 1;
7300 }
7301 }
7302
7303 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
7304 by DEP_INSN and nothing set by DEP_INSN. */
7305
7306 static int
7307 ix86_flags_dependant (insn, dep_insn, insn_type)
7308 rtx insn, dep_insn;
7309 enum attr_type insn_type;
7310 {
7311 rtx set, set2;
7312
7313 /* Simplify the test for uninteresting insns. */
7314 if (insn_type != TYPE_SETCC
7315 && insn_type != TYPE_ICMOV
7316 && insn_type != TYPE_FCMOV
7317 && insn_type != TYPE_IBR)
7318 return 0;
7319
7320 if ((set = single_set (dep_insn)) != 0)
7321 {
7322 set = SET_DEST (set);
7323 set2 = NULL_RTX;
7324 }
7325 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
7326 && XVECLEN (PATTERN (dep_insn), 0) == 2
7327 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
7328 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
7329 {
7330 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
7331 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
7332 }
7333 else
7334 return 0;
7335
7336 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
7337 return 0;
7338
7339 /* This test is true if the dependant insn reads the flags but
7340 not any other potentially set register. */
7341 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
7342 return 0;
7343
7344 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
7345 return 0;
7346
7347 return 1;
7348 }
7349
7350 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
7351 address with operands set by DEP_INSN. */
7352
7353 static int
7354 ix86_agi_dependant (insn, dep_insn, insn_type)
7355 rtx insn, dep_insn;
7356 enum attr_type insn_type;
7357 {
7358 rtx addr;
7359
7360 if (insn_type == TYPE_LEA)
7361 {
7362 addr = PATTERN (insn);
7363 if (GET_CODE (addr) == SET)
7364 ;
7365 else if (GET_CODE (addr) == PARALLEL
7366 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
7367 addr = XVECEXP (addr, 0, 0);
7368 else
7369 abort ();
7370 addr = SET_SRC (addr);
7371 }
7372 else
7373 {
7374 int i;
7375 extract_insn_cached (insn);
7376 for (i = recog_data.n_operands - 1; i >= 0; --i)
7377 if (GET_CODE (recog_data.operand[i]) == MEM)
7378 {
7379 addr = XEXP (recog_data.operand[i], 0);
7380 goto found;
7381 }
7382 return 0;
7383 found:;
7384 }
7385
7386 return modified_in_p (addr, dep_insn);
7387 }
7388
7389 int
7390 ix86_adjust_cost (insn, link, dep_insn, cost)
7391 rtx insn, link, dep_insn;
7392 int cost;
7393 {
7394 enum attr_type insn_type, dep_insn_type;
7395 enum attr_memory memory;
7396 rtx set, set2;
7397 int dep_insn_code_number;
7398
7399 /* Anti and output depenancies have zero cost on all CPUs. */
7400 if (REG_NOTE_KIND (link) != 0)
7401 return 0;
7402
7403 dep_insn_code_number = recog_memoized (dep_insn);
7404
7405 /* If we can't recognize the insns, we can't really do anything. */
7406 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
7407 return cost;
7408
7409 insn_type = get_attr_type (insn);
7410 dep_insn_type = get_attr_type (dep_insn);
7411
7412 /* Prologue and epilogue allocators can have a false dependency on ebp.
7413 This results in one cycle extra stall on Pentium prologue scheduling,
7414 so handle this important case manually. */
7415 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
7416 && dep_insn_type == TYPE_ALU
7417 && !reg_mentioned_p (stack_pointer_rtx, insn))
7418 return 0;
7419
7420 switch (ix86_cpu)
7421 {
7422 case PROCESSOR_PENTIUM:
7423 /* Address Generation Interlock adds a cycle of latency. */
7424 if (ix86_agi_dependant (insn, dep_insn, insn_type))
7425 cost += 1;
7426
7427 /* ??? Compares pair with jump/setcc. */
7428 if (ix86_flags_dependant (insn, dep_insn, insn_type))
7429 cost = 0;
7430
7431 /* Floating point stores require value to be ready one cycle ealier. */
7432 if (insn_type == TYPE_FMOV
7433 && get_attr_memory (insn) == MEMORY_STORE
7434 && !ix86_agi_dependant (insn, dep_insn, insn_type))
7435 cost += 1;
7436 break;
7437
7438 case PROCESSOR_PENTIUMPRO:
7439 /* Since we can't represent delayed latencies of load+operation,
7440 increase the cost here for non-imov insns. */
7441 if (dep_insn_type != TYPE_IMOV
7442 && dep_insn_type != TYPE_FMOV
7443 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7444 || memory == MEMORY_BOTH))
7445 cost += 1;
7446
7447 /* INT->FP conversion is expensive. */
7448 if (get_attr_fp_int_src (dep_insn))
7449 cost += 5;
7450
7451 /* There is one cycle extra latency between an FP op and a store. */
7452 if (insn_type == TYPE_FMOV
7453 && (set = single_set (dep_insn)) != NULL_RTX
7454 && (set2 = single_set (insn)) != NULL_RTX
7455 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
7456 && GET_CODE (SET_DEST (set2)) == MEM)
7457 cost += 1;
7458 break;
7459
7460 case PROCESSOR_K6:
7461 /* The esp dependency is resolved before the instruction is really
7462 finished. */
7463 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
7464 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
7465 return 1;
7466
7467 /* Since we can't represent delayed latencies of load+operation,
7468 increase the cost here for non-imov insns. */
7469 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7470 || memory == MEMORY_BOTH)
7471 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
7472
7473 /* INT->FP conversion is expensive. */
7474 if (get_attr_fp_int_src (dep_insn))
7475 cost += 5;
7476 break;
7477
7478 case PROCESSOR_ATHLON:
7479 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
7480 || memory == MEMORY_BOTH)
7481 {
7482 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
7483 cost += 2;
7484 else
7485 cost += 3;
7486 }
7487
7488 default:
7489 break;
7490 }
7491
7492 return cost;
7493 }
7494
7495 static union
7496 {
7497 struct ppro_sched_data
7498 {
7499 rtx decode[3];
7500 int issued_this_cycle;
7501 } ppro;
7502 } ix86_sched_data;
7503
7504 static int
7505 ix86_safe_length (insn)
7506 rtx insn;
7507 {
7508 if (recog_memoized (insn) >= 0)
7509 return get_attr_length(insn);
7510 else
7511 return 128;
7512 }
7513
7514 static int
7515 ix86_safe_length_prefix (insn)
7516 rtx insn;
7517 {
7518 if (recog_memoized (insn) >= 0)
7519 return get_attr_length(insn);
7520 else
7521 return 0;
7522 }
7523
7524 static enum attr_memory
7525 ix86_safe_memory (insn)
7526 rtx insn;
7527 {
7528 if (recog_memoized (insn) >= 0)
7529 return get_attr_memory(insn);
7530 else
7531 return MEMORY_UNKNOWN;
7532 }
7533
7534 static enum attr_pent_pair
7535 ix86_safe_pent_pair (insn)
7536 rtx insn;
7537 {
7538 if (recog_memoized (insn) >= 0)
7539 return get_attr_pent_pair(insn);
7540 else
7541 return PENT_PAIR_NP;
7542 }
7543
7544 static enum attr_ppro_uops
7545 ix86_safe_ppro_uops (insn)
7546 rtx insn;
7547 {
7548 if (recog_memoized (insn) >= 0)
7549 return get_attr_ppro_uops (insn);
7550 else
7551 return PPRO_UOPS_MANY;
7552 }
7553
7554 static void
7555 ix86_dump_ppro_packet (dump)
7556 FILE *dump;
7557 {
7558 if (ix86_sched_data.ppro.decode[0])
7559 {
7560 fprintf (dump, "PPRO packet: %d",
7561 INSN_UID (ix86_sched_data.ppro.decode[0]));
7562 if (ix86_sched_data.ppro.decode[1])
7563 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
7564 if (ix86_sched_data.ppro.decode[2])
7565 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
7566 fputc ('\n', dump);
7567 }
7568 }
7569
7570 /* We're beginning a new block. Initialize data structures as necessary. */
7571
7572 void
7573 ix86_sched_init (dump, sched_verbose)
7574 FILE *dump ATTRIBUTE_UNUSED;
7575 int sched_verbose ATTRIBUTE_UNUSED;
7576 {
7577 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
7578 }
7579
7580 /* Shift INSN to SLOT, and shift everything else down. */
7581
7582 static void
7583 ix86_reorder_insn (insnp, slot)
7584 rtx *insnp, *slot;
7585 {
7586 if (insnp != slot)
7587 {
7588 rtx insn = *insnp;
7589 do
7590 insnp[0] = insnp[1];
7591 while (++insnp != slot);
7592 *insnp = insn;
7593 }
7594 }
7595
7596 /* Find an instruction with given pairability and minimal amount of cycles
7597 lost by the fact that the CPU waits for both pipelines to finish before
7598 reading next instructions. Also take care that both instructions together
7599 can not exceed 7 bytes. */
7600
7601 static rtx *
7602 ix86_pent_find_pair (e_ready, ready, type, first)
7603 rtx *e_ready;
7604 rtx *ready;
7605 enum attr_pent_pair type;
7606 rtx first;
7607 {
7608 int mincycles, cycles;
7609 enum attr_pent_pair tmp;
7610 enum attr_memory memory;
7611 rtx *insnp, *bestinsnp = NULL;
7612
7613 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
7614 return NULL;
7615
7616 memory = ix86_safe_memory (first);
7617 cycles = result_ready_cost (first);
7618 mincycles = INT_MAX;
7619
7620 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
7621 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
7622 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
7623 {
7624 enum attr_memory second_memory;
7625 int secondcycles, currentcycles;
7626
7627 second_memory = ix86_safe_memory (*insnp);
7628 secondcycles = result_ready_cost (*insnp);
7629 currentcycles = abs (cycles - secondcycles);
7630
7631 if (secondcycles >= 1 && cycles >= 1)
7632 {
7633 /* Two read/modify/write instructions together takes two
7634 cycles longer. */
7635 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7636 currentcycles += 2;
7637
7638 /* Read modify/write instruction followed by read/modify
7639 takes one cycle longer. */
7640 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7641 && tmp != PENT_PAIR_UV
7642 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7643 currentcycles += 1;
7644 }
7645 if (currentcycles < mincycles)
7646 bestinsnp = insnp, mincycles = currentcycles;
7647 }
7648
7649 return bestinsnp;
7650 }
7651
7652 /* Subroutines of ix86_sched_reorder. */
7653
7654 static void
7655 ix86_sched_reorder_pentium (ready, e_ready)
7656 rtx *ready;
7657 rtx *e_ready;
7658 {
7659 enum attr_pent_pair pair1, pair2;
7660 rtx *insnp;
7661
7662 /* This wouldn't be necessary if Haifa knew that static insn ordering
7663 is important to which pipe an insn is issued to. So we have to make
7664 some minor rearrangements. */
7665
7666 pair1 = ix86_safe_pent_pair (*e_ready);
7667
7668 /* If the first insn is non-pairable, let it be. */
7669 if (pair1 == PENT_PAIR_NP)
7670 return;
7671
7672 pair2 = PENT_PAIR_NP;
7673 insnp = 0;
7674
7675 /* If the first insn is UV or PV pairable, search for a PU
7676 insn to go with. */
7677 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
7678 {
7679 insnp = ix86_pent_find_pair (e_ready-1, ready,
7680 PENT_PAIR_PU, *e_ready);
7681 if (insnp)
7682 pair2 = PENT_PAIR_PU;
7683 }
7684
7685 /* If the first insn is PU or UV pairable, search for a PV
7686 insn to go with. */
7687 if (pair2 == PENT_PAIR_NP
7688 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7689 {
7690 insnp = ix86_pent_find_pair (e_ready-1, ready,
7691 PENT_PAIR_PV, *e_ready);
7692 if (insnp)
7693 pair2 = PENT_PAIR_PV;
7694 }
7695
7696 /* If the first insn is pairable, search for a UV
7697 insn to go with. */
7698 if (pair2 == PENT_PAIR_NP)
7699 {
7700 insnp = ix86_pent_find_pair (e_ready-1, ready,
7701 PENT_PAIR_UV, *e_ready);
7702 if (insnp)
7703 pair2 = PENT_PAIR_UV;
7704 }
7705
7706 if (pair2 == PENT_PAIR_NP)
7707 return;
7708
7709 /* Found something! Decide if we need to swap the order. */
7710 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7711 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7712 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7713 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7714 ix86_reorder_insn (insnp, e_ready);
7715 else
7716 ix86_reorder_insn (insnp, e_ready - 1);
7717 }
7718
7719 static void
7720 ix86_sched_reorder_ppro (ready, e_ready)
7721 rtx *ready;
7722 rtx *e_ready;
7723 {
7724 rtx decode[3];
7725 enum attr_ppro_uops cur_uops;
7726 int issued_this_cycle;
7727 rtx *insnp;
7728 int i;
7729
7730 /* At this point .ppro.decode contains the state of the three
7731 decoders from last "cycle". That is, those insns that were
7732 actually independent. But here we're scheduling for the
7733 decoder, and we may find things that are decodable in the
7734 same cycle. */
7735
7736 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
7737 issued_this_cycle = 0;
7738
7739 insnp = e_ready;
7740 cur_uops = ix86_safe_ppro_uops (*insnp);
7741
7742 /* If the decoders are empty, and we've a complex insn at the
7743 head of the priority queue, let it issue without complaint. */
7744 if (decode[0] == NULL)
7745 {
7746 if (cur_uops == PPRO_UOPS_MANY)
7747 {
7748 decode[0] = *insnp;
7749 goto ppro_done;
7750 }
7751
7752 /* Otherwise, search for a 2-4 uop unsn to issue. */
7753 while (cur_uops != PPRO_UOPS_FEW)
7754 {
7755 if (insnp == ready)
7756 break;
7757 cur_uops = ix86_safe_ppro_uops (*--insnp);
7758 }
7759
7760 /* If so, move it to the head of the line. */
7761 if (cur_uops == PPRO_UOPS_FEW)
7762 ix86_reorder_insn (insnp, e_ready);
7763
7764 /* Issue the head of the queue. */
7765 issued_this_cycle = 1;
7766 decode[0] = *e_ready--;
7767 }
7768
7769 /* Look for simple insns to fill in the other two slots. */
7770 for (i = 1; i < 3; ++i)
7771 if (decode[i] == NULL)
7772 {
7773 if (ready >= e_ready)
7774 goto ppro_done;
7775
7776 insnp = e_ready;
7777 cur_uops = ix86_safe_ppro_uops (*insnp);
7778 while (cur_uops != PPRO_UOPS_ONE)
7779 {
7780 if (insnp == ready)
7781 break;
7782 cur_uops = ix86_safe_ppro_uops (*--insnp);
7783 }
7784
7785 /* Found one. Move it to the head of the queue and issue it. */
7786 if (cur_uops == PPRO_UOPS_ONE)
7787 {
7788 ix86_reorder_insn (insnp, e_ready);
7789 decode[i] = *e_ready--;
7790 issued_this_cycle++;
7791 continue;
7792 }
7793
7794 /* ??? Didn't find one. Ideally, here we would do a lazy split
7795 of 2-uop insns, issue one and queue the other. */
7796 }
7797
7798 ppro_done:
7799 if (issued_this_cycle == 0)
7800 issued_this_cycle = 1;
7801 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7802 }
7803
7804 /* We are about to being issuing insns for this clock cycle.
7805 Override the default sort algorithm to better slot instructions. */
7806 int
7807 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7808 FILE *dump ATTRIBUTE_UNUSED;
7809 int sched_verbose ATTRIBUTE_UNUSED;
7810 rtx *ready;
7811 int n_ready;
7812 int clock_var ATTRIBUTE_UNUSED;
7813 {
7814 rtx *e_ready = ready + n_ready - 1;
7815
7816 if (n_ready < 2)
7817 goto out;
7818
7819 switch (ix86_cpu)
7820 {
7821 default:
7822 break;
7823
7824 case PROCESSOR_PENTIUM:
7825 ix86_sched_reorder_pentium (ready, e_ready);
7826 break;
7827
7828 case PROCESSOR_PENTIUMPRO:
7829 ix86_sched_reorder_ppro (ready, e_ready);
7830 break;
7831 }
7832
7833 out:
7834 return ix86_issue_rate ();
7835 }
7836
7837 /* We are about to issue INSN. Return the number of insns left on the
7838 ready queue that can be issued this cycle. */
7839
7840 int
7841 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7842 FILE *dump;
7843 int sched_verbose;
7844 rtx insn;
7845 int can_issue_more;
7846 {
7847 int i;
7848 switch (ix86_cpu)
7849 {
7850 default:
7851 return can_issue_more - 1;
7852
7853 case PROCESSOR_PENTIUMPRO:
7854 {
7855 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
7856
7857 if (uops == PPRO_UOPS_MANY)
7858 {
7859 if (sched_verbose)
7860 ix86_dump_ppro_packet (dump);
7861 ix86_sched_data.ppro.decode[0] = insn;
7862 ix86_sched_data.ppro.decode[1] = NULL;
7863 ix86_sched_data.ppro.decode[2] = NULL;
7864 if (sched_verbose)
7865 ix86_dump_ppro_packet (dump);
7866 ix86_sched_data.ppro.decode[0] = NULL;
7867 }
7868 else if (uops == PPRO_UOPS_FEW)
7869 {
7870 if (sched_verbose)
7871 ix86_dump_ppro_packet (dump);
7872 ix86_sched_data.ppro.decode[0] = insn;
7873 ix86_sched_data.ppro.decode[1] = NULL;
7874 ix86_sched_data.ppro.decode[2] = NULL;
7875 }
7876 else
7877 {
7878 for (i = 0; i < 3; ++i)
7879 if (ix86_sched_data.ppro.decode[i] == NULL)
7880 {
7881 ix86_sched_data.ppro.decode[i] = insn;
7882 break;
7883 }
7884 if (i == 3)
7885 abort ();
7886 if (i == 2)
7887 {
7888 if (sched_verbose)
7889 ix86_dump_ppro_packet (dump);
7890 ix86_sched_data.ppro.decode[0] = NULL;
7891 ix86_sched_data.ppro.decode[1] = NULL;
7892 ix86_sched_data.ppro.decode[2] = NULL;
7893 }
7894 }
7895 }
7896 return --ix86_sched_data.ppro.issued_this_cycle;
7897 }
7898 }
7899 \f
7900 /* Walk through INSNS and look for MEM references whose address is DSTREG or
7901 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
7902 appropriate. */
7903
7904 void
7905 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
7906 rtx insns;
7907 rtx dstref, srcref, dstreg, srcreg;
7908 {
7909 rtx insn;
7910
7911 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
7912 if (INSN_P (insn))
7913 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
7914 dstreg, srcreg);
7915 }
7916
7917 /* Subroutine of above to actually do the updating by recursively walking
7918 the rtx. */
7919
7920 static void
7921 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
7922 rtx x;
7923 rtx dstref, srcref, dstreg, srcreg;
7924 {
7925 enum rtx_code code = GET_CODE (x);
7926 const char *format_ptr = GET_RTX_FORMAT (code);
7927 int i, j;
7928
7929 if (code == MEM && XEXP (x, 0) == dstreg)
7930 MEM_COPY_ATTRIBUTES (x, dstref);
7931 else if (code == MEM && XEXP (x, 0) == srcreg)
7932 MEM_COPY_ATTRIBUTES (x, srcref);
7933
7934 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
7935 {
7936 if (*format_ptr == 'e')
7937 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
7938 dstreg, srcreg);
7939 else if (*format_ptr == 'E')
7940 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7941 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
7942 dstreg, srcreg);
7943 }
7944 }
7945 \f
7946 /* Compute the alignment given to a constant that is being placed in memory.
7947 EXP is the constant and ALIGN is the alignment that the object would
7948 ordinarily have.
7949 The value of this function is used instead of that alignment to align
7950 the object. */
7951
7952 int
7953 ix86_constant_alignment (exp, align)
7954 tree exp;
7955 int align;
7956 {
7957 if (TREE_CODE (exp) == REAL_CST)
7958 {
7959 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7960 return 64;
7961 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7962 return 128;
7963 }
7964 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7965 && align < 256)
7966 return 256;
7967
7968 return align;
7969 }
7970
7971 /* Compute the alignment for a static variable.
7972 TYPE is the data type, and ALIGN is the alignment that
7973 the object would ordinarily have. The value of this function is used
7974 instead of that alignment to align the object. */
7975
7976 int
7977 ix86_data_alignment (type, align)
7978 tree type;
7979 int align;
7980 {
7981 if (AGGREGATE_TYPE_P (type)
7982 && TYPE_SIZE (type)
7983 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7984 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7985 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7986 return 256;
7987
7988 if (TREE_CODE (type) == ARRAY_TYPE)
7989 {
7990 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7991 return 64;
7992 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7993 return 128;
7994 }
7995 else if (TREE_CODE (type) == COMPLEX_TYPE)
7996 {
7997
7998 if (TYPE_MODE (type) == DCmode && align < 64)
7999 return 64;
8000 if (TYPE_MODE (type) == XCmode && align < 128)
8001 return 128;
8002 }
8003 else if ((TREE_CODE (type) == RECORD_TYPE
8004 || TREE_CODE (type) == UNION_TYPE
8005 || TREE_CODE (type) == QUAL_UNION_TYPE)
8006 && TYPE_FIELDS (type))
8007 {
8008 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
8009 return 64;
8010 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
8011 return 128;
8012 }
8013 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
8014 || TREE_CODE (type) == INTEGER_TYPE)
8015 {
8016 if (TYPE_MODE (type) == DFmode && align < 64)
8017 return 64;
8018 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
8019 return 128;
8020 }
8021
8022 return align;
8023 }
8024
8025 /* Compute the alignment for a local variable.
8026 TYPE is the data type, and ALIGN is the alignment that
8027 the object would ordinarily have. The value of this macro is used
8028 instead of that alignment to align the object. */
8029
8030 int
8031 ix86_local_alignment (type, align)
8032 tree type;
8033 int align;
8034 {
8035 if (TREE_CODE (type) == ARRAY_TYPE)
8036 {
8037 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
8038 return 64;
8039 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
8040 return 128;
8041 }
8042 else if (TREE_CODE (type) == COMPLEX_TYPE)
8043 {
8044 if (TYPE_MODE (type) == DCmode && align < 64)
8045 return 64;
8046 if (TYPE_MODE (type) == XCmode && align < 128)
8047 return 128;
8048 }
8049 else if ((TREE_CODE (type) == RECORD_TYPE
8050 || TREE_CODE (type) == UNION_TYPE
8051 || TREE_CODE (type) == QUAL_UNION_TYPE)
8052 && TYPE_FIELDS (type))
8053 {
8054 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
8055 return 64;
8056 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
8057 return 128;
8058 }
8059 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
8060 || TREE_CODE (type) == INTEGER_TYPE)
8061 {
8062
8063 if (TYPE_MODE (type) == DFmode && align < 64)
8064 return 64;
8065 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
8066 return 128;
8067 }
8068 return align;
8069 }
8070
8071 #define def_builtin(NAME, TYPE, CODE) \
8072 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
8073 struct builtin_description
8074 {
8075 enum insn_code icode;
8076 const char * name;
8077 enum ix86_builtins code;
8078 enum rtx_code comparison;
8079 unsigned int flag;
8080 };
8081
8082 static struct builtin_description bdesc_comi[] =
8083 {
8084 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
8085 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
8086 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
8087 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
8088 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
8089 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
8090 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
8091 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
8092 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
8093 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
8094 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
8095 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
8096 };
8097
8098 static struct builtin_description bdesc_2arg[] =
8099 {
8100 /* SSE */
8101 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
8102 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
8103 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
8104 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
8105 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
8106 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
8107 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
8108 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
8109
8110 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
8111 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
8112 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
8113 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
8114 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
8115 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
8116 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
8117 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
8118 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
8119 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
8120 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
8121 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
8122 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
8123 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
8124 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
8125 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
8126 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
8127 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
8128 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
8129 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
8130 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
8131 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
8132 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
8133 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
8134
8135 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
8136 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
8137 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
8138 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
8139
8140 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
8141 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
8142 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
8143 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
8144
8145 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
8146 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
8147 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
8148 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
8149 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
8150
8151 /* MMX */
8152 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
8153 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
8154 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
8155 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
8156 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
8157 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
8158
8159 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
8160 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
8161 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
8162 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
8163 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
8164 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
8165 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
8166 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
8167
8168 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
8169 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
8170 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
8171
8172 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
8173 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
8174 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
8175 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
8176
8177 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
8178 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
8179
8180 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
8181 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
8182 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
8183 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
8184 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
8185 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
8186
8187 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
8188 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
8189 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
8190 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
8191
8192 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
8193 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
8194 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
8195 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
8196 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
8197 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
8198
8199 /* Special. */
8200 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
8201 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
8202 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
8203
8204 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
8205 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
8206
8207 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
8208 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
8209 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
8210 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
8211 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
8212 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
8213
8214 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
8215 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
8216 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
8217 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
8218 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
8219 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
8220
8221 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
8222 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
8223 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
8224 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
8225
8226 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
8227 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
8228
8229 };
8230
8231 static struct builtin_description bdesc_1arg[] =
8232 {
8233 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
8234 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
8235
8236 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
8237 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
8238 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
8239
8240 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
8241 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
8242 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
8243 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
8244
8245 };
8246
8247 /* Expand all the target specific builtins. This is not called if TARGET_MMX
8248 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
8249 builtins. */
8250 void
8251 ix86_init_builtins ()
8252 {
8253 struct builtin_description * d;
8254 size_t i;
8255 tree endlink = void_list_node;
8256
8257 tree pchar_type_node = build_pointer_type (char_type_node);
8258 tree pfloat_type_node = build_pointer_type (float_type_node);
8259 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
8260 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
8261
8262 /* Comparisons. */
8263 tree int_ftype_v4sf_v4sf
8264 = build_function_type (integer_type_node,
8265 tree_cons (NULL_TREE, V4SF_type_node,
8266 tree_cons (NULL_TREE,
8267 V4SF_type_node,
8268 endlink)));
8269 tree v4si_ftype_v4sf_v4sf
8270 = build_function_type (V4SI_type_node,
8271 tree_cons (NULL_TREE, V4SF_type_node,
8272 tree_cons (NULL_TREE,
8273 V4SF_type_node,
8274 endlink)));
8275 /* MMX/SSE/integer conversions. */
8276 tree int_ftype_v4sf_int
8277 = build_function_type (integer_type_node,
8278 tree_cons (NULL_TREE, V4SF_type_node,
8279 tree_cons (NULL_TREE,
8280 integer_type_node,
8281 endlink)));
8282 tree int_ftype_v4sf
8283 = build_function_type (integer_type_node,
8284 tree_cons (NULL_TREE, V4SF_type_node,
8285 endlink));
8286 tree int_ftype_v8qi
8287 = build_function_type (integer_type_node,
8288 tree_cons (NULL_TREE, V8QI_type_node,
8289 endlink));
8290 tree int_ftype_v2si
8291 = build_function_type (integer_type_node,
8292 tree_cons (NULL_TREE, V2SI_type_node,
8293 endlink));
8294 tree v2si_ftype_int
8295 = build_function_type (V2SI_type_node,
8296 tree_cons (NULL_TREE, integer_type_node,
8297 endlink));
8298 tree v4sf_ftype_v4sf_int
8299 = build_function_type (integer_type_node,
8300 tree_cons (NULL_TREE, V4SF_type_node,
8301 tree_cons (NULL_TREE, integer_type_node,
8302 endlink)));
8303 tree v4sf_ftype_v4sf_v2si
8304 = build_function_type (V4SF_type_node,
8305 tree_cons (NULL_TREE, V4SF_type_node,
8306 tree_cons (NULL_TREE, V2SI_type_node,
8307 endlink)));
8308 tree int_ftype_v4hi_int
8309 = build_function_type (integer_type_node,
8310 tree_cons (NULL_TREE, V4HI_type_node,
8311 tree_cons (NULL_TREE, integer_type_node,
8312 endlink)));
8313 tree v4hi_ftype_v4hi_int_int
8314 = build_function_type (V4HI_type_node,
8315 tree_cons (NULL_TREE, V4HI_type_node,
8316 tree_cons (NULL_TREE, integer_type_node,
8317 tree_cons (NULL_TREE,
8318 integer_type_node,
8319 endlink))));
8320 /* Miscellaneous. */
8321 tree v8qi_ftype_v4hi_v4hi
8322 = build_function_type (V8QI_type_node,
8323 tree_cons (NULL_TREE, V4HI_type_node,
8324 tree_cons (NULL_TREE, V4HI_type_node,
8325 endlink)));
8326 tree v4hi_ftype_v2si_v2si
8327 = build_function_type (V4HI_type_node,
8328 tree_cons (NULL_TREE, V2SI_type_node,
8329 tree_cons (NULL_TREE, V2SI_type_node,
8330 endlink)));
8331 tree v4sf_ftype_v4sf_v4sf_int
8332 = build_function_type (V4SF_type_node,
8333 tree_cons (NULL_TREE, V4SF_type_node,
8334 tree_cons (NULL_TREE, V4SF_type_node,
8335 tree_cons (NULL_TREE,
8336 integer_type_node,
8337 endlink))));
8338 tree v4hi_ftype_v8qi_v8qi
8339 = build_function_type (V4HI_type_node,
8340 tree_cons (NULL_TREE, V8QI_type_node,
8341 tree_cons (NULL_TREE, V8QI_type_node,
8342 endlink)));
8343 tree v2si_ftype_v4hi_v4hi
8344 = build_function_type (V2SI_type_node,
8345 tree_cons (NULL_TREE, V4HI_type_node,
8346 tree_cons (NULL_TREE, V4HI_type_node,
8347 endlink)));
8348 tree v4hi_ftype_v4hi_int
8349 = build_function_type (V4HI_type_node,
8350 tree_cons (NULL_TREE, V4HI_type_node,
8351 tree_cons (NULL_TREE, integer_type_node,
8352 endlink)));
8353 tree di_ftype_di_int
8354 = build_function_type (long_long_unsigned_type_node,
8355 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8356 tree_cons (NULL_TREE, integer_type_node,
8357 endlink)));
8358 tree v8qi_ftype_v8qi_di
8359 = build_function_type (V8QI_type_node,
8360 tree_cons (NULL_TREE, V8QI_type_node,
8361 tree_cons (NULL_TREE,
8362 long_long_integer_type_node,
8363 endlink)));
8364 tree v4hi_ftype_v4hi_di
8365 = build_function_type (V4HI_type_node,
8366 tree_cons (NULL_TREE, V4HI_type_node,
8367 tree_cons (NULL_TREE,
8368 long_long_integer_type_node,
8369 endlink)));
8370 tree v2si_ftype_v2si_di
8371 = build_function_type (V2SI_type_node,
8372 tree_cons (NULL_TREE, V2SI_type_node,
8373 tree_cons (NULL_TREE,
8374 long_long_integer_type_node,
8375 endlink)));
8376 tree void_ftype_void
8377 = build_function_type (void_type_node, endlink);
8378 tree void_ftype_pchar_int
8379 = build_function_type (void_type_node,
8380 tree_cons (NULL_TREE, pchar_type_node,
8381 tree_cons (NULL_TREE, integer_type_node,
8382 endlink)));
8383 tree void_ftype_unsigned
8384 = build_function_type (void_type_node,
8385 tree_cons (NULL_TREE, unsigned_type_node,
8386 endlink));
8387 tree unsigned_ftype_void
8388 = build_function_type (unsigned_type_node, endlink);
8389 tree di_ftype_void
8390 = build_function_type (long_long_unsigned_type_node, endlink);
8391 tree ti_ftype_void
8392 = build_function_type (intTI_type_node, endlink);
8393 tree v2si_ftype_v4sf
8394 = build_function_type (V2SI_type_node,
8395 tree_cons (NULL_TREE, V4SF_type_node,
8396 endlink));
8397 /* Loads/stores. */
8398 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
8399 tree_cons (NULL_TREE, V8QI_type_node,
8400 tree_cons (NULL_TREE,
8401 pchar_type_node,
8402 endlink)));
8403 tree void_ftype_v8qi_v8qi_pchar
8404 = build_function_type (void_type_node, maskmovq_args);
8405 tree v4sf_ftype_pfloat
8406 = build_function_type (V4SF_type_node,
8407 tree_cons (NULL_TREE, pfloat_type_node,
8408 endlink));
8409 tree v4sf_ftype_float
8410 = build_function_type (V4SF_type_node,
8411 tree_cons (NULL_TREE, float_type_node,
8412 endlink));
8413 tree v4sf_ftype_float_float_float_float
8414 = build_function_type (V4SF_type_node,
8415 tree_cons (NULL_TREE, float_type_node,
8416 tree_cons (NULL_TREE, float_type_node,
8417 tree_cons (NULL_TREE,
8418 float_type_node,
8419 tree_cons (NULL_TREE,
8420 float_type_node,
8421 endlink)))));
8422 /* @@@ the type is bogus */
8423 tree v4sf_ftype_v4sf_pv2si
8424 = build_function_type (V4SF_type_node,
8425 tree_cons (NULL_TREE, V4SF_type_node,
8426 tree_cons (NULL_TREE, pv2si_type_node,
8427 endlink)));
8428 tree v4sf_ftype_pv2si_v4sf
8429 = build_function_type (V4SF_type_node,
8430 tree_cons (NULL_TREE, V4SF_type_node,
8431 tree_cons (NULL_TREE, pv2si_type_node,
8432 endlink)));
8433 tree void_ftype_pfloat_v4sf
8434 = build_function_type (void_type_node,
8435 tree_cons (NULL_TREE, pfloat_type_node,
8436 tree_cons (NULL_TREE, V4SF_type_node,
8437 endlink)));
8438 tree void_ftype_pdi_di
8439 = build_function_type (void_type_node,
8440 tree_cons (NULL_TREE, pdi_type_node,
8441 tree_cons (NULL_TREE,
8442 long_long_unsigned_type_node,
8443 endlink)));
8444 /* Normal vector unops. */
8445 tree v4sf_ftype_v4sf
8446 = build_function_type (V4SF_type_node,
8447 tree_cons (NULL_TREE, V4SF_type_node,
8448 endlink));
8449
8450 /* Normal vector binops. */
8451 tree v4sf_ftype_v4sf_v4sf
8452 = build_function_type (V4SF_type_node,
8453 tree_cons (NULL_TREE, V4SF_type_node,
8454 tree_cons (NULL_TREE, V4SF_type_node,
8455 endlink)));
8456 tree v8qi_ftype_v8qi_v8qi
8457 = build_function_type (V8QI_type_node,
8458 tree_cons (NULL_TREE, V8QI_type_node,
8459 tree_cons (NULL_TREE, V8QI_type_node,
8460 endlink)));
8461 tree v4hi_ftype_v4hi_v4hi
8462 = build_function_type (V4HI_type_node,
8463 tree_cons (NULL_TREE, V4HI_type_node,
8464 tree_cons (NULL_TREE, V4HI_type_node,
8465 endlink)));
8466 tree v2si_ftype_v2si_v2si
8467 = build_function_type (V2SI_type_node,
8468 tree_cons (NULL_TREE, V2SI_type_node,
8469 tree_cons (NULL_TREE, V2SI_type_node,
8470 endlink)));
8471 tree ti_ftype_ti_ti
8472 = build_function_type (intTI_type_node,
8473 tree_cons (NULL_TREE, intTI_type_node,
8474 tree_cons (NULL_TREE, intTI_type_node,
8475 endlink)));
8476 tree di_ftype_di_di
8477 = build_function_type (long_long_unsigned_type_node,
8478 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8479 tree_cons (NULL_TREE,
8480 long_long_unsigned_type_node,
8481 endlink)));
8482
8483 /* Add all builtins that are more or less simple operations on two
8484 operands. */
8485 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8486 {
8487 /* Use one of the operands; the target can have a different mode for
8488 mask-generating compares. */
8489 enum machine_mode mode;
8490 tree type;
8491
8492 if (d->name == 0)
8493 continue;
8494 mode = insn_data[d->icode].operand[1].mode;
8495
8496 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
8497 continue;
8498
8499 switch (mode)
8500 {
8501 case V4SFmode:
8502 type = v4sf_ftype_v4sf_v4sf;
8503 break;
8504 case V8QImode:
8505 type = v8qi_ftype_v8qi_v8qi;
8506 break;
8507 case V4HImode:
8508 type = v4hi_ftype_v4hi_v4hi;
8509 break;
8510 case V2SImode:
8511 type = v2si_ftype_v2si_v2si;
8512 break;
8513 case TImode:
8514 type = ti_ftype_ti_ti;
8515 break;
8516 case DImode:
8517 type = di_ftype_di_di;
8518 break;
8519
8520 default:
8521 abort ();
8522 }
8523
8524 /* Override for comparisons. */
8525 if (d->icode == CODE_FOR_maskcmpv4sf3
8526 || d->icode == CODE_FOR_maskncmpv4sf3
8527 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8528 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8529 type = v4si_ftype_v4sf_v4sf;
8530
8531 def_builtin (d->name, type, d->code);
8532 }
8533
8534 /* Add the remaining MMX insns with somewhat more complicated types. */
8535 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
8536 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
8537 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
8538 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
8539 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
8540 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
8541 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
8542 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
8543 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
8544
8545 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
8546 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
8547 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
8548
8549 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
8550 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
8551
8552 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
8553 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
8554
8555 /* Everything beyond this point is SSE only. */
8556 if (! TARGET_SSE)
8557 return;
8558
8559 /* comi/ucomi insns. */
8560 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8561 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
8562
8563 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
8564 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
8565 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
8566
8567 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
8568 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
8569 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
8570 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
8571 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
8572 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
8573
8574 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
8575 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
8576
8577 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
8578
8579 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
8580 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
8581 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
8582 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
8583 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
8584 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
8585
8586 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
8587 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
8588 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
8589 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
8590
8591 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
8592 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
8593 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
8594 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
8595
8596 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
8597 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
8598
8599 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
8600
8601 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
8602 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
8603 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
8604 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
8605 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
8606 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
8607
8608 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
8609
8610 /* Composite intrinsics. */
8611 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
8612 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
8613 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
8614 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
8615 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
8616 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
8617 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
8618 }
8619
8620 /* Errors in the source file can cause expand_expr to return const0_rtx
8621 where we expect a vector. To avoid crashing, use one of the vector
8622 clear instructions. */
8623 static rtx
8624 safe_vector_operand (x, mode)
8625 rtx x;
8626 enum machine_mode mode;
8627 {
8628 if (x != const0_rtx)
8629 return x;
8630 x = gen_reg_rtx (mode);
8631
8632 if (VALID_MMX_REG_MODE (mode))
8633 emit_insn (gen_mmx_clrdi (mode == DImode ? x
8634 : gen_rtx_SUBREG (DImode, x, 0)));
8635 else
8636 emit_insn (gen_sse_clrti (mode == TImode ? x
8637 : gen_rtx_SUBREG (TImode, x, 0)));
8638 return x;
8639 }
8640
8641 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
8642
8643 static rtx
8644 ix86_expand_binop_builtin (icode, arglist, target)
8645 enum insn_code icode;
8646 tree arglist;
8647 rtx target;
8648 {
8649 rtx pat;
8650 tree arg0 = TREE_VALUE (arglist);
8651 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8652 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8653 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8654 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8655 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8656 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
8657
8658 if (VECTOR_MODE_P (mode0))
8659 op0 = safe_vector_operand (op0, mode0);
8660 if (VECTOR_MODE_P (mode1))
8661 op1 = safe_vector_operand (op1, mode1);
8662
8663 if (! target
8664 || GET_MODE (target) != tmode
8665 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8666 target = gen_reg_rtx (tmode);
8667
8668 /* In case the insn wants input operands in modes different from
8669 the result, abort. */
8670 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8671 abort ();
8672
8673 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8674 op0 = copy_to_mode_reg (mode0, op0);
8675 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8676 op1 = copy_to_mode_reg (mode1, op1);
8677
8678 pat = GEN_FCN (icode) (target, op0, op1);
8679 if (! pat)
8680 return 0;
8681 emit_insn (pat);
8682 return target;
8683 }
8684
8685 /* Subroutine of ix86_expand_builtin to take care of stores. */
8686
8687 static rtx
8688 ix86_expand_store_builtin (icode, arglist, shuffle)
8689 enum insn_code icode;
8690 tree arglist;
8691 int shuffle;
8692 {
8693 rtx pat;
8694 tree arg0 = TREE_VALUE (arglist);
8695 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8696 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8697 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8698 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8699 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8700
8701 if (VECTOR_MODE_P (mode1))
8702 op1 = safe_vector_operand (op1, mode1);
8703
8704 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8705 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8706 op1 = copy_to_mode_reg (mode1, op1);
8707 if (shuffle >= 0)
8708 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8709 pat = GEN_FCN (icode) (op0, op1);
8710 if (pat)
8711 emit_insn (pat);
8712 return 0;
8713 }
8714
8715 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
8716
8717 static rtx
8718 ix86_expand_unop_builtin (icode, arglist, target, do_load)
8719 enum insn_code icode;
8720 tree arglist;
8721 rtx target;
8722 int do_load;
8723 {
8724 rtx pat;
8725 tree arg0 = TREE_VALUE (arglist);
8726 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8727 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8728 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8729
8730 if (! target
8731 || GET_MODE (target) != tmode
8732 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8733 target = gen_reg_rtx (tmode);
8734 if (do_load)
8735 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8736 else
8737 {
8738 if (VECTOR_MODE_P (mode0))
8739 op0 = safe_vector_operand (op0, mode0);
8740
8741 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8742 op0 = copy_to_mode_reg (mode0, op0);
8743 }
8744
8745 pat = GEN_FCN (icode) (target, op0);
8746 if (! pat)
8747 return 0;
8748 emit_insn (pat);
8749 return target;
8750 }
8751
8752 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8753 sqrtss, rsqrtss, rcpss. */
8754
8755 static rtx
8756 ix86_expand_unop1_builtin (icode, arglist, target)
8757 enum insn_code icode;
8758 tree arglist;
8759 rtx target;
8760 {
8761 rtx pat;
8762 tree arg0 = TREE_VALUE (arglist);
8763 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8764 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8765 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8766
8767 if (! target
8768 || GET_MODE (target) != tmode
8769 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8770 target = gen_reg_rtx (tmode);
8771
8772 if (VECTOR_MODE_P (mode0))
8773 op0 = safe_vector_operand (op0, mode0);
8774
8775 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8776 op0 = copy_to_mode_reg (mode0, op0);
8777
8778 pat = GEN_FCN (icode) (target, op0, op0);
8779 if (! pat)
8780 return 0;
8781 emit_insn (pat);
8782 return target;
8783 }
8784
8785 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8786
8787 static rtx
8788 ix86_expand_sse_compare (d, arglist, target)
8789 struct builtin_description *d;
8790 tree arglist;
8791 rtx target;
8792 {
8793 rtx pat;
8794 tree arg0 = TREE_VALUE (arglist);
8795 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8796 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8797 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8798 rtx op2;
8799 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8800 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8801 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8802 enum rtx_code comparison = d->comparison;
8803
8804 if (VECTOR_MODE_P (mode0))
8805 op0 = safe_vector_operand (op0, mode0);
8806 if (VECTOR_MODE_P (mode1))
8807 op1 = safe_vector_operand (op1, mode1);
8808
8809 /* Swap operands if we have a comparison that isn't available in
8810 hardware. */
8811 if (d->flag)
8812 {
8813 target = gen_reg_rtx (tmode);
8814 emit_move_insn (target, op1);
8815 op1 = op0;
8816 op0 = target;
8817 comparison = swap_condition (comparison);
8818 }
8819 else if (! target
8820 || GET_MODE (target) != tmode
8821 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8822 target = gen_reg_rtx (tmode);
8823
8824 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8825 op0 = copy_to_mode_reg (mode0, op0);
8826 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8827 op1 = copy_to_mode_reg (mode1, op1);
8828
8829 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8830 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8831 if (! pat)
8832 return 0;
8833 emit_insn (pat);
8834 return target;
8835 }
8836
8837 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
8838
8839 static rtx
8840 ix86_expand_sse_comi (d, arglist, target)
8841 struct builtin_description *d;
8842 tree arglist;
8843 rtx target;
8844 {
8845 rtx pat;
8846 tree arg0 = TREE_VALUE (arglist);
8847 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8848 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8849 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8850 rtx op2;
8851 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8852 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8853 enum rtx_code comparison = d->comparison;
8854
8855 if (VECTOR_MODE_P (mode0))
8856 op0 = safe_vector_operand (op0, mode0);
8857 if (VECTOR_MODE_P (mode1))
8858 op1 = safe_vector_operand (op1, mode1);
8859
8860 /* Swap operands if we have a comparison that isn't available in
8861 hardware. */
8862 if (d->flag)
8863 {
8864 rtx tmp = op1;
8865 op1 = op0;
8866 op0 = tmp;
8867 comparison = swap_condition (comparison);
8868 }
8869
8870 target = gen_reg_rtx (SImode);
8871 emit_move_insn (target, const0_rtx);
8872 target = gen_rtx_SUBREG (QImode, target, 0);
8873
8874 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8875 op0 = copy_to_mode_reg (mode0, op0);
8876 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8877 op1 = copy_to_mode_reg (mode1, op1);
8878
8879 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8880 pat = GEN_FCN (d->icode) (op0, op1, op2);
8881 if (! pat)
8882 return 0;
8883 emit_insn (pat);
8884 emit_insn (gen_setcc_2 (target, op2));
8885
8886 return target;
8887 }
8888
8889 /* Expand an expression EXP that calls a built-in function,
8890 with result going to TARGET if that's convenient
8891 (and in mode MODE if that's convenient).
8892 SUBTARGET may be used as the target for computing one of EXP's operands.
8893 IGNORE is nonzero if the value is to be ignored. */
8894
8895 rtx
8896 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8897 tree exp;
8898 rtx target;
8899 rtx subtarget ATTRIBUTE_UNUSED;
8900 enum machine_mode mode ATTRIBUTE_UNUSED;
8901 int ignore ATTRIBUTE_UNUSED;
8902 {
8903 struct builtin_description *d;
8904 size_t i;
8905 enum insn_code icode;
8906 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8907 tree arglist = TREE_OPERAND (exp, 1);
8908 tree arg0, arg1, arg2, arg3;
8909 rtx op0, op1, op2, pat;
8910 enum machine_mode tmode, mode0, mode1, mode2;
8911 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8912
8913 switch (fcode)
8914 {
8915 case IX86_BUILTIN_EMMS:
8916 emit_insn (gen_emms ());
8917 return 0;
8918
8919 case IX86_BUILTIN_SFENCE:
8920 emit_insn (gen_sfence ());
8921 return 0;
8922
8923 case IX86_BUILTIN_M_FROM_INT:
8924 target = gen_reg_rtx (DImode);
8925 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8926 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8927 return target;
8928
8929 case IX86_BUILTIN_M_TO_INT:
8930 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8931 op0 = copy_to_mode_reg (DImode, op0);
8932 target = gen_reg_rtx (SImode);
8933 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8934 return target;
8935
8936 case IX86_BUILTIN_PEXTRW:
8937 icode = CODE_FOR_mmx_pextrw;
8938 arg0 = TREE_VALUE (arglist);
8939 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8940 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8941 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8942 tmode = insn_data[icode].operand[0].mode;
8943 mode0 = insn_data[icode].operand[1].mode;
8944 mode1 = insn_data[icode].operand[2].mode;
8945
8946 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8947 op0 = copy_to_mode_reg (mode0, op0);
8948 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8949 {
8950 /* @@@ better error message */
8951 error ("selector must be an immediate");
8952 return const0_rtx;
8953 }
8954 if (target == 0
8955 || GET_MODE (target) != tmode
8956 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8957 target = gen_reg_rtx (tmode);
8958 pat = GEN_FCN (icode) (target, op0, op1);
8959 if (! pat)
8960 return 0;
8961 emit_insn (pat);
8962 return target;
8963
8964 case IX86_BUILTIN_PINSRW:
8965 icode = CODE_FOR_mmx_pinsrw;
8966 arg0 = TREE_VALUE (arglist);
8967 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8968 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8969 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8970 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8971 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8972 tmode = insn_data[icode].operand[0].mode;
8973 mode0 = insn_data[icode].operand[1].mode;
8974 mode1 = insn_data[icode].operand[2].mode;
8975 mode2 = insn_data[icode].operand[3].mode;
8976
8977 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8978 op0 = copy_to_mode_reg (mode0, op0);
8979 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8980 op1 = copy_to_mode_reg (mode1, op1);
8981 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8982 {
8983 /* @@@ better error message */
8984 error ("selector must be an immediate");
8985 return const0_rtx;
8986 }
8987 if (target == 0
8988 || GET_MODE (target) != tmode
8989 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8990 target = gen_reg_rtx (tmode);
8991 pat = GEN_FCN (icode) (target, op0, op1, op2);
8992 if (! pat)
8993 return 0;
8994 emit_insn (pat);
8995 return target;
8996
8997 case IX86_BUILTIN_MASKMOVQ:
8998 icode = CODE_FOR_mmx_maskmovq;
8999 /* Note the arg order is different from the operand order. */
9000 arg1 = TREE_VALUE (arglist);
9001 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
9002 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9003 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9004 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9005 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
9006 mode0 = insn_data[icode].operand[0].mode;
9007 mode1 = insn_data[icode].operand[1].mode;
9008 mode2 = insn_data[icode].operand[2].mode;
9009
9010 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9011 op0 = copy_to_mode_reg (mode0, op0);
9012 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9013 op1 = copy_to_mode_reg (mode1, op1);
9014 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
9015 op2 = copy_to_mode_reg (mode2, op2);
9016 pat = GEN_FCN (icode) (op0, op1, op2);
9017 if (! pat)
9018 return 0;
9019 emit_insn (pat);
9020 return 0;
9021
9022 case IX86_BUILTIN_SQRTSS:
9023 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
9024 case IX86_BUILTIN_RSQRTSS:
9025 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
9026 case IX86_BUILTIN_RCPSS:
9027 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
9028
9029 case IX86_BUILTIN_LOADAPS:
9030 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
9031
9032 case IX86_BUILTIN_LOADUPS:
9033 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
9034
9035 case IX86_BUILTIN_STOREAPS:
9036 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
9037 case IX86_BUILTIN_STOREUPS:
9038 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
9039
9040 case IX86_BUILTIN_LOADSS:
9041 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
9042
9043 case IX86_BUILTIN_STORESS:
9044 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
9045
9046 case IX86_BUILTIN_LOADHPS:
9047 case IX86_BUILTIN_LOADLPS:
9048 icode = (fcode == IX86_BUILTIN_LOADHPS
9049 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
9050 arg0 = TREE_VALUE (arglist);
9051 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9052 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9053 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9054 tmode = insn_data[icode].operand[0].mode;
9055 mode0 = insn_data[icode].operand[1].mode;
9056 mode1 = insn_data[icode].operand[2].mode;
9057
9058 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9059 op0 = copy_to_mode_reg (mode0, op0);
9060 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
9061 if (target == 0
9062 || GET_MODE (target) != tmode
9063 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9064 target = gen_reg_rtx (tmode);
9065 pat = GEN_FCN (icode) (target, op0, op1);
9066 if (! pat)
9067 return 0;
9068 emit_insn (pat);
9069 return target;
9070
9071 case IX86_BUILTIN_STOREHPS:
9072 case IX86_BUILTIN_STORELPS:
9073 icode = (fcode == IX86_BUILTIN_STOREHPS
9074 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
9075 arg0 = TREE_VALUE (arglist);
9076 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9077 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9078 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9079 mode0 = insn_data[icode].operand[1].mode;
9080 mode1 = insn_data[icode].operand[2].mode;
9081
9082 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9083 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9084 op1 = copy_to_mode_reg (mode1, op1);
9085
9086 pat = GEN_FCN (icode) (op0, op0, op1);
9087 if (! pat)
9088 return 0;
9089 emit_insn (pat);
9090 return 0;
9091
9092 case IX86_BUILTIN_MOVNTPS:
9093 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
9094 case IX86_BUILTIN_MOVNTQ:
9095 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
9096
9097 case IX86_BUILTIN_LDMXCSR:
9098 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
9099 target = assign_386_stack_local (SImode, 0);
9100 emit_move_insn (target, op0);
9101 emit_insn (gen_ldmxcsr (target));
9102 return 0;
9103
9104 case IX86_BUILTIN_STMXCSR:
9105 target = assign_386_stack_local (SImode, 0);
9106 emit_insn (gen_stmxcsr (target));
9107 return copy_to_mode_reg (SImode, target);
9108
9109 case IX86_BUILTIN_PREFETCH:
9110 icode = CODE_FOR_prefetch;
9111 arg0 = TREE_VALUE (arglist);
9112 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9113 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9114 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9115 mode0 = insn_data[icode].operand[0].mode;
9116 mode1 = insn_data[icode].operand[1].mode;
9117
9118 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9119 {
9120 /* @@@ better error message */
9121 error ("selector must be an immediate");
9122 return const0_rtx;
9123 }
9124
9125 op0 = copy_to_mode_reg (Pmode, op0);
9126 pat = GEN_FCN (icode) (op0, op1);
9127 if (! pat)
9128 return 0;
9129 emit_insn (pat);
9130 return target;
9131
9132 case IX86_BUILTIN_SHUFPS:
9133 icode = CODE_FOR_sse_shufps;
9134 arg0 = TREE_VALUE (arglist);
9135 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9136 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9137 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9138 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9139 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
9140 tmode = insn_data[icode].operand[0].mode;
9141 mode0 = insn_data[icode].operand[1].mode;
9142 mode1 = insn_data[icode].operand[2].mode;
9143 mode2 = insn_data[icode].operand[3].mode;
9144
9145 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9146 op0 = copy_to_mode_reg (mode0, op0);
9147 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9148 op1 = copy_to_mode_reg (mode1, op1);
9149 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
9150 {
9151 /* @@@ better error message */
9152 error ("mask must be an immediate");
9153 return const0_rtx;
9154 }
9155 if (target == 0
9156 || GET_MODE (target) != tmode
9157 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9158 target = gen_reg_rtx (tmode);
9159 pat = GEN_FCN (icode) (target, op0, op1, op2);
9160 if (! pat)
9161 return 0;
9162 emit_insn (pat);
9163 return target;
9164
9165 case IX86_BUILTIN_PSHUFW:
9166 icode = CODE_FOR_mmx_pshufw;
9167 arg0 = TREE_VALUE (arglist);
9168 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9169 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9170 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9171 tmode = insn_data[icode].operand[0].mode;
9172 mode0 = insn_data[icode].operand[2].mode;
9173 mode1 = insn_data[icode].operand[3].mode;
9174
9175 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9176 op0 = copy_to_mode_reg (mode0, op0);
9177 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
9178 {
9179 /* @@@ better error message */
9180 error ("mask must be an immediate");
9181 return const0_rtx;
9182 }
9183 if (target == 0
9184 || GET_MODE (target) != tmode
9185 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9186 target = gen_reg_rtx (tmode);
9187 pat = GEN_FCN (icode) (target, target, op0, op1);
9188 if (! pat)
9189 return 0;
9190 emit_insn (pat);
9191 return target;
9192
9193 /* Composite intrinsics. */
9194 case IX86_BUILTIN_SETPS1:
9195 target = assign_386_stack_local (SFmode, 0);
9196 arg0 = TREE_VALUE (arglist);
9197 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
9198 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
9199 op0 = gen_reg_rtx (V4SFmode);
9200 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
9201 XEXP (target, 0))));
9202 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
9203 return op0;
9204
9205 case IX86_BUILTIN_SETPS:
9206 target = assign_386_stack_local (V4SFmode, 0);
9207 op0 = change_address (target, SFmode, XEXP (target, 0));
9208 arg0 = TREE_VALUE (arglist);
9209 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9210 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9211 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
9212 emit_move_insn (op0,
9213 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
9214 emit_move_insn (adj_offsettable_operand (op0, 4),
9215 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
9216 emit_move_insn (adj_offsettable_operand (op0, 8),
9217 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
9218 emit_move_insn (adj_offsettable_operand (op0, 12),
9219 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
9220 op0 = gen_reg_rtx (V4SFmode);
9221 emit_insn (gen_sse_movaps (op0, target));
9222 return op0;
9223
9224 case IX86_BUILTIN_CLRPS:
9225 target = gen_reg_rtx (TImode);
9226 emit_insn (gen_sse_clrti (target));
9227 return target;
9228
9229 case IX86_BUILTIN_LOADRPS:
9230 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
9231 gen_reg_rtx (V4SFmode), 1);
9232 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
9233 return target;
9234
9235 case IX86_BUILTIN_LOADPS1:
9236 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
9237 gen_reg_rtx (V4SFmode), 1);
9238 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
9239 return target;
9240
9241 case IX86_BUILTIN_STOREPS1:
9242 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
9243 case IX86_BUILTIN_STORERPS:
9244 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
9245
9246 case IX86_BUILTIN_MMX_ZERO:
9247 target = gen_reg_rtx (DImode);
9248 emit_insn (gen_mmx_clrdi (target));
9249 return target;
9250
9251 default:
9252 break;
9253 }
9254
9255 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
9256 if (d->code == fcode)
9257 {
9258 /* Compares are treated specially. */
9259 if (d->icode == CODE_FOR_maskcmpv4sf3
9260 || d->icode == CODE_FOR_vmmaskcmpv4sf3
9261 || d->icode == CODE_FOR_maskncmpv4sf3
9262 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
9263 return ix86_expand_sse_compare (d, arglist, target);
9264
9265 return ix86_expand_binop_builtin (d->icode, arglist, target);
9266 }
9267
9268 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
9269 if (d->code == fcode)
9270 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
9271
9272 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
9273 if (d->code == fcode)
9274 return ix86_expand_sse_comi (d, arglist, target);
9275
9276 /* @@@ Should really do something sensible here. */
9277 return 0;
9278 }
9279
9280 /* Store OPERAND to the memory after reload is completed. This means
9281 that we can't easilly use assign_stack_local. */
9282 rtx
9283 ix86_force_to_memory (mode, operand)
9284 enum machine_mode mode;
9285 rtx operand;
9286 {
9287 if (!reload_completed)
9288 abort ();
9289 switch (mode)
9290 {
9291 case DImode:
9292 {
9293 rtx operands[2];
9294 split_di (&operand, 1, operands, operands+1);
9295 emit_insn (
9296 gen_rtx_SET (VOIDmode,
9297 gen_rtx_MEM (SImode,
9298 gen_rtx_PRE_DEC (Pmode,
9299 stack_pointer_rtx)),
9300 operands[1]));
9301 emit_insn (
9302 gen_rtx_SET (VOIDmode,
9303 gen_rtx_MEM (SImode,
9304 gen_rtx_PRE_DEC (Pmode,
9305 stack_pointer_rtx)),
9306 operands[0]));
9307 }
9308 break;
9309 case HImode:
9310 /* It is better to store HImodes as SImodes. */
9311 if (!TARGET_PARTIAL_REG_STALL)
9312 operand = gen_lowpart (SImode, operand);
9313 /* FALLTHRU */
9314 case SImode:
9315 emit_insn (
9316 gen_rtx_SET (VOIDmode,
9317 gen_rtx_MEM (GET_MODE (operand),
9318 gen_rtx_PRE_DEC (SImode,
9319 stack_pointer_rtx)),
9320 operand));
9321 break;
9322 default:
9323 abort();
9324 }
9325 return gen_rtx_MEM (mode, stack_pointer_rtx);
9326 }
9327
9328 /* Free operand from the memory. */
9329 void
9330 ix86_free_from_memory (mode)
9331 enum machine_mode mode;
9332 {
9333 /* Use LEA to deallocate stack space. In peephole2 it will be converted
9334 to pop or add instruction if registers are available. */
9335 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9336 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9337 GEN_INT (mode == DImode
9338 ? 8
9339 : mode == HImode && TARGET_PARTIAL_REG_STALL
9340 ? 2
9341 : 4))));
9342 }
9343
9344 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
9345 QImode must go into class Q_REGS.
9346 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
9347 movdf to do mem-to-mem moves through integer regs. */
9348 enum reg_class
9349 ix86_preferred_reload_class (x, class)
9350 rtx x;
9351 enum reg_class class;
9352 {
9353 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
9354 {
9355 /* SSE can't load any constant directly yet. */
9356 if (SSE_CLASS_P (class))
9357 return NO_REGS;
9358 /* Floats can load 0 and 1. */
9359 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
9360 {
9361 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
9362 if (MAYBE_SSE_CLASS_P (class))
9363 return (reg_class_subset_p (class, GENERAL_REGS)
9364 ? GENERAL_REGS : FLOAT_REGS);
9365 else
9366 return class;
9367 }
9368 /* General regs can load everything. */
9369 if (reg_class_subset_p (class, GENERAL_REGS))
9370 return GENERAL_REGS;
9371 /* In case we haven't resolved FLOAT or SSE yet, give up. */
9372 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
9373 return NO_REGS;
9374 }
9375 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
9376 return NO_REGS;
9377 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
9378 return Q_REGS;
9379 return class;
9380 }
9381
9382 /* If we are copying between general and FP registers, we need a memory
9383 location. The same is true for SSE and MMX registers.
9384
9385 The macro can't work reliably when one of the CLASSES is class containing
9386 registers from multiple units (SSE, MMX, integer). We avoid this by never
9387 combining those units in single alternative in the machine description.
9388 Ensure that this constraint holds to avoid unexpected surprises.
9389
9390 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
9391 enforce these sanity checks. */
9392 int
9393 ix86_secondary_memory_needed (class1, class2, mode, strict)
9394 enum reg_class class1, class2;
9395 enum machine_mode mode;
9396 int strict;
9397 {
9398 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
9399 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
9400 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
9401 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
9402 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
9403 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
9404 {
9405 if (strict)
9406 abort ();
9407 else
9408 return 1;
9409 }
9410 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
9411 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
9412 && (mode) != SImode)
9413 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9414 && (mode) != SImode));
9415 }
9416 /* Return the cost of moving data from a register in class CLASS1 to
9417 one in class CLASS2.
9418
9419 It is not required that the cost always equal 2 when FROM is the same as TO;
9420 on some machines it is expensive to move between registers if they are not
9421 general registers. */
9422 int
9423 ix86_register_move_cost (mode, class1, class2)
9424 enum machine_mode mode;
9425 enum reg_class class1, class2;
9426 {
9427 /* In case we require secondary memory, compute cost of the store followed
9428 by load. In case of copying from general_purpose_register we may emit
9429 multiple stores followed by single load causing memory size mismatch
9430 stall. Count this as arbitarily high cost of 20. */
9431 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
9432 {
9433 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
9434 return 10;
9435 return (MEMORY_MOVE_COST (mode, class1, 0)
9436 + MEMORY_MOVE_COST (mode, class2, 1));
9437 }
9438 /* Moves between SSE/MMX and integer unit are expensive.
9439 ??? We should make this cost CPU specific. */
9440 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9441 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
9442 return ix86_cost->mmxsse_to_integer;
9443 if (MAYBE_FLOAT_CLASS_P (class1))
9444 return ix86_cost->fp_move;
9445 if (MAYBE_SSE_CLASS_P (class1))
9446 return ix86_cost->sse_move;
9447 if (MAYBE_MMX_CLASS_P (class1))
9448 return ix86_cost->mmx_move;
9449 return 2;
9450 }
9451
9452 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
9453 int
9454 ix86_hard_regno_mode_ok (regno, mode)
9455 int regno;
9456 enum machine_mode mode;
9457 {
9458 /* Flags and only flags can only hold CCmode values. */
9459 if (CC_REGNO_P (regno))
9460 return GET_MODE_CLASS (mode) == MODE_CC;
9461 if (GET_MODE_CLASS (mode) == MODE_CC
9462 || GET_MODE_CLASS (mode) == MODE_RANDOM
9463 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
9464 return 0;
9465 if (FP_REGNO_P (regno))
9466 return VALID_FP_MODE_P (mode);
9467 if (SSE_REGNO_P (regno))
9468 return VALID_SSE_REG_MODE (mode);
9469 if (MMX_REGNO_P (regno))
9470 return VALID_MMX_REG_MODE (mode);
9471 /* We handle both integer and floats in the general purpose registers.
9472 In future we should be able to handle vector modes as well. */
9473 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
9474 return 0;
9475 /* Take care for QImode values - they can be in non-QI regs, but then
9476 they do cause partial register stalls. */
9477 if (regno < 4 || mode != QImode || TARGET_64BIT)
9478 return 1;
9479 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
9480 }
9481
9482 /* Return the cost of moving data of mode M between a
9483 register and memory. A value of 2 is the default; this cost is
9484 relative to those in `REGISTER_MOVE_COST'.
9485
9486 If moving between registers and memory is more expensive than
9487 between two registers, you should define this macro to express the
9488 relative cost.
9489
9490 Model also increased moving costs of QImode registers in non
9491 Q_REGS classes.
9492 */
9493 int
9494 ix86_memory_move_cost (mode, class, in)
9495 enum machine_mode mode;
9496 enum reg_class class;
9497 int in;
9498 {
9499 if (FLOAT_CLASS_P (class))
9500 {
9501 int index;
9502 switch (mode)
9503 {
9504 case SFmode:
9505 index = 0;
9506 break;
9507 case DFmode:
9508 index = 1;
9509 break;
9510 case XFmode:
9511 case TFmode:
9512 index = 2;
9513 break;
9514 default:
9515 return 100;
9516 }
9517 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
9518 }
9519 if (SSE_CLASS_P (class))
9520 {
9521 int index;
9522 switch (GET_MODE_SIZE (mode))
9523 {
9524 case 4:
9525 index = 0;
9526 break;
9527 case 8:
9528 index = 1;
9529 break;
9530 case 16:
9531 index = 2;
9532 break;
9533 default:
9534 return 100;
9535 }
9536 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
9537 }
9538 if (MMX_CLASS_P (class))
9539 {
9540 int index;
9541 switch (GET_MODE_SIZE (mode))
9542 {
9543 case 4:
9544 index = 0;
9545 break;
9546 case 8:
9547 index = 1;
9548 break;
9549 default:
9550 return 100;
9551 }
9552 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
9553 }
9554 switch (GET_MODE_SIZE (mode))
9555 {
9556 case 1:
9557 if (in)
9558 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
9559 : ix86_cost->movzbl_load);
9560 else
9561 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
9562 : ix86_cost->int_store[0] + 4);
9563 break;
9564 case 2:
9565 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
9566 default:
9567 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
9568 if (mode == TFmode)
9569 mode = XFmode;
9570 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
9571 * (int) GET_MODE_SIZE (mode) / 4);
9572 }
9573 }
This page took 0.447798 seconds and 5 git commands to generate.