]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
Daily bump.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include <setjmp.h>
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "toplev.h"
42 #include "basic-block.h"
43 #include "ggc.h"
44
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
47 #endif
48
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
59 3, /* MOVE_RATIO */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8}, /* cost of loading integer registers */
69 2, /* cost of moving MMX register */
70 {4, 8}, /* cost of loading MMX registers
71 in SImode and DImode */
72 {4, 8}, /* cost of storing MMX registers
73 in SImode and DImode */
74 2, /* cost of moving SSE register */
75 {4, 8, 16}, /* cost of loading SSE registers
76 in SImode, DImode and TImode */
77 {4, 8, 16}, /* cost of storing SSE registers
78 in SImode, DImode and TImode */
79 3, /* MMX or SSE register to integer */
80 };
81
82 struct processor_costs i486_cost = { /* 486 specific costs */
83 1, /* cost of an add instruction */
84 1, /* cost of a lea instruction */
85 3, /* variable shift costs */
86 2, /* constant shift costs */
87 12, /* cost of starting a multiply */
88 1, /* cost of multiply per each bit set */
89 40, /* cost of a divide/mod */
90 15, /* "large" insn */
91 3, /* MOVE_RATIO */
92 4, /* cost for loading QImode using movzbl */
93 {2, 4, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 4, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {8, 8, 8}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {8, 8, 8}, /* cost of loading integer registers */
101 2, /* cost of moving MMX register */
102 {4, 8}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {4, 8}, /* cost of storing MMX registers
105 in SImode and DImode */
106 2, /* cost of moving SSE register */
107 {4, 8, 16}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {4, 8, 16}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3 /* MMX or SSE register to integer */
112 };
113
114 struct processor_costs pentium_cost = {
115 1, /* cost of an add instruction */
116 1, /* cost of a lea instruction */
117 4, /* variable shift costs */
118 1, /* constant shift costs */
119 11, /* cost of starting a multiply */
120 0, /* cost of multiply per each bit set */
121 25, /* cost of a divide/mod */
122 8, /* "large" insn */
123 6, /* MOVE_RATIO */
124 6, /* cost for loading QImode using movzbl */
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
127 Relative to reg-reg move (2). */
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {2, 2, 6}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
132 {4, 4, 6}, /* cost of loading integer registers */
133 8, /* cost of moving MMX register */
134 {8, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {8, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3 /* MMX or SSE register to integer */
144 };
145
146 struct processor_costs pentiumpro_cost = {
147 1, /* cost of an add instruction */
148 1, /* cost of a lea instruction */
149 1, /* variable shift costs */
150 1, /* constant shift costs */
151 4, /* cost of starting a multiply */
152 0, /* cost of multiply per each bit set */
153 17, /* cost of a divide/mod */
154 8, /* "large" insn */
155 6, /* MOVE_RATIO */
156 2, /* cost for loading QImode using movzbl */
157 {4, 4, 4}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 6}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {4, 4, 6}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {2, 2}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {2, 2}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {2, 2, 8}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {2, 2, 8}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3 /* MMX or SSE register to integer */
176 };
177
178 struct processor_costs k6_cost = {
179 1, /* cost of an add instruction */
180 2, /* cost of a lea instruction */
181 1, /* variable shift costs */
182 1, /* constant shift costs */
183 3, /* cost of starting a multiply */
184 0, /* cost of multiply per each bit set */
185 18, /* cost of a divide/mod */
186 8, /* "large" insn */
187 4, /* MOVE_RATIO */
188 3, /* cost for loading QImode using movzbl */
189 {4, 5, 4}, /* cost of loading integer registers
190 in QImode, HImode and SImode.
191 Relative to reg-reg move (2). */
192 {2, 3, 2}, /* cost of storing integer registers */
193 4, /* cost of reg,reg fld/fst */
194 {6, 6, 6}, /* cost of loading fp registers
195 in SFmode, DFmode and XFmode */
196 {4, 4, 4}, /* cost of loading integer registers */
197 2, /* cost of moving MMX register */
198 {2, 2}, /* cost of loading MMX registers
199 in SImode and DImode */
200 {2, 2}, /* cost of storing MMX registers
201 in SImode and DImode */
202 2, /* cost of moving SSE register */
203 {2, 2, 8}, /* cost of loading SSE registers
204 in SImode, DImode and TImode */
205 {2, 2, 8}, /* cost of storing SSE registers
206 in SImode, DImode and TImode */
207 6 /* MMX or SSE register to integer */
208 };
209
210 struct processor_costs athlon_cost = {
211 1, /* cost of an add instruction */
212 2, /* cost of a lea instruction */
213 1, /* variable shift costs */
214 1, /* constant shift costs */
215 5, /* cost of starting a multiply */
216 0, /* cost of multiply per each bit set */
217 42, /* cost of a divide/mod */
218 8, /* "large" insn */
219 9, /* MOVE_RATIO */
220 4, /* cost for loading QImode using movzbl */
221 {4, 5, 4}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
223 Relative to reg-reg move (2). */
224 {2, 3, 2}, /* cost of storing integer registers */
225 4, /* cost of reg,reg fld/fst */
226 {6, 6, 20}, /* cost of loading fp registers
227 in SFmode, DFmode and XFmode */
228 {4, 4, 16}, /* cost of loading integer registers */
229 2, /* cost of moving MMX register */
230 {2, 2}, /* cost of loading MMX registers
231 in SImode and DImode */
232 {2, 2}, /* cost of storing MMX registers
233 in SImode and DImode */
234 2, /* cost of moving SSE register */
235 {2, 2, 8}, /* cost of loading SSE registers
236 in SImode, DImode and TImode */
237 {2, 2, 8}, /* cost of storing SSE registers
238 in SImode, DImode and TImode */
239 6 /* MMX or SSE register to integer */
240 };
241
242 struct processor_costs pentium4_cost = {
243 1, /* cost of an add instruction */
244 1, /* cost of a lea instruction */
245 8, /* variable shift costs */
246 8, /* constant shift costs */
247 30, /* cost of starting a multiply */
248 0, /* cost of multiply per each bit set */
249 112, /* cost of a divide/mod */
250 16, /* "large" insn */
251 6, /* MOVE_RATIO */
252 2, /* cost for loading QImode using movzbl */
253 {4, 5, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 3, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 12, /* cost of moving SSE register */
267 {12, 12, 12}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 10, /* MMX or SSE register to integer */
272 };
273
274 struct processor_costs *ix86_cost = &pentium_cost;
275
276 /* Processor feature/optimization bitmasks. */
277 #define m_386 (1<<PROCESSOR_I386)
278 #define m_486 (1<<PROCESSOR_I486)
279 #define m_PENT (1<<PROCESSOR_PENTIUM)
280 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
281 #define m_K6 (1<<PROCESSOR_K6)
282 #define m_ATHLON (1<<PROCESSOR_ATHLON)
283 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
284
285 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
286 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
287 const int x86_zero_extend_with_and = m_486 | m_PENT;
288 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
289 const int x86_double_with_add = ~m_386;
290 const int x86_use_bit_test = m_386;
291 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
292 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
293 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
294 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
295 const int x86_partial_reg_stall = m_PPRO;
296 const int x86_use_loop = m_K6;
297 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
298 const int x86_use_mov0 = m_K6;
299 const int x86_use_cltd = ~(m_PENT | m_K6);
300 const int x86_read_modify_write = ~m_PENT;
301 const int x86_read_modify = ~(m_PENT | m_PPRO);
302 const int x86_split_long_moves = m_PPRO;
303 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
304 const int x86_single_stringop = m_386 | m_PENT4;
305 const int x86_qimode_math = ~(0);
306 const int x86_promote_qi_regs = 0;
307 const int x86_himode_math = ~(m_PPRO);
308 const int x86_promote_hi_regs = m_PPRO;
309 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
310 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
311 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
312 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
313 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
314 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
315 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
316
317 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
318
319 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
320 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
321 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
322
323 /* Array of the smallest class containing reg number REGNO, indexed by
324 REGNO. Used by REGNO_REG_CLASS in i386.h. */
325
326 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
327 {
328 /* ax, dx, cx, bx */
329 AREG, DREG, CREG, BREG,
330 /* si, di, bp, sp */
331 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
332 /* FP registers */
333 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
334 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
335 /* arg pointer */
336 NON_Q_REGS,
337 /* flags, fpsr, dirflag, frame */
338 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
339 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
340 SSE_REGS, SSE_REGS,
341 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
342 MMX_REGS, MMX_REGS
343 };
344
345 /* The "default" register map. */
346
347 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
348 {
349 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
350 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
351 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
352 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
353 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
354 };
355
356 /* The "default" register map used in 64bit mode. */
357 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
358 {
359 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
360 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
361 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
362 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
363 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
364 8,9,10,11,12,13,14,15, /* extended integer registers */
365 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
366 };
367
368 /* Define the register numbers to be used in Dwarf debugging information.
369 The SVR4 reference port C compiler uses the following register numbers
370 in its Dwarf output code:
371 0 for %eax (gcc regno = 0)
372 1 for %ecx (gcc regno = 2)
373 2 for %edx (gcc regno = 1)
374 3 for %ebx (gcc regno = 3)
375 4 for %esp (gcc regno = 7)
376 5 for %ebp (gcc regno = 6)
377 6 for %esi (gcc regno = 4)
378 7 for %edi (gcc regno = 5)
379 The following three DWARF register numbers are never generated by
380 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
381 believes these numbers have these meanings.
382 8 for %eip (no gcc equivalent)
383 9 for %eflags (gcc regno = 17)
384 10 for %trapno (no gcc equivalent)
385 It is not at all clear how we should number the FP stack registers
386 for the x86 architecture. If the version of SDB on x86/svr4 were
387 a bit less brain dead with respect to floating-point then we would
388 have a precedent to follow with respect to DWARF register numbers
389 for x86 FP registers, but the SDB on x86/svr4 is so completely
390 broken with respect to FP registers that it is hardly worth thinking
391 of it as something to strive for compatibility with.
392 The version of x86/svr4 SDB I have at the moment does (partially)
393 seem to believe that DWARF register number 11 is associated with
394 the x86 register %st(0), but that's about all. Higher DWARF
395 register numbers don't seem to be associated with anything in
396 particular, and even for DWARF regno 11, SDB only seems to under-
397 stand that it should say that a variable lives in %st(0) (when
398 asked via an `=' command) if we said it was in DWARF regno 11,
399 but SDB still prints garbage when asked for the value of the
400 variable in question (via a `/' command).
401 (Also note that the labels SDB prints for various FP stack regs
402 when doing an `x' command are all wrong.)
403 Note that these problems generally don't affect the native SVR4
404 C compiler because it doesn't allow the use of -O with -g and
405 because when it is *not* optimizing, it allocates a memory
406 location for each floating-point variable, and the memory
407 location is what gets described in the DWARF AT_location
408 attribute for the variable in question.
409 Regardless of the severe mental illness of the x86/svr4 SDB, we
410 do something sensible here and we use the following DWARF
411 register numbers. Note that these are all stack-top-relative
412 numbers.
413 11 for %st(0) (gcc regno = 8)
414 12 for %st(1) (gcc regno = 9)
415 13 for %st(2) (gcc regno = 10)
416 14 for %st(3) (gcc regno = 11)
417 15 for %st(4) (gcc regno = 12)
418 16 for %st(5) (gcc regno = 13)
419 17 for %st(6) (gcc regno = 14)
420 18 for %st(7) (gcc regno = 15)
421 */
422 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
423 {
424 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
425 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
426 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
427 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
428 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
429 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
430 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
431 };
432
433 /* Test and compare insns in i386.md store the information needed to
434 generate branch and scc insns here. */
435
436 struct rtx_def *ix86_compare_op0 = NULL_RTX;
437 struct rtx_def *ix86_compare_op1 = NULL_RTX;
438
439 #define MAX_386_STACK_LOCALS 2
440
441 /* Define the structure for the machine field in struct function. */
442 struct machine_function
443 {
444 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
445 int accesses_prev_frame;
446 };
447
448 #define ix86_stack_locals (cfun->machine->stack_locals)
449
450 /* Structure describing stack frame layout.
451 Stack grows downward:
452
453 [arguments]
454 <- ARG_POINTER
455 saved pc
456
457 saved frame pointer if frame_pointer_needed
458 <- HARD_FRAME_POINTER
459 [saved regs]
460
461 [padding1] \
462 )
463 [va_arg registers] (
464 > to_allocate <- FRAME_POINTER
465 [frame] (
466 )
467 [padding2] /
468 */
469 struct ix86_frame
470 {
471 int nregs;
472 int padding1;
473 HOST_WIDE_INT frame;
474 int padding2;
475 int outgoing_arguments_size;
476
477 HOST_WIDE_INT to_allocate;
478 /* The offsets relative to ARG_POINTER. */
479 HOST_WIDE_INT frame_pointer_offset;
480 HOST_WIDE_INT hard_frame_pointer_offset;
481 HOST_WIDE_INT stack_pointer_offset;
482 };
483
484 /* which cpu are we scheduling for */
485 enum processor_type ix86_cpu;
486
487 /* which instruction set architecture to use. */
488 int ix86_arch;
489
490 /* Strings to hold which cpu and instruction set architecture to use. */
491 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
492 const char *ix86_arch_string; /* for -march=<xxx> */
493
494 /* # of registers to use to pass arguments. */
495 const char *ix86_regparm_string;
496
497 /* ix86_regparm_string as a number */
498 int ix86_regparm;
499
500 /* Alignment to use for loops and jumps: */
501
502 /* Power of two alignment for loops. */
503 const char *ix86_align_loops_string;
504
505 /* Power of two alignment for non-loop jumps. */
506 const char *ix86_align_jumps_string;
507
508 /* Power of two alignment for stack boundary in bytes. */
509 const char *ix86_preferred_stack_boundary_string;
510
511 /* Preferred alignment for stack boundary in bits. */
512 int ix86_preferred_stack_boundary;
513
514 /* Values 1-5: see jump.c */
515 int ix86_branch_cost;
516 const char *ix86_branch_cost_string;
517
518 /* Power of two alignment for functions. */
519 int ix86_align_funcs;
520 const char *ix86_align_funcs_string;
521
522 /* Power of two alignment for loops. */
523 int ix86_align_loops;
524
525 /* Power of two alignment for non-loop jumps. */
526 int ix86_align_jumps;
527 \f
528 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
529 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
530 int, int, FILE *));
531 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
532 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
533 rtx *, rtx *));
534 static rtx gen_push PARAMS ((rtx));
535 static int memory_address_length PARAMS ((rtx addr));
536 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
537 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
538 static int ix86_safe_length PARAMS ((rtx));
539 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
540 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
541 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
542 static void ix86_dump_ppro_packet PARAMS ((FILE *));
543 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
544 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
545 rtx));
546 static void ix86_init_machine_status PARAMS ((struct function *));
547 static void ix86_mark_machine_status PARAMS ((struct function *));
548 static void ix86_free_machine_status PARAMS ((struct function *));
549 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
550 static int ix86_safe_length_prefix PARAMS ((rtx));
551 static int ix86_nsaved_regs PARAMS((void));
552 static void ix86_emit_save_regs PARAMS((void));
553 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
554 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
555 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
556 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
557 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
558 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
559
560 struct ix86_address
561 {
562 rtx base, index, disp;
563 HOST_WIDE_INT scale;
564 };
565
566 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
567
568 struct builtin_description;
569 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
570 rtx));
571 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
572 rtx));
573 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
574 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
575 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
576 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
577 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
578 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
579 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
580 enum rtx_code *,
581 enum rtx_code *,
582 enum rtx_code *));
583 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
584 rtx *, rtx *));
585 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
586 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
587 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
588 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
589 static int ix86_save_reg PARAMS ((int));
590 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
591 \f
592 /* Sometimes certain combinations of command options do not make
593 sense on a particular target machine. You can define a macro
594 `OVERRIDE_OPTIONS' to take account of this. This macro, if
595 defined, is executed once just after all the command options have
596 been parsed.
597
598 Don't use this macro to turn on various extra optimizations for
599 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
600
601 void
602 override_options ()
603 {
604 int i;
605 /* Comes from final.c -- no real reason to change it. */
606 #define MAX_CODE_ALIGN 16
607
608 static struct ptt
609 {
610 struct processor_costs *cost; /* Processor costs */
611 int target_enable; /* Target flags to enable. */
612 int target_disable; /* Target flags to disable. */
613 int align_loop; /* Default alignments. */
614 int align_jump;
615 int align_func;
616 int branch_cost;
617 }
618 const processor_target_table[PROCESSOR_max] =
619 {
620 {&i386_cost, 0, 0, 2, 2, 2, 1},
621 {&i486_cost, 0, 0, 4, 4, 4, 1},
622 {&pentium_cost, 0, 0, -4, -4, -4, 1},
623 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
624 {&k6_cost, 0, 0, -5, -5, 4, 1},
625 {&athlon_cost, 0, 0, 4, -4, 4, 1},
626 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
627 };
628
629 static struct pta
630 {
631 const char *name; /* processor name or nickname. */
632 enum processor_type processor;
633 }
634 const processor_alias_table[] =
635 {
636 {"i386", PROCESSOR_I386},
637 {"i486", PROCESSOR_I486},
638 {"i586", PROCESSOR_PENTIUM},
639 {"pentium", PROCESSOR_PENTIUM},
640 {"i686", PROCESSOR_PENTIUMPRO},
641 {"pentiumpro", PROCESSOR_PENTIUMPRO},
642 {"k6", PROCESSOR_K6},
643 {"athlon", PROCESSOR_ATHLON},
644 {"pentium4", PROCESSOR_PENTIUM4},
645 };
646
647 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
648
649 #ifdef SUBTARGET_OVERRIDE_OPTIONS
650 SUBTARGET_OVERRIDE_OPTIONS;
651 #endif
652
653 ix86_arch = PROCESSOR_I386;
654 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
655
656 if (ix86_arch_string != 0)
657 {
658 for (i = 0; i < pta_size; i++)
659 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
660 {
661 ix86_arch = processor_alias_table[i].processor;
662 /* Default cpu tuning to the architecture. */
663 ix86_cpu = ix86_arch;
664 break;
665 }
666
667 if (i == pta_size)
668 error ("bad value (%s) for -march= switch", ix86_arch_string);
669 }
670
671 if (ix86_cpu_string != 0)
672 {
673 for (i = 0; i < pta_size; i++)
674 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
675 {
676 ix86_cpu = processor_alias_table[i].processor;
677 break;
678 }
679 if (i == pta_size)
680 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
681 }
682
683 ix86_cost = processor_target_table[ix86_cpu].cost;
684 target_flags |= processor_target_table[ix86_cpu].target_enable;
685 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
686
687 /* Arrange to set up i386_stack_locals for all functions. */
688 init_machine_status = ix86_init_machine_status;
689 mark_machine_status = ix86_mark_machine_status;
690 free_machine_status = ix86_free_machine_status;
691
692 /* Validate -mregparm= value. */
693 if (ix86_regparm_string)
694 {
695 i = atoi (ix86_regparm_string);
696 if (i < 0 || i > REGPARM_MAX)
697 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
698 else
699 ix86_regparm = i;
700 }
701
702 /* Validate -malign-loops= value, or provide default. */
703 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
704 if (ix86_align_loops_string)
705 {
706 i = atoi (ix86_align_loops_string);
707 if (i < 0 || i > MAX_CODE_ALIGN)
708 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
709 else
710 ix86_align_loops = i;
711 }
712
713 /* Validate -malign-jumps= value, or provide default. */
714 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
715 if (ix86_align_jumps_string)
716 {
717 i = atoi (ix86_align_jumps_string);
718 if (i < 0 || i > MAX_CODE_ALIGN)
719 error ("-malign-jumps=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
720 else
721 ix86_align_jumps = i;
722 }
723
724 /* Validate -malign-functions= value, or provide default. */
725 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
726 if (ix86_align_funcs_string)
727 {
728 i = atoi (ix86_align_funcs_string);
729 if (i < 0 || i > MAX_CODE_ALIGN)
730 error ("-malign-functions=%d is not between 0 and %d",
731 i, MAX_CODE_ALIGN);
732 else
733 ix86_align_funcs = i;
734 }
735
736 /* Validate -mpreferred-stack-boundary= value, or provide default.
737 The default of 128 bits is for Pentium III's SSE __m128. */
738 ix86_preferred_stack_boundary = 128;
739 if (ix86_preferred_stack_boundary_string)
740 {
741 i = atoi (ix86_preferred_stack_boundary_string);
742 if (i < 2 || i > 31)
743 error ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
744 else
745 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
746 }
747
748 /* Validate -mbranch-cost= value, or provide default. */
749 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
750 if (ix86_branch_cost_string)
751 {
752 i = atoi (ix86_branch_cost_string);
753 if (i < 0 || i > 5)
754 error ("-mbranch-cost=%d is not between 0 and 5", i);
755 else
756 ix86_branch_cost = i;
757 }
758
759 /* Keep nonleaf frame pointers. */
760 if (TARGET_OMIT_LEAF_FRAME_POINTER)
761 flag_omit_frame_pointer = 1;
762
763 /* If we're doing fast math, we don't care about comparison order
764 wrt NaNs. This lets us use a shorter comparison sequence. */
765 if (flag_unsafe_math_optimizations)
766 target_flags &= ~MASK_IEEE_FP;
767
768 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
769 on by -msse. */
770 if (TARGET_SSE)
771 target_flags |= MASK_MMX;
772 }
773 \f
774 void
775 optimization_options (level, size)
776 int level;
777 int size ATTRIBUTE_UNUSED;
778 {
779 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
780 make the problem with not enough registers even worse. */
781 #ifdef INSN_SCHEDULING
782 if (level > 1)
783 flag_schedule_insns = 0;
784 #endif
785 }
786 \f
787 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
788 attribute for DECL. The attributes in ATTRIBUTES have previously been
789 assigned to DECL. */
790
791 int
792 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
793 tree decl ATTRIBUTE_UNUSED;
794 tree attributes ATTRIBUTE_UNUSED;
795 tree identifier ATTRIBUTE_UNUSED;
796 tree args ATTRIBUTE_UNUSED;
797 {
798 return 0;
799 }
800
801 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
802 attribute for TYPE. The attributes in ATTRIBUTES have previously been
803 assigned to TYPE. */
804
805 int
806 ix86_valid_type_attribute_p (type, attributes, identifier, args)
807 tree type;
808 tree attributes ATTRIBUTE_UNUSED;
809 tree identifier;
810 tree args;
811 {
812 if (TREE_CODE (type) != FUNCTION_TYPE
813 && TREE_CODE (type) != METHOD_TYPE
814 && TREE_CODE (type) != FIELD_DECL
815 && TREE_CODE (type) != TYPE_DECL)
816 return 0;
817
818 /* Stdcall attribute says callee is responsible for popping arguments
819 if they are not variable. */
820 if (is_attribute_p ("stdcall", identifier))
821 return (args == NULL_TREE);
822
823 /* Cdecl attribute says the callee is a normal C declaration. */
824 if (is_attribute_p ("cdecl", identifier))
825 return (args == NULL_TREE);
826
827 /* Regparm attribute specifies how many integer arguments are to be
828 passed in registers. */
829 if (is_attribute_p ("regparm", identifier))
830 {
831 tree cst;
832
833 if (! args || TREE_CODE (args) != TREE_LIST
834 || TREE_CHAIN (args) != NULL_TREE
835 || TREE_VALUE (args) == NULL_TREE)
836 return 0;
837
838 cst = TREE_VALUE (args);
839 if (TREE_CODE (cst) != INTEGER_CST)
840 return 0;
841
842 if (compare_tree_int (cst, REGPARM_MAX) > 0)
843 return 0;
844
845 return 1;
846 }
847
848 return 0;
849 }
850
851 /* Return 0 if the attributes for two types are incompatible, 1 if they
852 are compatible, and 2 if they are nearly compatible (which causes a
853 warning to be generated). */
854
855 int
856 ix86_comp_type_attributes (type1, type2)
857 tree type1;
858 tree type2;
859 {
860 /* Check for mismatch of non-default calling convention. */
861 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
862
863 if (TREE_CODE (type1) != FUNCTION_TYPE)
864 return 1;
865
866 /* Check for mismatched return types (cdecl vs stdcall). */
867 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
868 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
869 return 0;
870 return 1;
871 }
872 \f
873 /* Value is the number of bytes of arguments automatically
874 popped when returning from a subroutine call.
875 FUNDECL is the declaration node of the function (as a tree),
876 FUNTYPE is the data type of the function (as a tree),
877 or for a library call it is an identifier node for the subroutine name.
878 SIZE is the number of bytes of arguments passed on the stack.
879
880 On the 80386, the RTD insn may be used to pop them if the number
881 of args is fixed, but if the number is variable then the caller
882 must pop them all. RTD can't be used for library calls now
883 because the library is compiled with the Unix compiler.
884 Use of RTD is a selectable option, since it is incompatible with
885 standard Unix calling sequences. If the option is not selected,
886 the caller must always pop the args.
887
888 The attribute stdcall is equivalent to RTD on a per module basis. */
889
890 int
891 ix86_return_pops_args (fundecl, funtype, size)
892 tree fundecl;
893 tree funtype;
894 int size;
895 {
896 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
897
898 /* Cdecl functions override -mrtd, and never pop the stack. */
899 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
900
901 /* Stdcall functions will pop the stack if not variable args. */
902 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
903 rtd = 1;
904
905 if (rtd
906 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
907 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
908 == void_type_node)))
909 return size;
910 }
911
912 /* Lose any fake structure return argument. */
913 if (aggregate_value_p (TREE_TYPE (funtype)))
914 return GET_MODE_SIZE (Pmode);
915
916 return 0;
917 }
918 \f
919 /* Argument support functions. */
920
921 /* Initialize a variable CUM of type CUMULATIVE_ARGS
922 for a call to a function whose data type is FNTYPE.
923 For a library call, FNTYPE is 0. */
924
925 void
926 init_cumulative_args (cum, fntype, libname)
927 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
928 tree fntype; /* tree ptr for function decl */
929 rtx libname; /* SYMBOL_REF of library name or 0 */
930 {
931 static CUMULATIVE_ARGS zero_cum;
932 tree param, next_param;
933
934 if (TARGET_DEBUG_ARG)
935 {
936 fprintf (stderr, "\ninit_cumulative_args (");
937 if (fntype)
938 fprintf (stderr, "fntype code = %s, ret code = %s",
939 tree_code_name[(int) TREE_CODE (fntype)],
940 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
941 else
942 fprintf (stderr, "no fntype");
943
944 if (libname)
945 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
946 }
947
948 *cum = zero_cum;
949
950 /* Set up the number of registers to use for passing arguments. */
951 cum->nregs = ix86_regparm;
952 if (fntype)
953 {
954 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
955
956 if (attr)
957 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
958 }
959
960 /* Determine if this function has variable arguments. This is
961 indicated by the last argument being 'void_type_mode' if there
962 are no variable arguments. If there are variable arguments, then
963 we won't pass anything in registers */
964
965 if (cum->nregs)
966 {
967 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
968 param != 0; param = next_param)
969 {
970 next_param = TREE_CHAIN (param);
971 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
972 cum->nregs = 0;
973 }
974 }
975
976 if (TARGET_DEBUG_ARG)
977 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
978
979 return;
980 }
981
982 /* Update the data in CUM to advance over an argument
983 of mode MODE and data type TYPE.
984 (TYPE is null for libcalls where that information may not be available.) */
985
986 void
987 function_arg_advance (cum, mode, type, named)
988 CUMULATIVE_ARGS *cum; /* current arg information */
989 enum machine_mode mode; /* current arg mode */
990 tree type; /* type of the argument or 0 if lib support */
991 int named; /* whether or not the argument was named */
992 {
993 int bytes =
994 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
995 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
996
997 if (TARGET_DEBUG_ARG)
998 fprintf (stderr,
999 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1000 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1001 if (TARGET_SSE && mode == TImode)
1002 {
1003 cum->sse_words += words;
1004 cum->sse_nregs -= 1;
1005 cum->sse_regno += 1;
1006 if (cum->sse_nregs <= 0)
1007 {
1008 cum->sse_nregs = 0;
1009 cum->sse_regno = 0;
1010 }
1011 }
1012 else
1013 {
1014 cum->words += words;
1015 cum->nregs -= words;
1016 cum->regno += words;
1017
1018 if (cum->nregs <= 0)
1019 {
1020 cum->nregs = 0;
1021 cum->regno = 0;
1022 }
1023 }
1024 return;
1025 }
1026
1027 /* Define where to put the arguments to a function.
1028 Value is zero to push the argument on the stack,
1029 or a hard register in which to store the argument.
1030
1031 MODE is the argument's machine mode.
1032 TYPE is the data type of the argument (as a tree).
1033 This is null for libcalls where that information may
1034 not be available.
1035 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1036 the preceding args and about the function being called.
1037 NAMED is nonzero if this argument is a named parameter
1038 (otherwise it is an extra parameter matching an ellipsis). */
1039
1040 struct rtx_def *
1041 function_arg (cum, mode, type, named)
1042 CUMULATIVE_ARGS *cum; /* current arg information */
1043 enum machine_mode mode; /* current arg mode */
1044 tree type; /* type of the argument or 0 if lib support */
1045 int named; /* != 0 for normal args, == 0 for ... args */
1046 {
1047 rtx ret = NULL_RTX;
1048 int bytes =
1049 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1050 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1051
1052 switch (mode)
1053 {
1054 /* For now, pass fp/complex values on the stack. */
1055 default:
1056 break;
1057
1058 case BLKmode:
1059 case DImode:
1060 case SImode:
1061 case HImode:
1062 case QImode:
1063 if (words <= cum->nregs)
1064 ret = gen_rtx_REG (mode, cum->regno);
1065 break;
1066 case TImode:
1067 if (cum->sse_nregs)
1068 ret = gen_rtx_REG (mode, cum->sse_regno);
1069 break;
1070 }
1071
1072 if (TARGET_DEBUG_ARG)
1073 {
1074 fprintf (stderr,
1075 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1076 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1077
1078 if (ret)
1079 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1080 else
1081 fprintf (stderr, ", stack");
1082
1083 fprintf (stderr, " )\n");
1084 }
1085
1086 return ret;
1087 }
1088 \f
1089
1090 /* Return nonzero if OP is (const_int 1), else return zero. */
1091
1092 int
1093 const_int_1_operand (op, mode)
1094 rtx op;
1095 enum machine_mode mode ATTRIBUTE_UNUSED;
1096 {
1097 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1098 }
1099
1100 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1101 reference and a constant. */
1102
1103 int
1104 symbolic_operand (op, mode)
1105 register rtx op;
1106 enum machine_mode mode ATTRIBUTE_UNUSED;
1107 {
1108 switch (GET_CODE (op))
1109 {
1110 case SYMBOL_REF:
1111 case LABEL_REF:
1112 return 1;
1113
1114 case CONST:
1115 op = XEXP (op, 0);
1116 if (GET_CODE (op) == SYMBOL_REF
1117 || GET_CODE (op) == LABEL_REF
1118 || (GET_CODE (op) == UNSPEC
1119 && XINT (op, 1) >= 6
1120 && XINT (op, 1) <= 7))
1121 return 1;
1122 if (GET_CODE (op) != PLUS
1123 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1124 return 0;
1125
1126 op = XEXP (op, 0);
1127 if (GET_CODE (op) == SYMBOL_REF
1128 || GET_CODE (op) == LABEL_REF)
1129 return 1;
1130 /* Only @GOTOFF gets offsets. */
1131 if (GET_CODE (op) != UNSPEC
1132 || XINT (op, 1) != 7)
1133 return 0;
1134
1135 op = XVECEXP (op, 0, 0);
1136 if (GET_CODE (op) == SYMBOL_REF
1137 || GET_CODE (op) == LABEL_REF)
1138 return 1;
1139 return 0;
1140
1141 default:
1142 return 0;
1143 }
1144 }
1145
1146 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1147
1148 int
1149 pic_symbolic_operand (op, mode)
1150 register rtx op;
1151 enum machine_mode mode ATTRIBUTE_UNUSED;
1152 {
1153 if (GET_CODE (op) == CONST)
1154 {
1155 op = XEXP (op, 0);
1156 if (GET_CODE (op) == UNSPEC)
1157 return 1;
1158 if (GET_CODE (op) != PLUS
1159 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1160 return 0;
1161 op = XEXP (op, 0);
1162 if (GET_CODE (op) == UNSPEC)
1163 return 1;
1164 }
1165 return 0;
1166 }
1167
1168 /* Test for a valid operand for a call instruction. Don't allow the
1169 arg pointer register or virtual regs since they may decay into
1170 reg + const, which the patterns can't handle. */
1171
1172 int
1173 call_insn_operand (op, mode)
1174 rtx op;
1175 enum machine_mode mode ATTRIBUTE_UNUSED;
1176 {
1177 /* Disallow indirect through a virtual register. This leads to
1178 compiler aborts when trying to eliminate them. */
1179 if (GET_CODE (op) == REG
1180 && (op == arg_pointer_rtx
1181 || op == frame_pointer_rtx
1182 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1183 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1184 return 0;
1185
1186 /* Disallow `call 1234'. Due to varying assembler lameness this
1187 gets either rejected or translated to `call .+1234'. */
1188 if (GET_CODE (op) == CONST_INT)
1189 return 0;
1190
1191 /* Explicitly allow SYMBOL_REF even if pic. */
1192 if (GET_CODE (op) == SYMBOL_REF)
1193 return 1;
1194
1195 /* Half-pic doesn't allow anything but registers and constants.
1196 We've just taken care of the later. */
1197 if (HALF_PIC_P ())
1198 return register_operand (op, Pmode);
1199
1200 /* Otherwise we can allow any general_operand in the address. */
1201 return general_operand (op, Pmode);
1202 }
1203
1204 int
1205 constant_call_address_operand (op, mode)
1206 rtx op;
1207 enum machine_mode mode ATTRIBUTE_UNUSED;
1208 {
1209 if (GET_CODE (op) == CONST
1210 && GET_CODE (XEXP (op, 0)) == PLUS
1211 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1212 op = XEXP (XEXP (op, 0), 0);
1213 return GET_CODE (op) == SYMBOL_REF;
1214 }
1215
1216 /* Match exactly zero and one. */
1217
1218 int
1219 const0_operand (op, mode)
1220 register rtx op;
1221 enum machine_mode mode;
1222 {
1223 return op == CONST0_RTX (mode);
1224 }
1225
1226 int
1227 const1_operand (op, mode)
1228 register rtx op;
1229 enum machine_mode mode ATTRIBUTE_UNUSED;
1230 {
1231 return op == const1_rtx;
1232 }
1233
1234 /* Match 2, 4, or 8. Used for leal multiplicands. */
1235
1236 int
1237 const248_operand (op, mode)
1238 register rtx op;
1239 enum machine_mode mode ATTRIBUTE_UNUSED;
1240 {
1241 return (GET_CODE (op) == CONST_INT
1242 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1243 }
1244
1245 /* True if this is a constant appropriate for an increment or decremenmt. */
1246
1247 int
1248 incdec_operand (op, mode)
1249 register rtx op;
1250 enum machine_mode mode;
1251 {
1252 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1253 registers, since carry flag is not set. */
1254 if (TARGET_PENTIUM4 && !optimize_size)
1255 return 0;
1256 if (op == const1_rtx || op == constm1_rtx)
1257 return 1;
1258 if (GET_CODE (op) != CONST_INT)
1259 return 0;
1260 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1261 return 1;
1262 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1263 return 1;
1264 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1265 return 1;
1266 return 0;
1267 }
1268
1269 /* Return false if this is the stack pointer, or any other fake
1270 register eliminable to the stack pointer. Otherwise, this is
1271 a register operand.
1272
1273 This is used to prevent esp from being used as an index reg.
1274 Which would only happen in pathological cases. */
1275
1276 int
1277 reg_no_sp_operand (op, mode)
1278 register rtx op;
1279 enum machine_mode mode;
1280 {
1281 rtx t = op;
1282 if (GET_CODE (t) == SUBREG)
1283 t = SUBREG_REG (t);
1284 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1285 return 0;
1286
1287 return register_operand (op, mode);
1288 }
1289
1290 int
1291 mmx_reg_operand (op, mode)
1292 register rtx op;
1293 enum machine_mode mode ATTRIBUTE_UNUSED;
1294 {
1295 return MMX_REG_P (op);
1296 }
1297
1298 /* Return false if this is any eliminable register. Otherwise
1299 general_operand. */
1300
1301 int
1302 general_no_elim_operand (op, mode)
1303 register rtx op;
1304 enum machine_mode mode;
1305 {
1306 rtx t = op;
1307 if (GET_CODE (t) == SUBREG)
1308 t = SUBREG_REG (t);
1309 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1310 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1311 || t == virtual_stack_dynamic_rtx)
1312 return 0;
1313
1314 return general_operand (op, mode);
1315 }
1316
1317 /* Return false if this is any eliminable register. Otherwise
1318 register_operand or const_int. */
1319
1320 int
1321 nonmemory_no_elim_operand (op, mode)
1322 register rtx op;
1323 enum machine_mode mode;
1324 {
1325 rtx t = op;
1326 if (GET_CODE (t) == SUBREG)
1327 t = SUBREG_REG (t);
1328 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1329 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1330 || t == virtual_stack_dynamic_rtx)
1331 return 0;
1332
1333 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1334 }
1335
1336 /* Return true if op is a Q_REGS class register. */
1337
1338 int
1339 q_regs_operand (op, mode)
1340 register rtx op;
1341 enum machine_mode mode;
1342 {
1343 if (mode != VOIDmode && GET_MODE (op) != mode)
1344 return 0;
1345 if (GET_CODE (op) == SUBREG)
1346 op = SUBREG_REG (op);
1347 return QI_REG_P (op);
1348 }
1349
1350 /* Return true if op is a NON_Q_REGS class register. */
1351
1352 int
1353 non_q_regs_operand (op, mode)
1354 register rtx op;
1355 enum machine_mode mode;
1356 {
1357 if (mode != VOIDmode && GET_MODE (op) != mode)
1358 return 0;
1359 if (GET_CODE (op) == SUBREG)
1360 op = SUBREG_REG (op);
1361 return NON_QI_REG_P (op);
1362 }
1363
1364 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1365 insns. */
1366 int
1367 sse_comparison_operator (op, mode)
1368 rtx op;
1369 enum machine_mode mode ATTRIBUTE_UNUSED;
1370 {
1371 enum rtx_code code = GET_CODE (op);
1372 switch (code)
1373 {
1374 /* Operations supported directly. */
1375 case EQ:
1376 case LT:
1377 case LE:
1378 case UNORDERED:
1379 case NE:
1380 case UNGE:
1381 case UNGT:
1382 case ORDERED:
1383 return 1;
1384 /* These are equivalent to ones above in non-IEEE comparisons. */
1385 case UNEQ:
1386 case UNLT:
1387 case UNLE:
1388 case LTGT:
1389 case GE:
1390 case GT:
1391 return !TARGET_IEEE_FP;
1392 default:
1393 return 0;
1394 }
1395 }
1396 /* Return 1 if OP is a valid comparison operator in valid mode. */
1397 int
1398 ix86_comparison_operator (op, mode)
1399 register rtx op;
1400 enum machine_mode mode;
1401 {
1402 enum machine_mode inmode;
1403 enum rtx_code code = GET_CODE (op);
1404 if (mode != VOIDmode && GET_MODE (op) != mode)
1405 return 0;
1406 if (GET_RTX_CLASS (code) != '<')
1407 return 0;
1408 inmode = GET_MODE (XEXP (op, 0));
1409
1410 if (inmode == CCFPmode || inmode == CCFPUmode)
1411 {
1412 enum rtx_code second_code, bypass_code;
1413 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1414 return (bypass_code == NIL && second_code == NIL);
1415 }
1416 switch (code)
1417 {
1418 case EQ: case NE:
1419 return 1;
1420 case LT: case GE:
1421 if (inmode == CCmode || inmode == CCGCmode
1422 || inmode == CCGOCmode || inmode == CCNOmode)
1423 return 1;
1424 return 0;
1425 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1426 if (inmode == CCmode)
1427 return 1;
1428 return 0;
1429 case GT: case LE:
1430 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1431 return 1;
1432 return 0;
1433 default:
1434 return 0;
1435 }
1436 }
1437
1438 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1439
1440 int
1441 fcmov_comparison_operator (op, mode)
1442 register rtx op;
1443 enum machine_mode mode;
1444 {
1445 enum machine_mode inmode;
1446 enum rtx_code code = GET_CODE (op);
1447 if (mode != VOIDmode && GET_MODE (op) != mode)
1448 return 0;
1449 if (GET_RTX_CLASS (code) != '<')
1450 return 0;
1451 inmode = GET_MODE (XEXP (op, 0));
1452 if (inmode == CCFPmode || inmode == CCFPUmode)
1453 {
1454 enum rtx_code second_code, bypass_code;
1455 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1456 if (bypass_code != NIL || second_code != NIL)
1457 return 0;
1458 code = ix86_fp_compare_code_to_integer (code);
1459 }
1460 /* i387 supports just limited amount of conditional codes. */
1461 switch (code)
1462 {
1463 case LTU: case GTU: case LEU: case GEU:
1464 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
1465 return 1;
1466 return 0;
1467 case ORDERED: case UNORDERED:
1468 case EQ: case NE:
1469 return 1;
1470 default:
1471 return 0;
1472 }
1473 }
1474
1475 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1476
1477 int
1478 promotable_binary_operator (op, mode)
1479 register rtx op;
1480 enum machine_mode mode ATTRIBUTE_UNUSED;
1481 {
1482 switch (GET_CODE (op))
1483 {
1484 case MULT:
1485 /* Modern CPUs have same latency for HImode and SImode multiply,
1486 but 386 and 486 do HImode multiply faster. */
1487 return ix86_cpu > PROCESSOR_I486;
1488 case PLUS:
1489 case AND:
1490 case IOR:
1491 case XOR:
1492 case ASHIFT:
1493 return 1;
1494 default:
1495 return 0;
1496 }
1497 }
1498
1499 /* Nearly general operand, but accept any const_double, since we wish
1500 to be able to drop them into memory rather than have them get pulled
1501 into registers. */
1502
1503 int
1504 cmp_fp_expander_operand (op, mode)
1505 register rtx op;
1506 enum machine_mode mode;
1507 {
1508 if (mode != VOIDmode && mode != GET_MODE (op))
1509 return 0;
1510 if (GET_CODE (op) == CONST_DOUBLE)
1511 return 1;
1512 return general_operand (op, mode);
1513 }
1514
1515 /* Match an SI or HImode register for a zero_extract. */
1516
1517 int
1518 ext_register_operand (op, mode)
1519 register rtx op;
1520 enum machine_mode mode ATTRIBUTE_UNUSED;
1521 {
1522 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1523 return 0;
1524 return register_operand (op, VOIDmode);
1525 }
1526
1527 /* Return 1 if this is a valid binary floating-point operation.
1528 OP is the expression matched, and MODE is its mode. */
1529
1530 int
1531 binary_fp_operator (op, mode)
1532 register rtx op;
1533 enum machine_mode mode;
1534 {
1535 if (mode != VOIDmode && mode != GET_MODE (op))
1536 return 0;
1537
1538 switch (GET_CODE (op))
1539 {
1540 case PLUS:
1541 case MINUS:
1542 case MULT:
1543 case DIV:
1544 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1545
1546 default:
1547 return 0;
1548 }
1549 }
1550
1551 int
1552 mult_operator(op, mode)
1553 register rtx op;
1554 enum machine_mode mode ATTRIBUTE_UNUSED;
1555 {
1556 return GET_CODE (op) == MULT;
1557 }
1558
1559 int
1560 div_operator(op, mode)
1561 register rtx op;
1562 enum machine_mode mode ATTRIBUTE_UNUSED;
1563 {
1564 return GET_CODE (op) == DIV;
1565 }
1566
1567 int
1568 arith_or_logical_operator (op, mode)
1569 rtx op;
1570 enum machine_mode mode;
1571 {
1572 return ((mode == VOIDmode || GET_MODE (op) == mode)
1573 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1574 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1575 }
1576
1577 /* Returns 1 if OP is memory operand with a displacement. */
1578
1579 int
1580 memory_displacement_operand (op, mode)
1581 register rtx op;
1582 enum machine_mode mode;
1583 {
1584 struct ix86_address parts;
1585
1586 if (! memory_operand (op, mode))
1587 return 0;
1588
1589 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1590 abort ();
1591
1592 return parts.disp != NULL_RTX;
1593 }
1594
1595 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1596 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1597
1598 ??? It seems likely that this will only work because cmpsi is an
1599 expander, and no actual insns use this. */
1600
1601 int
1602 cmpsi_operand (op, mode)
1603 rtx op;
1604 enum machine_mode mode;
1605 {
1606 if (general_operand (op, mode))
1607 return 1;
1608
1609 if (GET_CODE (op) == AND
1610 && GET_MODE (op) == SImode
1611 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1612 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1613 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1614 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1615 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1616 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1617 return 1;
1618
1619 return 0;
1620 }
1621
1622 /* Returns 1 if OP is memory operand that can not be represented by the
1623 modRM array. */
1624
1625 int
1626 long_memory_operand (op, mode)
1627 register rtx op;
1628 enum machine_mode mode;
1629 {
1630 if (! memory_operand (op, mode))
1631 return 0;
1632
1633 return memory_address_length (op) != 0;
1634 }
1635
1636 /* Return nonzero if the rtx is known aligned. */
1637
1638 int
1639 aligned_operand (op, mode)
1640 rtx op;
1641 enum machine_mode mode;
1642 {
1643 struct ix86_address parts;
1644
1645 if (!general_operand (op, mode))
1646 return 0;
1647
1648 /* Registers and immediate operands are always "aligned". */
1649 if (GET_CODE (op) != MEM)
1650 return 1;
1651
1652 /* Don't even try to do any aligned optimizations with volatiles. */
1653 if (MEM_VOLATILE_P (op))
1654 return 0;
1655
1656 op = XEXP (op, 0);
1657
1658 /* Pushes and pops are only valid on the stack pointer. */
1659 if (GET_CODE (op) == PRE_DEC
1660 || GET_CODE (op) == POST_INC)
1661 return 1;
1662
1663 /* Decode the address. */
1664 if (! ix86_decompose_address (op, &parts))
1665 abort ();
1666
1667 /* Look for some component that isn't known to be aligned. */
1668 if (parts.index)
1669 {
1670 if (parts.scale < 4
1671 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1672 return 0;
1673 }
1674 if (parts.base)
1675 {
1676 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1677 return 0;
1678 }
1679 if (parts.disp)
1680 {
1681 if (GET_CODE (parts.disp) != CONST_INT
1682 || (INTVAL (parts.disp) & 3) != 0)
1683 return 0;
1684 }
1685
1686 /* Didn't find one -- this must be an aligned address. */
1687 return 1;
1688 }
1689 \f
1690 /* Return true if the constant is something that can be loaded with
1691 a special instruction. Only handle 0.0 and 1.0; others are less
1692 worthwhile. */
1693
1694 int
1695 standard_80387_constant_p (x)
1696 rtx x;
1697 {
1698 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
1699 return -1;
1700 /* Note that on the 80387, other constants, such as pi, that we should support
1701 too. On some machines, these are much slower to load as standard constant,
1702 than to load from doubles in memory. */
1703 if (x == CONST0_RTX (GET_MODE (x)))
1704 return 1;
1705 if (x == CONST1_RTX (GET_MODE (x)))
1706 return 2;
1707 return 0;
1708 }
1709
1710 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1711 */
1712 int
1713 standard_sse_constant_p (x)
1714 rtx x;
1715 {
1716 if (GET_CODE (x) != CONST_DOUBLE)
1717 return -1;
1718 return (x == CONST0_RTX (GET_MODE (x)));
1719 }
1720
1721 /* Returns 1 if OP contains a symbol reference */
1722
1723 int
1724 symbolic_reference_mentioned_p (op)
1725 rtx op;
1726 {
1727 register const char *fmt;
1728 register int i;
1729
1730 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1731 return 1;
1732
1733 fmt = GET_RTX_FORMAT (GET_CODE (op));
1734 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1735 {
1736 if (fmt[i] == 'E')
1737 {
1738 register int j;
1739
1740 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1741 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1742 return 1;
1743 }
1744
1745 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1746 return 1;
1747 }
1748
1749 return 0;
1750 }
1751
1752 /* Return 1 if it is appropriate to emit `ret' instructions in the
1753 body of a function. Do this only if the epilogue is simple, needing a
1754 couple of insns. Prior to reloading, we can't tell how many registers
1755 must be saved, so return 0 then. Return 0 if there is no frame
1756 marker to de-allocate.
1757
1758 If NON_SAVING_SETJMP is defined and true, then it is not possible
1759 for the epilogue to be simple, so return 0. This is a special case
1760 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1761 until final, but jump_optimize may need to know sooner if a
1762 `return' is OK. */
1763
1764 int
1765 ix86_can_use_return_insn_p ()
1766 {
1767 struct ix86_frame frame;
1768
1769 #ifdef NON_SAVING_SETJMP
1770 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1771 return 0;
1772 #endif
1773 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1774 if (profile_block_flag == 2)
1775 return 0;
1776 #endif
1777
1778 if (! reload_completed || frame_pointer_needed)
1779 return 0;
1780
1781 /* Don't allow more than 32 pop, since that's all we can do
1782 with one instruction. */
1783 if (current_function_pops_args
1784 && current_function_args_size >= 32768)
1785 return 0;
1786
1787 ix86_compute_frame_layout (&frame);
1788 return frame.to_allocate == 0 && frame.nregs == 0;
1789 }
1790
1791 /* Value should be nonzero if functions must have frame pointers.
1792 Zero means the frame pointer need not be set up (and parms may
1793 be accessed via the stack pointer) in functions that seem suitable. */
1794
1795 int
1796 ix86_frame_pointer_required ()
1797 {
1798 /* If we accessed previous frames, then the generated code expects
1799 to be able to access the saved ebp value in our frame. */
1800 if (cfun->machine->accesses_prev_frame)
1801 return 1;
1802
1803 /* Several x86 os'es need a frame pointer for other reasons,
1804 usually pertaining to setjmp. */
1805 if (SUBTARGET_FRAME_POINTER_REQUIRED)
1806 return 1;
1807
1808 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
1809 the frame pointer by default. Turn it back on now if we've not
1810 got a leaf function. */
1811 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
1812 return 1;
1813
1814 return 0;
1815 }
1816
1817 /* Record that the current function accesses previous call frames. */
1818
1819 void
1820 ix86_setup_frame_addresses ()
1821 {
1822 cfun->machine->accesses_prev_frame = 1;
1823 }
1824 \f
1825 static char pic_label_name[32];
1826
1827 /* This function generates code for -fpic that loads %ebx with
1828 the return address of the caller and then returns. */
1829
1830 void
1831 ix86_asm_file_end (file)
1832 FILE *file;
1833 {
1834 rtx xops[2];
1835
1836 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
1837 return;
1838
1839 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
1840 to updating relocations to a section being discarded such that this
1841 doesn't work. Ought to detect this at configure time. */
1842 #if 0 && defined (ASM_OUTPUT_SECTION_NAME)
1843 /* The trick here is to create a linkonce section containing the
1844 pic label thunk, but to refer to it with an internal label.
1845 Because the label is internal, we don't have inter-dso name
1846 binding issues on hosts that don't support ".hidden".
1847
1848 In order to use these macros, however, we must create a fake
1849 function decl. */
1850 {
1851 tree decl = build_decl (FUNCTION_DECL,
1852 get_identifier ("i686.get_pc_thunk"),
1853 error_mark_node);
1854 DECL_ONE_ONLY (decl) = 1;
1855 UNIQUE_SECTION (decl, 0);
1856 named_section (decl, NULL, 0);
1857 }
1858 #else
1859 text_section ();
1860 #endif
1861
1862 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1863 internal (non-global) label that's being emitted, it didn't make
1864 sense to have .type information for local labels. This caused
1865 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1866 me debug info for a label that you're declaring non-global?) this
1867 was changed to call ASM_OUTPUT_LABEL() instead. */
1868
1869 ASM_OUTPUT_LABEL (file, pic_label_name);
1870
1871 xops[0] = pic_offset_table_rtx;
1872 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
1873 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1874 output_asm_insn ("ret", xops);
1875 }
1876
1877 void
1878 load_pic_register ()
1879 {
1880 rtx gotsym, pclab;
1881
1882 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1883
1884 if (TARGET_DEEP_BRANCH_PREDICTION)
1885 {
1886 if (! pic_label_name[0])
1887 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1888 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1889 }
1890 else
1891 {
1892 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1893 }
1894
1895 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1896
1897 if (! TARGET_DEEP_BRANCH_PREDICTION)
1898 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1899
1900 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1901 }
1902
1903 /* Generate an SImode "push" pattern for input ARG. */
1904
1905 static rtx
1906 gen_push (arg)
1907 rtx arg;
1908 {
1909 return gen_rtx_SET (VOIDmode,
1910 gen_rtx_MEM (SImode,
1911 gen_rtx_PRE_DEC (SImode,
1912 stack_pointer_rtx)),
1913 arg);
1914 }
1915
1916 /* Return 1 if we need to save REGNO. */
1917 static int
1918 ix86_save_reg (regno)
1919 int regno;
1920 {
1921 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1922 || current_function_uses_const_pool);
1923 return ((regs_ever_live[regno] && !call_used_regs[regno]
1924 && !fixed_regs[regno]
1925 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
1926 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used));
1927
1928 }
1929
1930 /* Return number of registers to be saved on the stack. */
1931
1932 static int
1933 ix86_nsaved_regs ()
1934 {
1935 int nregs = 0;
1936 int regno;
1937
1938 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1939 if (ix86_save_reg (regno))
1940 nregs++;
1941 return nregs;
1942 }
1943
1944 /* Return the offset between two registers, one to be eliminated, and the other
1945 its replacement, at the start of a routine. */
1946
1947 HOST_WIDE_INT
1948 ix86_initial_elimination_offset (from, to)
1949 int from;
1950 int to;
1951 {
1952 struct ix86_frame frame;
1953 ix86_compute_frame_layout (&frame);
1954
1955 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1956 return frame.hard_frame_pointer_offset;
1957 else if (from == FRAME_POINTER_REGNUM
1958 && to == HARD_FRAME_POINTER_REGNUM)
1959 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
1960 else
1961 {
1962 if (to != STACK_POINTER_REGNUM)
1963 abort ();
1964 else if (from == ARG_POINTER_REGNUM)
1965 return frame.stack_pointer_offset;
1966 else if (from != FRAME_POINTER_REGNUM)
1967 abort ();
1968 else
1969 return frame.stack_pointer_offset - frame.frame_pointer_offset;
1970 }
1971 }
1972
1973 /* Fill structure ix86_frame about frame of currently computed function. */
1974
1975 static void
1976 ix86_compute_frame_layout (frame)
1977 struct ix86_frame *frame;
1978 {
1979 HOST_WIDE_INT total_size;
1980 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1981 int offset;
1982 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1983 HOST_WIDE_INT size = get_frame_size ();
1984
1985 frame->nregs = ix86_nsaved_regs ();
1986 total_size = size;
1987
1988 /* Skip return value and save base pointer. */
1989 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
1990
1991 frame->hard_frame_pointer_offset = offset;
1992
1993 /* Do some sanity checking of stack_alignment_needed and
1994 preferred_alignment, since i386 port is the only using those features
1995 that may break easilly. */
1996
1997 if (size && !stack_alignment_needed)
1998 abort ();
1999 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2000 abort ();
2001 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2002 abort ();
2003 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2004 abort ();
2005
2006 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2007 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
2008
2009 /* Register save area */
2010 offset += frame->nregs * UNITS_PER_WORD;
2011
2012 /* Align start of frame for local function. */
2013 frame->padding1 = ((offset + stack_alignment_needed - 1)
2014 & -stack_alignment_needed) - offset;
2015
2016 offset += frame->padding1;
2017
2018 /* Frame pointer points here. */
2019 frame->frame_pointer_offset = offset;
2020
2021 offset += size;
2022
2023 /* Add outgoing arguments area. */
2024 if (ACCUMULATE_OUTGOING_ARGS)
2025 {
2026 offset += current_function_outgoing_args_size;
2027 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2028 }
2029 else
2030 frame->outgoing_arguments_size = 0;
2031
2032 /* Align stack boundary. */
2033 frame->padding2 = ((offset + preferred_alignment - 1)
2034 & -preferred_alignment) - offset;
2035
2036 offset += frame->padding2;
2037
2038 /* We've reached end of stack frame. */
2039 frame->stack_pointer_offset = offset;
2040
2041 /* Size prologue needs to allocate. */
2042 frame->to_allocate =
2043 (size + frame->padding1 + frame->padding2
2044 + frame->outgoing_arguments_size);
2045
2046 #if 0
2047 fprintf (stderr, "nregs: %i\n", frame->nregs);
2048 fprintf (stderr, "size: %i\n", size);
2049 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2050 fprintf (stderr, "padding1: %i\n", frame->padding1);
2051 fprintf (stderr, "padding2: %i\n", frame->padding2);
2052 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
2053 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2054 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2055 frame->hard_frame_pointer_offset);
2056 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2057 #endif
2058 }
2059
2060 /* Emit code to save registers in the prologue. */
2061
2062 static void
2063 ix86_emit_save_regs ()
2064 {
2065 register int regno;
2066 rtx insn;
2067
2068 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2069 if (ix86_save_reg (regno))
2070 {
2071 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
2072 RTX_FRAME_RELATED_P (insn) = 1;
2073 }
2074 }
2075
2076 /* Expand the prologue into a bunch of separate insns. */
2077
2078 void
2079 ix86_expand_prologue ()
2080 {
2081 rtx insn;
2082 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2083 || current_function_uses_const_pool);
2084 struct ix86_frame frame;
2085
2086 ix86_compute_frame_layout (&frame);
2087
2088 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2089 slower on all targets. Also sdb doesn't like it. */
2090
2091 if (frame_pointer_needed)
2092 {
2093 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
2094 RTX_FRAME_RELATED_P (insn) = 1;
2095
2096 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2097 RTX_FRAME_RELATED_P (insn) = 1;
2098 }
2099
2100 ix86_emit_save_regs ();
2101
2102 if (frame.to_allocate == 0)
2103 ;
2104 else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
2105 {
2106 if (frame_pointer_needed)
2107 insn = emit_insn (gen_pro_epilogue_adjust_stack
2108 (stack_pointer_rtx, stack_pointer_rtx,
2109 GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
2110 else
2111 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2112 GEN_INT (-frame.to_allocate)));
2113 RTX_FRAME_RELATED_P (insn) = 1;
2114 }
2115 else
2116 {
2117 /* ??? Is this only valid for Win32? */
2118
2119 rtx arg0, sym;
2120
2121 arg0 = gen_rtx_REG (SImode, 0);
2122 emit_move_insn (arg0, GEN_INT (frame.to_allocate));
2123
2124 sym = gen_rtx_MEM (FUNCTION_MODE,
2125 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2126 insn = emit_call_insn (gen_call (sym, const0_rtx));
2127
2128 CALL_INSN_FUNCTION_USAGE (insn)
2129 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2130 CALL_INSN_FUNCTION_USAGE (insn));
2131 }
2132
2133 #ifdef SUBTARGET_PROLOGUE
2134 SUBTARGET_PROLOGUE;
2135 #endif
2136
2137 if (pic_reg_used)
2138 load_pic_register ();
2139
2140 /* If we are profiling, make sure no instructions are scheduled before
2141 the call to mcount. However, if -fpic, the above call will have
2142 done that. */
2143 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2144 emit_insn (gen_blockage ());
2145 }
2146
2147 /* Emit code to add TSIZE to esp value. Use POP instruction when
2148 profitable. */
2149
2150 static void
2151 ix86_emit_epilogue_esp_adjustment (tsize)
2152 int tsize;
2153 {
2154 /* If a frame pointer is present, we must be sure to tie the sp
2155 to the fp so that we don't mis-schedule. */
2156 if (frame_pointer_needed)
2157 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2158 stack_pointer_rtx,
2159 GEN_INT (tsize),
2160 hard_frame_pointer_rtx));
2161 else
2162 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2163 GEN_INT (tsize)));
2164 }
2165
2166 /* Emit code to restore saved registers using MOV insns. First register
2167 is restored from POINTER + OFFSET. */
2168 static void
2169 ix86_emit_restore_regs_using_mov (pointer, offset)
2170 rtx pointer;
2171 int offset;
2172 {
2173 int regno;
2174
2175 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2176 if (ix86_save_reg (regno))
2177 {
2178 emit_move_insn (gen_rtx_REG (Pmode, regno),
2179 adj_offsettable_operand (gen_rtx_MEM (Pmode,
2180 pointer),
2181 offset));
2182 offset += UNITS_PER_WORD;
2183 }
2184 }
2185
2186 /* Restore function stack, frame, and registers. */
2187
2188 void
2189 ix86_expand_epilogue (emit_return)
2190 int emit_return;
2191 {
2192 int regno;
2193 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2194 struct ix86_frame frame;
2195 HOST_WIDE_INT offset;
2196
2197 ix86_compute_frame_layout (&frame);
2198
2199 /* Calculate start of saved registers relative to ebp. */
2200 offset = -frame.nregs * UNITS_PER_WORD;
2201
2202 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2203 if (profile_block_flag == 2)
2204 {
2205 FUNCTION_BLOCK_PROFILER_EXIT;
2206 }
2207 #endif
2208
2209 /* If we're only restoring one register and sp is not valid then
2210 using a move instruction to restore the register since it's
2211 less work than reloading sp and popping the register.
2212
2213 The default code result in stack adjustment using add/lea instruction,
2214 while this code results in LEAVE instruction (or discrete equivalent),
2215 so it is profitable in some other cases as well. Especially when there
2216 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2217 and there is exactly one register to pop. This heruistic may need some
2218 tuning in future. */
2219 if ((!sp_valid && frame.nregs <= 1)
2220 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2221 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2222 && frame.nregs == 1))
2223 {
2224 /* Restore registers. We can use ebp or esp to address the memory
2225 locations. If both are available, default to ebp, since offsets
2226 are known to be small. Only exception is esp pointing directly to the
2227 end of block of saved registers, where we may simplify addressing
2228 mode. */
2229
2230 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2231 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
2232 else
2233 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2234
2235 if (!frame_pointer_needed)
2236 ix86_emit_epilogue_esp_adjustment (frame.to_allocate
2237 + frame.nregs * UNITS_PER_WORD);
2238 /* If not an i386, mov & pop is faster than "leave". */
2239 else if (TARGET_USE_LEAVE || optimize_size)
2240 emit_insn (gen_leave ());
2241 else
2242 {
2243 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2244 hard_frame_pointer_rtx,
2245 const0_rtx,
2246 hard_frame_pointer_rtx));
2247 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2248 }
2249 }
2250 else
2251 {
2252 /* First step is to deallocate the stack frame so that we can
2253 pop the registers. */
2254 if (!sp_valid)
2255 {
2256 if (!frame_pointer_needed)
2257 abort ();
2258 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2259 hard_frame_pointer_rtx,
2260 GEN_INT (offset),
2261 hard_frame_pointer_rtx));
2262 }
2263 else if (frame.to_allocate)
2264 ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
2265
2266 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2267 if (ix86_save_reg (regno))
2268 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2269 if (frame_pointer_needed)
2270 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2271 }
2272
2273 /* Sibcall epilogues don't want a return instruction. */
2274 if (! emit_return)
2275 return;
2276
2277 if (current_function_pops_args && current_function_args_size)
2278 {
2279 rtx popc = GEN_INT (current_function_pops_args);
2280
2281 /* i386 can only pop 64K bytes. If asked to pop more, pop
2282 return address, do explicit add, and jump indirectly to the
2283 caller. */
2284
2285 if (current_function_pops_args >= 65536)
2286 {
2287 rtx ecx = gen_rtx_REG (SImode, 2);
2288
2289 emit_insn (gen_popsi1 (ecx));
2290 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2291 emit_jump_insn (gen_return_indirect_internal (ecx));
2292 }
2293 else
2294 emit_jump_insn (gen_return_pop_internal (popc));
2295 }
2296 else
2297 emit_jump_insn (gen_return_internal ());
2298 }
2299 \f
2300 /* Extract the parts of an RTL expression that is a valid memory address
2301 for an instruction. Return false if the structure of the address is
2302 grossly off. */
2303
2304 static int
2305 ix86_decompose_address (addr, out)
2306 register rtx addr;
2307 struct ix86_address *out;
2308 {
2309 rtx base = NULL_RTX;
2310 rtx index = NULL_RTX;
2311 rtx disp = NULL_RTX;
2312 HOST_WIDE_INT scale = 1;
2313 rtx scale_rtx = NULL_RTX;
2314
2315 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2316 base = addr;
2317 else if (GET_CODE (addr) == PLUS)
2318 {
2319 rtx op0 = XEXP (addr, 0);
2320 rtx op1 = XEXP (addr, 1);
2321 enum rtx_code code0 = GET_CODE (op0);
2322 enum rtx_code code1 = GET_CODE (op1);
2323
2324 if (code0 == REG || code0 == SUBREG)
2325 {
2326 if (code1 == REG || code1 == SUBREG)
2327 index = op0, base = op1; /* index + base */
2328 else
2329 base = op0, disp = op1; /* base + displacement */
2330 }
2331 else if (code0 == MULT)
2332 {
2333 index = XEXP (op0, 0);
2334 scale_rtx = XEXP (op0, 1);
2335 if (code1 == REG || code1 == SUBREG)
2336 base = op1; /* index*scale + base */
2337 else
2338 disp = op1; /* index*scale + disp */
2339 }
2340 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2341 {
2342 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2343 scale_rtx = XEXP (XEXP (op0, 0), 1);
2344 base = XEXP (op0, 1);
2345 disp = op1;
2346 }
2347 else if (code0 == PLUS)
2348 {
2349 index = XEXP (op0, 0); /* index + base + disp */
2350 base = XEXP (op0, 1);
2351 disp = op1;
2352 }
2353 else
2354 return FALSE;
2355 }
2356 else if (GET_CODE (addr) == MULT)
2357 {
2358 index = XEXP (addr, 0); /* index*scale */
2359 scale_rtx = XEXP (addr, 1);
2360 }
2361 else if (GET_CODE (addr) == ASHIFT)
2362 {
2363 rtx tmp;
2364
2365 /* We're called for lea too, which implements ashift on occasion. */
2366 index = XEXP (addr, 0);
2367 tmp = XEXP (addr, 1);
2368 if (GET_CODE (tmp) != CONST_INT)
2369 return FALSE;
2370 scale = INTVAL (tmp);
2371 if ((unsigned HOST_WIDE_INT) scale > 3)
2372 return FALSE;
2373 scale = 1 << scale;
2374 }
2375 else
2376 disp = addr; /* displacement */
2377
2378 /* Extract the integral value of scale. */
2379 if (scale_rtx)
2380 {
2381 if (GET_CODE (scale_rtx) != CONST_INT)
2382 return FALSE;
2383 scale = INTVAL (scale_rtx);
2384 }
2385
2386 /* Allow arg pointer and stack pointer as index if there is not scaling */
2387 if (base && index && scale == 1
2388 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2389 || index == stack_pointer_rtx))
2390 {
2391 rtx tmp = base;
2392 base = index;
2393 index = tmp;
2394 }
2395
2396 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2397 if ((base == hard_frame_pointer_rtx
2398 || base == frame_pointer_rtx
2399 || base == arg_pointer_rtx) && !disp)
2400 disp = const0_rtx;
2401
2402 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2403 Avoid this by transforming to [%esi+0]. */
2404 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2405 && base && !index && !disp
2406 && REG_P (base)
2407 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2408 disp = const0_rtx;
2409
2410 /* Special case: encode reg+reg instead of reg*2. */
2411 if (!base && index && scale && scale == 2)
2412 base = index, scale = 1;
2413
2414 /* Special case: scaling cannot be encoded without base or displacement. */
2415 if (!base && !disp && index && scale != 1)
2416 disp = const0_rtx;
2417
2418 out->base = base;
2419 out->index = index;
2420 out->disp = disp;
2421 out->scale = scale;
2422
2423 return TRUE;
2424 }
2425 \f
2426 /* Return cost of the memory address x.
2427 For i386, it is better to use a complex address than let gcc copy
2428 the address into a reg and make a new pseudo. But not if the address
2429 requires to two regs - that would mean more pseudos with longer
2430 lifetimes. */
2431 int
2432 ix86_address_cost (x)
2433 rtx x;
2434 {
2435 struct ix86_address parts;
2436 int cost = 1;
2437
2438 if (!ix86_decompose_address (x, &parts))
2439 abort ();
2440
2441 /* More complex memory references are better. */
2442 if (parts.disp && parts.disp != const0_rtx)
2443 cost--;
2444
2445 /* Attempt to minimize number of registers in the address. */
2446 if ((parts.base
2447 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2448 || (parts.index
2449 && (!REG_P (parts.index)
2450 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2451 cost++;
2452
2453 if (parts.base
2454 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2455 && parts.index
2456 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2457 && parts.base != parts.index)
2458 cost++;
2459
2460 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2461 since it's predecode logic can't detect the length of instructions
2462 and it degenerates to vector decoded. Increase cost of such
2463 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2464 to split such addresses or even refuse such addresses at all.
2465
2466 Following addressing modes are affected:
2467 [base+scale*index]
2468 [scale*index+disp]
2469 [base+index]
2470
2471 The first and last case may be avoidable by explicitly coding the zero in
2472 memory address, but I don't have AMD-K6 machine handy to check this
2473 theory. */
2474
2475 if (TARGET_K6
2476 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2477 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2478 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2479 cost += 10;
2480
2481 return cost;
2482 }
2483 \f
2484 /* If X is a machine specific address (i.e. a symbol or label being
2485 referenced as a displacement from the GOT implemented using an
2486 UNSPEC), then return the base term. Otherwise return X. */
2487
2488 rtx
2489 ix86_find_base_term (x)
2490 rtx x;
2491 {
2492 rtx term;
2493
2494 if (GET_CODE (x) != PLUS
2495 || XEXP (x, 0) != pic_offset_table_rtx
2496 || GET_CODE (XEXP (x, 1)) != CONST)
2497 return x;
2498
2499 term = XEXP (XEXP (x, 1), 0);
2500
2501 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2502 term = XEXP (term, 0);
2503
2504 if (GET_CODE (term) != UNSPEC
2505 || XVECLEN (term, 0) != 1
2506 || XINT (term, 1) != 7)
2507 return x;
2508
2509 term = XVECEXP (term, 0, 0);
2510
2511 if (GET_CODE (term) != SYMBOL_REF
2512 && GET_CODE (term) != LABEL_REF)
2513 return x;
2514
2515 return term;
2516 }
2517 \f
2518 /* Determine if a given CONST RTX is a valid memory displacement
2519 in PIC mode. */
2520
2521 int
2522 legitimate_pic_address_disp_p (disp)
2523 register rtx disp;
2524 {
2525 if (GET_CODE (disp) != CONST)
2526 return 0;
2527 disp = XEXP (disp, 0);
2528
2529 if (GET_CODE (disp) == PLUS)
2530 {
2531 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2532 return 0;
2533 disp = XEXP (disp, 0);
2534 }
2535
2536 if (GET_CODE (disp) != UNSPEC
2537 || XVECLEN (disp, 0) != 1)
2538 return 0;
2539
2540 /* Must be @GOT or @GOTOFF. */
2541 if (XINT (disp, 1) != 6
2542 && XINT (disp, 1) != 7)
2543 return 0;
2544
2545 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2546 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2547 return 0;
2548
2549 return 1;
2550 }
2551
2552 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2553 memory address for an instruction. The MODE argument is the machine mode
2554 for the MEM expression that wants to use this address.
2555
2556 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2557 convert common non-canonical forms to canonical form so that they will
2558 be recognized. */
2559
2560 int
2561 legitimate_address_p (mode, addr, strict)
2562 enum machine_mode mode;
2563 register rtx addr;
2564 int strict;
2565 {
2566 struct ix86_address parts;
2567 rtx base, index, disp;
2568 HOST_WIDE_INT scale;
2569 const char *reason = NULL;
2570 rtx reason_rtx = NULL_RTX;
2571
2572 if (TARGET_DEBUG_ADDR)
2573 {
2574 fprintf (stderr,
2575 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2576 GET_MODE_NAME (mode), strict);
2577 debug_rtx (addr);
2578 }
2579
2580 if (! ix86_decompose_address (addr, &parts))
2581 {
2582 reason = "decomposition failed";
2583 goto report_error;
2584 }
2585
2586 base = parts.base;
2587 index = parts.index;
2588 disp = parts.disp;
2589 scale = parts.scale;
2590
2591 /* Validate base register.
2592
2593 Don't allow SUBREG's here, it can lead to spill failures when the base
2594 is one word out of a two word structure, which is represented internally
2595 as a DImode int. */
2596
2597 if (base)
2598 {
2599 reason_rtx = base;
2600
2601 if (GET_CODE (base) != REG)
2602 {
2603 reason = "base is not a register";
2604 goto report_error;
2605 }
2606
2607 if (GET_MODE (base) != Pmode)
2608 {
2609 reason = "base is not in Pmode";
2610 goto report_error;
2611 }
2612
2613 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2614 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2615 {
2616 reason = "base is not valid";
2617 goto report_error;
2618 }
2619 }
2620
2621 /* Validate index register.
2622
2623 Don't allow SUBREG's here, it can lead to spill failures when the index
2624 is one word out of a two word structure, which is represented internally
2625 as a DImode int. */
2626
2627 if (index)
2628 {
2629 reason_rtx = index;
2630
2631 if (GET_CODE (index) != REG)
2632 {
2633 reason = "index is not a register";
2634 goto report_error;
2635 }
2636
2637 if (GET_MODE (index) != Pmode)
2638 {
2639 reason = "index is not in Pmode";
2640 goto report_error;
2641 }
2642
2643 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2644 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2645 {
2646 reason = "index is not valid";
2647 goto report_error;
2648 }
2649 }
2650
2651 /* Validate scale factor. */
2652 if (scale != 1)
2653 {
2654 reason_rtx = GEN_INT (scale);
2655 if (!index)
2656 {
2657 reason = "scale without index";
2658 goto report_error;
2659 }
2660
2661 if (scale != 2 && scale != 4 && scale != 8)
2662 {
2663 reason = "scale is not a valid multiplier";
2664 goto report_error;
2665 }
2666 }
2667
2668 /* Validate displacement. */
2669 if (disp)
2670 {
2671 reason_rtx = disp;
2672
2673 if (!CONSTANT_ADDRESS_P (disp))
2674 {
2675 reason = "displacement is not constant";
2676 goto report_error;
2677 }
2678
2679 if (GET_CODE (disp) == CONST_DOUBLE)
2680 {
2681 reason = "displacement is a const_double";
2682 goto report_error;
2683 }
2684
2685 if (flag_pic && SYMBOLIC_CONST (disp))
2686 {
2687 if (! legitimate_pic_address_disp_p (disp))
2688 {
2689 reason = "displacement is an invalid pic construct";
2690 goto report_error;
2691 }
2692
2693 /* This code used to verify that a symbolic pic displacement
2694 includes the pic_offset_table_rtx register.
2695
2696 While this is good idea, unfortunately these constructs may
2697 be created by "adds using lea" optimization for incorrect
2698 code like:
2699
2700 int a;
2701 int foo(int i)
2702 {
2703 return *(&a+i);
2704 }
2705
2706 This code is nonsensical, but results in addressing
2707 GOT table with pic_offset_table_rtx base. We can't
2708 just refuse it easilly, since it gets matched by
2709 "addsi3" pattern, that later gets split to lea in the
2710 case output register differs from input. While this
2711 can be handled by separate addsi pattern for this case
2712 that never results in lea, this seems to be easier and
2713 correct fix for crash to disable this test. */
2714 }
2715 else if (HALF_PIC_P ())
2716 {
2717 if (! HALF_PIC_ADDRESS_P (disp)
2718 || (base != NULL_RTX || index != NULL_RTX))
2719 {
2720 reason = "displacement is an invalid half-pic reference";
2721 goto report_error;
2722 }
2723 }
2724 }
2725
2726 /* Everything looks valid. */
2727 if (TARGET_DEBUG_ADDR)
2728 fprintf (stderr, "Success.\n");
2729 return TRUE;
2730
2731 report_error:
2732 if (TARGET_DEBUG_ADDR)
2733 {
2734 fprintf (stderr, "Error: %s\n", reason);
2735 debug_rtx (reason_rtx);
2736 }
2737 return FALSE;
2738 }
2739 \f
2740 /* Return an unique alias set for the GOT. */
2741
2742 static HOST_WIDE_INT
2743 ix86_GOT_alias_set ()
2744 {
2745 static HOST_WIDE_INT set = -1;
2746 if (set == -1)
2747 set = new_alias_set ();
2748 return set;
2749 }
2750
2751 /* Return a legitimate reference for ORIG (an address) using the
2752 register REG. If REG is 0, a new pseudo is generated.
2753
2754 There are two types of references that must be handled:
2755
2756 1. Global data references must load the address from the GOT, via
2757 the PIC reg. An insn is emitted to do this load, and the reg is
2758 returned.
2759
2760 2. Static data references, constant pool addresses, and code labels
2761 compute the address as an offset from the GOT, whose base is in
2762 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2763 differentiate them from global data objects. The returned
2764 address is the PIC reg + an unspec constant.
2765
2766 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2767 reg also appears in the address. */
2768
2769 rtx
2770 legitimize_pic_address (orig, reg)
2771 rtx orig;
2772 rtx reg;
2773 {
2774 rtx addr = orig;
2775 rtx new = orig;
2776 rtx base;
2777
2778 if (GET_CODE (addr) == LABEL_REF
2779 || (GET_CODE (addr) == SYMBOL_REF
2780 && (CONSTANT_POOL_ADDRESS_P (addr)
2781 || SYMBOL_REF_FLAG (addr))))
2782 {
2783 /* This symbol may be referenced via a displacement from the PIC
2784 base address (@GOTOFF). */
2785
2786 current_function_uses_pic_offset_table = 1;
2787 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2788 new = gen_rtx_CONST (Pmode, new);
2789 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2790
2791 if (reg != 0)
2792 {
2793 emit_move_insn (reg, new);
2794 new = reg;
2795 }
2796 }
2797 else if (GET_CODE (addr) == SYMBOL_REF)
2798 {
2799 /* This symbol must be referenced via a load from the
2800 Global Offset Table (@GOT). */
2801
2802 current_function_uses_pic_offset_table = 1;
2803 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2804 new = gen_rtx_CONST (Pmode, new);
2805 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2806 new = gen_rtx_MEM (Pmode, new);
2807 RTX_UNCHANGING_P (new) = 1;
2808 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2809
2810 if (reg == 0)
2811 reg = gen_reg_rtx (Pmode);
2812 emit_move_insn (reg, new);
2813 new = reg;
2814 }
2815 else
2816 {
2817 if (GET_CODE (addr) == CONST)
2818 {
2819 addr = XEXP (addr, 0);
2820 if (GET_CODE (addr) == UNSPEC)
2821 {
2822 /* Check that the unspec is one of the ones we generate? */
2823 }
2824 else if (GET_CODE (addr) != PLUS)
2825 abort ();
2826 }
2827 if (GET_CODE (addr) == PLUS)
2828 {
2829 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2830
2831 /* Check first to see if this is a constant offset from a @GOTOFF
2832 symbol reference. */
2833 if ((GET_CODE (op0) == LABEL_REF
2834 || (GET_CODE (op0) == SYMBOL_REF
2835 && (CONSTANT_POOL_ADDRESS_P (op0)
2836 || SYMBOL_REF_FLAG (op0))))
2837 && GET_CODE (op1) == CONST_INT)
2838 {
2839 current_function_uses_pic_offset_table = 1;
2840 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2841 new = gen_rtx_PLUS (Pmode, new, op1);
2842 new = gen_rtx_CONST (Pmode, new);
2843 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2844
2845 if (reg != 0)
2846 {
2847 emit_move_insn (reg, new);
2848 new = reg;
2849 }
2850 }
2851 else
2852 {
2853 base = legitimize_pic_address (XEXP (addr, 0), reg);
2854 new = legitimize_pic_address (XEXP (addr, 1),
2855 base == reg ? NULL_RTX : reg);
2856
2857 if (GET_CODE (new) == CONST_INT)
2858 new = plus_constant (base, INTVAL (new));
2859 else
2860 {
2861 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2862 {
2863 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2864 new = XEXP (new, 1);
2865 }
2866 new = gen_rtx_PLUS (Pmode, base, new);
2867 }
2868 }
2869 }
2870 }
2871 return new;
2872 }
2873 \f
2874 /* Try machine-dependent ways of modifying an illegitimate address
2875 to be legitimate. If we find one, return the new, valid address.
2876 This macro is used in only one place: `memory_address' in explow.c.
2877
2878 OLDX is the address as it was before break_out_memory_refs was called.
2879 In some cases it is useful to look at this to decide what needs to be done.
2880
2881 MODE and WIN are passed so that this macro can use
2882 GO_IF_LEGITIMATE_ADDRESS.
2883
2884 It is always safe for this macro to do nothing. It exists to recognize
2885 opportunities to optimize the output.
2886
2887 For the 80386, we handle X+REG by loading X into a register R and
2888 using R+REG. R will go in a general reg and indexing will be used.
2889 However, if REG is a broken-out memory address or multiplication,
2890 nothing needs to be done because REG can certainly go in a general reg.
2891
2892 When -fpic is used, special handling is needed for symbolic references.
2893 See comments by legitimize_pic_address in i386.c for details. */
2894
2895 rtx
2896 legitimize_address (x, oldx, mode)
2897 register rtx x;
2898 register rtx oldx ATTRIBUTE_UNUSED;
2899 enum machine_mode mode;
2900 {
2901 int changed = 0;
2902 unsigned log;
2903
2904 if (TARGET_DEBUG_ADDR)
2905 {
2906 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2907 GET_MODE_NAME (mode));
2908 debug_rtx (x);
2909 }
2910
2911 if (flag_pic && SYMBOLIC_CONST (x))
2912 return legitimize_pic_address (x, 0);
2913
2914 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2915 if (GET_CODE (x) == ASHIFT
2916 && GET_CODE (XEXP (x, 1)) == CONST_INT
2917 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2918 {
2919 changed = 1;
2920 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2921 GEN_INT (1 << log));
2922 }
2923
2924 if (GET_CODE (x) == PLUS)
2925 {
2926 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2927
2928 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2929 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2930 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2931 {
2932 changed = 1;
2933 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2934 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2935 GEN_INT (1 << log));
2936 }
2937
2938 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2939 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2940 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2941 {
2942 changed = 1;
2943 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2944 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2945 GEN_INT (1 << log));
2946 }
2947
2948 /* Put multiply first if it isn't already. */
2949 if (GET_CODE (XEXP (x, 1)) == MULT)
2950 {
2951 rtx tmp = XEXP (x, 0);
2952 XEXP (x, 0) = XEXP (x, 1);
2953 XEXP (x, 1) = tmp;
2954 changed = 1;
2955 }
2956
2957 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2958 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2959 created by virtual register instantiation, register elimination, and
2960 similar optimizations. */
2961 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2962 {
2963 changed = 1;
2964 x = gen_rtx_PLUS (Pmode,
2965 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2966 XEXP (XEXP (x, 1), 0)),
2967 XEXP (XEXP (x, 1), 1));
2968 }
2969
2970 /* Canonicalize
2971 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2972 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2973 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2974 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2975 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2976 && CONSTANT_P (XEXP (x, 1)))
2977 {
2978 rtx constant;
2979 rtx other = NULL_RTX;
2980
2981 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2982 {
2983 constant = XEXP (x, 1);
2984 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2985 }
2986 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2987 {
2988 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2989 other = XEXP (x, 1);
2990 }
2991 else
2992 constant = 0;
2993
2994 if (constant)
2995 {
2996 changed = 1;
2997 x = gen_rtx_PLUS (Pmode,
2998 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2999 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3000 plus_constant (other, INTVAL (constant)));
3001 }
3002 }
3003
3004 if (changed && legitimate_address_p (mode, x, FALSE))
3005 return x;
3006
3007 if (GET_CODE (XEXP (x, 0)) == MULT)
3008 {
3009 changed = 1;
3010 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3011 }
3012
3013 if (GET_CODE (XEXP (x, 1)) == MULT)
3014 {
3015 changed = 1;
3016 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3017 }
3018
3019 if (changed
3020 && GET_CODE (XEXP (x, 1)) == REG
3021 && GET_CODE (XEXP (x, 0)) == REG)
3022 return x;
3023
3024 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3025 {
3026 changed = 1;
3027 x = legitimize_pic_address (x, 0);
3028 }
3029
3030 if (changed && legitimate_address_p (mode, x, FALSE))
3031 return x;
3032
3033 if (GET_CODE (XEXP (x, 0)) == REG)
3034 {
3035 register rtx temp = gen_reg_rtx (Pmode);
3036 register rtx val = force_operand (XEXP (x, 1), temp);
3037 if (val != temp)
3038 emit_move_insn (temp, val);
3039
3040 XEXP (x, 1) = temp;
3041 return x;
3042 }
3043
3044 else if (GET_CODE (XEXP (x, 1)) == REG)
3045 {
3046 register rtx temp = gen_reg_rtx (Pmode);
3047 register rtx val = force_operand (XEXP (x, 0), temp);
3048 if (val != temp)
3049 emit_move_insn (temp, val);
3050
3051 XEXP (x, 0) = temp;
3052 return x;
3053 }
3054 }
3055
3056 return x;
3057 }
3058 \f
3059 /* Print an integer constant expression in assembler syntax. Addition
3060 and subtraction are the only arithmetic that may appear in these
3061 expressions. FILE is the stdio stream to write to, X is the rtx, and
3062 CODE is the operand print code from the output string. */
3063
3064 static void
3065 output_pic_addr_const (file, x, code)
3066 FILE *file;
3067 rtx x;
3068 int code;
3069 {
3070 char buf[256];
3071
3072 switch (GET_CODE (x))
3073 {
3074 case PC:
3075 if (flag_pic)
3076 putc ('.', file);
3077 else
3078 abort ();
3079 break;
3080
3081 case SYMBOL_REF:
3082 assemble_name (file, XSTR (x, 0));
3083 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3084 fputs ("@PLT", file);
3085 break;
3086
3087 case LABEL_REF:
3088 x = XEXP (x, 0);
3089 /* FALLTHRU */
3090 case CODE_LABEL:
3091 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3092 assemble_name (asm_out_file, buf);
3093 break;
3094
3095 case CONST_INT:
3096 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3097 break;
3098
3099 case CONST:
3100 /* This used to output parentheses around the expression,
3101 but that does not work on the 386 (either ATT or BSD assembler). */
3102 output_pic_addr_const (file, XEXP (x, 0), code);
3103 break;
3104
3105 case CONST_DOUBLE:
3106 if (GET_MODE (x) == VOIDmode)
3107 {
3108 /* We can use %d if the number is <32 bits and positive. */
3109 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
3110 fprintf (file, "0x%lx%08lx",
3111 (unsigned long) CONST_DOUBLE_HIGH (x),
3112 (unsigned long) CONST_DOUBLE_LOW (x));
3113 else
3114 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
3115 }
3116 else
3117 /* We can't handle floating point constants;
3118 PRINT_OPERAND must handle them. */
3119 output_operand_lossage ("floating constant misused");
3120 break;
3121
3122 case PLUS:
3123 /* Some assemblers need integer constants to appear first. */
3124 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3125 {
3126 output_pic_addr_const (file, XEXP (x, 0), code);
3127 putc ('+', file);
3128 output_pic_addr_const (file, XEXP (x, 1), code);
3129 }
3130 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3131 {
3132 output_pic_addr_const (file, XEXP (x, 1), code);
3133 putc ('+', file);
3134 output_pic_addr_const (file, XEXP (x, 0), code);
3135 }
3136 else
3137 abort ();
3138 break;
3139
3140 case MINUS:
3141 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3142 output_pic_addr_const (file, XEXP (x, 0), code);
3143 putc ('-', file);
3144 output_pic_addr_const (file, XEXP (x, 1), code);
3145 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3146 break;
3147
3148 case UNSPEC:
3149 if (XVECLEN (x, 0) != 1)
3150 abort ();
3151 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3152 switch (XINT (x, 1))
3153 {
3154 case 6:
3155 fputs ("@GOT", file);
3156 break;
3157 case 7:
3158 fputs ("@GOTOFF", file);
3159 break;
3160 case 8:
3161 fputs ("@PLT", file);
3162 break;
3163 default:
3164 output_operand_lossage ("invalid UNSPEC as operand");
3165 break;
3166 }
3167 break;
3168
3169 default:
3170 output_operand_lossage ("invalid expression as operand");
3171 }
3172 }
3173
3174 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3175 We need to handle our special PIC relocations. */
3176
3177 void
3178 i386_dwarf_output_addr_const (file, x)
3179 FILE *file;
3180 rtx x;
3181 {
3182 fprintf (file, "%s", INT_ASM_OP);
3183 if (flag_pic)
3184 output_pic_addr_const (file, x, '\0');
3185 else
3186 output_addr_const (file, x);
3187 fputc ('\n', file);
3188 }
3189
3190 /* In the name of slightly smaller debug output, and to cater to
3191 general assembler losage, recognize PIC+GOTOFF and turn it back
3192 into a direct symbol reference. */
3193
3194 rtx
3195 i386_simplify_dwarf_addr (orig_x)
3196 rtx orig_x;
3197 {
3198 rtx x = orig_x;
3199
3200 if (GET_CODE (x) != PLUS
3201 || GET_CODE (XEXP (x, 0)) != REG
3202 || GET_CODE (XEXP (x, 1)) != CONST)
3203 return orig_x;
3204
3205 x = XEXP (XEXP (x, 1), 0);
3206 if (GET_CODE (x) == UNSPEC
3207 && (XINT (x, 1) == 6
3208 || XINT (x, 1) == 7))
3209 return XVECEXP (x, 0, 0);
3210
3211 if (GET_CODE (x) == PLUS
3212 && GET_CODE (XEXP (x, 0)) == UNSPEC
3213 && GET_CODE (XEXP (x, 1)) == CONST_INT
3214 && (XINT (XEXP (x, 0), 1) == 6
3215 || XINT (XEXP (x, 0), 1) == 7))
3216 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3217
3218 return orig_x;
3219 }
3220 \f
3221 static void
3222 put_condition_code (code, mode, reverse, fp, file)
3223 enum rtx_code code;
3224 enum machine_mode mode;
3225 int reverse, fp;
3226 FILE *file;
3227 {
3228 const char *suffix;
3229
3230 if (mode == CCFPmode || mode == CCFPUmode)
3231 {
3232 enum rtx_code second_code, bypass_code;
3233 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3234 if (bypass_code != NIL || second_code != NIL)
3235 abort();
3236 code = ix86_fp_compare_code_to_integer (code);
3237 mode = CCmode;
3238 }
3239 if (reverse)
3240 code = reverse_condition (code);
3241
3242 switch (code)
3243 {
3244 case EQ:
3245 suffix = "e";
3246 break;
3247 case NE:
3248 suffix = "ne";
3249 break;
3250 case GT:
3251 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3252 abort ();
3253 suffix = "g";
3254 break;
3255 case GTU:
3256 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3257 Those same assemblers have the same but opposite losage on cmov. */
3258 if (mode != CCmode)
3259 abort ();
3260 suffix = fp ? "nbe" : "a";
3261 break;
3262 case LT:
3263 if (mode == CCNOmode || mode == CCGOCmode)
3264 suffix = "s";
3265 else if (mode == CCmode || mode == CCGCmode)
3266 suffix = "l";
3267 else
3268 abort ();
3269 break;
3270 case LTU:
3271 if (mode != CCmode)
3272 abort ();
3273 suffix = "b";
3274 break;
3275 case GE:
3276 if (mode == CCNOmode || mode == CCGOCmode)
3277 suffix = "ns";
3278 else if (mode == CCmode || mode == CCGCmode)
3279 suffix = "ge";
3280 else
3281 abort ();
3282 break;
3283 case GEU:
3284 /* ??? As above. */
3285 if (mode != CCmode)
3286 abort ();
3287 suffix = fp ? "nb" : "ae";
3288 break;
3289 case LE:
3290 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3291 abort ();
3292 suffix = "le";
3293 break;
3294 case LEU:
3295 if (mode != CCmode)
3296 abort ();
3297 suffix = "be";
3298 break;
3299 case UNORDERED:
3300 suffix = fp ? "u" : "p";
3301 break;
3302 case ORDERED:
3303 suffix = fp ? "nu" : "np";
3304 break;
3305 default:
3306 abort ();
3307 }
3308 fputs (suffix, file);
3309 }
3310
3311 void
3312 print_reg (x, code, file)
3313 rtx x;
3314 int code;
3315 FILE *file;
3316 {
3317 if (REGNO (x) == ARG_POINTER_REGNUM
3318 || REGNO (x) == FRAME_POINTER_REGNUM
3319 || REGNO (x) == FLAGS_REG
3320 || REGNO (x) == FPSR_REG)
3321 abort ();
3322
3323 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3324 putc ('%', file);
3325
3326 if (code == 'w')
3327 code = 2;
3328 else if (code == 'b')
3329 code = 1;
3330 else if (code == 'k')
3331 code = 4;
3332 else if (code == 'q')
3333 code = 8;
3334 else if (code == 'y')
3335 code = 3;
3336 else if (code == 'h')
3337 code = 0;
3338 else if (code == 'm' || MMX_REG_P (x))
3339 code = 5;
3340 else
3341 code = GET_MODE_SIZE (GET_MODE (x));
3342
3343 /* Irritatingly, AMD extended registers use different naming convention
3344 from the normal registers. */
3345 if (REX_INT_REG_P (x))
3346 {
3347 switch (code)
3348 {
3349 case 5:
3350 error ("Extended registers have no high halves\n");
3351 break;
3352 case 1:
3353 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3354 break;
3355 case 2:
3356 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3357 break;
3358 case 4:
3359 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3360 break;
3361 case 8:
3362 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3363 break;
3364 default:
3365 error ("Unsupported operand size for extended register.\n");
3366 break;
3367 }
3368 return;
3369 }
3370 switch (code)
3371 {
3372 case 5:
3373 fputs (hi_reg_name[REGNO (x)], file);
3374 break;
3375 case 3:
3376 if (STACK_TOP_P (x))
3377 {
3378 fputs ("st(0)", file);
3379 break;
3380 }
3381 /* FALLTHRU */
3382 case 8:
3383 case 4:
3384 case 12:
3385 if (! ANY_FP_REG_P (x))
3386 putc (code == 8 ? 'r' : 'e', file);
3387 /* FALLTHRU */
3388 case 16:
3389 case 2:
3390 fputs (hi_reg_name[REGNO (x)], file);
3391 break;
3392 case 1:
3393 fputs (qi_reg_name[REGNO (x)], file);
3394 break;
3395 case 0:
3396 fputs (qi_high_reg_name[REGNO (x)], file);
3397 break;
3398 default:
3399 abort ();
3400 }
3401 }
3402
3403 /* Meaning of CODE:
3404 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3405 C -- print opcode suffix for set/cmov insn.
3406 c -- like C, but print reversed condition
3407 R -- print the prefix for register names.
3408 z -- print the opcode suffix for the size of the current operand.
3409 * -- print a star (in certain assembler syntax)
3410 A -- print an absolute memory reference.
3411 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3412 s -- print a shift double count, followed by the assemblers argument
3413 delimiter.
3414 b -- print the QImode name of the register for the indicated operand.
3415 %b0 would print %al if operands[0] is reg 0.
3416 w -- likewise, print the HImode name of the register.
3417 k -- likewise, print the SImode name of the register.
3418 q -- likewise, print the DImode name of the register.
3419 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3420 y -- print "st(0)" instead of "st" as a register.
3421 m -- print "st(n)" as an mmx register.
3422 D -- print condition for SSE cmp instruction.
3423 */
3424
3425 void
3426 print_operand (file, x, code)
3427 FILE *file;
3428 rtx x;
3429 int code;
3430 {
3431 if (code)
3432 {
3433 switch (code)
3434 {
3435 case '*':
3436 if (ASSEMBLER_DIALECT == 0)
3437 putc ('*', file);
3438 return;
3439
3440 case 'A':
3441 if (ASSEMBLER_DIALECT == 0)
3442 putc ('*', file);
3443 else if (ASSEMBLER_DIALECT == 1)
3444 {
3445 /* Intel syntax. For absolute addresses, registers should not
3446 be surrounded by braces. */
3447 if (GET_CODE (x) != REG)
3448 {
3449 putc ('[', file);
3450 PRINT_OPERAND (file, x, 0);
3451 putc (']', file);
3452 return;
3453 }
3454 }
3455
3456 PRINT_OPERAND (file, x, 0);
3457 return;
3458
3459
3460 case 'L':
3461 if (ASSEMBLER_DIALECT == 0)
3462 putc ('l', file);
3463 return;
3464
3465 case 'W':
3466 if (ASSEMBLER_DIALECT == 0)
3467 putc ('w', file);
3468 return;
3469
3470 case 'B':
3471 if (ASSEMBLER_DIALECT == 0)
3472 putc ('b', file);
3473 return;
3474
3475 case 'Q':
3476 if (ASSEMBLER_DIALECT == 0)
3477 putc ('l', file);
3478 return;
3479
3480 case 'S':
3481 if (ASSEMBLER_DIALECT == 0)
3482 putc ('s', file);
3483 return;
3484
3485 case 'T':
3486 if (ASSEMBLER_DIALECT == 0)
3487 putc ('t', file);
3488 return;
3489
3490 case 'z':
3491 /* 387 opcodes don't get size suffixes if the operands are
3492 registers. */
3493
3494 if (STACK_REG_P (x))
3495 return;
3496
3497 /* this is the size of op from size of operand */
3498 switch (GET_MODE_SIZE (GET_MODE (x)))
3499 {
3500 case 2:
3501 #ifdef HAVE_GAS_FILDS_FISTS
3502 putc ('s', file);
3503 #endif
3504 return;
3505
3506 case 4:
3507 if (GET_MODE (x) == SFmode)
3508 {
3509 putc ('s', file);
3510 return;
3511 }
3512 else
3513 putc ('l', file);
3514 return;
3515
3516 case 12:
3517 case 16:
3518 putc ('t', file);
3519 return;
3520
3521 case 8:
3522 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3523 {
3524 #ifdef GAS_MNEMONICS
3525 putc ('q', file);
3526 #else
3527 putc ('l', file);
3528 putc ('l', file);
3529 #endif
3530 }
3531 else
3532 putc ('l', file);
3533 return;
3534
3535 default:
3536 abort ();
3537 }
3538
3539 case 'b':
3540 case 'w':
3541 case 'k':
3542 case 'q':
3543 case 'h':
3544 case 'y':
3545 case 'm':
3546 case 'X':
3547 case 'P':
3548 break;
3549
3550 case 's':
3551 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3552 {
3553 PRINT_OPERAND (file, x, 0);
3554 putc (',', file);
3555 }
3556 return;
3557
3558 case 'D':
3559 /* Little bit of braindamage here. The SSE compare instructions
3560 does use completely different names for the comparisons that the
3561 fp conditional moves. */
3562 switch (GET_CODE (x))
3563 {
3564 case EQ:
3565 case UNEQ:
3566 fputs ("eq", file);
3567 break;
3568 case LT:
3569 case UNLT:
3570 fputs ("lt", file);
3571 break;
3572 case LE:
3573 case UNLE:
3574 fputs ("le", file);
3575 break;
3576 case UNORDERED:
3577 fputs ("unord", file);
3578 break;
3579 case NE:
3580 case LTGT:
3581 fputs ("neq", file);
3582 break;
3583 case UNGE:
3584 case GE:
3585 fputs ("nlt", file);
3586 break;
3587 case UNGT:
3588 case GT:
3589 fputs ("nle", file);
3590 break;
3591 case ORDERED:
3592 fputs ("ord", file);
3593 break;
3594 default:
3595 abort ();
3596 break;
3597 }
3598 return;
3599 case 'C':
3600 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3601 return;
3602 case 'F':
3603 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3604 return;
3605
3606 /* Like above, but reverse condition */
3607 case 'c':
3608 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3609 return;
3610 case 'f':
3611 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3612 return;
3613
3614 default:
3615 {
3616 char str[50];
3617 sprintf (str, "invalid operand code `%c'", code);
3618 output_operand_lossage (str);
3619 }
3620 }
3621 }
3622
3623 if (GET_CODE (x) == REG)
3624 {
3625 PRINT_REG (x, code, file);
3626 }
3627
3628 else if (GET_CODE (x) == MEM)
3629 {
3630 /* No `byte ptr' prefix for call instructions. */
3631 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3632 {
3633 const char * size;
3634 switch (GET_MODE_SIZE (GET_MODE (x)))
3635 {
3636 case 1: size = "BYTE"; break;
3637 case 2: size = "WORD"; break;
3638 case 4: size = "DWORD"; break;
3639 case 8: size = "QWORD"; break;
3640 case 12: size = "XWORD"; break;
3641 case 16: size = "XMMWORD"; break;
3642 default:
3643 abort ();
3644 }
3645
3646 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3647 if (code == 'b')
3648 size = "BYTE";
3649 else if (code == 'w')
3650 size = "WORD";
3651 else if (code == 'k')
3652 size = "DWORD";
3653
3654 fputs (size, file);
3655 fputs (" PTR ", file);
3656 }
3657
3658 x = XEXP (x, 0);
3659 if (flag_pic && CONSTANT_ADDRESS_P (x))
3660 output_pic_addr_const (file, x, code);
3661 else
3662 output_address (x);
3663 }
3664
3665 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3666 {
3667 REAL_VALUE_TYPE r;
3668 long l;
3669
3670 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3671 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3672
3673 if (ASSEMBLER_DIALECT == 0)
3674 putc ('$', file);
3675 fprintf (file, "0x%lx", l);
3676 }
3677
3678 /* These float cases don't actually occur as immediate operands. */
3679 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3680 {
3681 REAL_VALUE_TYPE r;
3682 char dstr[30];
3683
3684 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3685 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3686 fprintf (file, "%s", dstr);
3687 }
3688
3689 else if (GET_CODE (x) == CONST_DOUBLE
3690 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
3691 {
3692 REAL_VALUE_TYPE r;
3693 char dstr[30];
3694
3695 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3696 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3697 fprintf (file, "%s", dstr);
3698 }
3699 else
3700 {
3701 if (code != 'P')
3702 {
3703 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3704 {
3705 if (ASSEMBLER_DIALECT == 0)
3706 putc ('$', file);
3707 }
3708 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3709 || GET_CODE (x) == LABEL_REF)
3710 {
3711 if (ASSEMBLER_DIALECT == 0)
3712 putc ('$', file);
3713 else
3714 fputs ("OFFSET FLAT:", file);
3715 }
3716 }
3717 if (GET_CODE (x) == CONST_INT)
3718 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3719 else if (flag_pic)
3720 output_pic_addr_const (file, x, code);
3721 else
3722 output_addr_const (file, x);
3723 }
3724 }
3725 \f
3726 /* Print a memory operand whose address is ADDR. */
3727
3728 void
3729 print_operand_address (file, addr)
3730 FILE *file;
3731 register rtx addr;
3732 {
3733 struct ix86_address parts;
3734 rtx base, index, disp;
3735 int scale;
3736
3737 if (! ix86_decompose_address (addr, &parts))
3738 abort ();
3739
3740 base = parts.base;
3741 index = parts.index;
3742 disp = parts.disp;
3743 scale = parts.scale;
3744
3745 if (!base && !index)
3746 {
3747 /* Displacement only requires special attention. */
3748
3749 if (GET_CODE (disp) == CONST_INT)
3750 {
3751 if (ASSEMBLER_DIALECT != 0)
3752 {
3753 if (USER_LABEL_PREFIX[0] == 0)
3754 putc ('%', file);
3755 fputs ("ds:", file);
3756 }
3757 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3758 }
3759 else if (flag_pic)
3760 output_pic_addr_const (file, addr, 0);
3761 else
3762 output_addr_const (file, addr);
3763 }
3764 else
3765 {
3766 if (ASSEMBLER_DIALECT == 0)
3767 {
3768 if (disp)
3769 {
3770 if (flag_pic)
3771 output_pic_addr_const (file, disp, 0);
3772 else if (GET_CODE (disp) == LABEL_REF)
3773 output_asm_label (disp);
3774 else
3775 output_addr_const (file, disp);
3776 }
3777
3778 putc ('(', file);
3779 if (base)
3780 PRINT_REG (base, 0, file);
3781 if (index)
3782 {
3783 putc (',', file);
3784 PRINT_REG (index, 0, file);
3785 if (scale != 1)
3786 fprintf (file, ",%d", scale);
3787 }
3788 putc (')', file);
3789 }
3790 else
3791 {
3792 rtx offset = NULL_RTX;
3793
3794 if (disp)
3795 {
3796 /* Pull out the offset of a symbol; print any symbol itself. */
3797 if (GET_CODE (disp) == CONST
3798 && GET_CODE (XEXP (disp, 0)) == PLUS
3799 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3800 {
3801 offset = XEXP (XEXP (disp, 0), 1);
3802 disp = gen_rtx_CONST (VOIDmode,
3803 XEXP (XEXP (disp, 0), 0));
3804 }
3805
3806 if (flag_pic)
3807 output_pic_addr_const (file, disp, 0);
3808 else if (GET_CODE (disp) == LABEL_REF)
3809 output_asm_label (disp);
3810 else if (GET_CODE (disp) == CONST_INT)
3811 offset = disp;
3812 else
3813 output_addr_const (file, disp);
3814 }
3815
3816 putc ('[', file);
3817 if (base)
3818 {
3819 PRINT_REG (base, 0, file);
3820 if (offset)
3821 {
3822 if (INTVAL (offset) >= 0)
3823 putc ('+', file);
3824 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3825 }
3826 }
3827 else if (offset)
3828 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3829 else
3830 putc ('0', file);
3831
3832 if (index)
3833 {
3834 putc ('+', file);
3835 PRINT_REG (index, 0, file);
3836 if (scale != 1)
3837 fprintf (file, "*%d", scale);
3838 }
3839 putc (']', file);
3840 }
3841 }
3842 }
3843 \f
3844 /* Split one or more DImode RTL references into pairs of SImode
3845 references. The RTL can be REG, offsettable MEM, integer constant, or
3846 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3847 split and "num" is its length. lo_half and hi_half are output arrays
3848 that parallel "operands". */
3849
3850 void
3851 split_di (operands, num, lo_half, hi_half)
3852 rtx operands[];
3853 int num;
3854 rtx lo_half[], hi_half[];
3855 {
3856 while (num--)
3857 {
3858 rtx op = operands[num];
3859 if (CONSTANT_P (op))
3860 split_double (op, &lo_half[num], &hi_half[num]);
3861 else if (! reload_completed)
3862 {
3863 lo_half[num] = gen_lowpart (SImode, op);
3864 hi_half[num] = gen_highpart (SImode, op);
3865 }
3866 else if (GET_CODE (op) == REG)
3867 {
3868 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3869 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3870 }
3871 else if (offsettable_memref_p (op))
3872 {
3873 rtx lo_addr = XEXP (op, 0);
3874 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3875 lo_half[num] = change_address (op, SImode, lo_addr);
3876 hi_half[num] = change_address (op, SImode, hi_addr);
3877 }
3878 else
3879 abort ();
3880 }
3881 }
3882 \f
3883 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3884 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3885 is the expression of the binary operation. The output may either be
3886 emitted here, or returned to the caller, like all output_* functions.
3887
3888 There is no guarantee that the operands are the same mode, as they
3889 might be within FLOAT or FLOAT_EXTEND expressions. */
3890
3891 #ifndef SYSV386_COMPAT
3892 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3893 wants to fix the assemblers because that causes incompatibility
3894 with gcc. No-one wants to fix gcc because that causes
3895 incompatibility with assemblers... You can use the option of
3896 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3897 #define SYSV386_COMPAT 1
3898 #endif
3899
3900 const char *
3901 output_387_binary_op (insn, operands)
3902 rtx insn;
3903 rtx *operands;
3904 {
3905 static char buf[30];
3906 const char *p;
3907 const char *ssep;
3908 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
3909
3910 #ifdef ENABLE_CHECKING
3911 /* Even if we do not want to check the inputs, this documents input
3912 constraints. Which helps in understanding the following code. */
3913 if (STACK_REG_P (operands[0])
3914 && ((REG_P (operands[1])
3915 && REGNO (operands[0]) == REGNO (operands[1])
3916 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3917 || (REG_P (operands[2])
3918 && REGNO (operands[0]) == REGNO (operands[2])
3919 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3920 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3921 ; /* ok */
3922 else if (!is_sse)
3923 abort ();
3924 #endif
3925
3926 switch (GET_CODE (operands[3]))
3927 {
3928 case PLUS:
3929 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3930 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3931 p = "fiadd";
3932 else
3933 p = "fadd";
3934 ssep = "add";
3935 break;
3936
3937 case MINUS:
3938 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3939 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3940 p = "fisub";
3941 else
3942 p = "fsub";
3943 ssep = "sub";
3944 break;
3945
3946 case MULT:
3947 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3948 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3949 p = "fimul";
3950 else
3951 p = "fmul";
3952 ssep = "mul";
3953 break;
3954
3955 case DIV:
3956 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3957 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3958 p = "fidiv";
3959 else
3960 p = "fdiv";
3961 ssep = "div";
3962 break;
3963
3964 default:
3965 abort ();
3966 }
3967
3968 if (is_sse)
3969 {
3970 strcpy (buf, ssep);
3971 if (GET_MODE (operands[0]) == SFmode)
3972 strcat (buf, "ss\t{%2, %0|%0, %2}");
3973 else
3974 strcat (buf, "sd\t{%2, %0|%0, %2}");
3975 return buf;
3976 }
3977 strcpy (buf, p);
3978
3979 switch (GET_CODE (operands[3]))
3980 {
3981 case MULT:
3982 case PLUS:
3983 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3984 {
3985 rtx temp = operands[2];
3986 operands[2] = operands[1];
3987 operands[1] = temp;
3988 }
3989
3990 /* know operands[0] == operands[1]. */
3991
3992 if (GET_CODE (operands[2]) == MEM)
3993 {
3994 p = "%z2\t%2";
3995 break;
3996 }
3997
3998 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3999 {
4000 if (STACK_TOP_P (operands[0]))
4001 /* How is it that we are storing to a dead operand[2]?
4002 Well, presumably operands[1] is dead too. We can't
4003 store the result to st(0) as st(0) gets popped on this
4004 instruction. Instead store to operands[2] (which I
4005 think has to be st(1)). st(1) will be popped later.
4006 gcc <= 2.8.1 didn't have this check and generated
4007 assembly code that the Unixware assembler rejected. */
4008 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4009 else
4010 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4011 break;
4012 }
4013
4014 if (STACK_TOP_P (operands[0]))
4015 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4016 else
4017 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4018 break;
4019
4020 case MINUS:
4021 case DIV:
4022 if (GET_CODE (operands[1]) == MEM)
4023 {
4024 p = "r%z1\t%1";
4025 break;
4026 }
4027
4028 if (GET_CODE (operands[2]) == MEM)
4029 {
4030 p = "%z2\t%2";
4031 break;
4032 }
4033
4034 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4035 {
4036 #if SYSV386_COMPAT
4037 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4038 derived assemblers, confusingly reverse the direction of
4039 the operation for fsub{r} and fdiv{r} when the
4040 destination register is not st(0). The Intel assembler
4041 doesn't have this brain damage. Read !SYSV386_COMPAT to
4042 figure out what the hardware really does. */
4043 if (STACK_TOP_P (operands[0]))
4044 p = "{p\t%0, %2|rp\t%2, %0}";
4045 else
4046 p = "{rp\t%2, %0|p\t%0, %2}";
4047 #else
4048 if (STACK_TOP_P (operands[0]))
4049 /* As above for fmul/fadd, we can't store to st(0). */
4050 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4051 else
4052 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4053 #endif
4054 break;
4055 }
4056
4057 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
4058 {
4059 #if SYSV386_COMPAT
4060 if (STACK_TOP_P (operands[0]))
4061 p = "{rp\t%0, %1|p\t%1, %0}";
4062 else
4063 p = "{p\t%1, %0|rp\t%0, %1}";
4064 #else
4065 if (STACK_TOP_P (operands[0]))
4066 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4067 else
4068 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4069 #endif
4070 break;
4071 }
4072
4073 if (STACK_TOP_P (operands[0]))
4074 {
4075 if (STACK_TOP_P (operands[1]))
4076 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4077 else
4078 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
4079 break;
4080 }
4081 else if (STACK_TOP_P (operands[1]))
4082 {
4083 #if SYSV386_COMPAT
4084 p = "{\t%1, %0|r\t%0, %1}";
4085 #else
4086 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4087 #endif
4088 }
4089 else
4090 {
4091 #if SYSV386_COMPAT
4092 p = "{r\t%2, %0|\t%0, %2}";
4093 #else
4094 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4095 #endif
4096 }
4097 break;
4098
4099 default:
4100 abort ();
4101 }
4102
4103 strcat (buf, p);
4104 return buf;
4105 }
4106
4107 /* Output code for INSN to convert a float to a signed int. OPERANDS
4108 are the insn operands. The output may be [HSD]Imode and the input
4109 operand may be [SDX]Fmode. */
4110
4111 const char *
4112 output_fix_trunc (insn, operands)
4113 rtx insn;
4114 rtx *operands;
4115 {
4116 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4117 int dimode_p = GET_MODE (operands[0]) == DImode;
4118 rtx xops[4];
4119
4120 /* Jump through a hoop or two for DImode, since the hardware has no
4121 non-popping instruction. We used to do this a different way, but
4122 that was somewhat fragile and broke with post-reload splitters. */
4123 if (dimode_p && !stack_top_dies)
4124 output_asm_insn ("fld\t%y1", operands);
4125
4126 if (! STACK_TOP_P (operands[1]))
4127 abort ();
4128
4129 xops[0] = GEN_INT (12);
4130 xops[1] = adj_offsettable_operand (operands[2], 1);
4131 xops[1] = change_address (xops[1], QImode, NULL_RTX);
4132
4133 xops[2] = operands[0];
4134 if (GET_CODE (operands[0]) != MEM)
4135 xops[2] = operands[3];
4136
4137 output_asm_insn ("fnstcw\t%2", operands);
4138 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
4139 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
4140 output_asm_insn ("fldcw\t%2", operands);
4141 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
4142
4143 if (stack_top_dies || dimode_p)
4144 output_asm_insn ("fistp%z2\t%2", xops);
4145 else
4146 output_asm_insn ("fist%z2\t%2", xops);
4147
4148 output_asm_insn ("fldcw\t%2", operands);
4149
4150 if (GET_CODE (operands[0]) != MEM)
4151 {
4152 if (dimode_p)
4153 {
4154 split_di (operands+0, 1, xops+0, xops+1);
4155 split_di (operands+3, 1, xops+2, xops+3);
4156 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4157 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
4158 }
4159 else if (GET_MODE (operands[0]) == SImode)
4160 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
4161 else
4162 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
4163 }
4164
4165 return "";
4166 }
4167
4168 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4169 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4170 when fucom should be used. */
4171
4172 const char *
4173 output_fp_compare (insn, operands, eflags_p, unordered_p)
4174 rtx insn;
4175 rtx *operands;
4176 int eflags_p, unordered_p;
4177 {
4178 int stack_top_dies;
4179 rtx cmp_op0 = operands[0];
4180 rtx cmp_op1 = operands[1];
4181 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
4182
4183 if (eflags_p == 2)
4184 {
4185 cmp_op0 = cmp_op1;
4186 cmp_op1 = operands[2];
4187 }
4188 if (is_sse)
4189 {
4190 if (GET_MODE (operands[0]) == SFmode)
4191 if (unordered_p)
4192 return "ucomiss\t{%1, %0|%0, %1}";
4193 else
4194 return "comiss\t{%1, %0|%0, %y}";
4195 else
4196 if (unordered_p)
4197 return "ucomisd\t{%1, %0|%0, %1}";
4198 else
4199 return "comisd\t{%1, %0|%0, %y}";
4200 }
4201
4202 if (! STACK_TOP_P (cmp_op0))
4203 abort ();
4204
4205 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4206
4207 if (STACK_REG_P (cmp_op1)
4208 && stack_top_dies
4209 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4210 && REGNO (cmp_op1) != FIRST_STACK_REG)
4211 {
4212 /* If both the top of the 387 stack dies, and the other operand
4213 is also a stack register that dies, then this must be a
4214 `fcompp' float compare */
4215
4216 if (eflags_p == 1)
4217 {
4218 /* There is no double popping fcomi variant. Fortunately,
4219 eflags is immune from the fstp's cc clobbering. */
4220 if (unordered_p)
4221 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4222 else
4223 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4224 return "fstp\t%y0";
4225 }
4226 else
4227 {
4228 if (eflags_p == 2)
4229 {
4230 if (unordered_p)
4231 return "fucompp\n\tfnstsw\t%0";
4232 else
4233 return "fcompp\n\tfnstsw\t%0";
4234 }
4235 else
4236 {
4237 if (unordered_p)
4238 return "fucompp";
4239 else
4240 return "fcompp";
4241 }
4242 }
4243 }
4244 else
4245 {
4246 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4247
4248 static const char * const alt[24] =
4249 {
4250 "fcom%z1\t%y1",
4251 "fcomp%z1\t%y1",
4252 "fucom%z1\t%y1",
4253 "fucomp%z1\t%y1",
4254
4255 "ficom%z1\t%y1",
4256 "ficomp%z1\t%y1",
4257 NULL,
4258 NULL,
4259
4260 "fcomi\t{%y1, %0|%0, %y1}",
4261 "fcomip\t{%y1, %0|%0, %y1}",
4262 "fucomi\t{%y1, %0|%0, %y1}",
4263 "fucomip\t{%y1, %0|%0, %y1}",
4264
4265 NULL,
4266 NULL,
4267 NULL,
4268 NULL,
4269
4270 "fcom%z2\t%y2\n\tfnstsw\t%0",
4271 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4272 "fucom%z2\t%y2\n\tfnstsw\t%0",
4273 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4274
4275 "ficom%z2\t%y2\n\tfnstsw\t%0",
4276 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4277 NULL,
4278 NULL
4279 };
4280
4281 int mask;
4282 const char *ret;
4283
4284 mask = eflags_p << 3;
4285 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4286 mask |= unordered_p << 1;
4287 mask |= stack_top_dies;
4288
4289 if (mask >= 24)
4290 abort ();
4291 ret = alt[mask];
4292 if (ret == NULL)
4293 abort ();
4294
4295 return ret;
4296 }
4297 }
4298
4299 /* Output assembler code to FILE to initialize basic-block profiling.
4300
4301 If profile_block_flag == 2
4302
4303 Output code to call the subroutine `__bb_init_trace_func'
4304 and pass two parameters to it. The first parameter is
4305 the address of a block allocated in the object module.
4306 The second parameter is the number of the first basic block
4307 of the function.
4308
4309 The name of the block is a local symbol made with this statement:
4310
4311 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4312
4313 Of course, since you are writing the definition of
4314 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4315 can take a short cut in the definition of this macro and use the
4316 name that you know will result.
4317
4318 The number of the first basic block of the function is
4319 passed to the macro in BLOCK_OR_LABEL.
4320
4321 If described in a virtual assembler language the code to be
4322 output looks like:
4323
4324 parameter1 <- LPBX0
4325 parameter2 <- BLOCK_OR_LABEL
4326 call __bb_init_trace_func
4327
4328 else if profile_block_flag != 0
4329
4330 Output code to call the subroutine `__bb_init_func'
4331 and pass one single parameter to it, which is the same
4332 as the first parameter to `__bb_init_trace_func'.
4333
4334 The first word of this parameter is a flag which will be nonzero if
4335 the object module has already been initialized. So test this word
4336 first, and do not call `__bb_init_func' if the flag is nonzero.
4337 Note: When profile_block_flag == 2 the test need not be done
4338 but `__bb_init_trace_func' *must* be called.
4339
4340 BLOCK_OR_LABEL may be used to generate a label number as a
4341 branch destination in case `__bb_init_func' will not be called.
4342
4343 If described in a virtual assembler language the code to be
4344 output looks like:
4345
4346 cmp (LPBX0),0
4347 jne local_label
4348 parameter1 <- LPBX0
4349 call __bb_init_func
4350 local_label:
4351 */
4352
4353 void
4354 ix86_output_function_block_profiler (file, block_or_label)
4355 FILE *file;
4356 int block_or_label;
4357 {
4358 static int num_func = 0;
4359 rtx xops[8];
4360 char block_table[80], false_label[80];
4361
4362 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4363
4364 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4365 xops[5] = stack_pointer_rtx;
4366 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4367
4368 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4369
4370 switch (profile_block_flag)
4371 {
4372 case 2:
4373 xops[2] = GEN_INT (block_or_label);
4374 xops[3] = gen_rtx_MEM (Pmode,
4375 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4376 xops[6] = GEN_INT (8);
4377
4378 output_asm_insn ("push{l}\t%2", xops);
4379 if (!flag_pic)
4380 output_asm_insn ("push{l}\t%1", xops);
4381 else
4382 {
4383 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4384 output_asm_insn ("push{l}\t%7", xops);
4385 }
4386 output_asm_insn ("call\t%P3", xops);
4387 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4388 break;
4389
4390 default:
4391 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4392
4393 xops[0] = const0_rtx;
4394 xops[2] = gen_rtx_MEM (Pmode,
4395 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4396 xops[3] = gen_rtx_MEM (Pmode,
4397 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4398 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4399 xops[6] = GEN_INT (4);
4400
4401 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4402
4403 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4404 output_asm_insn ("jne\t%2", xops);
4405
4406 if (!flag_pic)
4407 output_asm_insn ("push{l}\t%1", xops);
4408 else
4409 {
4410 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4411 output_asm_insn ("push{l}\t%7", xops);
4412 }
4413 output_asm_insn ("call\t%P3", xops);
4414 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4415 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4416 num_func++;
4417 break;
4418 }
4419 }
4420
4421 /* Output assembler code to FILE to increment a counter associated
4422 with basic block number BLOCKNO.
4423
4424 If profile_block_flag == 2
4425
4426 Output code to initialize the global structure `__bb' and
4427 call the function `__bb_trace_func' which will increment the
4428 counter.
4429
4430 `__bb' consists of two words. In the first word the number
4431 of the basic block has to be stored. In the second word
4432 the address of a block allocated in the object module
4433 has to be stored.
4434
4435 The basic block number is given by BLOCKNO.
4436
4437 The address of the block is given by the label created with
4438
4439 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4440
4441 by FUNCTION_BLOCK_PROFILER.
4442
4443 Of course, since you are writing the definition of
4444 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4445 can take a short cut in the definition of this macro and use the
4446 name that you know will result.
4447
4448 If described in a virtual assembler language the code to be
4449 output looks like:
4450
4451 move BLOCKNO -> (__bb)
4452 move LPBX0 -> (__bb+4)
4453 call __bb_trace_func
4454
4455 Note that function `__bb_trace_func' must not change the
4456 machine state, especially the flag register. To grant
4457 this, you must output code to save and restore registers
4458 either in this macro or in the macros MACHINE_STATE_SAVE
4459 and MACHINE_STATE_RESTORE. The last two macros will be
4460 used in the function `__bb_trace_func', so you must make
4461 sure that the function prologue does not change any
4462 register prior to saving it with MACHINE_STATE_SAVE.
4463
4464 else if profile_block_flag != 0
4465
4466 Output code to increment the counter directly.
4467 Basic blocks are numbered separately from zero within each
4468 compiled object module. The count associated with block number
4469 BLOCKNO is at index BLOCKNO in an array of words; the name of
4470 this array is a local symbol made with this statement:
4471
4472 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4473
4474 Of course, since you are writing the definition of
4475 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4476 can take a short cut in the definition of this macro and use the
4477 name that you know will result.
4478
4479 If described in a virtual assembler language the code to be
4480 output looks like:
4481
4482 inc (LPBX2+4*BLOCKNO)
4483 */
4484
4485 void
4486 ix86_output_block_profiler (file, blockno)
4487 FILE *file ATTRIBUTE_UNUSED;
4488 int blockno;
4489 {
4490 rtx xops[8], cnt_rtx;
4491 char counts[80];
4492 char *block_table = counts;
4493
4494 switch (profile_block_flag)
4495 {
4496 case 2:
4497 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4498
4499 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4500 xops[2] = GEN_INT (blockno);
4501 xops[3] = gen_rtx_MEM (Pmode,
4502 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4503 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4504 xops[5] = plus_constant (xops[4], 4);
4505 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4506 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4507
4508 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4509
4510 output_asm_insn ("pushf", xops);
4511 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4512 if (flag_pic)
4513 {
4514 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4515 output_asm_insn ("push{l}\t%7", xops);
4516 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4517 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4518 output_asm_insn ("pop{l}\t%7", xops);
4519 }
4520 else
4521 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4522 output_asm_insn ("call\t%P3", xops);
4523 output_asm_insn ("popf", xops);
4524
4525 break;
4526
4527 default:
4528 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4529 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4530 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4531
4532 if (blockno)
4533 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4534
4535 if (flag_pic)
4536 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4537
4538 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4539 output_asm_insn ("inc{l}\t%0", xops);
4540
4541 break;
4542 }
4543 }
4544 \f
4545 void
4546 ix86_expand_move (mode, operands)
4547 enum machine_mode mode;
4548 rtx operands[];
4549 {
4550 int strict = (reload_in_progress || reload_completed);
4551 rtx insn;
4552
4553 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4554 {
4555 /* Emit insns to move operands[1] into operands[0]. */
4556
4557 if (GET_CODE (operands[0]) == MEM)
4558 operands[1] = force_reg (Pmode, operands[1]);
4559 else
4560 {
4561 rtx temp = operands[0];
4562 if (GET_CODE (temp) != REG)
4563 temp = gen_reg_rtx (Pmode);
4564 temp = legitimize_pic_address (operands[1], temp);
4565 if (temp == operands[0])
4566 return;
4567 operands[1] = temp;
4568 }
4569 }
4570 else
4571 {
4572 if (GET_CODE (operands[0]) == MEM
4573 && (GET_MODE (operands[0]) == QImode
4574 || !push_operand (operands[0], mode))
4575 && GET_CODE (operands[1]) == MEM)
4576 operands[1] = force_reg (mode, operands[1]);
4577
4578 if (push_operand (operands[0], mode)
4579 && ! general_no_elim_operand (operands[1], mode))
4580 operands[1] = copy_to_mode_reg (mode, operands[1]);
4581
4582 if (FLOAT_MODE_P (mode))
4583 {
4584 /* If we are loading a floating point constant to a register,
4585 force the value to memory now, since we'll get better code
4586 out the back end. */
4587
4588 if (strict)
4589 ;
4590 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4591 && register_operand (operands[0], mode))
4592 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4593 }
4594 }
4595
4596 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4597
4598 emit_insn (insn);
4599 }
4600
4601 /* Attempt to expand a binary operator. Make the expansion closer to the
4602 actual machine, then just general_operand, which will allow 3 separate
4603 memory references (one output, two input) in a single insn. */
4604
4605 void
4606 ix86_expand_binary_operator (code, mode, operands)
4607 enum rtx_code code;
4608 enum machine_mode mode;
4609 rtx operands[];
4610 {
4611 int matching_memory;
4612 rtx src1, src2, dst, op, clob;
4613
4614 dst = operands[0];
4615 src1 = operands[1];
4616 src2 = operands[2];
4617
4618 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4619 if (GET_RTX_CLASS (code) == 'c'
4620 && (rtx_equal_p (dst, src2)
4621 || immediate_operand (src1, mode)))
4622 {
4623 rtx temp = src1;
4624 src1 = src2;
4625 src2 = temp;
4626 }
4627
4628 /* If the destination is memory, and we do not have matching source
4629 operands, do things in registers. */
4630 matching_memory = 0;
4631 if (GET_CODE (dst) == MEM)
4632 {
4633 if (rtx_equal_p (dst, src1))
4634 matching_memory = 1;
4635 else if (GET_RTX_CLASS (code) == 'c'
4636 && rtx_equal_p (dst, src2))
4637 matching_memory = 2;
4638 else
4639 dst = gen_reg_rtx (mode);
4640 }
4641
4642 /* Both source operands cannot be in memory. */
4643 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4644 {
4645 if (matching_memory != 2)
4646 src2 = force_reg (mode, src2);
4647 else
4648 src1 = force_reg (mode, src1);
4649 }
4650
4651 /* If the operation is not commutable, source 1 cannot be a constant
4652 or non-matching memory. */
4653 if ((CONSTANT_P (src1)
4654 || (!matching_memory && GET_CODE (src1) == MEM))
4655 && GET_RTX_CLASS (code) != 'c')
4656 src1 = force_reg (mode, src1);
4657
4658 /* If optimizing, copy to regs to improve CSE */
4659 if (optimize && ! no_new_pseudos)
4660 {
4661 if (GET_CODE (dst) == MEM)
4662 dst = gen_reg_rtx (mode);
4663 if (GET_CODE (src1) == MEM)
4664 src1 = force_reg (mode, src1);
4665 if (GET_CODE (src2) == MEM)
4666 src2 = force_reg (mode, src2);
4667 }
4668
4669 /* Emit the instruction. */
4670
4671 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4672 if (reload_in_progress)
4673 {
4674 /* Reload doesn't know about the flags register, and doesn't know that
4675 it doesn't want to clobber it. We can only do this with PLUS. */
4676 if (code != PLUS)
4677 abort ();
4678 emit_insn (op);
4679 }
4680 else
4681 {
4682 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4683 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4684 }
4685
4686 /* Fix up the destination if needed. */
4687 if (dst != operands[0])
4688 emit_move_insn (operands[0], dst);
4689 }
4690
4691 /* Return TRUE or FALSE depending on whether the binary operator meets the
4692 appropriate constraints. */
4693
4694 int
4695 ix86_binary_operator_ok (code, mode, operands)
4696 enum rtx_code code;
4697 enum machine_mode mode ATTRIBUTE_UNUSED;
4698 rtx operands[3];
4699 {
4700 /* Both source operands cannot be in memory. */
4701 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4702 return 0;
4703 /* If the operation is not commutable, source 1 cannot be a constant. */
4704 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4705 return 0;
4706 /* If the destination is memory, we must have a matching source operand. */
4707 if (GET_CODE (operands[0]) == MEM
4708 && ! (rtx_equal_p (operands[0], operands[1])
4709 || (GET_RTX_CLASS (code) == 'c'
4710 && rtx_equal_p (operands[0], operands[2]))))
4711 return 0;
4712 /* If the operation is not commutable and the source 1 is memory, we must
4713 have a matching destionation. */
4714 if (GET_CODE (operands[1]) == MEM
4715 && GET_RTX_CLASS (code) != 'c'
4716 && ! rtx_equal_p (operands[0], operands[1]))
4717 return 0;
4718 return 1;
4719 }
4720
4721 /* Attempt to expand a unary operator. Make the expansion closer to the
4722 actual machine, then just general_operand, which will allow 2 separate
4723 memory references (one output, one input) in a single insn. */
4724
4725 void
4726 ix86_expand_unary_operator (code, mode, operands)
4727 enum rtx_code code;
4728 enum machine_mode mode;
4729 rtx operands[];
4730 {
4731 int matching_memory;
4732 rtx src, dst, op, clob;
4733
4734 dst = operands[0];
4735 src = operands[1];
4736
4737 /* If the destination is memory, and we do not have matching source
4738 operands, do things in registers. */
4739 matching_memory = 0;
4740 if (GET_CODE (dst) == MEM)
4741 {
4742 if (rtx_equal_p (dst, src))
4743 matching_memory = 1;
4744 else
4745 dst = gen_reg_rtx (mode);
4746 }
4747
4748 /* When source operand is memory, destination must match. */
4749 if (!matching_memory && GET_CODE (src) == MEM)
4750 src = force_reg (mode, src);
4751
4752 /* If optimizing, copy to regs to improve CSE */
4753 if (optimize && ! no_new_pseudos)
4754 {
4755 if (GET_CODE (dst) == MEM)
4756 dst = gen_reg_rtx (mode);
4757 if (GET_CODE (src) == MEM)
4758 src = force_reg (mode, src);
4759 }
4760
4761 /* Emit the instruction. */
4762
4763 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4764 if (reload_in_progress || code == NOT)
4765 {
4766 /* Reload doesn't know about the flags register, and doesn't know that
4767 it doesn't want to clobber it. */
4768 if (code != NOT)
4769 abort ();
4770 emit_insn (op);
4771 }
4772 else
4773 {
4774 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4775 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4776 }
4777
4778 /* Fix up the destination if needed. */
4779 if (dst != operands[0])
4780 emit_move_insn (operands[0], dst);
4781 }
4782
4783 /* Return TRUE or FALSE depending on whether the unary operator meets the
4784 appropriate constraints. */
4785
4786 int
4787 ix86_unary_operator_ok (code, mode, operands)
4788 enum rtx_code code ATTRIBUTE_UNUSED;
4789 enum machine_mode mode ATTRIBUTE_UNUSED;
4790 rtx operands[2] ATTRIBUTE_UNUSED;
4791 {
4792 /* If one of operands is memory, source and destination must match. */
4793 if ((GET_CODE (operands[0]) == MEM
4794 || GET_CODE (operands[1]) == MEM)
4795 && ! rtx_equal_p (operands[0], operands[1]))
4796 return FALSE;
4797 return TRUE;
4798 }
4799
4800 /* Return TRUE or FALSE depending on whether the first SET in INSN
4801 has source and destination with matching CC modes, and that the
4802 CC mode is at least as constrained as REQ_MODE. */
4803
4804 int
4805 ix86_match_ccmode (insn, req_mode)
4806 rtx insn;
4807 enum machine_mode req_mode;
4808 {
4809 rtx set;
4810 enum machine_mode set_mode;
4811
4812 set = PATTERN (insn);
4813 if (GET_CODE (set) == PARALLEL)
4814 set = XVECEXP (set, 0, 0);
4815 if (GET_CODE (set) != SET)
4816 abort ();
4817 if (GET_CODE (SET_SRC (set)) != COMPARE)
4818 abort ();
4819
4820 set_mode = GET_MODE (SET_DEST (set));
4821 switch (set_mode)
4822 {
4823 case CCNOmode:
4824 if (req_mode != CCNOmode
4825 && (req_mode != CCmode
4826 || XEXP (SET_SRC (set), 1) != const0_rtx))
4827 return 0;
4828 break;
4829 case CCmode:
4830 if (req_mode == CCGCmode)
4831 return 0;
4832 /* FALLTHRU */
4833 case CCGCmode:
4834 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4835 return 0;
4836 /* FALLTHRU */
4837 case CCGOCmode:
4838 if (req_mode == CCZmode)
4839 return 0;
4840 /* FALLTHRU */
4841 case CCZmode:
4842 break;
4843
4844 default:
4845 abort ();
4846 }
4847
4848 return (GET_MODE (SET_SRC (set)) == set_mode);
4849 }
4850
4851 /* Generate insn patterns to do an integer compare of OPERANDS. */
4852
4853 static rtx
4854 ix86_expand_int_compare (code, op0, op1)
4855 enum rtx_code code;
4856 rtx op0, op1;
4857 {
4858 enum machine_mode cmpmode;
4859 rtx tmp, flags;
4860
4861 cmpmode = SELECT_CC_MODE (code, op0, op1);
4862 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4863
4864 /* This is very simple, but making the interface the same as in the
4865 FP case makes the rest of the code easier. */
4866 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4867 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4868
4869 /* Return the test that should be put into the flags user, i.e.
4870 the bcc, scc, or cmov instruction. */
4871 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4872 }
4873
4874 /* Figure out whether to use ordered or unordered fp comparisons.
4875 Return the appropriate mode to use. */
4876
4877 enum machine_mode
4878 ix86_fp_compare_mode (code)
4879 enum rtx_code code ATTRIBUTE_UNUSED;
4880 {
4881 /* ??? In order to make all comparisons reversible, we do all comparisons
4882 non-trapping when compiling for IEEE. Once gcc is able to distinguish
4883 all forms trapping and nontrapping comparisons, we can make inequality
4884 comparisons trapping again, since it results in better code when using
4885 FCOM based compares. */
4886 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
4887 }
4888
4889 enum machine_mode
4890 ix86_cc_mode (code, op0, op1)
4891 enum rtx_code code;
4892 rtx op0, op1;
4893 {
4894 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4895 return ix86_fp_compare_mode (code);
4896 switch (code)
4897 {
4898 /* Only zero flag is needed. */
4899 case EQ: /* ZF=0 */
4900 case NE: /* ZF!=0 */
4901 return CCZmode;
4902 /* Codes needing carry flag. */
4903 case GEU: /* CF=0 */
4904 case GTU: /* CF=0 & ZF=0 */
4905 case LTU: /* CF=1 */
4906 case LEU: /* CF=1 | ZF=1 */
4907 return CCmode;
4908 /* Codes possibly doable only with sign flag when
4909 comparing against zero. */
4910 case GE: /* SF=OF or SF=0 */
4911 case LT: /* SF<>OF or SF=1 */
4912 if (op1 == const0_rtx)
4913 return CCGOCmode;
4914 else
4915 /* For other cases Carry flag is not required. */
4916 return CCGCmode;
4917 /* Codes doable only with sign flag when comparing
4918 against zero, but we miss jump instruction for it
4919 so we need to use relational tests agains overflow
4920 that thus needs to be zero. */
4921 case GT: /* ZF=0 & SF=OF */
4922 case LE: /* ZF=1 | SF<>OF */
4923 if (op1 == const0_rtx)
4924 return CCNOmode;
4925 else
4926 return CCGCmode;
4927 default:
4928 abort ();
4929 }
4930 }
4931
4932 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4933
4934 int
4935 ix86_use_fcomi_compare (code)
4936 enum rtx_code code ATTRIBUTE_UNUSED;
4937 {
4938 enum rtx_code swapped_code = swap_condition (code);
4939 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
4940 || (ix86_fp_comparison_cost (swapped_code)
4941 == ix86_fp_comparison_fcomi_cost (swapped_code)));
4942 }
4943
4944 /* Swap, force into registers, or otherwise massage the two operands
4945 to a fp comparison. The operands are updated in place; the new
4946 comparsion code is returned. */
4947
4948 static enum rtx_code
4949 ix86_prepare_fp_compare_args (code, pop0, pop1)
4950 enum rtx_code code;
4951 rtx *pop0, *pop1;
4952 {
4953 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4954 rtx op0 = *pop0, op1 = *pop1;
4955 enum machine_mode op_mode = GET_MODE (op0);
4956 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
4957
4958 /* All of the unordered compare instructions only work on registers.
4959 The same is true of the XFmode compare instructions. The same is
4960 true of the fcomi compare instructions. */
4961
4962 if (!is_sse
4963 && (fpcmp_mode == CCFPUmode
4964 || op_mode == XFmode
4965 || op_mode == TFmode
4966 || ix86_use_fcomi_compare (code)))
4967 {
4968 op0 = force_reg (op_mode, op0);
4969 op1 = force_reg (op_mode, op1);
4970 }
4971 else
4972 {
4973 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4974 things around if they appear profitable, otherwise force op0
4975 into a register. */
4976
4977 if (standard_80387_constant_p (op0) == 0
4978 || (GET_CODE (op0) == MEM
4979 && ! (standard_80387_constant_p (op1) == 0
4980 || GET_CODE (op1) == MEM)))
4981 {
4982 rtx tmp;
4983 tmp = op0, op0 = op1, op1 = tmp;
4984 code = swap_condition (code);
4985 }
4986
4987 if (GET_CODE (op0) != REG)
4988 op0 = force_reg (op_mode, op0);
4989
4990 if (CONSTANT_P (op1))
4991 {
4992 if (standard_80387_constant_p (op1))
4993 op1 = force_reg (op_mode, op1);
4994 else
4995 op1 = validize_mem (force_const_mem (op_mode, op1));
4996 }
4997 }
4998
4999 /* Try to rearrange the comparison to make it cheaper. */
5000 if (ix86_fp_comparison_cost (code)
5001 > ix86_fp_comparison_cost (swap_condition (code))
5002 && (GET_CODE (op0) == REG || !reload_completed))
5003 {
5004 rtx tmp;
5005 tmp = op0, op0 = op1, op1 = tmp;
5006 code = swap_condition (code);
5007 if (GET_CODE (op0) != REG)
5008 op0 = force_reg (op_mode, op0);
5009 }
5010
5011 *pop0 = op0;
5012 *pop1 = op1;
5013 return code;
5014 }
5015
5016 /* Convert comparison codes we use to represent FP comparison to integer
5017 code that will result in proper branch. Return UNKNOWN if no such code
5018 is available. */
5019 static enum rtx_code
5020 ix86_fp_compare_code_to_integer (code)
5021 enum rtx_code code;
5022 {
5023 switch (code)
5024 {
5025 case GT:
5026 return GTU;
5027 case GE:
5028 return GEU;
5029 case ORDERED:
5030 case UNORDERED:
5031 return code;
5032 break;
5033 case UNEQ:
5034 return EQ;
5035 break;
5036 case UNLT:
5037 return LTU;
5038 break;
5039 case UNLE:
5040 return LEU;
5041 break;
5042 case LTGT:
5043 return NE;
5044 break;
5045 default:
5046 return UNKNOWN;
5047 }
5048 }
5049
5050 /* Split comparison code CODE into comparisons we can do using branch
5051 instructions. BYPASS_CODE is comparison code for branch that will
5052 branch around FIRST_CODE and SECOND_CODE. If some of branches
5053 is not required, set value to NIL.
5054 We never require more than two branches. */
5055 static void
5056 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5057 enum rtx_code code, *bypass_code, *first_code, *second_code;
5058 {
5059 *first_code = code;
5060 *bypass_code = NIL;
5061 *second_code = NIL;
5062
5063 /* The fcomi comparison sets flags as follows:
5064
5065 cmp ZF PF CF
5066 > 0 0 0
5067 < 0 0 1
5068 = 1 0 0
5069 un 1 1 1 */
5070
5071 switch (code)
5072 {
5073 case GT: /* GTU - CF=0 & ZF=0 */
5074 case GE: /* GEU - CF=0 */
5075 case ORDERED: /* PF=0 */
5076 case UNORDERED: /* PF=1 */
5077 case UNEQ: /* EQ - ZF=1 */
5078 case UNLT: /* LTU - CF=1 */
5079 case UNLE: /* LEU - CF=1 | ZF=1 */
5080 case LTGT: /* EQ - ZF=0 */
5081 break;
5082 case LT: /* LTU - CF=1 - fails on unordered */
5083 *first_code = UNLT;
5084 *bypass_code = UNORDERED;
5085 break;
5086 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5087 *first_code = UNLE;
5088 *bypass_code = UNORDERED;
5089 break;
5090 case EQ: /* EQ - ZF=1 - fails on unordered */
5091 *first_code = UNEQ;
5092 *bypass_code = UNORDERED;
5093 break;
5094 case NE: /* NE - ZF=0 - fails on unordered */
5095 *first_code = LTGT;
5096 *second_code = UNORDERED;
5097 break;
5098 case UNGE: /* GEU - CF=0 - fails on unordered */
5099 *first_code = GE;
5100 *second_code = UNORDERED;
5101 break;
5102 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5103 *first_code = GT;
5104 *second_code = UNORDERED;
5105 break;
5106 default:
5107 abort ();
5108 }
5109 if (!TARGET_IEEE_FP)
5110 {
5111 *second_code = NIL;
5112 *bypass_code = NIL;
5113 }
5114 }
5115
5116 /* Return cost of comparison done fcom + arithmetics operations on AX.
5117 All following functions do use number of instructions as an cost metrics.
5118 In future this should be tweaked to compute bytes for optimize_size and
5119 take into account performance of various instructions on various CPUs. */
5120 static int
5121 ix86_fp_comparison_arithmetics_cost (code)
5122 enum rtx_code code;
5123 {
5124 if (!TARGET_IEEE_FP)
5125 return 4;
5126 /* The cost of code output by ix86_expand_fp_compare. */
5127 switch (code)
5128 {
5129 case UNLE:
5130 case UNLT:
5131 case LTGT:
5132 case GT:
5133 case GE:
5134 case UNORDERED:
5135 case ORDERED:
5136 case UNEQ:
5137 return 4;
5138 break;
5139 case LT:
5140 case NE:
5141 case EQ:
5142 case UNGE:
5143 return 5;
5144 break;
5145 case LE:
5146 case UNGT:
5147 return 6;
5148 break;
5149 default:
5150 abort ();
5151 }
5152 }
5153
5154 /* Return cost of comparison done using fcomi operation.
5155 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5156 static int
5157 ix86_fp_comparison_fcomi_cost (code)
5158 enum rtx_code code;
5159 {
5160 enum rtx_code bypass_code, first_code, second_code;
5161 /* Return arbitarily high cost when instruction is not supported - this
5162 prevents gcc from using it. */
5163 if (!TARGET_CMOVE)
5164 return 1024;
5165 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5166 return (bypass_code != NIL || second_code != NIL) + 2;
5167 }
5168
5169 /* Return cost of comparison done using sahf operation.
5170 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5171 static int
5172 ix86_fp_comparison_sahf_cost (code)
5173 enum rtx_code code;
5174 {
5175 enum rtx_code bypass_code, first_code, second_code;
5176 /* Return arbitarily high cost when instruction is not preferred - this
5177 avoids gcc from using it. */
5178 if (!TARGET_USE_SAHF && !optimize_size)
5179 return 1024;
5180 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5181 return (bypass_code != NIL || second_code != NIL) + 3;
5182 }
5183
5184 /* Compute cost of the comparison done using any method.
5185 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5186 static int
5187 ix86_fp_comparison_cost (code)
5188 enum rtx_code code;
5189 {
5190 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5191 int min;
5192
5193 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5194 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5195
5196 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5197 if (min > sahf_cost)
5198 min = sahf_cost;
5199 if (min > fcomi_cost)
5200 min = fcomi_cost;
5201 return min;
5202 }
5203
5204 /* Generate insn patterns to do a floating point compare of OPERANDS. */
5205
5206 static rtx
5207 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
5208 enum rtx_code code;
5209 rtx op0, op1, scratch;
5210 rtx *second_test;
5211 rtx *bypass_test;
5212 {
5213 enum machine_mode fpcmp_mode, intcmp_mode;
5214 rtx tmp, tmp2;
5215 int cost = ix86_fp_comparison_cost (code);
5216 enum rtx_code bypass_code, first_code, second_code;
5217
5218 fpcmp_mode = ix86_fp_compare_mode (code);
5219 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5220
5221 if (second_test)
5222 *second_test = NULL_RTX;
5223 if (bypass_test)
5224 *bypass_test = NULL_RTX;
5225
5226 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5227
5228 /* Do fcomi/sahf based test when profitable. */
5229 if ((bypass_code == NIL || bypass_test)
5230 && (second_code == NIL || second_test)
5231 && ix86_fp_comparison_arithmetics_cost (code) > cost)
5232 {
5233 if (TARGET_CMOVE)
5234 {
5235 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5236 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5237 tmp);
5238 emit_insn (tmp);
5239 }
5240 else
5241 {
5242 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5243 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5244 if (!scratch)
5245 scratch = gen_reg_rtx (HImode);
5246 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5247 emit_insn (gen_x86_sahf_1 (scratch));
5248 }
5249
5250 /* The FP codes work out to act like unsigned. */
5251 intcmp_mode = fpcmp_mode;
5252 code = first_code;
5253 if (bypass_code != NIL)
5254 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5255 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5256 const0_rtx);
5257 if (second_code != NIL)
5258 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5259 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5260 const0_rtx);
5261 }
5262 else
5263 {
5264 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5265 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5266 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5267 if (!scratch)
5268 scratch = gen_reg_rtx (HImode);
5269 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5270
5271 /* In the unordered case, we have to check C2 for NaN's, which
5272 doesn't happen to work out to anything nice combination-wise.
5273 So do some bit twiddling on the value we've got in AH to come
5274 up with an appropriate set of condition codes. */
5275
5276 intcmp_mode = CCNOmode;
5277 switch (code)
5278 {
5279 case GT:
5280 case UNGT:
5281 if (code == GT || !TARGET_IEEE_FP)
5282 {
5283 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5284 code = EQ;
5285 }
5286 else
5287 {
5288 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5289 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5290 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5291 intcmp_mode = CCmode;
5292 code = GEU;
5293 }
5294 break;
5295 case LT:
5296 case UNLT:
5297 if (code == LT && TARGET_IEEE_FP)
5298 {
5299 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5300 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
5301 intcmp_mode = CCmode;
5302 code = EQ;
5303 }
5304 else
5305 {
5306 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5307 code = NE;
5308 }
5309 break;
5310 case GE:
5311 case UNGE:
5312 if (code == GE || !TARGET_IEEE_FP)
5313 {
5314 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
5315 code = EQ;
5316 }
5317 else
5318 {
5319 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5320 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5321 GEN_INT (0x01)));
5322 code = NE;
5323 }
5324 break;
5325 case LE:
5326 case UNLE:
5327 if (code == LE && TARGET_IEEE_FP)
5328 {
5329 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5330 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5331 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5332 intcmp_mode = CCmode;
5333 code = LTU;
5334 }
5335 else
5336 {
5337 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5338 code = NE;
5339 }
5340 break;
5341 case EQ:
5342 case UNEQ:
5343 if (code == EQ && TARGET_IEEE_FP)
5344 {
5345 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5346 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5347 intcmp_mode = CCmode;
5348 code = EQ;
5349 }
5350 else
5351 {
5352 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5353 code = NE;
5354 break;
5355 }
5356 break;
5357 case NE:
5358 case LTGT:
5359 if (code == NE && TARGET_IEEE_FP)
5360 {
5361 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5362 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5363 GEN_INT (0x40)));
5364 code = NE;
5365 }
5366 else
5367 {
5368 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5369 code = EQ;
5370 }
5371 break;
5372
5373 case UNORDERED:
5374 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5375 code = NE;
5376 break;
5377 case ORDERED:
5378 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5379 code = EQ;
5380 break;
5381
5382 default:
5383 abort ();
5384 }
5385 }
5386
5387 /* Return the test that should be put into the flags user, i.e.
5388 the bcc, scc, or cmov instruction. */
5389 return gen_rtx_fmt_ee (code, VOIDmode,
5390 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5391 const0_rtx);
5392 }
5393
5394 rtx
5395 ix86_expand_compare (code, second_test, bypass_test)
5396 enum rtx_code code;
5397 rtx *second_test, *bypass_test;
5398 {
5399 rtx op0, op1, ret;
5400 op0 = ix86_compare_op0;
5401 op1 = ix86_compare_op1;
5402
5403 if (second_test)
5404 *second_test = NULL_RTX;
5405 if (bypass_test)
5406 *bypass_test = NULL_RTX;
5407
5408 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5409 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
5410 second_test, bypass_test);
5411 else
5412 ret = ix86_expand_int_compare (code, op0, op1);
5413
5414 return ret;
5415 }
5416
5417 void
5418 ix86_expand_branch (code, label)
5419 enum rtx_code code;
5420 rtx label;
5421 {
5422 rtx tmp;
5423
5424 switch (GET_MODE (ix86_compare_op0))
5425 {
5426 case QImode:
5427 case HImode:
5428 case SImode:
5429 tmp = ix86_expand_compare (code, NULL, NULL);
5430 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5431 gen_rtx_LABEL_REF (VOIDmode, label),
5432 pc_rtx);
5433 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5434 return;
5435
5436 case SFmode:
5437 case DFmode:
5438 case XFmode:
5439 case TFmode:
5440 /* Don't expand the comparison early, so that we get better code
5441 when jump or whoever decides to reverse the comparison. */
5442 {
5443 rtvec vec;
5444 int use_fcomi;
5445
5446 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5447 &ix86_compare_op1);
5448
5449 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5450 ix86_compare_op0, ix86_compare_op1);
5451 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5452 gen_rtx_LABEL_REF (VOIDmode, label),
5453 pc_rtx);
5454 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5455
5456 use_fcomi = ix86_use_fcomi_compare (code);
5457 vec = rtvec_alloc (3 + !use_fcomi);
5458 RTVEC_ELT (vec, 0) = tmp;
5459 RTVEC_ELT (vec, 1)
5460 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5461 RTVEC_ELT (vec, 2)
5462 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5463 if (! use_fcomi)
5464 RTVEC_ELT (vec, 3)
5465 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5466
5467 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5468 return;
5469 }
5470
5471 case DImode:
5472 /* Expand DImode branch into multiple compare+branch. */
5473 {
5474 rtx lo[2], hi[2], label2;
5475 enum rtx_code code1, code2, code3;
5476
5477 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5478 {
5479 tmp = ix86_compare_op0;
5480 ix86_compare_op0 = ix86_compare_op1;
5481 ix86_compare_op1 = tmp;
5482 code = swap_condition (code);
5483 }
5484 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5485 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5486
5487 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5488 avoid two branches. This costs one extra insn, so disable when
5489 optimizing for size. */
5490
5491 if ((code == EQ || code == NE)
5492 && (!optimize_size
5493 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5494 {
5495 rtx xor0, xor1;
5496
5497 xor1 = hi[0];
5498 if (hi[1] != const0_rtx)
5499 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5500 NULL_RTX, 0, OPTAB_WIDEN);
5501
5502 xor0 = lo[0];
5503 if (lo[1] != const0_rtx)
5504 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5505 NULL_RTX, 0, OPTAB_WIDEN);
5506
5507 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5508 NULL_RTX, 0, OPTAB_WIDEN);
5509
5510 ix86_compare_op0 = tmp;
5511 ix86_compare_op1 = const0_rtx;
5512 ix86_expand_branch (code, label);
5513 return;
5514 }
5515
5516 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5517 op1 is a constant and the low word is zero, then we can just
5518 examine the high word. */
5519
5520 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5521 switch (code)
5522 {
5523 case LT: case LTU: case GE: case GEU:
5524 ix86_compare_op0 = hi[0];
5525 ix86_compare_op1 = hi[1];
5526 ix86_expand_branch (code, label);
5527 return;
5528 default:
5529 break;
5530 }
5531
5532 /* Otherwise, we need two or three jumps. */
5533
5534 label2 = gen_label_rtx ();
5535
5536 code1 = code;
5537 code2 = swap_condition (code);
5538 code3 = unsigned_condition (code);
5539
5540 switch (code)
5541 {
5542 case LT: case GT: case LTU: case GTU:
5543 break;
5544
5545 case LE: code1 = LT; code2 = GT; break;
5546 case GE: code1 = GT; code2 = LT; break;
5547 case LEU: code1 = LTU; code2 = GTU; break;
5548 case GEU: code1 = GTU; code2 = LTU; break;
5549
5550 case EQ: code1 = NIL; code2 = NE; break;
5551 case NE: code2 = NIL; break;
5552
5553 default:
5554 abort ();
5555 }
5556
5557 /*
5558 * a < b =>
5559 * if (hi(a) < hi(b)) goto true;
5560 * if (hi(a) > hi(b)) goto false;
5561 * if (lo(a) < lo(b)) goto true;
5562 * false:
5563 */
5564
5565 ix86_compare_op0 = hi[0];
5566 ix86_compare_op1 = hi[1];
5567
5568 if (code1 != NIL)
5569 ix86_expand_branch (code1, label);
5570 if (code2 != NIL)
5571 ix86_expand_branch (code2, label2);
5572
5573 ix86_compare_op0 = lo[0];
5574 ix86_compare_op1 = lo[1];
5575 ix86_expand_branch (code3, label);
5576
5577 if (code2 != NIL)
5578 emit_label (label2);
5579 return;
5580 }
5581
5582 default:
5583 abort ();
5584 }
5585 }
5586
5587 /* Split branch based on floating point condition. */
5588 void
5589 ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5590 rtx condition, op1, op2, target1, target2, tmp;
5591 {
5592 rtx second, bypass;
5593 rtx label = NULL_RTX;
5594 enum rtx_code code = GET_CODE (condition);
5595
5596 if (target2 != pc_rtx)
5597 {
5598 rtx tmp = target2;
5599 code = reverse_condition_maybe_unordered (code);
5600 target2 = target1;
5601 target1 = tmp;
5602 }
5603
5604 condition = ix86_expand_fp_compare (code, op1, op2,
5605 tmp, &second, &bypass);
5606 if (bypass != NULL_RTX)
5607 {
5608 label = gen_label_rtx ();
5609 emit_jump_insn (gen_rtx_SET
5610 (VOIDmode, pc_rtx,
5611 gen_rtx_IF_THEN_ELSE (VOIDmode,
5612 bypass,
5613 gen_rtx_LABEL_REF (VOIDmode,
5614 label),
5615 pc_rtx)));
5616 }
5617 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5618 comparison and first branch. The second branch takes longer to execute
5619 so place first branch the worse predicable one if possible. */
5620 if (second != NULL_RTX
5621 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5622 {
5623 rtx tmp = condition;
5624 condition = second;
5625 second = tmp;
5626 }
5627 emit_jump_insn (gen_rtx_SET
5628 (VOIDmode, pc_rtx,
5629 gen_rtx_IF_THEN_ELSE (VOIDmode,
5630 condition, target1, target2)));
5631 if (second != NULL_RTX)
5632 emit_jump_insn (gen_rtx_SET
5633 (VOIDmode, pc_rtx,
5634 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5635 if (label != NULL_RTX)
5636 emit_label (label);
5637 }
5638
5639 int
5640 ix86_expand_setcc (code, dest)
5641 enum rtx_code code;
5642 rtx dest;
5643 {
5644 rtx ret, tmp, tmpreg;
5645 rtx second_test, bypass_test;
5646 int type;
5647
5648 if (GET_MODE (ix86_compare_op0) == DImode)
5649 return 0; /* FAIL */
5650
5651 /* Three modes of generation:
5652 0 -- destination does not overlap compare sources:
5653 clear dest first, emit strict_low_part setcc.
5654 1 -- destination does overlap compare sources:
5655 emit subreg setcc, zero extend.
5656 2 -- destination is in QImode:
5657 emit setcc only.
5658 */
5659
5660 type = 0;
5661
5662 if (GET_MODE (dest) == QImode)
5663 type = 2;
5664 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5665 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5666 type = 1;
5667
5668 if (type == 0)
5669 emit_move_insn (dest, const0_rtx);
5670
5671 ret = ix86_expand_compare (code, &second_test, &bypass_test);
5672 PUT_MODE (ret, QImode);
5673
5674 tmp = dest;
5675 tmpreg = dest;
5676 if (type == 0)
5677 {
5678 tmp = gen_lowpart (QImode, dest);
5679 tmpreg = tmp;
5680 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5681 }
5682 else if (type == 1)
5683 {
5684 if (!cse_not_expected)
5685 tmp = gen_reg_rtx (QImode);
5686 else
5687 tmp = gen_lowpart (QImode, dest);
5688 tmpreg = tmp;
5689 }
5690
5691 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5692 if (bypass_test || second_test)
5693 {
5694 rtx test = second_test;
5695 int bypass = 0;
5696 rtx tmp2 = gen_reg_rtx (QImode);
5697 if (bypass_test)
5698 {
5699 if (second_test)
5700 abort();
5701 test = bypass_test;
5702 bypass = 1;
5703 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
5704 }
5705 PUT_MODE (test, QImode);
5706 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
5707
5708 if (bypass)
5709 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
5710 else
5711 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
5712 }
5713
5714 if (type == 1)
5715 {
5716 rtx clob;
5717
5718 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5719 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5720 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5721 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5722 emit_insn (tmp);
5723 }
5724
5725 return 1; /* DONE */
5726 }
5727
5728 int
5729 ix86_expand_int_movcc (operands)
5730 rtx operands[];
5731 {
5732 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5733 rtx compare_seq, compare_op;
5734 rtx second_test, bypass_test;
5735
5736 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5737 In case comparsion is done with immediate, we can convert it to LTU or
5738 GEU by altering the integer. */
5739
5740 if ((code == LEU || code == GTU)
5741 && GET_CODE (ix86_compare_op1) == CONST_INT
5742 && GET_MODE (operands[0]) != HImode
5743 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5744 && GET_CODE (operands[2]) == CONST_INT
5745 && GET_CODE (operands[3]) == CONST_INT)
5746 {
5747 if (code == LEU)
5748 code = LTU;
5749 else
5750 code = GEU;
5751 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5752 }
5753
5754 start_sequence ();
5755 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5756 compare_seq = gen_sequence ();
5757 end_sequence ();
5758
5759 compare_code = GET_CODE (compare_op);
5760
5761 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5762 HImode insns, we'd be swallowed in word prefix ops. */
5763
5764 if (GET_MODE (operands[0]) != HImode
5765 && GET_CODE (operands[2]) == CONST_INT
5766 && GET_CODE (operands[3]) == CONST_INT)
5767 {
5768 rtx out = operands[0];
5769 HOST_WIDE_INT ct = INTVAL (operands[2]);
5770 HOST_WIDE_INT cf = INTVAL (operands[3]);
5771 HOST_WIDE_INT diff;
5772
5773 if ((compare_code == LTU || compare_code == GEU)
5774 && !second_test && !bypass_test)
5775 {
5776
5777 /* Detect overlap between destination and compare sources. */
5778 rtx tmp = out;
5779
5780 /* To simplify rest of code, restrict to the GEU case. */
5781 if (compare_code == LTU)
5782 {
5783 int tmp = ct;
5784 ct = cf;
5785 cf = tmp;
5786 compare_code = reverse_condition (compare_code);
5787 code = reverse_condition (code);
5788 }
5789 diff = ct - cf;
5790
5791 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5792 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5793 tmp = gen_reg_rtx (SImode);
5794
5795 emit_insn (compare_seq);
5796 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5797
5798 if (diff == 1)
5799 {
5800 /*
5801 * cmpl op0,op1
5802 * sbbl dest,dest
5803 * [addl dest, ct]
5804 *
5805 * Size 5 - 8.
5806 */
5807 if (ct)
5808 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5809 }
5810 else if (cf == -1)
5811 {
5812 /*
5813 * cmpl op0,op1
5814 * sbbl dest,dest
5815 * orl $ct, dest
5816 *
5817 * Size 8.
5818 */
5819 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5820 }
5821 else if (diff == -1 && ct)
5822 {
5823 /*
5824 * cmpl op0,op1
5825 * sbbl dest,dest
5826 * xorl $-1, dest
5827 * [addl dest, cf]
5828 *
5829 * Size 8 - 11.
5830 */
5831 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5832 if (cf)
5833 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5834 }
5835 else
5836 {
5837 /*
5838 * cmpl op0,op1
5839 * sbbl dest,dest
5840 * andl cf - ct, dest
5841 * [addl dest, ct]
5842 *
5843 * Size 8 - 11.
5844 */
5845 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5846 if (ct)
5847 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5848 }
5849
5850 if (tmp != out)
5851 emit_move_insn (out, tmp);
5852
5853 return 1; /* DONE */
5854 }
5855
5856 diff = ct - cf;
5857 if (diff < 0)
5858 {
5859 HOST_WIDE_INT tmp;
5860 tmp = ct, ct = cf, cf = tmp;
5861 diff = -diff;
5862 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5863 {
5864 /* We may be reversing unordered compare to normal compare, that
5865 is not valid in general (we may convert non-trapping condition
5866 to trapping one), however on i386 we currently emit all
5867 comparisons unordered. */
5868 compare_code = reverse_condition_maybe_unordered (compare_code);
5869 code = reverse_condition_maybe_unordered (code);
5870 }
5871 else
5872 {
5873 compare_code = reverse_condition (compare_code);
5874 code = reverse_condition (code);
5875 }
5876 }
5877 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5878 || diff == 3 || diff == 5 || diff == 9)
5879 {
5880 /*
5881 * xorl dest,dest
5882 * cmpl op1,op2
5883 * setcc dest
5884 * lea cf(dest*(ct-cf)),dest
5885 *
5886 * Size 14.
5887 *
5888 * This also catches the degenerate setcc-only case.
5889 */
5890
5891 rtx tmp;
5892 int nops;
5893
5894 out = emit_store_flag (out, code, ix86_compare_op0,
5895 ix86_compare_op1, VOIDmode, 0, 1);
5896
5897 nops = 0;
5898 if (diff == 1)
5899 tmp = out;
5900 else
5901 {
5902 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5903 nops++;
5904 if (diff & 1)
5905 {
5906 tmp = gen_rtx_PLUS (SImode, tmp, out);
5907 nops++;
5908 }
5909 }
5910 if (cf != 0)
5911 {
5912 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5913 nops++;
5914 }
5915 if (tmp != out)
5916 {
5917 if (nops == 0)
5918 emit_move_insn (out, tmp);
5919 else if (nops == 1)
5920 {
5921 rtx clob;
5922
5923 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5924 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5925
5926 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5927 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5928 emit_insn (tmp);
5929 }
5930 else
5931 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5932 }
5933 if (out != operands[0])
5934 emit_move_insn (operands[0], out);
5935
5936 return 1; /* DONE */
5937 }
5938
5939 /*
5940 * General case: Jumpful:
5941 * xorl dest,dest cmpl op1, op2
5942 * cmpl op1, op2 movl ct, dest
5943 * setcc dest jcc 1f
5944 * decl dest movl cf, dest
5945 * andl (cf-ct),dest 1:
5946 * addl ct,dest
5947 *
5948 * Size 20. Size 14.
5949 *
5950 * This is reasonably steep, but branch mispredict costs are
5951 * high on modern cpus, so consider failing only if optimizing
5952 * for space.
5953 *
5954 * %%% Parameterize branch_cost on the tuning architecture, then
5955 * use that. The 80386 couldn't care less about mispredicts.
5956 */
5957
5958 if (!optimize_size && !TARGET_CMOVE)
5959 {
5960 if (ct == 0)
5961 {
5962 ct = cf;
5963 cf = 0;
5964 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5965 {
5966 /* We may be reversing unordered compare to normal compare,
5967 that is not valid in general (we may convert non-trapping
5968 condition to trapping one), however on i386 we currently
5969 emit all comparisons unordered. */
5970 compare_code = reverse_condition_maybe_unordered (compare_code);
5971 code = reverse_condition_maybe_unordered (code);
5972 }
5973 else
5974 {
5975 compare_code = reverse_condition (compare_code);
5976 code = reverse_condition (code);
5977 }
5978 }
5979
5980 out = emit_store_flag (out, code, ix86_compare_op0,
5981 ix86_compare_op1, VOIDmode, 0, 1);
5982
5983 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5984 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5985 if (ct != 0)
5986 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5987 if (out != operands[0])
5988 emit_move_insn (operands[0], out);
5989
5990 return 1; /* DONE */
5991 }
5992 }
5993
5994 if (!TARGET_CMOVE)
5995 {
5996 /* Try a few things more with specific constants and a variable. */
5997
5998 optab op;
5999 rtx var, orig_out, out, tmp;
6000
6001 if (optimize_size)
6002 return 0; /* FAIL */
6003
6004 /* If one of the two operands is an interesting constant, load a
6005 constant with the above and mask it in with a logical operation. */
6006
6007 if (GET_CODE (operands[2]) == CONST_INT)
6008 {
6009 var = operands[3];
6010 if (INTVAL (operands[2]) == 0)
6011 operands[3] = constm1_rtx, op = and_optab;
6012 else if (INTVAL (operands[2]) == -1)
6013 operands[3] = const0_rtx, op = ior_optab;
6014 else
6015 return 0; /* FAIL */
6016 }
6017 else if (GET_CODE (operands[3]) == CONST_INT)
6018 {
6019 var = operands[2];
6020 if (INTVAL (operands[3]) == 0)
6021 operands[2] = constm1_rtx, op = and_optab;
6022 else if (INTVAL (operands[3]) == -1)
6023 operands[2] = const0_rtx, op = ior_optab;
6024 else
6025 return 0; /* FAIL */
6026 }
6027 else
6028 return 0; /* FAIL */
6029
6030 orig_out = operands[0];
6031 tmp = gen_reg_rtx (GET_MODE (orig_out));
6032 operands[0] = tmp;
6033
6034 /* Recurse to get the constant loaded. */
6035 if (ix86_expand_int_movcc (operands) == 0)
6036 return 0; /* FAIL */
6037
6038 /* Mask in the interesting variable. */
6039 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6040 OPTAB_WIDEN);
6041 if (out != orig_out)
6042 emit_move_insn (orig_out, out);
6043
6044 return 1; /* DONE */
6045 }
6046
6047 /*
6048 * For comparison with above,
6049 *
6050 * movl cf,dest
6051 * movl ct,tmp
6052 * cmpl op1,op2
6053 * cmovcc tmp,dest
6054 *
6055 * Size 15.
6056 */
6057
6058 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6059 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6060 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6061 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6062
6063 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6064 {
6065 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6066 emit_move_insn (tmp, operands[3]);
6067 operands[3] = tmp;
6068 }
6069 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6070 {
6071 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6072 emit_move_insn (tmp, operands[2]);
6073 operands[2] = tmp;
6074 }
6075
6076 emit_insn (compare_seq);
6077 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6078 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6079 compare_op, operands[2],
6080 operands[3])));
6081 if (bypass_test)
6082 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6083 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6084 bypass_test,
6085 operands[3],
6086 operands[0])));
6087 if (second_test)
6088 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6089 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6090 second_test,
6091 operands[2],
6092 operands[0])));
6093
6094 return 1; /* DONE */
6095 }
6096
6097 int
6098 ix86_expand_fp_movcc (operands)
6099 rtx operands[];
6100 {
6101 enum rtx_code code;
6102 rtx tmp;
6103 rtx compare_op, second_test, bypass_test;
6104
6105 /* For SF/DFmode conditional moves based on comparisons
6106 in same mode, we may want to use SSE min/max instructions. */
6107 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6108 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6109 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
6110 /* We may be called from the post-reload splitter. */
6111 && (!REG_P (operands[0])
6112 || SSE_REG_P (operands[0])
6113 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
6114 {
6115 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6116 code = GET_CODE (operands[1]);
6117
6118 /* See if we have (cross) match between comparison operands and
6119 conditional move operands. */
6120 if (rtx_equal_p (operands[2], op1))
6121 {
6122 rtx tmp = op0;
6123 op0 = op1;
6124 op1 = tmp;
6125 code = reverse_condition_maybe_unordered (code);
6126 }
6127 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6128 {
6129 /* Check for min operation. */
6130 if (code == LT)
6131 {
6132 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6133 if (memory_operand (op0, VOIDmode))
6134 op0 = force_reg (GET_MODE (operands[0]), op0);
6135 if (GET_MODE (operands[0]) == SFmode)
6136 emit_insn (gen_minsf3 (operands[0], op0, op1));
6137 else
6138 emit_insn (gen_mindf3 (operands[0], op0, op1));
6139 return 1;
6140 }
6141 /* Check for max operation. */
6142 if (code == GT)
6143 {
6144 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6145 if (memory_operand (op0, VOIDmode))
6146 op0 = force_reg (GET_MODE (operands[0]), op0);
6147 if (GET_MODE (operands[0]) == SFmode)
6148 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6149 else
6150 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6151 return 1;
6152 }
6153 }
6154 /* Manage condition to be sse_comparison_operator. In case we are
6155 in non-ieee mode, try to canonicalize the destination operand
6156 to be first in the comparison - this helps reload to avoid extra
6157 moves. */
6158 if (!sse_comparison_operator (operands[1], VOIDmode)
6159 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6160 {
6161 rtx tmp = ix86_compare_op0;
6162 ix86_compare_op0 = ix86_compare_op1;
6163 ix86_compare_op1 = tmp;
6164 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6165 VOIDmode, ix86_compare_op0,
6166 ix86_compare_op1);
6167 }
6168 /* Similary try to manage result to be first operand of conditional
6169 move. */
6170 if (rtx_equal_p (operands[0], operands[3]))
6171 {
6172 rtx tmp = operands[2];
6173 operands[2] = operands[3];
6174 operands[2] = tmp;
6175 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6176 (GET_CODE (operands[1])),
6177 VOIDmode, ix86_compare_op0,
6178 ix86_compare_op1);
6179 }
6180 if (GET_MODE (operands[0]) == SFmode)
6181 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6182 operands[2], operands[3],
6183 ix86_compare_op0, ix86_compare_op1));
6184 else
6185 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6186 operands[2], operands[3],
6187 ix86_compare_op0, ix86_compare_op1));
6188 return 1;
6189 }
6190
6191 /* The floating point conditional move instructions don't directly
6192 support conditions resulting from a signed integer comparison. */
6193
6194 code = GET_CODE (operands[1]);
6195 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6196
6197 /* The floating point conditional move instructions don't directly
6198 support signed integer comparisons. */
6199
6200 if (!fcmov_comparison_operator (compare_op, VOIDmode))
6201 {
6202 if (second_test != NULL || bypass_test != NULL)
6203 abort();
6204 tmp = gen_reg_rtx (QImode);
6205 ix86_expand_setcc (code, tmp);
6206 code = NE;
6207 ix86_compare_op0 = tmp;
6208 ix86_compare_op1 = const0_rtx;
6209 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6210 }
6211 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6212 {
6213 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6214 emit_move_insn (tmp, operands[3]);
6215 operands[3] = tmp;
6216 }
6217 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6218 {
6219 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6220 emit_move_insn (tmp, operands[2]);
6221 operands[2] = tmp;
6222 }
6223
6224 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6225 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6226 compare_op,
6227 operands[2],
6228 operands[3])));
6229 if (bypass_test)
6230 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6231 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6232 bypass_test,
6233 operands[3],
6234 operands[0])));
6235 if (second_test)
6236 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6237 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6238 second_test,
6239 operands[2],
6240 operands[0])));
6241
6242 return 1;
6243 }
6244
6245 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6246 works for floating pointer parameters and nonoffsetable memories.
6247 For pushes, it returns just stack offsets; the values will be saved
6248 in the right order. Maximally three parts are generated. */
6249
6250 static int
6251 ix86_split_to_parts (operand, parts, mode)
6252 rtx operand;
6253 rtx *parts;
6254 enum machine_mode mode;
6255 {
6256 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
6257
6258 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6259 abort ();
6260 if (size < 2 || size > 3)
6261 abort ();
6262
6263 /* Optimize constant pool reference to immediates. This is used by fp moves,
6264 that force all constants to memory to allow combining. */
6265
6266 if (GET_CODE (operand) == MEM
6267 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6268 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6269 operand = get_pool_constant (XEXP (operand, 0));
6270
6271 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
6272 {
6273 /* The only non-offsetable memories we handle are pushes. */
6274 if (! push_operand (operand, VOIDmode))
6275 abort ();
6276
6277 PUT_MODE (operand, SImode);
6278 parts[0] = parts[1] = parts[2] = operand;
6279 }
6280 else
6281 {
6282 if (mode == DImode)
6283 split_di (&operand, 1, &parts[0], &parts[1]);
6284 else
6285 {
6286 if (REG_P (operand))
6287 {
6288 if (!reload_completed)
6289 abort ();
6290 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6291 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6292 if (size == 3)
6293 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6294 }
6295 else if (offsettable_memref_p (operand))
6296 {
6297 PUT_MODE (operand, SImode);
6298 parts[0] = operand;
6299 parts[1] = adj_offsettable_operand (operand, 4);
6300 if (size == 3)
6301 parts[2] = adj_offsettable_operand (operand, 8);
6302 }
6303 else if (GET_CODE (operand) == CONST_DOUBLE)
6304 {
6305 REAL_VALUE_TYPE r;
6306 long l[4];
6307
6308 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6309 switch (mode)
6310 {
6311 case XFmode:
6312 case TFmode:
6313 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6314 parts[2] = GEN_INT (l[2]);
6315 break;
6316 case DFmode:
6317 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6318 break;
6319 default:
6320 abort ();
6321 }
6322 parts[1] = GEN_INT (l[1]);
6323 parts[0] = GEN_INT (l[0]);
6324 }
6325 else
6326 abort ();
6327 }
6328 }
6329
6330 return size;
6331 }
6332
6333 /* Emit insns to perform a move or push of DI, DF, and XF values.
6334 Return false when normal moves are needed; true when all required
6335 insns have been emitted. Operands 2-4 contain the input values
6336 int the correct order; operands 5-7 contain the output values. */
6337
6338 int
6339 ix86_split_long_move (operands1)
6340 rtx operands1[];
6341 {
6342 rtx part[2][3];
6343 rtx operands[2];
6344 int size;
6345 int push = 0;
6346 int collisions = 0;
6347
6348 /* Make our own copy to avoid clobbering the operands. */
6349 operands[0] = copy_rtx (operands1[0]);
6350 operands[1] = copy_rtx (operands1[1]);
6351
6352 /* The only non-offsettable memory we handle is push. */
6353 if (push_operand (operands[0], VOIDmode))
6354 push = 1;
6355 else if (GET_CODE (operands[0]) == MEM
6356 && ! offsettable_memref_p (operands[0]))
6357 abort ();
6358
6359 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
6360 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6361
6362 /* When emitting push, take care for source operands on the stack. */
6363 if (push && GET_CODE (operands[1]) == MEM
6364 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6365 {
6366 if (size == 3)
6367 part[1][1] = part[1][2];
6368 part[1][0] = part[1][1];
6369 }
6370
6371 /* We need to do copy in the right order in case an address register
6372 of the source overlaps the destination. */
6373 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6374 {
6375 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6376 collisions++;
6377 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6378 collisions++;
6379 if (size == 3
6380 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6381 collisions++;
6382
6383 /* Collision in the middle part can be handled by reordering. */
6384 if (collisions == 1 && size == 3
6385 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6386 {
6387 rtx tmp;
6388 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6389 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6390 }
6391
6392 /* If there are more collisions, we can't handle it by reordering.
6393 Do an lea to the last part and use only one colliding move. */
6394 else if (collisions > 1)
6395 {
6396 collisions = 1;
6397 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6398 XEXP (part[1][0], 0)));
6399 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6400 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6401 if (size == 3)
6402 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6403 }
6404 }
6405
6406 if (push)
6407 {
6408 if (size == 3)
6409 {
6410 /* We use only first 12 bytes of TFmode value, but for pushing we
6411 are required to adjust stack as if we were pushing real 16byte
6412 value. */
6413 if (GET_MODE (operands1[0]) == TFmode)
6414 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6415 GEN_INT (-4)));
6416 emit_insn (gen_push (part[1][2]));
6417 }
6418 emit_insn (gen_push (part[1][1]));
6419 emit_insn (gen_push (part[1][0]));
6420 return 1;
6421 }
6422
6423 /* Choose correct order to not overwrite the source before it is copied. */
6424 if ((REG_P (part[0][0])
6425 && REG_P (part[1][1])
6426 && (REGNO (part[0][0]) == REGNO (part[1][1])
6427 || (size == 3
6428 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6429 || (collisions > 0
6430 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6431 {
6432 if (size == 3)
6433 {
6434 operands1[2] = part[0][2];
6435 operands1[3] = part[0][1];
6436 operands1[4] = part[0][0];
6437 operands1[5] = part[1][2];
6438 operands1[6] = part[1][1];
6439 operands1[7] = part[1][0];
6440 }
6441 else
6442 {
6443 operands1[2] = part[0][1];
6444 operands1[3] = part[0][0];
6445 operands1[5] = part[1][1];
6446 operands1[6] = part[1][0];
6447 }
6448 }
6449 else
6450 {
6451 if (size == 3)
6452 {
6453 operands1[2] = part[0][0];
6454 operands1[3] = part[0][1];
6455 operands1[4] = part[0][2];
6456 operands1[5] = part[1][0];
6457 operands1[6] = part[1][1];
6458 operands1[7] = part[1][2];
6459 }
6460 else
6461 {
6462 operands1[2] = part[0][0];
6463 operands1[3] = part[0][1];
6464 operands1[5] = part[1][0];
6465 operands1[6] = part[1][1];
6466 }
6467 }
6468
6469 return 0;
6470 }
6471
6472 void
6473 ix86_split_ashldi (operands, scratch)
6474 rtx *operands, scratch;
6475 {
6476 rtx low[2], high[2];
6477 int count;
6478
6479 if (GET_CODE (operands[2]) == CONST_INT)
6480 {
6481 split_di (operands, 2, low, high);
6482 count = INTVAL (operands[2]) & 63;
6483
6484 if (count >= 32)
6485 {
6486 emit_move_insn (high[0], low[1]);
6487 emit_move_insn (low[0], const0_rtx);
6488
6489 if (count > 32)
6490 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6491 }
6492 else
6493 {
6494 if (!rtx_equal_p (operands[0], operands[1]))
6495 emit_move_insn (operands[0], operands[1]);
6496 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6497 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6498 }
6499 }
6500 else
6501 {
6502 if (!rtx_equal_p (operands[0], operands[1]))
6503 emit_move_insn (operands[0], operands[1]);
6504
6505 split_di (operands, 1, low, high);
6506
6507 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6508 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
6509
6510 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6511 {
6512 if (! no_new_pseudos)
6513 scratch = force_reg (SImode, const0_rtx);
6514 else
6515 emit_move_insn (scratch, const0_rtx);
6516
6517 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6518 scratch));
6519 }
6520 else
6521 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6522 }
6523 }
6524
6525 void
6526 ix86_split_ashrdi (operands, scratch)
6527 rtx *operands, scratch;
6528 {
6529 rtx low[2], high[2];
6530 int count;
6531
6532 if (GET_CODE (operands[2]) == CONST_INT)
6533 {
6534 split_di (operands, 2, low, high);
6535 count = INTVAL (operands[2]) & 63;
6536
6537 if (count >= 32)
6538 {
6539 emit_move_insn (low[0], high[1]);
6540
6541 if (! reload_completed)
6542 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6543 else
6544 {
6545 emit_move_insn (high[0], low[0]);
6546 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6547 }
6548
6549 if (count > 32)
6550 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6551 }
6552 else
6553 {
6554 if (!rtx_equal_p (operands[0], operands[1]))
6555 emit_move_insn (operands[0], operands[1]);
6556 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6557 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6558 }
6559 }
6560 else
6561 {
6562 if (!rtx_equal_p (operands[0], operands[1]))
6563 emit_move_insn (operands[0], operands[1]);
6564
6565 split_di (operands, 1, low, high);
6566
6567 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6568 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6569
6570 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6571 {
6572 if (! no_new_pseudos)
6573 scratch = gen_reg_rtx (SImode);
6574 emit_move_insn (scratch, high[0]);
6575 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6576 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6577 scratch));
6578 }
6579 else
6580 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
6581 }
6582 }
6583
6584 void
6585 ix86_split_lshrdi (operands, scratch)
6586 rtx *operands, scratch;
6587 {
6588 rtx low[2], high[2];
6589 int count;
6590
6591 if (GET_CODE (operands[2]) == CONST_INT)
6592 {
6593 split_di (operands, 2, low, high);
6594 count = INTVAL (operands[2]) & 63;
6595
6596 if (count >= 32)
6597 {
6598 emit_move_insn (low[0], high[1]);
6599 emit_move_insn (high[0], const0_rtx);
6600
6601 if (count > 32)
6602 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6603 }
6604 else
6605 {
6606 if (!rtx_equal_p (operands[0], operands[1]))
6607 emit_move_insn (operands[0], operands[1]);
6608 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6609 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6610 }
6611 }
6612 else
6613 {
6614 if (!rtx_equal_p (operands[0], operands[1]))
6615 emit_move_insn (operands[0], operands[1]);
6616
6617 split_di (operands, 1, low, high);
6618
6619 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6620 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6621
6622 /* Heh. By reversing the arguments, we can reuse this pattern. */
6623 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6624 {
6625 if (! no_new_pseudos)
6626 scratch = force_reg (SImode, const0_rtx);
6627 else
6628 emit_move_insn (scratch, const0_rtx);
6629
6630 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6631 scratch));
6632 }
6633 else
6634 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6635 }
6636 }
6637
6638 /* Expand the appropriate insns for doing strlen if not just doing
6639 repnz; scasb
6640
6641 out = result, initialized with the start address
6642 align_rtx = alignment of the address.
6643 scratch = scratch register, initialized with the startaddress when
6644 not aligned, otherwise undefined
6645
6646 This is just the body. It needs the initialisations mentioned above and
6647 some address computing at the end. These things are done in i386.md. */
6648
6649 void
6650 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6651 rtx out, align_rtx, scratch;
6652 {
6653 int align;
6654 rtx tmp;
6655 rtx align_2_label = NULL_RTX;
6656 rtx align_3_label = NULL_RTX;
6657 rtx align_4_label = gen_label_rtx ();
6658 rtx end_0_label = gen_label_rtx ();
6659 rtx mem;
6660 rtx tmpreg = gen_reg_rtx (SImode);
6661
6662 align = 0;
6663 if (GET_CODE (align_rtx) == CONST_INT)
6664 align = INTVAL (align_rtx);
6665
6666 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6667
6668 /* Is there a known alignment and is it less than 4? */
6669 if (align < 4)
6670 {
6671 /* Is there a known alignment and is it not 2? */
6672 if (align != 2)
6673 {
6674 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6675 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6676
6677 /* Leave just the 3 lower bits. */
6678 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6679 NULL_RTX, 0, OPTAB_WIDEN);
6680
6681 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6682 SImode, 1, 0, align_4_label);
6683 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6684 SImode, 1, 0, align_2_label);
6685 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6686 SImode, 1, 0, align_3_label);
6687 }
6688 else
6689 {
6690 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6691 check if is aligned to 4 - byte. */
6692
6693 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6694 NULL_RTX, 0, OPTAB_WIDEN);
6695
6696 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6697 SImode, 1, 0, align_4_label);
6698 }
6699
6700 mem = gen_rtx_MEM (QImode, out);
6701
6702 /* Now compare the bytes. */
6703
6704 /* Compare the first n unaligned byte on a byte per byte basis. */
6705 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6706 QImode, 1, 0, end_0_label);
6707
6708 /* Increment the address. */
6709 emit_insn (gen_addsi3 (out, out, const1_rtx));
6710
6711 /* Not needed with an alignment of 2 */
6712 if (align != 2)
6713 {
6714 emit_label (align_2_label);
6715
6716 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6717 QImode, 1, 0, end_0_label);
6718
6719 emit_insn (gen_addsi3 (out, out, const1_rtx));
6720
6721 emit_label (align_3_label);
6722 }
6723
6724 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6725 QImode, 1, 0, end_0_label);
6726
6727 emit_insn (gen_addsi3 (out, out, const1_rtx));
6728 }
6729
6730 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6731 align this loop. It gives only huge programs, but does not help to
6732 speed up. */
6733 emit_label (align_4_label);
6734
6735 mem = gen_rtx_MEM (SImode, out);
6736 emit_move_insn (scratch, mem);
6737 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6738
6739 /* This formula yields a nonzero result iff one of the bytes is zero.
6740 This saves three branches inside loop and many cycles. */
6741
6742 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6743 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6744 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6745 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6746 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6747 SImode, 1, 0, align_4_label);
6748
6749 if (TARGET_CMOVE)
6750 {
6751 rtx reg = gen_reg_rtx (SImode);
6752 emit_move_insn (reg, tmpreg);
6753 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6754
6755 /* If zero is not in the first two bytes, move two bytes forward. */
6756 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6757 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6758 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6759 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6760 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6761 reg,
6762 tmpreg)));
6763 /* Emit lea manually to avoid clobbering of flags. */
6764 emit_insn (gen_rtx_SET (SImode, reg,
6765 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6766
6767 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6768 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6769 emit_insn (gen_rtx_SET (VOIDmode, out,
6770 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6771 reg,
6772 out)));
6773
6774 }
6775 else
6776 {
6777 rtx end_2_label = gen_label_rtx ();
6778 /* Is zero in the first two bytes? */
6779
6780 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6781 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6782 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6783 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6784 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6785 pc_rtx);
6786 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6787 JUMP_LABEL (tmp) = end_2_label;
6788
6789 /* Not in the first two. Move two bytes forward. */
6790 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6791 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6792
6793 emit_label (end_2_label);
6794
6795 }
6796
6797 /* Avoid branch in fixing the byte. */
6798 tmpreg = gen_lowpart (QImode, tmpreg);
6799 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6800 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6801
6802 emit_label (end_0_label);
6803 }
6804 \f
6805 /* Clear stack slot assignments remembered from previous functions.
6806 This is called from INIT_EXPANDERS once before RTL is emitted for each
6807 function. */
6808
6809 static void
6810 ix86_init_machine_status (p)
6811 struct function *p;
6812 {
6813 p->machine = (struct machine_function *)
6814 xcalloc (1, sizeof (struct machine_function));
6815 }
6816
6817 /* Mark machine specific bits of P for GC. */
6818 static void
6819 ix86_mark_machine_status (p)
6820 struct function *p;
6821 {
6822 struct machine_function *machine = p->machine;
6823 enum machine_mode mode;
6824 int n;
6825
6826 if (! machine)
6827 return;
6828
6829 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6830 mode = (enum machine_mode) ((int) mode + 1))
6831 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6832 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
6833 }
6834
6835 static void
6836 ix86_free_machine_status (p)
6837 struct function *p;
6838 {
6839 free (p->machine);
6840 p->machine = NULL;
6841 }
6842
6843 /* Return a MEM corresponding to a stack slot with mode MODE.
6844 Allocate a new slot if necessary.
6845
6846 The RTL for a function can have several slots available: N is
6847 which slot to use. */
6848
6849 rtx
6850 assign_386_stack_local (mode, n)
6851 enum machine_mode mode;
6852 int n;
6853 {
6854 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6855 abort ();
6856
6857 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6858 ix86_stack_locals[(int) mode][n]
6859 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6860
6861 return ix86_stack_locals[(int) mode][n];
6862 }
6863 \f
6864 /* Calculate the length of the memory address in the instruction
6865 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6866
6867 static int
6868 memory_address_length (addr)
6869 rtx addr;
6870 {
6871 struct ix86_address parts;
6872 rtx base, index, disp;
6873 int len;
6874
6875 if (GET_CODE (addr) == PRE_DEC
6876 || GET_CODE (addr) == POST_INC)
6877 return 0;
6878
6879 if (! ix86_decompose_address (addr, &parts))
6880 abort ();
6881
6882 base = parts.base;
6883 index = parts.index;
6884 disp = parts.disp;
6885 len = 0;
6886
6887 /* Register Indirect. */
6888 if (base && !index && !disp)
6889 {
6890 /* Special cases: ebp and esp need the two-byte modrm form. */
6891 if (addr == stack_pointer_rtx
6892 || addr == arg_pointer_rtx
6893 || addr == frame_pointer_rtx
6894 || addr == hard_frame_pointer_rtx)
6895 len = 1;
6896 }
6897
6898 /* Direct Addressing. */
6899 else if (disp && !base && !index)
6900 len = 4;
6901
6902 else
6903 {
6904 /* Find the length of the displacement constant. */
6905 if (disp)
6906 {
6907 if (GET_CODE (disp) == CONST_INT
6908 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6909 len = 1;
6910 else
6911 len = 4;
6912 }
6913
6914 /* An index requires the two-byte modrm form. */
6915 if (index)
6916 len += 1;
6917 }
6918
6919 return len;
6920 }
6921
6922 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6923 expect that insn have 8bit immediate alternative. */
6924 int
6925 ix86_attr_length_immediate_default (insn, shortform)
6926 rtx insn;
6927 int shortform;
6928 {
6929 int len = 0;
6930 int i;
6931 extract_insn_cached (insn);
6932 for (i = recog_data.n_operands - 1; i >= 0; --i)
6933 if (CONSTANT_P (recog_data.operand[i]))
6934 {
6935 if (len)
6936 abort ();
6937 if (shortform
6938 && GET_CODE (recog_data.operand[i]) == CONST_INT
6939 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6940 len = 1;
6941 else
6942 {
6943 switch (get_attr_mode (insn))
6944 {
6945 case MODE_QI:
6946 len+=1;
6947 break;
6948 case MODE_HI:
6949 len+=2;
6950 break;
6951 case MODE_SI:
6952 len+=4;
6953 break;
6954 default:
6955 fatal_insn ("Unknown insn mode", insn);
6956 }
6957 }
6958 }
6959 return len;
6960 }
6961 /* Compute default value for "length_address" attribute. */
6962 int
6963 ix86_attr_length_address_default (insn)
6964 rtx insn;
6965 {
6966 int i;
6967 extract_insn_cached (insn);
6968 for (i = recog_data.n_operands - 1; i >= 0; --i)
6969 if (GET_CODE (recog_data.operand[i]) == MEM)
6970 {
6971 return memory_address_length (XEXP (recog_data.operand[i], 0));
6972 break;
6973 }
6974 return 0;
6975 }
6976 \f
6977 /* Return the maximum number of instructions a cpu can issue. */
6978
6979 int
6980 ix86_issue_rate ()
6981 {
6982 switch (ix86_cpu)
6983 {
6984 case PROCESSOR_PENTIUM:
6985 case PROCESSOR_K6:
6986 return 2;
6987
6988 case PROCESSOR_PENTIUMPRO:
6989 case PROCESSOR_PENTIUM4:
6990 case PROCESSOR_ATHLON:
6991 return 3;
6992
6993 default:
6994 return 1;
6995 }
6996 }
6997
6998 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6999 by DEP_INSN and nothing set by DEP_INSN. */
7000
7001 static int
7002 ix86_flags_dependant (insn, dep_insn, insn_type)
7003 rtx insn, dep_insn;
7004 enum attr_type insn_type;
7005 {
7006 rtx set, set2;
7007
7008 /* Simplify the test for uninteresting insns. */
7009 if (insn_type != TYPE_SETCC
7010 && insn_type != TYPE_ICMOV
7011 && insn_type != TYPE_FCMOV
7012 && insn_type != TYPE_IBR)
7013 return 0;
7014
7015 if ((set = single_set (dep_insn)) != 0)
7016 {
7017 set = SET_DEST (set);
7018 set2 = NULL_RTX;
7019 }
7020 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
7021 && XVECLEN (PATTERN (dep_insn), 0) == 2
7022 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
7023 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
7024 {
7025 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
7026 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
7027 }
7028 else
7029 return 0;
7030
7031 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
7032 return 0;
7033
7034 /* This test is true if the dependant insn reads the flags but
7035 not any other potentially set register. */
7036 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
7037 return 0;
7038
7039 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
7040 return 0;
7041
7042 return 1;
7043 }
7044
7045 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
7046 address with operands set by DEP_INSN. */
7047
7048 static int
7049 ix86_agi_dependant (insn, dep_insn, insn_type)
7050 rtx insn, dep_insn;
7051 enum attr_type insn_type;
7052 {
7053 rtx addr;
7054
7055 if (insn_type == TYPE_LEA)
7056 {
7057 addr = PATTERN (insn);
7058 if (GET_CODE (addr) == SET)
7059 ;
7060 else if (GET_CODE (addr) == PARALLEL
7061 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
7062 addr = XVECEXP (addr, 0, 0);
7063 else
7064 abort ();
7065 addr = SET_SRC (addr);
7066 }
7067 else
7068 {
7069 int i;
7070 extract_insn_cached (insn);
7071 for (i = recog_data.n_operands - 1; i >= 0; --i)
7072 if (GET_CODE (recog_data.operand[i]) == MEM)
7073 {
7074 addr = XEXP (recog_data.operand[i], 0);
7075 goto found;
7076 }
7077 return 0;
7078 found:;
7079 }
7080
7081 return modified_in_p (addr, dep_insn);
7082 }
7083
7084 int
7085 ix86_adjust_cost (insn, link, dep_insn, cost)
7086 rtx insn, link, dep_insn;
7087 int cost;
7088 {
7089 enum attr_type insn_type, dep_insn_type;
7090 enum attr_memory memory;
7091 rtx set, set2;
7092 int dep_insn_code_number;
7093
7094 /* Anti and output depenancies have zero cost on all CPUs. */
7095 if (REG_NOTE_KIND (link) != 0)
7096 return 0;
7097
7098 dep_insn_code_number = recog_memoized (dep_insn);
7099
7100 /* If we can't recognize the insns, we can't really do anything. */
7101 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
7102 return cost;
7103
7104 insn_type = get_attr_type (insn);
7105 dep_insn_type = get_attr_type (dep_insn);
7106
7107 /* Prologue and epilogue allocators can have a false dependency on ebp.
7108 This results in one cycle extra stall on Pentium prologue scheduling,
7109 so handle this important case manually. */
7110 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
7111 && dep_insn_type == TYPE_ALU
7112 && !reg_mentioned_p (stack_pointer_rtx, insn))
7113 return 0;
7114
7115 switch (ix86_cpu)
7116 {
7117 case PROCESSOR_PENTIUM:
7118 /* Address Generation Interlock adds a cycle of latency. */
7119 if (ix86_agi_dependant (insn, dep_insn, insn_type))
7120 cost += 1;
7121
7122 /* ??? Compares pair with jump/setcc. */
7123 if (ix86_flags_dependant (insn, dep_insn, insn_type))
7124 cost = 0;
7125
7126 /* Floating point stores require value to be ready one cycle ealier. */
7127 if (insn_type == TYPE_FMOV
7128 && get_attr_memory (insn) == MEMORY_STORE
7129 && !ix86_agi_dependant (insn, dep_insn, insn_type))
7130 cost += 1;
7131 break;
7132
7133 case PROCESSOR_PENTIUMPRO:
7134 /* Since we can't represent delayed latencies of load+operation,
7135 increase the cost here for non-imov insns. */
7136 if (dep_insn_type != TYPE_IMOV
7137 && dep_insn_type != TYPE_FMOV
7138 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7139 || memory == MEMORY_BOTH))
7140 cost += 1;
7141
7142 /* INT->FP conversion is expensive. */
7143 if (get_attr_fp_int_src (dep_insn))
7144 cost += 5;
7145
7146 /* There is one cycle extra latency between an FP op and a store. */
7147 if (insn_type == TYPE_FMOV
7148 && (set = single_set (dep_insn)) != NULL_RTX
7149 && (set2 = single_set (insn)) != NULL_RTX
7150 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
7151 && GET_CODE (SET_DEST (set2)) == MEM)
7152 cost += 1;
7153 break;
7154
7155 case PROCESSOR_K6:
7156 /* The esp dependency is resolved before the instruction is really
7157 finished. */
7158 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
7159 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
7160 return 1;
7161
7162 /* Since we can't represent delayed latencies of load+operation,
7163 increase the cost here for non-imov insns. */
7164 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7165 || memory == MEMORY_BOTH)
7166 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
7167
7168 /* INT->FP conversion is expensive. */
7169 if (get_attr_fp_int_src (dep_insn))
7170 cost += 5;
7171 break;
7172
7173 case PROCESSOR_ATHLON:
7174 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
7175 || memory == MEMORY_BOTH)
7176 {
7177 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
7178 cost += 2;
7179 else
7180 cost += 3;
7181 }
7182
7183 default:
7184 break;
7185 }
7186
7187 return cost;
7188 }
7189
7190 static union
7191 {
7192 struct ppro_sched_data
7193 {
7194 rtx decode[3];
7195 int issued_this_cycle;
7196 } ppro;
7197 } ix86_sched_data;
7198
7199 static int
7200 ix86_safe_length (insn)
7201 rtx insn;
7202 {
7203 if (recog_memoized (insn) >= 0)
7204 return get_attr_length(insn);
7205 else
7206 return 128;
7207 }
7208
7209 static int
7210 ix86_safe_length_prefix (insn)
7211 rtx insn;
7212 {
7213 if (recog_memoized (insn) >= 0)
7214 return get_attr_length(insn);
7215 else
7216 return 0;
7217 }
7218
7219 static enum attr_memory
7220 ix86_safe_memory (insn)
7221 rtx insn;
7222 {
7223 if (recog_memoized (insn) >= 0)
7224 return get_attr_memory(insn);
7225 else
7226 return MEMORY_UNKNOWN;
7227 }
7228
7229 static enum attr_pent_pair
7230 ix86_safe_pent_pair (insn)
7231 rtx insn;
7232 {
7233 if (recog_memoized (insn) >= 0)
7234 return get_attr_pent_pair(insn);
7235 else
7236 return PENT_PAIR_NP;
7237 }
7238
7239 static enum attr_ppro_uops
7240 ix86_safe_ppro_uops (insn)
7241 rtx insn;
7242 {
7243 if (recog_memoized (insn) >= 0)
7244 return get_attr_ppro_uops (insn);
7245 else
7246 return PPRO_UOPS_MANY;
7247 }
7248
7249 static void
7250 ix86_dump_ppro_packet (dump)
7251 FILE *dump;
7252 {
7253 if (ix86_sched_data.ppro.decode[0])
7254 {
7255 fprintf (dump, "PPRO packet: %d",
7256 INSN_UID (ix86_sched_data.ppro.decode[0]));
7257 if (ix86_sched_data.ppro.decode[1])
7258 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
7259 if (ix86_sched_data.ppro.decode[2])
7260 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
7261 fputc ('\n', dump);
7262 }
7263 }
7264
7265 /* We're beginning a new block. Initialize data structures as necessary. */
7266
7267 void
7268 ix86_sched_init (dump, sched_verbose)
7269 FILE *dump ATTRIBUTE_UNUSED;
7270 int sched_verbose ATTRIBUTE_UNUSED;
7271 {
7272 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
7273 }
7274
7275 /* Shift INSN to SLOT, and shift everything else down. */
7276
7277 static void
7278 ix86_reorder_insn (insnp, slot)
7279 rtx *insnp, *slot;
7280 {
7281 if (insnp != slot)
7282 {
7283 rtx insn = *insnp;
7284 do
7285 insnp[0] = insnp[1];
7286 while (++insnp != slot);
7287 *insnp = insn;
7288 }
7289 }
7290
7291 /* Find an instruction with given pairability and minimal amount of cycles
7292 lost by the fact that the CPU waits for both pipelines to finish before
7293 reading next instructions. Also take care that both instructions together
7294 can not exceed 7 bytes. */
7295
7296 static rtx *
7297 ix86_pent_find_pair (e_ready, ready, type, first)
7298 rtx *e_ready;
7299 rtx *ready;
7300 enum attr_pent_pair type;
7301 rtx first;
7302 {
7303 int mincycles, cycles;
7304 enum attr_pent_pair tmp;
7305 enum attr_memory memory;
7306 rtx *insnp, *bestinsnp = NULL;
7307
7308 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
7309 return NULL;
7310
7311 memory = ix86_safe_memory (first);
7312 cycles = result_ready_cost (first);
7313 mincycles = INT_MAX;
7314
7315 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
7316 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
7317 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
7318 {
7319 enum attr_memory second_memory;
7320 int secondcycles, currentcycles;
7321
7322 second_memory = ix86_safe_memory (*insnp);
7323 secondcycles = result_ready_cost (*insnp);
7324 currentcycles = abs (cycles - secondcycles);
7325
7326 if (secondcycles >= 1 && cycles >= 1)
7327 {
7328 /* Two read/modify/write instructions together takes two
7329 cycles longer. */
7330 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7331 currentcycles += 2;
7332
7333 /* Read modify/write instruction followed by read/modify
7334 takes one cycle longer. */
7335 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7336 && tmp != PENT_PAIR_UV
7337 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7338 currentcycles += 1;
7339 }
7340 if (currentcycles < mincycles)
7341 bestinsnp = insnp, mincycles = currentcycles;
7342 }
7343
7344 return bestinsnp;
7345 }
7346
7347 /* Subroutines of ix86_sched_reorder. */
7348
7349 static void
7350 ix86_sched_reorder_pentium (ready, e_ready)
7351 rtx *ready;
7352 rtx *e_ready;
7353 {
7354 enum attr_pent_pair pair1, pair2;
7355 rtx *insnp;
7356
7357 /* This wouldn't be necessary if Haifa knew that static insn ordering
7358 is important to which pipe an insn is issued to. So we have to make
7359 some minor rearrangements. */
7360
7361 pair1 = ix86_safe_pent_pair (*e_ready);
7362
7363 /* If the first insn is non-pairable, let it be. */
7364 if (pair1 == PENT_PAIR_NP)
7365 return;
7366
7367 pair2 = PENT_PAIR_NP;
7368 insnp = 0;
7369
7370 /* If the first insn is UV or PV pairable, search for a PU
7371 insn to go with. */
7372 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
7373 {
7374 insnp = ix86_pent_find_pair (e_ready-1, ready,
7375 PENT_PAIR_PU, *e_ready);
7376 if (insnp)
7377 pair2 = PENT_PAIR_PU;
7378 }
7379
7380 /* If the first insn is PU or UV pairable, search for a PV
7381 insn to go with. */
7382 if (pair2 == PENT_PAIR_NP
7383 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7384 {
7385 insnp = ix86_pent_find_pair (e_ready-1, ready,
7386 PENT_PAIR_PV, *e_ready);
7387 if (insnp)
7388 pair2 = PENT_PAIR_PV;
7389 }
7390
7391 /* If the first insn is pairable, search for a UV
7392 insn to go with. */
7393 if (pair2 == PENT_PAIR_NP)
7394 {
7395 insnp = ix86_pent_find_pair (e_ready-1, ready,
7396 PENT_PAIR_UV, *e_ready);
7397 if (insnp)
7398 pair2 = PENT_PAIR_UV;
7399 }
7400
7401 if (pair2 == PENT_PAIR_NP)
7402 return;
7403
7404 /* Found something! Decide if we need to swap the order. */
7405 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7406 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7407 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7408 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7409 ix86_reorder_insn (insnp, e_ready);
7410 else
7411 ix86_reorder_insn (insnp, e_ready - 1);
7412 }
7413
7414 static void
7415 ix86_sched_reorder_ppro (ready, e_ready)
7416 rtx *ready;
7417 rtx *e_ready;
7418 {
7419 rtx decode[3];
7420 enum attr_ppro_uops cur_uops;
7421 int issued_this_cycle;
7422 rtx *insnp;
7423 int i;
7424
7425 /* At this point .ppro.decode contains the state of the three
7426 decoders from last "cycle". That is, those insns that were
7427 actually independent. But here we're scheduling for the
7428 decoder, and we may find things that are decodable in the
7429 same cycle. */
7430
7431 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
7432 issued_this_cycle = 0;
7433
7434 insnp = e_ready;
7435 cur_uops = ix86_safe_ppro_uops (*insnp);
7436
7437 /* If the decoders are empty, and we've a complex insn at the
7438 head of the priority queue, let it issue without complaint. */
7439 if (decode[0] == NULL)
7440 {
7441 if (cur_uops == PPRO_UOPS_MANY)
7442 {
7443 decode[0] = *insnp;
7444 goto ppro_done;
7445 }
7446
7447 /* Otherwise, search for a 2-4 uop unsn to issue. */
7448 while (cur_uops != PPRO_UOPS_FEW)
7449 {
7450 if (insnp == ready)
7451 break;
7452 cur_uops = ix86_safe_ppro_uops (*--insnp);
7453 }
7454
7455 /* If so, move it to the head of the line. */
7456 if (cur_uops == PPRO_UOPS_FEW)
7457 ix86_reorder_insn (insnp, e_ready);
7458
7459 /* Issue the head of the queue. */
7460 issued_this_cycle = 1;
7461 decode[0] = *e_ready--;
7462 }
7463
7464 /* Look for simple insns to fill in the other two slots. */
7465 for (i = 1; i < 3; ++i)
7466 if (decode[i] == NULL)
7467 {
7468 if (ready >= e_ready)
7469 goto ppro_done;
7470
7471 insnp = e_ready;
7472 cur_uops = ix86_safe_ppro_uops (*insnp);
7473 while (cur_uops != PPRO_UOPS_ONE)
7474 {
7475 if (insnp == ready)
7476 break;
7477 cur_uops = ix86_safe_ppro_uops (*--insnp);
7478 }
7479
7480 /* Found one. Move it to the head of the queue and issue it. */
7481 if (cur_uops == PPRO_UOPS_ONE)
7482 {
7483 ix86_reorder_insn (insnp, e_ready);
7484 decode[i] = *e_ready--;
7485 issued_this_cycle++;
7486 continue;
7487 }
7488
7489 /* ??? Didn't find one. Ideally, here we would do a lazy split
7490 of 2-uop insns, issue one and queue the other. */
7491 }
7492
7493 ppro_done:
7494 if (issued_this_cycle == 0)
7495 issued_this_cycle = 1;
7496 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7497 }
7498
7499 /* We are about to being issuing insns for this clock cycle.
7500 Override the default sort algorithm to better slot instructions. */
7501 int
7502 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7503 FILE *dump ATTRIBUTE_UNUSED;
7504 int sched_verbose ATTRIBUTE_UNUSED;
7505 rtx *ready;
7506 int n_ready;
7507 int clock_var ATTRIBUTE_UNUSED;
7508 {
7509 rtx *e_ready = ready + n_ready - 1;
7510
7511 if (n_ready < 2)
7512 goto out;
7513
7514 switch (ix86_cpu)
7515 {
7516 default:
7517 break;
7518
7519 case PROCESSOR_PENTIUM:
7520 ix86_sched_reorder_pentium (ready, e_ready);
7521 break;
7522
7523 case PROCESSOR_PENTIUMPRO:
7524 ix86_sched_reorder_ppro (ready, e_ready);
7525 break;
7526 }
7527
7528 out:
7529 return ix86_issue_rate ();
7530 }
7531
7532 /* We are about to issue INSN. Return the number of insns left on the
7533 ready queue that can be issued this cycle. */
7534
7535 int
7536 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7537 FILE *dump;
7538 int sched_verbose;
7539 rtx insn;
7540 int can_issue_more;
7541 {
7542 int i;
7543 switch (ix86_cpu)
7544 {
7545 default:
7546 return can_issue_more - 1;
7547
7548 case PROCESSOR_PENTIUMPRO:
7549 {
7550 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
7551
7552 if (uops == PPRO_UOPS_MANY)
7553 {
7554 if (sched_verbose)
7555 ix86_dump_ppro_packet (dump);
7556 ix86_sched_data.ppro.decode[0] = insn;
7557 ix86_sched_data.ppro.decode[1] = NULL;
7558 ix86_sched_data.ppro.decode[2] = NULL;
7559 if (sched_verbose)
7560 ix86_dump_ppro_packet (dump);
7561 ix86_sched_data.ppro.decode[0] = NULL;
7562 }
7563 else if (uops == PPRO_UOPS_FEW)
7564 {
7565 if (sched_verbose)
7566 ix86_dump_ppro_packet (dump);
7567 ix86_sched_data.ppro.decode[0] = insn;
7568 ix86_sched_data.ppro.decode[1] = NULL;
7569 ix86_sched_data.ppro.decode[2] = NULL;
7570 }
7571 else
7572 {
7573 for (i = 0; i < 3; ++i)
7574 if (ix86_sched_data.ppro.decode[i] == NULL)
7575 {
7576 ix86_sched_data.ppro.decode[i] = insn;
7577 break;
7578 }
7579 if (i == 3)
7580 abort ();
7581 if (i == 2)
7582 {
7583 if (sched_verbose)
7584 ix86_dump_ppro_packet (dump);
7585 ix86_sched_data.ppro.decode[0] = NULL;
7586 ix86_sched_data.ppro.decode[1] = NULL;
7587 ix86_sched_data.ppro.decode[2] = NULL;
7588 }
7589 }
7590 }
7591 return --ix86_sched_data.ppro.issued_this_cycle;
7592 }
7593 }
7594 \f
7595 /* Walk through INSNS and look for MEM references whose address is DSTREG or
7596 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
7597 appropriate. */
7598
7599 void
7600 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
7601 rtx insns;
7602 rtx dstref, srcref, dstreg, srcreg;
7603 {
7604 rtx insn;
7605
7606 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
7607 if (INSN_P (insn))
7608 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
7609 dstreg, srcreg);
7610 }
7611
7612 /* Subroutine of above to actually do the updating by recursively walking
7613 the rtx. */
7614
7615 static void
7616 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
7617 rtx x;
7618 rtx dstref, srcref, dstreg, srcreg;
7619 {
7620 enum rtx_code code = GET_CODE (x);
7621 const char *format_ptr = GET_RTX_FORMAT (code);
7622 int i, j;
7623
7624 if (code == MEM && XEXP (x, 0) == dstreg)
7625 MEM_COPY_ATTRIBUTES (x, dstref);
7626 else if (code == MEM && XEXP (x, 0) == srcreg)
7627 MEM_COPY_ATTRIBUTES (x, srcref);
7628
7629 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
7630 {
7631 if (*format_ptr == 'e')
7632 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
7633 dstreg, srcreg);
7634 else if (*format_ptr == 'E')
7635 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7636 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
7637 dstreg, srcreg);
7638 }
7639 }
7640 \f
7641 /* Compute the alignment given to a constant that is being placed in memory.
7642 EXP is the constant and ALIGN is the alignment that the object would
7643 ordinarily have.
7644 The value of this function is used instead of that alignment to align
7645 the object. */
7646
7647 int
7648 ix86_constant_alignment (exp, align)
7649 tree exp;
7650 int align;
7651 {
7652 if (TREE_CODE (exp) == REAL_CST)
7653 {
7654 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7655 return 64;
7656 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7657 return 128;
7658 }
7659 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7660 && align < 256)
7661 return 256;
7662
7663 return align;
7664 }
7665
7666 /* Compute the alignment for a static variable.
7667 TYPE is the data type, and ALIGN is the alignment that
7668 the object would ordinarily have. The value of this function is used
7669 instead of that alignment to align the object. */
7670
7671 int
7672 ix86_data_alignment (type, align)
7673 tree type;
7674 int align;
7675 {
7676 if (AGGREGATE_TYPE_P (type)
7677 && TYPE_SIZE (type)
7678 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7679 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7680 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7681 return 256;
7682
7683 if (TREE_CODE (type) == ARRAY_TYPE)
7684 {
7685 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7686 return 64;
7687 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7688 return 128;
7689 }
7690 else if (TREE_CODE (type) == COMPLEX_TYPE)
7691 {
7692
7693 if (TYPE_MODE (type) == DCmode && align < 64)
7694 return 64;
7695 if (TYPE_MODE (type) == XCmode && align < 128)
7696 return 128;
7697 }
7698 else if ((TREE_CODE (type) == RECORD_TYPE
7699 || TREE_CODE (type) == UNION_TYPE
7700 || TREE_CODE (type) == QUAL_UNION_TYPE)
7701 && TYPE_FIELDS (type))
7702 {
7703 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7704 return 64;
7705 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7706 return 128;
7707 }
7708 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7709 || TREE_CODE (type) == INTEGER_TYPE)
7710 {
7711 if (TYPE_MODE (type) == DFmode && align < 64)
7712 return 64;
7713 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7714 return 128;
7715 }
7716
7717 return align;
7718 }
7719
7720 /* Compute the alignment for a local variable.
7721 TYPE is the data type, and ALIGN is the alignment that
7722 the object would ordinarily have. The value of this macro is used
7723 instead of that alignment to align the object. */
7724
7725 int
7726 ix86_local_alignment (type, align)
7727 tree type;
7728 int align;
7729 {
7730 if (TREE_CODE (type) == ARRAY_TYPE)
7731 {
7732 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7733 return 64;
7734 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7735 return 128;
7736 }
7737 else if (TREE_CODE (type) == COMPLEX_TYPE)
7738 {
7739 if (TYPE_MODE (type) == DCmode && align < 64)
7740 return 64;
7741 if (TYPE_MODE (type) == XCmode && align < 128)
7742 return 128;
7743 }
7744 else if ((TREE_CODE (type) == RECORD_TYPE
7745 || TREE_CODE (type) == UNION_TYPE
7746 || TREE_CODE (type) == QUAL_UNION_TYPE)
7747 && TYPE_FIELDS (type))
7748 {
7749 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7750 return 64;
7751 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7752 return 128;
7753 }
7754 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7755 || TREE_CODE (type) == INTEGER_TYPE)
7756 {
7757
7758 if (TYPE_MODE (type) == DFmode && align < 64)
7759 return 64;
7760 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7761 return 128;
7762 }
7763 return align;
7764 }
7765
7766 #define def_builtin(NAME, TYPE, CODE) \
7767 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7768 struct builtin_description
7769 {
7770 enum insn_code icode;
7771 const char * name;
7772 enum ix86_builtins code;
7773 enum rtx_code comparison;
7774 unsigned int flag;
7775 };
7776
7777 static struct builtin_description bdesc_comi[] =
7778 {
7779 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7780 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7781 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7782 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7783 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7784 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7785 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7786 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7787 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7788 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7789 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7790 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7791 };
7792
7793 static struct builtin_description bdesc_2arg[] =
7794 {
7795 /* SSE */
7796 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7797 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7798 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7799 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7800 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7801 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7802 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7803 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7804
7805 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7806 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7807 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7808 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7809 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7810 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7811 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7812 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7813 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7814 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7815 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7816 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7817 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7818 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7819 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7820 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7821 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7822 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7823 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7824 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7825 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7826 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7827 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7828 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7829
7830 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7831 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7832 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7833 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7834
7835 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7836 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7837 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7838 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7839
7840 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7841 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7842 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7843 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7844 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7845
7846 /* MMX */
7847 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7848 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7849 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7850 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7851 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7852 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7853
7854 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7855 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7856 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7857 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7858 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7859 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7860 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7861 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7862
7863 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7864 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7865 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7866
7867 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7868 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7869 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7870 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7871
7872 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7873 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7874
7875 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7876 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7877 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7878 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7879 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7880 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7881
7882 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7883 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7884 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7885 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7886
7887 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7888 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7889 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7890 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7891 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7892 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7893
7894 /* Special. */
7895 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7896 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7897 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7898
7899 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7900 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7901
7902 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7903 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7904 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7905 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7906 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7907 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7908
7909 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7910 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7911 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7912 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7913 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7914 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7915
7916 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7917 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7918 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7919 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7920
7921 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7922 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7923
7924 };
7925
7926 static struct builtin_description bdesc_1arg[] =
7927 {
7928 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7929 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7930
7931 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7932 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7933 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7934
7935 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7936 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7937 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7938 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7939
7940 };
7941
7942 /* Expand all the target specific builtins. This is not called if TARGET_MMX
7943 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7944 builtins. */
7945 void
7946 ix86_init_builtins ()
7947 {
7948 struct builtin_description * d;
7949 size_t i;
7950 tree endlink = void_list_node;
7951
7952 tree pchar_type_node = build_pointer_type (char_type_node);
7953 tree pfloat_type_node = build_pointer_type (float_type_node);
7954 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7955 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7956
7957 /* Comparisons. */
7958 tree int_ftype_v4sf_v4sf
7959 = build_function_type (integer_type_node,
7960 tree_cons (NULL_TREE, V4SF_type_node,
7961 tree_cons (NULL_TREE,
7962 V4SF_type_node,
7963 endlink)));
7964 tree v4si_ftype_v4sf_v4sf
7965 = build_function_type (V4SI_type_node,
7966 tree_cons (NULL_TREE, V4SF_type_node,
7967 tree_cons (NULL_TREE,
7968 V4SF_type_node,
7969 endlink)));
7970 /* MMX/SSE/integer conversions. */
7971 tree int_ftype_v4sf_int
7972 = build_function_type (integer_type_node,
7973 tree_cons (NULL_TREE, V4SF_type_node,
7974 tree_cons (NULL_TREE,
7975 integer_type_node,
7976 endlink)));
7977 tree int_ftype_v4sf
7978 = build_function_type (integer_type_node,
7979 tree_cons (NULL_TREE, V4SF_type_node,
7980 endlink));
7981 tree int_ftype_v8qi
7982 = build_function_type (integer_type_node,
7983 tree_cons (NULL_TREE, V8QI_type_node,
7984 endlink));
7985 tree int_ftype_v2si
7986 = build_function_type (integer_type_node,
7987 tree_cons (NULL_TREE, V2SI_type_node,
7988 endlink));
7989 tree v2si_ftype_int
7990 = build_function_type (V2SI_type_node,
7991 tree_cons (NULL_TREE, integer_type_node,
7992 endlink));
7993 tree v4sf_ftype_v4sf_int
7994 = build_function_type (integer_type_node,
7995 tree_cons (NULL_TREE, V4SF_type_node,
7996 tree_cons (NULL_TREE, integer_type_node,
7997 endlink)));
7998 tree v4sf_ftype_v4sf_v2si
7999 = build_function_type (V4SF_type_node,
8000 tree_cons (NULL_TREE, V4SF_type_node,
8001 tree_cons (NULL_TREE, V2SI_type_node,
8002 endlink)));
8003 tree int_ftype_v4hi_int
8004 = build_function_type (integer_type_node,
8005 tree_cons (NULL_TREE, V4HI_type_node,
8006 tree_cons (NULL_TREE, integer_type_node,
8007 endlink)));
8008 tree v4hi_ftype_v4hi_int_int
8009 = build_function_type (V4HI_type_node,
8010 tree_cons (NULL_TREE, V4HI_type_node,
8011 tree_cons (NULL_TREE, integer_type_node,
8012 tree_cons (NULL_TREE,
8013 integer_type_node,
8014 endlink))));
8015 /* Miscellaneous. */
8016 tree v8qi_ftype_v4hi_v4hi
8017 = build_function_type (V8QI_type_node,
8018 tree_cons (NULL_TREE, V4HI_type_node,
8019 tree_cons (NULL_TREE, V4HI_type_node,
8020 endlink)));
8021 tree v4hi_ftype_v2si_v2si
8022 = build_function_type (V4HI_type_node,
8023 tree_cons (NULL_TREE, V2SI_type_node,
8024 tree_cons (NULL_TREE, V2SI_type_node,
8025 endlink)));
8026 tree v4sf_ftype_v4sf_v4sf_int
8027 = build_function_type (V4SF_type_node,
8028 tree_cons (NULL_TREE, V4SF_type_node,
8029 tree_cons (NULL_TREE, V4SF_type_node,
8030 tree_cons (NULL_TREE,
8031 integer_type_node,
8032 endlink))));
8033 tree v4hi_ftype_v8qi_v8qi
8034 = build_function_type (V4HI_type_node,
8035 tree_cons (NULL_TREE, V8QI_type_node,
8036 tree_cons (NULL_TREE, V8QI_type_node,
8037 endlink)));
8038 tree v2si_ftype_v4hi_v4hi
8039 = build_function_type (V2SI_type_node,
8040 tree_cons (NULL_TREE, V4HI_type_node,
8041 tree_cons (NULL_TREE, V4HI_type_node,
8042 endlink)));
8043 tree v4hi_ftype_v4hi_int
8044 = build_function_type (V4HI_type_node,
8045 tree_cons (NULL_TREE, V4HI_type_node,
8046 tree_cons (NULL_TREE, integer_type_node,
8047 endlink)));
8048 tree di_ftype_di_int
8049 = build_function_type (long_long_unsigned_type_node,
8050 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8051 tree_cons (NULL_TREE, integer_type_node,
8052 endlink)));
8053 tree v8qi_ftype_v8qi_di
8054 = build_function_type (V8QI_type_node,
8055 tree_cons (NULL_TREE, V8QI_type_node,
8056 tree_cons (NULL_TREE,
8057 long_long_integer_type_node,
8058 endlink)));
8059 tree v4hi_ftype_v4hi_di
8060 = build_function_type (V4HI_type_node,
8061 tree_cons (NULL_TREE, V4HI_type_node,
8062 tree_cons (NULL_TREE,
8063 long_long_integer_type_node,
8064 endlink)));
8065 tree v2si_ftype_v2si_di
8066 = build_function_type (V2SI_type_node,
8067 tree_cons (NULL_TREE, V2SI_type_node,
8068 tree_cons (NULL_TREE,
8069 long_long_integer_type_node,
8070 endlink)));
8071 tree void_ftype_void
8072 = build_function_type (void_type_node, endlink);
8073 tree void_ftype_pchar_int
8074 = build_function_type (void_type_node,
8075 tree_cons (NULL_TREE, pchar_type_node,
8076 tree_cons (NULL_TREE, integer_type_node,
8077 endlink)));
8078 tree void_ftype_unsigned
8079 = build_function_type (void_type_node,
8080 tree_cons (NULL_TREE, unsigned_type_node,
8081 endlink));
8082 tree unsigned_ftype_void
8083 = build_function_type (unsigned_type_node, endlink);
8084 tree di_ftype_void
8085 = build_function_type (long_long_unsigned_type_node, endlink);
8086 tree ti_ftype_void
8087 = build_function_type (intTI_type_node, endlink);
8088 tree v2si_ftype_v4sf
8089 = build_function_type (V2SI_type_node,
8090 tree_cons (NULL_TREE, V4SF_type_node,
8091 endlink));
8092 /* Loads/stores. */
8093 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
8094 tree_cons (NULL_TREE, V8QI_type_node,
8095 tree_cons (NULL_TREE,
8096 pchar_type_node,
8097 endlink)));
8098 tree void_ftype_v8qi_v8qi_pchar
8099 = build_function_type (void_type_node, maskmovq_args);
8100 tree v4sf_ftype_pfloat
8101 = build_function_type (V4SF_type_node,
8102 tree_cons (NULL_TREE, pfloat_type_node,
8103 endlink));
8104 tree v4sf_ftype_float
8105 = build_function_type (V4SF_type_node,
8106 tree_cons (NULL_TREE, float_type_node,
8107 endlink));
8108 tree v4sf_ftype_float_float_float_float
8109 = build_function_type (V4SF_type_node,
8110 tree_cons (NULL_TREE, float_type_node,
8111 tree_cons (NULL_TREE, float_type_node,
8112 tree_cons (NULL_TREE,
8113 float_type_node,
8114 tree_cons (NULL_TREE,
8115 float_type_node,
8116 endlink)))));
8117 /* @@@ the type is bogus */
8118 tree v4sf_ftype_v4sf_pv2si
8119 = build_function_type (V4SF_type_node,
8120 tree_cons (NULL_TREE, V4SF_type_node,
8121 tree_cons (NULL_TREE, pv2si_type_node,
8122 endlink)));
8123 tree v4sf_ftype_pv2si_v4sf
8124 = build_function_type (V4SF_type_node,
8125 tree_cons (NULL_TREE, V4SF_type_node,
8126 tree_cons (NULL_TREE, pv2si_type_node,
8127 endlink)));
8128 tree void_ftype_pfloat_v4sf
8129 = build_function_type (void_type_node,
8130 tree_cons (NULL_TREE, pfloat_type_node,
8131 tree_cons (NULL_TREE, V4SF_type_node,
8132 endlink)));
8133 tree void_ftype_pdi_di
8134 = build_function_type (void_type_node,
8135 tree_cons (NULL_TREE, pdi_type_node,
8136 tree_cons (NULL_TREE,
8137 long_long_unsigned_type_node,
8138 endlink)));
8139 /* Normal vector unops. */
8140 tree v4sf_ftype_v4sf
8141 = build_function_type (V4SF_type_node,
8142 tree_cons (NULL_TREE, V4SF_type_node,
8143 endlink));
8144
8145 /* Normal vector binops. */
8146 tree v4sf_ftype_v4sf_v4sf
8147 = build_function_type (V4SF_type_node,
8148 tree_cons (NULL_TREE, V4SF_type_node,
8149 tree_cons (NULL_TREE, V4SF_type_node,
8150 endlink)));
8151 tree v8qi_ftype_v8qi_v8qi
8152 = build_function_type (V8QI_type_node,
8153 tree_cons (NULL_TREE, V8QI_type_node,
8154 tree_cons (NULL_TREE, V8QI_type_node,
8155 endlink)));
8156 tree v4hi_ftype_v4hi_v4hi
8157 = build_function_type (V4HI_type_node,
8158 tree_cons (NULL_TREE, V4HI_type_node,
8159 tree_cons (NULL_TREE, V4HI_type_node,
8160 endlink)));
8161 tree v2si_ftype_v2si_v2si
8162 = build_function_type (V2SI_type_node,
8163 tree_cons (NULL_TREE, V2SI_type_node,
8164 tree_cons (NULL_TREE, V2SI_type_node,
8165 endlink)));
8166 tree ti_ftype_ti_ti
8167 = build_function_type (intTI_type_node,
8168 tree_cons (NULL_TREE, intTI_type_node,
8169 tree_cons (NULL_TREE, intTI_type_node,
8170 endlink)));
8171 tree di_ftype_di_di
8172 = build_function_type (long_long_unsigned_type_node,
8173 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8174 tree_cons (NULL_TREE,
8175 long_long_unsigned_type_node,
8176 endlink)));
8177
8178 /* Add all builtins that are more or less simple operations on two
8179 operands. */
8180 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8181 {
8182 /* Use one of the operands; the target can have a different mode for
8183 mask-generating compares. */
8184 enum machine_mode mode;
8185 tree type;
8186
8187 if (d->name == 0)
8188 continue;
8189 mode = insn_data[d->icode].operand[1].mode;
8190
8191 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
8192 continue;
8193
8194 switch (mode)
8195 {
8196 case V4SFmode:
8197 type = v4sf_ftype_v4sf_v4sf;
8198 break;
8199 case V8QImode:
8200 type = v8qi_ftype_v8qi_v8qi;
8201 break;
8202 case V4HImode:
8203 type = v4hi_ftype_v4hi_v4hi;
8204 break;
8205 case V2SImode:
8206 type = v2si_ftype_v2si_v2si;
8207 break;
8208 case TImode:
8209 type = ti_ftype_ti_ti;
8210 break;
8211 case DImode:
8212 type = di_ftype_di_di;
8213 break;
8214
8215 default:
8216 abort ();
8217 }
8218
8219 /* Override for comparisons. */
8220 if (d->icode == CODE_FOR_maskcmpv4sf3
8221 || d->icode == CODE_FOR_maskncmpv4sf3
8222 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8223 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8224 type = v4si_ftype_v4sf_v4sf;
8225
8226 def_builtin (d->name, type, d->code);
8227 }
8228
8229 /* Add the remaining MMX insns with somewhat more complicated types. */
8230 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
8231 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
8232 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
8233 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
8234 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
8235 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
8236 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
8237 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
8238 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
8239
8240 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
8241 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
8242 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
8243
8244 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
8245 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
8246
8247 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
8248 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
8249
8250 /* Everything beyond this point is SSE only. */
8251 if (! TARGET_SSE)
8252 return;
8253
8254 /* comi/ucomi insns. */
8255 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8256 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
8257
8258 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
8259 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
8260 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
8261
8262 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
8263 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
8264 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
8265 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
8266 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
8267 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
8268
8269 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
8270 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
8271
8272 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
8273
8274 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
8275 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
8276 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
8277 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
8278 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
8279 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
8280
8281 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
8282 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
8283 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
8284 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
8285
8286 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
8287 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
8288 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
8289 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
8290
8291 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
8292 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
8293
8294 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
8295
8296 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
8297 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
8298 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
8299 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
8300 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
8301 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
8302
8303 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
8304
8305 /* Composite intrinsics. */
8306 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
8307 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
8308 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
8309 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
8310 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
8311 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
8312 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
8313 }
8314
8315 /* Errors in the source file can cause expand_expr to return const0_rtx
8316 where we expect a vector. To avoid crashing, use one of the vector
8317 clear instructions. */
8318 static rtx
8319 safe_vector_operand (x, mode)
8320 rtx x;
8321 enum machine_mode mode;
8322 {
8323 if (x != const0_rtx)
8324 return x;
8325 x = gen_reg_rtx (mode);
8326
8327 if (VALID_MMX_REG_MODE (mode))
8328 emit_insn (gen_mmx_clrdi (mode == DImode ? x
8329 : gen_rtx_SUBREG (DImode, x, 0)));
8330 else
8331 emit_insn (gen_sse_clrti (mode == TImode ? x
8332 : gen_rtx_SUBREG (TImode, x, 0)));
8333 return x;
8334 }
8335
8336 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
8337
8338 static rtx
8339 ix86_expand_binop_builtin (icode, arglist, target)
8340 enum insn_code icode;
8341 tree arglist;
8342 rtx target;
8343 {
8344 rtx pat;
8345 tree arg0 = TREE_VALUE (arglist);
8346 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8347 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8348 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8349 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8350 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8351 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
8352
8353 if (VECTOR_MODE_P (mode0))
8354 op0 = safe_vector_operand (op0, mode0);
8355 if (VECTOR_MODE_P (mode1))
8356 op1 = safe_vector_operand (op1, mode1);
8357
8358 if (! target
8359 || GET_MODE (target) != tmode
8360 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8361 target = gen_reg_rtx (tmode);
8362
8363 /* In case the insn wants input operands in modes different from
8364 the result, abort. */
8365 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8366 abort ();
8367
8368 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8369 op0 = copy_to_mode_reg (mode0, op0);
8370 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8371 op1 = copy_to_mode_reg (mode1, op1);
8372
8373 pat = GEN_FCN (icode) (target, op0, op1);
8374 if (! pat)
8375 return 0;
8376 emit_insn (pat);
8377 return target;
8378 }
8379
8380 /* Subroutine of ix86_expand_builtin to take care of stores. */
8381
8382 static rtx
8383 ix86_expand_store_builtin (icode, arglist, shuffle)
8384 enum insn_code icode;
8385 tree arglist;
8386 int shuffle;
8387 {
8388 rtx pat;
8389 tree arg0 = TREE_VALUE (arglist);
8390 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8391 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8392 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8393 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8394 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8395
8396 if (VECTOR_MODE_P (mode1))
8397 op1 = safe_vector_operand (op1, mode1);
8398
8399 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8400 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8401 op1 = copy_to_mode_reg (mode1, op1);
8402 if (shuffle >= 0)
8403 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8404 pat = GEN_FCN (icode) (op0, op1);
8405 if (pat)
8406 emit_insn (pat);
8407 return 0;
8408 }
8409
8410 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
8411
8412 static rtx
8413 ix86_expand_unop_builtin (icode, arglist, target, do_load)
8414 enum insn_code icode;
8415 tree arglist;
8416 rtx target;
8417 int do_load;
8418 {
8419 rtx pat;
8420 tree arg0 = TREE_VALUE (arglist);
8421 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8422 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8423 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8424
8425 if (! target
8426 || GET_MODE (target) != tmode
8427 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8428 target = gen_reg_rtx (tmode);
8429 if (do_load)
8430 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8431 else
8432 {
8433 if (VECTOR_MODE_P (mode0))
8434 op0 = safe_vector_operand (op0, mode0);
8435
8436 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8437 op0 = copy_to_mode_reg (mode0, op0);
8438 }
8439
8440 pat = GEN_FCN (icode) (target, op0);
8441 if (! pat)
8442 return 0;
8443 emit_insn (pat);
8444 return target;
8445 }
8446
8447 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8448 sqrtss, rsqrtss, rcpss. */
8449
8450 static rtx
8451 ix86_expand_unop1_builtin (icode, arglist, target)
8452 enum insn_code icode;
8453 tree arglist;
8454 rtx target;
8455 {
8456 rtx pat;
8457 tree arg0 = TREE_VALUE (arglist);
8458 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8459 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8460 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8461
8462 if (! target
8463 || GET_MODE (target) != tmode
8464 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8465 target = gen_reg_rtx (tmode);
8466
8467 if (VECTOR_MODE_P (mode0))
8468 op0 = safe_vector_operand (op0, mode0);
8469
8470 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8471 op0 = copy_to_mode_reg (mode0, op0);
8472
8473 pat = GEN_FCN (icode) (target, op0, op0);
8474 if (! pat)
8475 return 0;
8476 emit_insn (pat);
8477 return target;
8478 }
8479
8480 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8481
8482 static rtx
8483 ix86_expand_sse_compare (d, arglist, target)
8484 struct builtin_description *d;
8485 tree arglist;
8486 rtx target;
8487 {
8488 rtx pat;
8489 tree arg0 = TREE_VALUE (arglist);
8490 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8491 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8492 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8493 rtx op2;
8494 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8495 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8496 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8497 enum rtx_code comparison = d->comparison;
8498
8499 if (VECTOR_MODE_P (mode0))
8500 op0 = safe_vector_operand (op0, mode0);
8501 if (VECTOR_MODE_P (mode1))
8502 op1 = safe_vector_operand (op1, mode1);
8503
8504 /* Swap operands if we have a comparison that isn't available in
8505 hardware. */
8506 if (d->flag)
8507 {
8508 target = gen_reg_rtx (tmode);
8509 emit_move_insn (target, op1);
8510 op1 = op0;
8511 op0 = target;
8512 comparison = swap_condition (comparison);
8513 }
8514 else if (! target
8515 || GET_MODE (target) != tmode
8516 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8517 target = gen_reg_rtx (tmode);
8518
8519 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8520 op0 = copy_to_mode_reg (mode0, op0);
8521 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8522 op1 = copy_to_mode_reg (mode1, op1);
8523
8524 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8525 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8526 if (! pat)
8527 return 0;
8528 emit_insn (pat);
8529 return target;
8530 }
8531
8532 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
8533
8534 static rtx
8535 ix86_expand_sse_comi (d, arglist, target)
8536 struct builtin_description *d;
8537 tree arglist;
8538 rtx target;
8539 {
8540 rtx pat;
8541 tree arg0 = TREE_VALUE (arglist);
8542 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8543 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8544 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8545 rtx op2;
8546 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8547 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8548 enum rtx_code comparison = d->comparison;
8549
8550 if (VECTOR_MODE_P (mode0))
8551 op0 = safe_vector_operand (op0, mode0);
8552 if (VECTOR_MODE_P (mode1))
8553 op1 = safe_vector_operand (op1, mode1);
8554
8555 /* Swap operands if we have a comparison that isn't available in
8556 hardware. */
8557 if (d->flag)
8558 {
8559 rtx tmp = op1;
8560 op1 = op0;
8561 op0 = tmp;
8562 comparison = swap_condition (comparison);
8563 }
8564
8565 target = gen_reg_rtx (SImode);
8566 emit_move_insn (target, const0_rtx);
8567 target = gen_rtx_SUBREG (QImode, target, 0);
8568
8569 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8570 op0 = copy_to_mode_reg (mode0, op0);
8571 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8572 op1 = copy_to_mode_reg (mode1, op1);
8573
8574 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8575 pat = GEN_FCN (d->icode) (op0, op1, op2);
8576 if (! pat)
8577 return 0;
8578 emit_insn (pat);
8579 emit_insn (gen_setcc_2 (target, op2));
8580
8581 return target;
8582 }
8583
8584 /* Expand an expression EXP that calls a built-in function,
8585 with result going to TARGET if that's convenient
8586 (and in mode MODE if that's convenient).
8587 SUBTARGET may be used as the target for computing one of EXP's operands.
8588 IGNORE is nonzero if the value is to be ignored. */
8589
8590 rtx
8591 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8592 tree exp;
8593 rtx target;
8594 rtx subtarget ATTRIBUTE_UNUSED;
8595 enum machine_mode mode ATTRIBUTE_UNUSED;
8596 int ignore ATTRIBUTE_UNUSED;
8597 {
8598 struct builtin_description *d;
8599 size_t i;
8600 enum insn_code icode;
8601 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8602 tree arglist = TREE_OPERAND (exp, 1);
8603 tree arg0, arg1, arg2, arg3;
8604 rtx op0, op1, op2, pat;
8605 enum machine_mode tmode, mode0, mode1, mode2;
8606 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8607
8608 switch (fcode)
8609 {
8610 case IX86_BUILTIN_EMMS:
8611 emit_insn (gen_emms ());
8612 return 0;
8613
8614 case IX86_BUILTIN_SFENCE:
8615 emit_insn (gen_sfence ());
8616 return 0;
8617
8618 case IX86_BUILTIN_M_FROM_INT:
8619 target = gen_reg_rtx (DImode);
8620 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8621 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8622 return target;
8623
8624 case IX86_BUILTIN_M_TO_INT:
8625 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8626 op0 = copy_to_mode_reg (DImode, op0);
8627 target = gen_reg_rtx (SImode);
8628 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8629 return target;
8630
8631 case IX86_BUILTIN_PEXTRW:
8632 icode = CODE_FOR_mmx_pextrw;
8633 arg0 = TREE_VALUE (arglist);
8634 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8635 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8636 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8637 tmode = insn_data[icode].operand[0].mode;
8638 mode0 = insn_data[icode].operand[1].mode;
8639 mode1 = insn_data[icode].operand[2].mode;
8640
8641 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8642 op0 = copy_to_mode_reg (mode0, op0);
8643 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8644 {
8645 /* @@@ better error message */
8646 error ("selector must be an immediate");
8647 return const0_rtx;
8648 }
8649 if (target == 0
8650 || GET_MODE (target) != tmode
8651 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8652 target = gen_reg_rtx (tmode);
8653 pat = GEN_FCN (icode) (target, op0, op1);
8654 if (! pat)
8655 return 0;
8656 emit_insn (pat);
8657 return target;
8658
8659 case IX86_BUILTIN_PINSRW:
8660 icode = CODE_FOR_mmx_pinsrw;
8661 arg0 = TREE_VALUE (arglist);
8662 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8663 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8664 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8665 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8666 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8667 tmode = insn_data[icode].operand[0].mode;
8668 mode0 = insn_data[icode].operand[1].mode;
8669 mode1 = insn_data[icode].operand[2].mode;
8670 mode2 = insn_data[icode].operand[3].mode;
8671
8672 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8673 op0 = copy_to_mode_reg (mode0, op0);
8674 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8675 op1 = copy_to_mode_reg (mode1, op1);
8676 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8677 {
8678 /* @@@ better error message */
8679 error ("selector must be an immediate");
8680 return const0_rtx;
8681 }
8682 if (target == 0
8683 || GET_MODE (target) != tmode
8684 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8685 target = gen_reg_rtx (tmode);
8686 pat = GEN_FCN (icode) (target, op0, op1, op2);
8687 if (! pat)
8688 return 0;
8689 emit_insn (pat);
8690 return target;
8691
8692 case IX86_BUILTIN_MASKMOVQ:
8693 icode = CODE_FOR_mmx_maskmovq;
8694 /* Note the arg order is different from the operand order. */
8695 arg1 = TREE_VALUE (arglist);
8696 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8697 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8698 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8699 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8700 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8701 mode0 = insn_data[icode].operand[0].mode;
8702 mode1 = insn_data[icode].operand[1].mode;
8703 mode2 = insn_data[icode].operand[2].mode;
8704
8705 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8706 op0 = copy_to_mode_reg (mode0, op0);
8707 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8708 op1 = copy_to_mode_reg (mode1, op1);
8709 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8710 op2 = copy_to_mode_reg (mode2, op2);
8711 pat = GEN_FCN (icode) (op0, op1, op2);
8712 if (! pat)
8713 return 0;
8714 emit_insn (pat);
8715 return 0;
8716
8717 case IX86_BUILTIN_SQRTSS:
8718 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8719 case IX86_BUILTIN_RSQRTSS:
8720 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8721 case IX86_BUILTIN_RCPSS:
8722 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8723
8724 case IX86_BUILTIN_LOADAPS:
8725 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8726
8727 case IX86_BUILTIN_LOADUPS:
8728 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8729
8730 case IX86_BUILTIN_STOREAPS:
8731 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8732 case IX86_BUILTIN_STOREUPS:
8733 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8734
8735 case IX86_BUILTIN_LOADSS:
8736 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8737
8738 case IX86_BUILTIN_STORESS:
8739 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8740
8741 case IX86_BUILTIN_LOADHPS:
8742 case IX86_BUILTIN_LOADLPS:
8743 icode = (fcode == IX86_BUILTIN_LOADHPS
8744 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8745 arg0 = TREE_VALUE (arglist);
8746 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8747 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8748 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8749 tmode = insn_data[icode].operand[0].mode;
8750 mode0 = insn_data[icode].operand[1].mode;
8751 mode1 = insn_data[icode].operand[2].mode;
8752
8753 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8754 op0 = copy_to_mode_reg (mode0, op0);
8755 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8756 if (target == 0
8757 || GET_MODE (target) != tmode
8758 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8759 target = gen_reg_rtx (tmode);
8760 pat = GEN_FCN (icode) (target, op0, op1);
8761 if (! pat)
8762 return 0;
8763 emit_insn (pat);
8764 return target;
8765
8766 case IX86_BUILTIN_STOREHPS:
8767 case IX86_BUILTIN_STORELPS:
8768 icode = (fcode == IX86_BUILTIN_STOREHPS
8769 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8770 arg0 = TREE_VALUE (arglist);
8771 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8772 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8773 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8774 mode0 = insn_data[icode].operand[1].mode;
8775 mode1 = insn_data[icode].operand[2].mode;
8776
8777 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8778 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8779 op1 = copy_to_mode_reg (mode1, op1);
8780
8781 pat = GEN_FCN (icode) (op0, op0, op1);
8782 if (! pat)
8783 return 0;
8784 emit_insn (pat);
8785 return 0;
8786
8787 case IX86_BUILTIN_MOVNTPS:
8788 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8789 case IX86_BUILTIN_MOVNTQ:
8790 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8791
8792 case IX86_BUILTIN_LDMXCSR:
8793 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8794 target = assign_386_stack_local (SImode, 0);
8795 emit_move_insn (target, op0);
8796 emit_insn (gen_ldmxcsr (target));
8797 return 0;
8798
8799 case IX86_BUILTIN_STMXCSR:
8800 target = assign_386_stack_local (SImode, 0);
8801 emit_insn (gen_stmxcsr (target));
8802 return copy_to_mode_reg (SImode, target);
8803
8804 case IX86_BUILTIN_PREFETCH:
8805 icode = CODE_FOR_prefetch;
8806 arg0 = TREE_VALUE (arglist);
8807 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8808 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8809 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8810 mode0 = insn_data[icode].operand[0].mode;
8811 mode1 = insn_data[icode].operand[1].mode;
8812
8813 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8814 {
8815 /* @@@ better error message */
8816 error ("selector must be an immediate");
8817 return const0_rtx;
8818 }
8819
8820 op0 = copy_to_mode_reg (Pmode, op0);
8821 pat = GEN_FCN (icode) (op0, op1);
8822 if (! pat)
8823 return 0;
8824 emit_insn (pat);
8825 return target;
8826
8827 case IX86_BUILTIN_SHUFPS:
8828 icode = CODE_FOR_sse_shufps;
8829 arg0 = TREE_VALUE (arglist);
8830 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8831 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8832 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8833 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8834 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8835 tmode = insn_data[icode].operand[0].mode;
8836 mode0 = insn_data[icode].operand[1].mode;
8837 mode1 = insn_data[icode].operand[2].mode;
8838 mode2 = insn_data[icode].operand[3].mode;
8839
8840 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8841 op0 = copy_to_mode_reg (mode0, op0);
8842 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8843 op1 = copy_to_mode_reg (mode1, op1);
8844 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8845 {
8846 /* @@@ better error message */
8847 error ("mask must be an immediate");
8848 return const0_rtx;
8849 }
8850 if (target == 0
8851 || GET_MODE (target) != tmode
8852 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8853 target = gen_reg_rtx (tmode);
8854 pat = GEN_FCN (icode) (target, op0, op1, op2);
8855 if (! pat)
8856 return 0;
8857 emit_insn (pat);
8858 return target;
8859
8860 case IX86_BUILTIN_PSHUFW:
8861 icode = CODE_FOR_mmx_pshufw;
8862 arg0 = TREE_VALUE (arglist);
8863 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8864 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8865 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8866 tmode = insn_data[icode].operand[0].mode;
8867 mode0 = insn_data[icode].operand[2].mode;
8868 mode1 = insn_data[icode].operand[3].mode;
8869
8870 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8871 op0 = copy_to_mode_reg (mode0, op0);
8872 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8873 {
8874 /* @@@ better error message */
8875 error ("mask must be an immediate");
8876 return const0_rtx;
8877 }
8878 if (target == 0
8879 || GET_MODE (target) != tmode
8880 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8881 target = gen_reg_rtx (tmode);
8882 pat = GEN_FCN (icode) (target, target, op0, op1);
8883 if (! pat)
8884 return 0;
8885 emit_insn (pat);
8886 return target;
8887
8888 /* Composite intrinsics. */
8889 case IX86_BUILTIN_SETPS1:
8890 target = assign_386_stack_local (SFmode, 0);
8891 arg0 = TREE_VALUE (arglist);
8892 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8893 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8894 op0 = gen_reg_rtx (V4SFmode);
8895 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8896 XEXP (target, 0))));
8897 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8898 return op0;
8899
8900 case IX86_BUILTIN_SETPS:
8901 target = assign_386_stack_local (V4SFmode, 0);
8902 op0 = change_address (target, SFmode, XEXP (target, 0));
8903 arg0 = TREE_VALUE (arglist);
8904 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8905 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8906 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8907 emit_move_insn (op0,
8908 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8909 emit_move_insn (adj_offsettable_operand (op0, 4),
8910 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8911 emit_move_insn (adj_offsettable_operand (op0, 8),
8912 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8913 emit_move_insn (adj_offsettable_operand (op0, 12),
8914 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8915 op0 = gen_reg_rtx (V4SFmode);
8916 emit_insn (gen_sse_movaps (op0, target));
8917 return op0;
8918
8919 case IX86_BUILTIN_CLRPS:
8920 target = gen_reg_rtx (TImode);
8921 emit_insn (gen_sse_clrti (target));
8922 return target;
8923
8924 case IX86_BUILTIN_LOADRPS:
8925 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8926 gen_reg_rtx (V4SFmode), 1);
8927 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8928 return target;
8929
8930 case IX86_BUILTIN_LOADPS1:
8931 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8932 gen_reg_rtx (V4SFmode), 1);
8933 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8934 return target;
8935
8936 case IX86_BUILTIN_STOREPS1:
8937 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8938 case IX86_BUILTIN_STORERPS:
8939 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8940
8941 case IX86_BUILTIN_MMX_ZERO:
8942 target = gen_reg_rtx (DImode);
8943 emit_insn (gen_mmx_clrdi (target));
8944 return target;
8945
8946 default:
8947 break;
8948 }
8949
8950 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8951 if (d->code == fcode)
8952 {
8953 /* Compares are treated specially. */
8954 if (d->icode == CODE_FOR_maskcmpv4sf3
8955 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8956 || d->icode == CODE_FOR_maskncmpv4sf3
8957 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8958 return ix86_expand_sse_compare (d, arglist, target);
8959
8960 return ix86_expand_binop_builtin (d->icode, arglist, target);
8961 }
8962
8963 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8964 if (d->code == fcode)
8965 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
8966
8967 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8968 if (d->code == fcode)
8969 return ix86_expand_sse_comi (d, arglist, target);
8970
8971 /* @@@ Should really do something sensible here. */
8972 return 0;
8973 }
8974
8975 /* Store OPERAND to the memory after reload is completed. This means
8976 that we can't easilly use assign_stack_local. */
8977 rtx
8978 ix86_force_to_memory (mode, operand)
8979 enum machine_mode mode;
8980 rtx operand;
8981 {
8982 if (!reload_completed)
8983 abort ();
8984 switch (mode)
8985 {
8986 case DImode:
8987 {
8988 rtx operands[2];
8989 split_di (&operand, 1, operands, operands+1);
8990 emit_insn (
8991 gen_rtx_SET (VOIDmode,
8992 gen_rtx_MEM (SImode,
8993 gen_rtx_PRE_DEC (Pmode,
8994 stack_pointer_rtx)),
8995 operands[1]));
8996 emit_insn (
8997 gen_rtx_SET (VOIDmode,
8998 gen_rtx_MEM (SImode,
8999 gen_rtx_PRE_DEC (Pmode,
9000 stack_pointer_rtx)),
9001 operands[0]));
9002 }
9003 break;
9004 case HImode:
9005 /* It is better to store HImodes as SImodes. */
9006 if (!TARGET_PARTIAL_REG_STALL)
9007 operand = gen_lowpart (SImode, operand);
9008 /* FALLTHRU */
9009 case SImode:
9010 emit_insn (
9011 gen_rtx_SET (VOIDmode,
9012 gen_rtx_MEM (GET_MODE (operand),
9013 gen_rtx_PRE_DEC (SImode,
9014 stack_pointer_rtx)),
9015 operand));
9016 break;
9017 default:
9018 abort();
9019 }
9020 return gen_rtx_MEM (mode, stack_pointer_rtx);
9021 }
9022
9023 /* Free operand from the memory. */
9024 void
9025 ix86_free_from_memory (mode)
9026 enum machine_mode mode;
9027 {
9028 /* Use LEA to deallocate stack space. In peephole2 it will be converted
9029 to pop or add instruction if registers are available. */
9030 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9031 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9032 GEN_INT (mode == DImode
9033 ? 8
9034 : mode == HImode && TARGET_PARTIAL_REG_STALL
9035 ? 2
9036 : 4))));
9037 }
9038
9039 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
9040 QImode must go into class Q_REGS.
9041 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
9042 movdf to do mem-to-mem moves through integer regs. */
9043 enum reg_class
9044 ix86_preferred_reload_class (x, class)
9045 rtx x;
9046 enum reg_class class;
9047 {
9048 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
9049 {
9050 /* SSE can't load any constant directly yet. */
9051 if (SSE_CLASS_P (class))
9052 return NO_REGS;
9053 /* Floats can load 0 and 1. */
9054 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
9055 {
9056 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
9057 if (MAYBE_SSE_CLASS_P (class))
9058 return (reg_class_subset_p (class, GENERAL_REGS)
9059 ? GENERAL_REGS : FLOAT_REGS);
9060 else
9061 return class;
9062 }
9063 /* General regs can load everything. */
9064 if (reg_class_subset_p (class, GENERAL_REGS))
9065 return GENERAL_REGS;
9066 /* In case we haven't resolved FLOAT or SSE yet, give up. */
9067 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
9068 return NO_REGS;
9069 }
9070 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
9071 return NO_REGS;
9072 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
9073 return Q_REGS;
9074 return class;
9075 }
9076
9077 /* If we are copying between general and FP registers, we need a memory
9078 location. The same is true for SSE and MMX registers.
9079
9080 The macro can't work reliably when one of the CLASSES is class containing
9081 registers from multiple units (SSE, MMX, integer). We avoid this by never
9082 combining those units in single alternative in the machine description.
9083 Ensure that this constraint holds to avoid unexpected surprises.
9084
9085 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
9086 enforce these sanity checks. */
9087 int
9088 ix86_secondary_memory_needed (class1, class2, mode, strict)
9089 enum reg_class class1, class2;
9090 enum machine_mode mode;
9091 int strict;
9092 {
9093 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
9094 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
9095 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
9096 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
9097 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
9098 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
9099 {
9100 if (strict)
9101 abort ();
9102 else
9103 return 1;
9104 }
9105 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
9106 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
9107 && (mode) != SImode)
9108 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9109 && (mode) != SImode));
9110 }
9111 /* Return the cost of moving data from a register in class CLASS1 to
9112 one in class CLASS2.
9113
9114 It is not required that the cost always equal 2 when FROM is the same as TO;
9115 on some machines it is expensive to move between registers if they are not
9116 general registers. */
9117 int
9118 ix86_register_move_cost (mode, class1, class2)
9119 enum machine_mode mode;
9120 enum reg_class class1, class2;
9121 {
9122 /* In case we require secondary memory, compute cost of the store followed
9123 by load. In case of copying from general_purpose_register we may emit
9124 multiple stores followed by single load causing memory size mismatch
9125 stall. Count this as arbitarily high cost of 20. */
9126 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
9127 {
9128 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
9129 return 10;
9130 return (MEMORY_MOVE_COST (mode, class1, 0)
9131 + MEMORY_MOVE_COST (mode, class2, 1));
9132 }
9133 /* Moves between SSE/MMX and integer unit are expensive.
9134 ??? We should make this cost CPU specific. */
9135 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9136 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
9137 return ix86_cost->mmxsse_to_integer;
9138 if (MAYBE_FLOAT_CLASS_P (class1))
9139 return ix86_cost->fp_move;
9140 if (MAYBE_SSE_CLASS_P (class1))
9141 return ix86_cost->sse_move;
9142 if (MAYBE_MMX_CLASS_P (class1))
9143 return ix86_cost->mmx_move;
9144 return 2;
9145 }
9146
9147 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
9148 int
9149 ix86_hard_regno_mode_ok (regno, mode)
9150 int regno;
9151 enum machine_mode mode;
9152 {
9153 /* Flags and only flags can only hold CCmode values. */
9154 if (CC_REGNO_P (regno))
9155 return GET_MODE_CLASS (mode) == MODE_CC;
9156 if (GET_MODE_CLASS (mode) == MODE_CC
9157 || GET_MODE_CLASS (mode) == MODE_RANDOM
9158 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
9159 return 0;
9160 if (FP_REGNO_P (regno))
9161 return VALID_FP_MODE_P (mode);
9162 if (SSE_REGNO_P (regno))
9163 return VALID_SSE_REG_MODE (mode);
9164 if (MMX_REGNO_P (regno))
9165 return VALID_MMX_REG_MODE (mode);
9166 /* We handle both integer and floats in the general purpose registers.
9167 In future we should be able to handle vector modes as well. */
9168 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
9169 return 0;
9170 /* Take care for QImode values - they can be in non-QI regs, but then
9171 they do cause partial register stalls. */
9172 if (regno < 4 || mode != QImode)
9173 return 1;
9174 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
9175 }
9176
9177 /* Return the cost of moving data of mode M between a
9178 register and memory. A value of 2 is the default; this cost is
9179 relative to those in `REGISTER_MOVE_COST'.
9180
9181 If moving between registers and memory is more expensive than
9182 between two registers, you should define this macro to express the
9183 relative cost.
9184
9185 Model also increased moving costs of QImode registers in non
9186 Q_REGS classes.
9187 */
9188 int
9189 ix86_memory_move_cost (mode, class, in)
9190 enum machine_mode mode;
9191 enum reg_class class;
9192 int in;
9193 {
9194 if (FLOAT_CLASS_P (class))
9195 {
9196 int index;
9197 switch (mode)
9198 {
9199 case SFmode:
9200 index = 0;
9201 break;
9202 case DFmode:
9203 index = 1;
9204 break;
9205 case XFmode:
9206 case TFmode:
9207 index = 2;
9208 break;
9209 default:
9210 return 100;
9211 }
9212 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
9213 }
9214 if (SSE_CLASS_P (class))
9215 {
9216 int index;
9217 switch (GET_MODE_SIZE (mode))
9218 {
9219 case 4:
9220 index = 0;
9221 break;
9222 case 8:
9223 index = 1;
9224 break;
9225 case 16:
9226 index = 2;
9227 break;
9228 default:
9229 return 100;
9230 }
9231 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
9232 }
9233 if (MMX_CLASS_P (class))
9234 {
9235 int index;
9236 switch (GET_MODE_SIZE (mode))
9237 {
9238 case 4:
9239 index = 0;
9240 break;
9241 case 8:
9242 index = 1;
9243 break;
9244 default:
9245 return 100;
9246 }
9247 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
9248 }
9249 switch (GET_MODE_SIZE (mode))
9250 {
9251 case 1:
9252 if (in)
9253 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
9254 : ix86_cost->movzbl_load);
9255 else
9256 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
9257 : ix86_cost->int_store[0] + 4);
9258 break;
9259 case 2:
9260 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
9261 default:
9262 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
9263 if (mode == TFmode)
9264 mode = XFmode;
9265 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
9266 * (int) GET_MODE_SIZE (mode) / 4);
9267 }
9268 }
This page took 0.438977 seconds and 6 git commands to generate.