]> gcc.gnu.org Git - gcc.git/blame_incremental - gcc/config/i386/i386.c
* parse.c: Regenerated.
[gcc.git] / gcc / config / i386 / i386.c
... / ...
CommitLineData
1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
21
22#include <setjmp.h>
23#include "config.h"
24#include "system.h"
25#include "rtl.h"
26#include "tree.h"
27#include "tm_p.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "except.h"
38#include "function.h"
39#include "recog.h"
40#include "expr.h"
41#include "toplev.h"
42#include "basic-block.h"
43#include "ggc.h"
44
45#ifdef EXTRA_CONSTRAINT
46/* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50/* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
52#endif
53
54#ifndef CHECK_STACK_LIMIT
55#define CHECK_STACK_LIMIT -1
56#endif
57
58/* Processor costs (relative to an add) */
59struct processor_costs i386_cost = { /* 386 specific costs */
60 1, /* cost of an add instruction */
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
66 23, /* cost of a divide/mod */
67 15, /* "large" insn */
68 3, /* MOVE_RATIO */
69 4, /* cost for loading QImode using movzbl */
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
78};
79
80struct processor_costs i486_cost = { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
87 40, /* cost of a divide/mod */
88 15, /* "large" insn */
89 3, /* MOVE_RATIO */
90 4, /* cost for loading QImode using movzbl */
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
99};
100
101struct processor_costs pentium_cost = {
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
104 4, /* variable shift costs */
105 1, /* constant shift costs */
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
108 25, /* cost of a divide/mod */
109 8, /* "large" insn */
110 6, /* MOVE_RATIO */
111 6, /* cost for loading QImode using movzbl */
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
120};
121
122struct processor_costs pentiumpro_cost = {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 1, /* variable shift costs */
126 1, /* constant shift costs */
127 4, /* cost of starting a multiply */
128 0, /* cost of multiply per each bit set */
129 17, /* cost of a divide/mod */
130 8, /* "large" insn */
131 6, /* MOVE_RATIO */
132 2, /* cost for loading QImode using movzbl */
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
141};
142
143struct processor_costs k6_cost = {
144 1, /* cost of an add instruction */
145 2, /* cost of a lea instruction */
146 1, /* variable shift costs */
147 1, /* constant shift costs */
148 3, /* cost of starting a multiply */
149 0, /* cost of multiply per each bit set */
150 18, /* cost of a divide/mod */
151 8, /* "large" insn */
152 4, /* MOVE_RATIO */
153 3, /* cost for loading QImode using movzbl */
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
162};
163
164struct processor_costs athlon_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 19, /* cost of a divide/mod */
172 8, /* "large" insn */
173 9, /* MOVE_RATIO */
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
180 {6, 6, 6}, /* cost of loading fp registers
181 in SFmode, DFmode and XFmode */
182 {4, 4, 4} /* cost of loading integer registers */
183};
184
185struct processor_costs *ix86_cost = &pentium_cost;
186
187/* Processor feature/optimization bitmasks. */
188#define m_386 (1<<PROCESSOR_I386)
189#define m_486 (1<<PROCESSOR_I486)
190#define m_PENT (1<<PROCESSOR_PENTIUM)
191#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192#define m_K6 (1<<PROCESSOR_K6)
193#define m_ATHLON (1<<PROCESSOR_ATHLON)
194
195const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
196const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
197const int x86_zero_extend_with_and = m_486 | m_PENT;
198const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
199const int x86_double_with_add = ~m_386;
200const int x86_use_bit_test = m_386;
201const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
202const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
203const int x86_use_any_reg = m_486;
204const int x86_cmove = m_PPRO | m_ATHLON;
205const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
206const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
207const int x86_partial_reg_stall = m_PPRO;
208const int x86_use_loop = m_K6;
209const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
210const int x86_use_mov0 = m_K6;
211const int x86_use_cltd = ~(m_PENT | m_K6);
212const int x86_read_modify_write = ~m_PENT;
213const int x86_read_modify = ~(m_PENT | m_PPRO);
214const int x86_split_long_moves = m_PPRO;
215const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
216const int x86_single_stringop = m_386;
217const int x86_qimode_math = ~(0);
218const int x86_promote_qi_regs = 0;
219const int x86_himode_math = ~(m_PPRO);
220const int x86_promote_hi_regs = m_PPRO;
221
222#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
223
224const char * const hi_reg_name[] = HI_REGISTER_NAMES;
225const char * const qi_reg_name[] = QI_REGISTER_NAMES;
226const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
227
228/* Array of the smallest class containing reg number REGNO, indexed by
229 REGNO. Used by REGNO_REG_CLASS in i386.h. */
230
231enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
232{
233 /* ax, dx, cx, bx */
234 AREG, DREG, CREG, BREG,
235 /* si, di, bp, sp */
236 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
237 /* FP registers */
238 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
239 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
240 /* arg pointer */
241 NON_Q_REGS,
242 /* flags, fpsr, dirflag, frame */
243 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS
244};
245
246/* The "default" register map. */
247
248int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
249{
250 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
251 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
252 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
253};
254
255/* Define the register numbers to be used in Dwarf debugging information.
256 The SVR4 reference port C compiler uses the following register numbers
257 in its Dwarf output code:
258 0 for %eax (gcc regno = 0)
259 1 for %ecx (gcc regno = 2)
260 2 for %edx (gcc regno = 1)
261 3 for %ebx (gcc regno = 3)
262 4 for %esp (gcc regno = 7)
263 5 for %ebp (gcc regno = 6)
264 6 for %esi (gcc regno = 4)
265 7 for %edi (gcc regno = 5)
266 The following three DWARF register numbers are never generated by
267 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
268 believes these numbers have these meanings.
269 8 for %eip (no gcc equivalent)
270 9 for %eflags (gcc regno = 17)
271 10 for %trapno (no gcc equivalent)
272 It is not at all clear how we should number the FP stack registers
273 for the x86 architecture. If the version of SDB on x86/svr4 were
274 a bit less brain dead with respect to floating-point then we would
275 have a precedent to follow with respect to DWARF register numbers
276 for x86 FP registers, but the SDB on x86/svr4 is so completely
277 broken with respect to FP registers that it is hardly worth thinking
278 of it as something to strive for compatibility with.
279 The version of x86/svr4 SDB I have at the moment does (partially)
280 seem to believe that DWARF register number 11 is associated with
281 the x86 register %st(0), but that's about all. Higher DWARF
282 register numbers don't seem to be associated with anything in
283 particular, and even for DWARF regno 11, SDB only seems to under-
284 stand that it should say that a variable lives in %st(0) (when
285 asked via an `=' command) if we said it was in DWARF regno 11,
286 but SDB still prints garbage when asked for the value of the
287 variable in question (via a `/' command).
288 (Also note that the labels SDB prints for various FP stack regs
289 when doing an `x' command are all wrong.)
290 Note that these problems generally don't affect the native SVR4
291 C compiler because it doesn't allow the use of -O with -g and
292 because when it is *not* optimizing, it allocates a memory
293 location for each floating-point variable, and the memory
294 location is what gets described in the DWARF AT_location
295 attribute for the variable in question.
296 Regardless of the severe mental illness of the x86/svr4 SDB, we
297 do something sensible here and we use the following DWARF
298 register numbers. Note that these are all stack-top-relative
299 numbers.
300 11 for %st(0) (gcc regno = 8)
301 12 for %st(1) (gcc regno = 9)
302 13 for %st(2) (gcc regno = 10)
303 14 for %st(3) (gcc regno = 11)
304 15 for %st(4) (gcc regno = 12)
305 16 for %st(5) (gcc regno = 13)
306 17 for %st(6) (gcc regno = 14)
307 18 for %st(7) (gcc regno = 15)
308*/
309int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
310{
311 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
312 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
313 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
314};
315
316
317
318/* Test and compare insns in i386.md store the information needed to
319 generate branch and scc insns here. */
320
321struct rtx_def *ix86_compare_op0 = NULL_RTX;
322struct rtx_def *ix86_compare_op1 = NULL_RTX;
323
324#define MAX_386_STACK_LOCALS 2
325
326/* Define the structure for the machine field in struct function. */
327struct machine_function
328{
329 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
330};
331
332#define ix86_stack_locals (cfun->machine->stack_locals)
333
334/* which cpu are we scheduling for */
335enum processor_type ix86_cpu;
336
337/* which instruction set architecture to use. */
338int ix86_arch;
339
340/* Strings to hold which cpu and instruction set architecture to use. */
341const char *ix86_cpu_string; /* for -mcpu=<xxx> */
342const char *ix86_arch_string; /* for -march=<xxx> */
343
344/* Register allocation order */
345const char *ix86_reg_alloc_order;
346static char regs_allocated[FIRST_PSEUDO_REGISTER];
347
348/* # of registers to use to pass arguments. */
349const char *ix86_regparm_string;
350
351/* ix86_regparm_string as a number */
352int ix86_regparm;
353
354/* Alignment to use for loops and jumps: */
355
356/* Power of two alignment for loops. */
357const char *ix86_align_loops_string;
358
359/* Power of two alignment for non-loop jumps. */
360const char *ix86_align_jumps_string;
361
362/* Power of two alignment for stack boundary in bytes. */
363const char *ix86_preferred_stack_boundary_string;
364
365/* Preferred alignment for stack boundary in bits. */
366int ix86_preferred_stack_boundary;
367
368/* Values 1-5: see jump.c */
369int ix86_branch_cost;
370const char *ix86_branch_cost_string;
371
372/* Power of two alignment for functions. */
373int ix86_align_funcs;
374const char *ix86_align_funcs_string;
375
376/* Power of two alignment for loops. */
377int ix86_align_loops;
378
379/* Power of two alignment for non-loop jumps. */
380int ix86_align_jumps;
381\f
382static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
383static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
384 int, int, FILE *));
385static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
386static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
387static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, int));
388static rtx ix86_expand_compare PARAMS ((enum rtx_code, int));
389static rtx gen_push PARAMS ((rtx));
390static int memory_address_length PARAMS ((rtx addr));
391static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
392static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
393static int ix86_safe_length PARAMS ((rtx));
394static enum attr_memory ix86_safe_memory PARAMS ((rtx));
395static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
396static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
397static void ix86_dump_ppro_packet PARAMS ((FILE *));
398static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
399static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
400 rtx));
401static void ix86_init_machine_status PARAMS ((struct function *));
402static void ix86_mark_machine_status PARAMS ((struct function *));
403static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
404static int ix86_safe_length_prefix PARAMS ((rtx));
405static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
406 int *, int *, int *));
407static int ix86_nsaved_regs PARAMS((void));
408static void ix86_emit_save_regs PARAMS((void));
409static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
410static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
411static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
412static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
413
414struct ix86_address
415{
416 rtx base, index, disp;
417 HOST_WIDE_INT scale;
418};
419
420static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
421\f
422/* Sometimes certain combinations of command options do not make
423 sense on a particular target machine. You can define a macro
424 `OVERRIDE_OPTIONS' to take account of this. This macro, if
425 defined, is executed once just after all the command options have
426 been parsed.
427
428 Don't use this macro to turn on various extra optimizations for
429 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
430
431void
432override_options ()
433{
434 /* Comes from final.c -- no real reason to change it. */
435#define MAX_CODE_ALIGN 16
436
437 static struct ptt
438 {
439 struct processor_costs *cost; /* Processor costs */
440 int target_enable; /* Target flags to enable. */
441 int target_disable; /* Target flags to disable. */
442 int align_loop; /* Default alignments. */
443 int align_jump;
444 int align_func;
445 int branch_cost;
446 }
447 const processor_target_table[PROCESSOR_max] =
448 {
449 {&i386_cost, 0, 0, 2, 2, 2, 1},
450 {&i486_cost, 0, 0, 4, 4, 4, 1},
451 {&pentium_cost, 0, 0, -4, -4, -4, 1},
452 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
453 {&k6_cost, 0, 0, -5, -5, 4, 1},
454 {&athlon_cost, 0, 0, 4, -4, 4, 1}
455 };
456
457 static struct pta
458 {
459 const char *name; /* processor name or nickname. */
460 enum processor_type processor;
461 }
462 const processor_alias_table[] =
463 {
464 {"i386", PROCESSOR_I386},
465 {"i486", PROCESSOR_I486},
466 {"i586", PROCESSOR_PENTIUM},
467 {"pentium", PROCESSOR_PENTIUM},
468 {"i686", PROCESSOR_PENTIUMPRO},
469 {"pentiumpro", PROCESSOR_PENTIUMPRO},
470 {"k6", PROCESSOR_K6},
471 {"athlon", PROCESSOR_ATHLON},
472 };
473
474 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
475
476#ifdef SUBTARGET_OVERRIDE_OPTIONS
477 SUBTARGET_OVERRIDE_OPTIONS;
478#endif
479
480 ix86_arch = PROCESSOR_I386;
481 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
482
483 if (ix86_arch_string != 0)
484 {
485 int i;
486 for (i = 0; i < pta_size; i++)
487 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
488 {
489 ix86_arch = processor_alias_table[i].processor;
490 /* Default cpu tuning to the architecture. */
491 ix86_cpu = ix86_arch;
492 break;
493 }
494 if (i == pta_size)
495 error ("bad value (%s) for -march= switch", ix86_arch_string);
496 }
497
498 if (ix86_cpu_string != 0)
499 {
500 int i;
501 for (i = 0; i < pta_size; i++)
502 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
503 {
504 ix86_cpu = processor_alias_table[i].processor;
505 break;
506 }
507 if (i == pta_size)
508 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
509 }
510
511 ix86_cost = processor_target_table[ix86_cpu].cost;
512 target_flags |= processor_target_table[ix86_cpu].target_enable;
513 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
514
515 /* Arrange to set up i386_stack_locals for all functions. */
516 init_machine_status = ix86_init_machine_status;
517 mark_machine_status = ix86_mark_machine_status;
518
519 /* Validate registers in register allocation order. */
520 if (ix86_reg_alloc_order)
521 {
522 int i, ch;
523 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
524 {
525 int regno = 0;
526
527 switch (ch)
528 {
529 case 'a': regno = 0; break;
530 case 'd': regno = 1; break;
531 case 'c': regno = 2; break;
532 case 'b': regno = 3; break;
533 case 'S': regno = 4; break;
534 case 'D': regno = 5; break;
535 case 'B': regno = 6; break;
536
537 default: fatal ("Register '%c' is unknown", ch);
538 }
539
540 if (regs_allocated[regno])
541 fatal ("Register '%c' already specified in allocation order", ch);
542
543 regs_allocated[regno] = 1;
544 }
545 }
546
547 /* Validate -mregparm= value. */
548 if (ix86_regparm_string)
549 {
550 ix86_regparm = atoi (ix86_regparm_string);
551 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
552 fatal ("-mregparm=%d is not between 0 and %d",
553 ix86_regparm, REGPARM_MAX);
554 }
555
556 /* Validate -malign-loops= value, or provide default. */
557 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
558 if (ix86_align_loops_string)
559 {
560 ix86_align_loops = atoi (ix86_align_loops_string);
561 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
562 fatal ("-malign-loops=%d is not between 0 and %d",
563 ix86_align_loops, MAX_CODE_ALIGN);
564 }
565
566 /* Validate -malign-jumps= value, or provide default. */
567 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
568 if (ix86_align_jumps_string)
569 {
570 ix86_align_jumps = atoi (ix86_align_jumps_string);
571 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
572 fatal ("-malign-jumps=%d is not between 0 and %d",
573 ix86_align_jumps, MAX_CODE_ALIGN);
574 }
575
576 /* Validate -malign-functions= value, or provide default. */
577 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
578 if (ix86_align_funcs_string)
579 {
580 ix86_align_funcs = atoi (ix86_align_funcs_string);
581 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
582 fatal ("-malign-functions=%d is not between 0 and %d",
583 ix86_align_funcs, MAX_CODE_ALIGN);
584 }
585
586 /* Validate -mpreferred-stack-boundary= value, or provide default.
587 The default of 128 bits is for Pentium III's SSE __m128. */
588 ix86_preferred_stack_boundary = 128;
589 if (ix86_preferred_stack_boundary_string)
590 {
591 int i = atoi (ix86_preferred_stack_boundary_string);
592 if (i < 2 || i > 31)
593 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
594 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
595 }
596
597 /* Validate -mbranch-cost= value, or provide default. */
598 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
599 if (ix86_branch_cost_string)
600 {
601 ix86_branch_cost = atoi (ix86_branch_cost_string);
602 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
603 fatal ("-mbranch-cost=%d is not between 0 and 5",
604 ix86_branch_cost);
605 }
606
607 /* Keep nonleaf frame pointers. */
608 if (TARGET_OMIT_LEAF_FRAME_POINTER)
609 flag_omit_frame_pointer = 1;
610
611 /* If we're doing fast math, we don't care about comparison order
612 wrt NaNs. This lets us use a shorter comparison sequence. */
613 if (flag_fast_math)
614 target_flags &= ~MASK_IEEE_FP;
615
616 /* If we're planning on using `loop', use it. */
617 if (TARGET_USE_LOOP && optimize)
618 flag_branch_on_count_reg = 1;
619}
620\f
621/* A C statement (sans semicolon) to choose the order in which to
622 allocate hard registers for pseudo-registers local to a basic
623 block.
624
625 Store the desired register order in the array `reg_alloc_order'.
626 Element 0 should be the register to allocate first; element 1, the
627 next register; and so on.
628
629 The macro body should not assume anything about the contents of
630 `reg_alloc_order' before execution of the macro.
631
632 On most machines, it is not necessary to define this macro. */
633
634void
635order_regs_for_local_alloc ()
636{
637 int i, ch, order;
638
639 /* User specified the register allocation order. */
640
641 if (ix86_reg_alloc_order)
642 {
643 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
644 {
645 int regno = 0;
646
647 switch (ch)
648 {
649 case 'a': regno = 0; break;
650 case 'd': regno = 1; break;
651 case 'c': regno = 2; break;
652 case 'b': regno = 3; break;
653 case 'S': regno = 4; break;
654 case 'D': regno = 5; break;
655 case 'B': regno = 6; break;
656 }
657
658 reg_alloc_order[order++] = regno;
659 }
660
661 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
662 {
663 if (! regs_allocated[i])
664 reg_alloc_order[order++] = i;
665 }
666 }
667
668 /* If user did not specify a register allocation order, use natural order. */
669 else
670 {
671 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
672 reg_alloc_order[i] = i;
673 }
674}
675\f
676void
677optimization_options (level, size)
678 int level;
679 int size ATTRIBUTE_UNUSED;
680{
681 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
682 make the problem with not enough registers even worse. */
683#ifdef INSN_SCHEDULING
684 if (level > 1)
685 flag_schedule_insns = 0;
686#endif
687}
688\f
689/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
690 attribute for DECL. The attributes in ATTRIBUTES have previously been
691 assigned to DECL. */
692
693int
694ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
695 tree decl ATTRIBUTE_UNUSED;
696 tree attributes ATTRIBUTE_UNUSED;
697 tree identifier ATTRIBUTE_UNUSED;
698 tree args ATTRIBUTE_UNUSED;
699{
700 return 0;
701}
702
703/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
704 attribute for TYPE. The attributes in ATTRIBUTES have previously been
705 assigned to TYPE. */
706
707int
708ix86_valid_type_attribute_p (type, attributes, identifier, args)
709 tree type;
710 tree attributes ATTRIBUTE_UNUSED;
711 tree identifier;
712 tree args;
713{
714 if (TREE_CODE (type) != FUNCTION_TYPE
715 && TREE_CODE (type) != METHOD_TYPE
716 && TREE_CODE (type) != FIELD_DECL
717 && TREE_CODE (type) != TYPE_DECL)
718 return 0;
719
720 /* Stdcall attribute says callee is responsible for popping arguments
721 if they are not variable. */
722 if (is_attribute_p ("stdcall", identifier))
723 return (args == NULL_TREE);
724
725 /* Cdecl attribute says the callee is a normal C declaration. */
726 if (is_attribute_p ("cdecl", identifier))
727 return (args == NULL_TREE);
728
729 /* Regparm attribute specifies how many integer arguments are to be
730 passed in registers. */
731 if (is_attribute_p ("regparm", identifier))
732 {
733 tree cst;
734
735 if (! args || TREE_CODE (args) != TREE_LIST
736 || TREE_CHAIN (args) != NULL_TREE
737 || TREE_VALUE (args) == NULL_TREE)
738 return 0;
739
740 cst = TREE_VALUE (args);
741 if (TREE_CODE (cst) != INTEGER_CST)
742 return 0;
743
744 if (compare_tree_int (cst, REGPARM_MAX) > 0)
745 return 0;
746
747 return 1;
748 }
749
750 return 0;
751}
752
753/* Return 0 if the attributes for two types are incompatible, 1 if they
754 are compatible, and 2 if they are nearly compatible (which causes a
755 warning to be generated). */
756
757int
758ix86_comp_type_attributes (type1, type2)
759 tree type1;
760 tree type2;
761{
762 /* Check for mismatch of non-default calling convention. */
763 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
764
765 if (TREE_CODE (type1) != FUNCTION_TYPE)
766 return 1;
767
768 /* Check for mismatched return types (cdecl vs stdcall). */
769 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
770 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
771 return 0;
772 return 1;
773}
774\f
775/* Value is the number of bytes of arguments automatically
776 popped when returning from a subroutine call.
777 FUNDECL is the declaration node of the function (as a tree),
778 FUNTYPE is the data type of the function (as a tree),
779 or for a library call it is an identifier node for the subroutine name.
780 SIZE is the number of bytes of arguments passed on the stack.
781
782 On the 80386, the RTD insn may be used to pop them if the number
783 of args is fixed, but if the number is variable then the caller
784 must pop them all. RTD can't be used for library calls now
785 because the library is compiled with the Unix compiler.
786 Use of RTD is a selectable option, since it is incompatible with
787 standard Unix calling sequences. If the option is not selected,
788 the caller must always pop the args.
789
790 The attribute stdcall is equivalent to RTD on a per module basis. */
791
792int
793ix86_return_pops_args (fundecl, funtype, size)
794 tree fundecl;
795 tree funtype;
796 int size;
797{
798 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
799
800 /* Cdecl functions override -mrtd, and never pop the stack. */
801 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
802
803 /* Stdcall functions will pop the stack if not variable args. */
804 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
805 rtd = 1;
806
807 if (rtd
808 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
809 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
810 == void_type_node)))
811 return size;
812 }
813
814 /* Lose any fake structure return argument. */
815 if (aggregate_value_p (TREE_TYPE (funtype)))
816 return GET_MODE_SIZE (Pmode);
817
818 return 0;
819}
820\f
821/* Argument support functions. */
822
823/* Initialize a variable CUM of type CUMULATIVE_ARGS
824 for a call to a function whose data type is FNTYPE.
825 For a library call, FNTYPE is 0. */
826
827void
828init_cumulative_args (cum, fntype, libname)
829 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
830 tree fntype; /* tree ptr for function decl */
831 rtx libname; /* SYMBOL_REF of library name or 0 */
832{
833 static CUMULATIVE_ARGS zero_cum;
834 tree param, next_param;
835
836 if (TARGET_DEBUG_ARG)
837 {
838 fprintf (stderr, "\ninit_cumulative_args (");
839 if (fntype)
840 fprintf (stderr, "fntype code = %s, ret code = %s",
841 tree_code_name[(int) TREE_CODE (fntype)],
842 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
843 else
844 fprintf (stderr, "no fntype");
845
846 if (libname)
847 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
848 }
849
850 *cum = zero_cum;
851
852 /* Set up the number of registers to use for passing arguments. */
853 cum->nregs = ix86_regparm;
854 if (fntype)
855 {
856 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
857
858 if (attr)
859 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
860 }
861
862 /* Determine if this function has variable arguments. This is
863 indicated by the last argument being 'void_type_mode' if there
864 are no variable arguments. If there are variable arguments, then
865 we won't pass anything in registers */
866
867 if (cum->nregs)
868 {
869 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
870 param != 0; param = next_param)
871 {
872 next_param = TREE_CHAIN (param);
873 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
874 cum->nregs = 0;
875 }
876 }
877
878 if (TARGET_DEBUG_ARG)
879 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
880
881 return;
882}
883
884/* Update the data in CUM to advance over an argument
885 of mode MODE and data type TYPE.
886 (TYPE is null for libcalls where that information may not be available.) */
887
888void
889function_arg_advance (cum, mode, type, named)
890 CUMULATIVE_ARGS *cum; /* current arg information */
891 enum machine_mode mode; /* current arg mode */
892 tree type; /* type of the argument or 0 if lib support */
893 int named; /* whether or not the argument was named */
894{
895 int bytes
896 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
897 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
898
899 if (TARGET_DEBUG_ARG)
900 fprintf (stderr,
901 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
902 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
903
904 cum->words += words;
905 cum->nregs -= words;
906 cum->regno += words;
907
908 if (cum->nregs <= 0)
909 {
910 cum->nregs = 0;
911 cum->regno = 0;
912 }
913
914 return;
915}
916
917/* Define where to put the arguments to a function.
918 Value is zero to push the argument on the stack,
919 or a hard register in which to store the argument.
920
921 MODE is the argument's machine mode.
922 TYPE is the data type of the argument (as a tree).
923 This is null for libcalls where that information may
924 not be available.
925 CUM is a variable of type CUMULATIVE_ARGS which gives info about
926 the preceding args and about the function being called.
927 NAMED is nonzero if this argument is a named parameter
928 (otherwise it is an extra parameter matching an ellipsis). */
929
930struct rtx_def *
931function_arg (cum, mode, type, named)
932 CUMULATIVE_ARGS *cum; /* current arg information */
933 enum machine_mode mode; /* current arg mode */
934 tree type; /* type of the argument or 0 if lib support */
935 int named; /* != 0 for normal args, == 0 for ... args */
936{
937 rtx ret = NULL_RTX;
938 int bytes
939 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
940 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
941
942 switch (mode)
943 {
944 /* For now, pass fp/complex values on the stack. */
945 default:
946 break;
947
948 case BLKmode:
949 case DImode:
950 case SImode:
951 case HImode:
952 case QImode:
953 if (words <= cum->nregs)
954 ret = gen_rtx_REG (mode, cum->regno);
955 break;
956 }
957
958 if (TARGET_DEBUG_ARG)
959 {
960 fprintf (stderr,
961 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
962 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
963
964 if (ret)
965 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
966 else
967 fprintf (stderr, ", stack");
968
969 fprintf (stderr, " )\n");
970 }
971
972 return ret;
973}
974\f
975/* Returns 1 if OP is either a symbol reference or a sum of a symbol
976 reference and a constant. */
977
978int
979symbolic_operand (op, mode)
980 register rtx op;
981 enum machine_mode mode ATTRIBUTE_UNUSED;
982{
983 switch (GET_CODE (op))
984 {
985 case SYMBOL_REF:
986 case LABEL_REF:
987 return 1;
988
989 case CONST:
990 op = XEXP (op, 0);
991 if (GET_CODE (op) == SYMBOL_REF
992 || GET_CODE (op) == LABEL_REF
993 || (GET_CODE (op) == UNSPEC
994 && XINT (op, 1) >= 6
995 && XINT (op, 1) <= 7))
996 return 1;
997 if (GET_CODE (op) != PLUS
998 || GET_CODE (XEXP (op, 1)) != CONST_INT)
999 return 0;
1000
1001 op = XEXP (op, 0);
1002 if (GET_CODE (op) == SYMBOL_REF
1003 || GET_CODE (op) == LABEL_REF)
1004 return 1;
1005 /* Only @GOTOFF gets offsets. */
1006 if (GET_CODE (op) != UNSPEC
1007 || XINT (op, 1) != 7)
1008 return 0;
1009
1010 op = XVECEXP (op, 0, 0);
1011 if (GET_CODE (op) == SYMBOL_REF
1012 || GET_CODE (op) == LABEL_REF)
1013 return 1;
1014 return 0;
1015
1016 default:
1017 return 0;
1018 }
1019}
1020
1021/* Return true if the operand contains a @GOT or @GOTOFF reference. */
1022
1023int
1024pic_symbolic_operand (op, mode)
1025 register rtx op;
1026 enum machine_mode mode ATTRIBUTE_UNUSED;
1027{
1028 if (GET_CODE (op) == CONST)
1029 {
1030 op = XEXP (op, 0);
1031 if (GET_CODE (op) == UNSPEC)
1032 return 1;
1033 if (GET_CODE (op) != PLUS
1034 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1035 return 0;
1036 op = XEXP (op, 0);
1037 if (GET_CODE (op) == UNSPEC)
1038 return 1;
1039 }
1040 return 0;
1041}
1042
1043/* Test for a valid operand for a call instruction. Don't allow the
1044 arg pointer register or virtual regs since they may decay into
1045 reg + const, which the patterns can't handle. */
1046
1047int
1048call_insn_operand (op, mode)
1049 rtx op;
1050 enum machine_mode mode ATTRIBUTE_UNUSED;
1051{
1052 if (GET_CODE (op) != MEM)
1053 return 0;
1054 op = XEXP (op, 0);
1055
1056 /* Disallow indirect through a virtual register. This leads to
1057 compiler aborts when trying to eliminate them. */
1058 if (GET_CODE (op) == REG
1059 && (op == arg_pointer_rtx
1060 || op == frame_pointer_rtx
1061 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1062 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1063 return 0;
1064
1065 /* Disallow `call 1234'. Due to varying assembler lameness this
1066 gets either rejected or translated to `call .+1234'. */
1067 if (GET_CODE (op) == CONST_INT)
1068 return 0;
1069
1070 /* Otherwise we can allow any general_operand in the address. */
1071 return general_operand (op, Pmode);
1072}
1073
1074/* Like call_insn_operand but allow (mem (symbol_ref ...)) even if pic. */
1075
1076int
1077expander_call_insn_operand (op, mode)
1078 rtx op;
1079 enum machine_mode mode;
1080{
1081 if (GET_CODE (op) == MEM
1082 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF)
1083 return 1;
1084
1085 return call_insn_operand (op, mode);
1086}
1087
1088int
1089constant_call_address_operand (op, mode)
1090 rtx op;
1091 enum machine_mode mode ATTRIBUTE_UNUSED;
1092{
1093 return GET_CODE (op) == MEM &&
1094 CONSTANT_ADDRESS_P (XEXP (op, 0)) &&
1095 GET_CODE (XEXP (op, 0)) != CONST_INT;
1096}
1097
1098/* Match exactly zero and one. */
1099
1100int
1101const0_operand (op, mode)
1102 register rtx op;
1103 enum machine_mode mode;
1104{
1105 return op == CONST0_RTX (mode);
1106}
1107
1108int
1109const1_operand (op, mode)
1110 register rtx op;
1111 enum machine_mode mode ATTRIBUTE_UNUSED;
1112{
1113 return op == const1_rtx;
1114}
1115
1116/* Match 2, 4, or 8. Used for leal multiplicands. */
1117
1118int
1119const248_operand (op, mode)
1120 register rtx op;
1121 enum machine_mode mode ATTRIBUTE_UNUSED;
1122{
1123 return (GET_CODE (op) == CONST_INT
1124 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1125}
1126
1127/* True if this is a constant appropriate for an increment or decremenmt. */
1128
1129int
1130incdec_operand (op, mode)
1131 register rtx op;
1132 enum machine_mode mode;
1133{
1134 if (op == const1_rtx || op == constm1_rtx)
1135 return 1;
1136 if (GET_CODE (op) != CONST_INT)
1137 return 0;
1138 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1139 return 1;
1140 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1141 return 1;
1142 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1143 return 1;
1144 return 0;
1145}
1146
1147/* Return false if this is the stack pointer, or any other fake
1148 register eliminable to the stack pointer. Otherwise, this is
1149 a register operand.
1150
1151 This is used to prevent esp from being used as an index reg.
1152 Which would only happen in pathological cases. */
1153
1154int
1155reg_no_sp_operand (op, mode)
1156 register rtx op;
1157 enum machine_mode mode;
1158{
1159 rtx t = op;
1160 if (GET_CODE (t) == SUBREG)
1161 t = SUBREG_REG (t);
1162 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1163 return 0;
1164
1165 return register_operand (op, mode);
1166}
1167
1168/* Return false if this is any eliminable register. Otherwise
1169 general_operand. */
1170
1171int
1172general_no_elim_operand (op, mode)
1173 register rtx op;
1174 enum machine_mode mode;
1175{
1176 rtx t = op;
1177 if (GET_CODE (t) == SUBREG)
1178 t = SUBREG_REG (t);
1179 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1180 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1181 || t == virtual_stack_dynamic_rtx)
1182 return 0;
1183
1184 return general_operand (op, mode);
1185}
1186
1187/* Return false if this is any eliminable register. Otherwise
1188 register_operand or const_int. */
1189
1190int
1191nonmemory_no_elim_operand (op, mode)
1192 register rtx op;
1193 enum machine_mode mode;
1194{
1195 rtx t = op;
1196 if (GET_CODE (t) == SUBREG)
1197 t = SUBREG_REG (t);
1198 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1199 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1200 || t == virtual_stack_dynamic_rtx)
1201 return 0;
1202
1203 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1204}
1205
1206/* Return true if op is a Q_REGS class register. */
1207
1208int
1209q_regs_operand (op, mode)
1210 register rtx op;
1211 enum machine_mode mode;
1212{
1213 if (mode != VOIDmode && GET_MODE (op) != mode)
1214 return 0;
1215 if (GET_CODE (op) == SUBREG)
1216 op = SUBREG_REG (op);
1217 return QI_REG_P (op);
1218}
1219
1220/* Return true if op is a NON_Q_REGS class register. */
1221
1222int
1223non_q_regs_operand (op, mode)
1224 register rtx op;
1225 enum machine_mode mode;
1226{
1227 if (mode != VOIDmode && GET_MODE (op) != mode)
1228 return 0;
1229 if (GET_CODE (op) == SUBREG)
1230 op = SUBREG_REG (op);
1231 return NON_QI_REG_P (op);
1232}
1233
1234/* Return 1 if OP is a comparison operator that can use the condition code
1235 generated by a logical operation, which characteristicly does not set
1236 overflow or carry. To be used with CCNOmode. */
1237
1238int
1239no_comparison_operator (op, mode)
1240 register rtx op;
1241 enum machine_mode mode;
1242{
1243 return ((mode == VOIDmode || GET_MODE (op) == mode)
1244 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1245 && GET_CODE (op) != LE
1246 && GET_CODE (op) != GT);
1247}
1248
1249/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1250
1251int
1252fcmov_comparison_operator (op, mode)
1253 register rtx op;
1254 enum machine_mode mode;
1255{
1256 return ((mode == VOIDmode || GET_MODE (op) == mode)
1257 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1258 && GET_CODE (op) == unsigned_condition (GET_CODE (op)));
1259}
1260
1261/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1262
1263int
1264promotable_binary_operator (op, mode)
1265 register rtx op;
1266 enum machine_mode mode ATTRIBUTE_UNUSED;
1267{
1268 switch (GET_CODE (op))
1269 {
1270 case MULT:
1271 /* Modern CPUs have same latency for HImode and SImode multiply,
1272 but 386 and 486 do HImode multiply faster. */
1273 return ix86_cpu > PROCESSOR_I486;
1274 case PLUS:
1275 case AND:
1276 case IOR:
1277 case XOR:
1278 case ASHIFT:
1279 return 1;
1280 default:
1281 return 0;
1282 }
1283}
1284
1285/* Nearly general operand, but accept any const_double, since we wish
1286 to be able to drop them into memory rather than have them get pulled
1287 into registers. */
1288
1289int
1290cmp_fp_expander_operand (op, mode)
1291 register rtx op;
1292 enum machine_mode mode;
1293{
1294 if (mode != VOIDmode && mode != GET_MODE (op))
1295 return 0;
1296 if (GET_CODE (op) == CONST_DOUBLE)
1297 return 1;
1298 return general_operand (op, mode);
1299}
1300
1301/* Match an SI or HImode register for a zero_extract. */
1302
1303int
1304ext_register_operand (op, mode)
1305 register rtx op;
1306 enum machine_mode mode ATTRIBUTE_UNUSED;
1307{
1308 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1309 return 0;
1310 return register_operand (op, VOIDmode);
1311}
1312
1313/* Return 1 if this is a valid binary floating-point operation.
1314 OP is the expression matched, and MODE is its mode. */
1315
1316int
1317binary_fp_operator (op, mode)
1318 register rtx op;
1319 enum machine_mode mode;
1320{
1321 if (mode != VOIDmode && mode != GET_MODE (op))
1322 return 0;
1323
1324 switch (GET_CODE (op))
1325 {
1326 case PLUS:
1327 case MINUS:
1328 case MULT:
1329 case DIV:
1330 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1331
1332 default:
1333 return 0;
1334 }
1335}
1336
1337int
1338mult_operator(op, mode)
1339 register rtx op;
1340 enum machine_mode mode ATTRIBUTE_UNUSED;
1341{
1342 return GET_CODE (op) == MULT;
1343}
1344
1345int
1346div_operator(op, mode)
1347 register rtx op;
1348 enum machine_mode mode ATTRIBUTE_UNUSED;
1349{
1350 return GET_CODE (op) == DIV;
1351}
1352
1353int
1354arith_or_logical_operator (op, mode)
1355 rtx op;
1356 enum machine_mode mode;
1357{
1358 return ((mode == VOIDmode || GET_MODE (op) == mode)
1359 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1360 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1361}
1362
1363/* Returns 1 if OP is memory operand with a displacement. */
1364
1365int
1366memory_displacement_operand (op, mode)
1367 register rtx op;
1368 enum machine_mode mode;
1369{
1370 struct ix86_address parts;
1371
1372 if (! memory_operand (op, mode))
1373 return 0;
1374
1375 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1376 abort ();
1377
1378 return parts.disp != NULL_RTX;
1379}
1380
1381/* To avoid problems when jump re-emits comparisons like testqi_ext_0,
1382 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1383
1384 ??? It seems likely that this will only work because cmpsi is an
1385 expander, and no actual insns use this. */
1386
1387int
1388cmpsi_operand (op, mode)
1389 rtx op;
1390 enum machine_mode mode;
1391{
1392 if (general_operand (op, mode))
1393 return 1;
1394
1395 if (GET_CODE (op) == AND
1396 && GET_MODE (op) == SImode
1397 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1398 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1399 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1400 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1401 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1402 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1403 return 1;
1404
1405 return 0;
1406}
1407
1408/* Returns 1 if OP is memory operand that can not be represented by the
1409 modRM array. */
1410
1411int
1412long_memory_operand (op, mode)
1413 register rtx op;
1414 enum machine_mode mode;
1415{
1416 if (! memory_operand (op, mode))
1417 return 0;
1418
1419 return memory_address_length (op) != 0;
1420}
1421
1422/* Return nonzero if the rtx is known aligned. */
1423
1424int
1425aligned_operand (op, mode)
1426 rtx op;
1427 enum machine_mode mode;
1428{
1429 struct ix86_address parts;
1430
1431 if (!general_operand (op, mode))
1432 return 0;
1433
1434 /* Registers and immediate operands are always "aligned". */
1435 if (GET_CODE (op) != MEM)
1436 return 1;
1437
1438 /* Don't even try to do any aligned optimizations with volatiles. */
1439 if (MEM_VOLATILE_P (op))
1440 return 0;
1441
1442 op = XEXP (op, 0);
1443
1444 /* Pushes and pops are only valid on the stack pointer. */
1445 if (GET_CODE (op) == PRE_DEC
1446 || GET_CODE (op) == POST_INC)
1447 return 1;
1448
1449 /* Decode the address. */
1450 if (! ix86_decompose_address (op, &parts))
1451 abort ();
1452
1453 /* Look for some component that isn't known to be aligned. */
1454 if (parts.index)
1455 {
1456 if (parts.scale < 4
1457 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 4)
1458 return 0;
1459 }
1460 if (parts.base)
1461 {
1462 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 4)
1463 return 0;
1464 }
1465 if (parts.disp)
1466 {
1467 if (GET_CODE (parts.disp) != CONST_INT
1468 || (INTVAL (parts.disp) & 3) != 0)
1469 return 0;
1470 }
1471
1472 /* Didn't find one -- this must be an aligned address. */
1473 return 1;
1474}
1475\f
1476/* Return true if the constant is something that can be loaded with
1477 a special instruction. Only handle 0.0 and 1.0; others are less
1478 worthwhile. */
1479
1480int
1481standard_80387_constant_p (x)
1482 rtx x;
1483{
1484 if (GET_CODE (x) != CONST_DOUBLE)
1485 return -1;
1486
1487#if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1488 {
1489 REAL_VALUE_TYPE d;
1490 jmp_buf handler;
1491 int is0, is1;
1492
1493 if (setjmp (handler))
1494 return 0;
1495
1496 set_float_handler (handler);
1497 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1498 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1499 is1 = REAL_VALUES_EQUAL (d, dconst1);
1500 set_float_handler (NULL_PTR);
1501
1502 if (is0)
1503 return 1;
1504
1505 if (is1)
1506 return 2;
1507
1508 /* Note that on the 80387, other constants, such as pi,
1509 are much slower to load as standard constants
1510 than to load from doubles in memory! */
1511 /* ??? Not true on K6: all constants are equal cost. */
1512 }
1513#endif
1514
1515 return 0;
1516}
1517
1518/* Returns 1 if OP contains a symbol reference */
1519
1520int
1521symbolic_reference_mentioned_p (op)
1522 rtx op;
1523{
1524 register const char *fmt;
1525 register int i;
1526
1527 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1528 return 1;
1529
1530 fmt = GET_RTX_FORMAT (GET_CODE (op));
1531 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1532 {
1533 if (fmt[i] == 'E')
1534 {
1535 register int j;
1536
1537 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1538 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1539 return 1;
1540 }
1541
1542 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1543 return 1;
1544 }
1545
1546 return 0;
1547}
1548
1549/* Return 1 if it is appropriate to emit `ret' instructions in the
1550 body of a function. Do this only if the epilogue is simple, needing a
1551 couple of insns. Prior to reloading, we can't tell how many registers
1552 must be saved, so return 0 then. Return 0 if there is no frame
1553 marker to de-allocate.
1554
1555 If NON_SAVING_SETJMP is defined and true, then it is not possible
1556 for the epilogue to be simple, so return 0. This is a special case
1557 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1558 until final, but jump_optimize may need to know sooner if a
1559 `return' is OK. */
1560
1561int
1562ix86_can_use_return_insn_p ()
1563{
1564 HOST_WIDE_INT tsize;
1565 int nregs;
1566
1567#ifdef NON_SAVING_SETJMP
1568 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1569 return 0;
1570#endif
1571#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1572 if (profile_block_flag == 2)
1573 return 0;
1574#endif
1575
1576 if (! reload_completed || frame_pointer_needed)
1577 return 0;
1578
1579 /* Don't allow more than 32 pop, since that's all we can do
1580 with one instruction. */
1581 if (current_function_pops_args
1582 && current_function_args_size >= 32768)
1583 return 0;
1584
1585 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1586 return tsize == 0 && nregs == 0;
1587}
1588\f
1589static char *pic_label_name;
1590static int pic_label_output;
1591static char *global_offset_table_name;
1592
1593/* This function generates code for -fpic that loads %ebx with
1594 the return address of the caller and then returns. */
1595
1596void
1597asm_output_function_prefix (file, name)
1598 FILE *file;
1599 const char *name ATTRIBUTE_UNUSED;
1600{
1601 rtx xops[2];
1602 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1603 || current_function_uses_const_pool);
1604 xops[0] = pic_offset_table_rtx;
1605 xops[1] = stack_pointer_rtx;
1606
1607 /* Deep branch prediction favors having a return for every call. */
1608 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1609 {
1610 if (!pic_label_output)
1611 {
1612 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1613 internal (non-global) label that's being emitted, it didn't make
1614 sense to have .type information for local labels. This caused
1615 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1616 me debug info for a label that you're declaring non-global?) this
1617 was changed to call ASM_OUTPUT_LABEL() instead. */
1618
1619 ASM_OUTPUT_LABEL (file, pic_label_name);
1620
1621 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1622 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1623 output_asm_insn ("ret", xops);
1624
1625 pic_label_output = 1;
1626 }
1627 }
1628}
1629
1630void
1631load_pic_register ()
1632{
1633 rtx gotsym, pclab;
1634
1635 if (global_offset_table_name == NULL)
1636 {
1637 global_offset_table_name =
1638 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1639 ggc_add_string_root (&global_offset_table_name, 1);
1640 }
1641 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
1642
1643 if (TARGET_DEEP_BRANCH_PREDICTION)
1644 {
1645 if (pic_label_name == NULL)
1646 {
1647 pic_label_name = ggc_alloc_string (NULL, 32);
1648 ggc_add_string_root (&pic_label_name, 1);
1649 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1650 }
1651 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1652 }
1653 else
1654 {
1655 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1656 }
1657
1658 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1659
1660 if (! TARGET_DEEP_BRANCH_PREDICTION)
1661 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1662
1663 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1664}
1665
1666/* Generate an SImode "push" pattern for input ARG. */
1667
1668static rtx
1669gen_push (arg)
1670 rtx arg;
1671{
1672 return gen_rtx_SET (VOIDmode,
1673 gen_rtx_MEM (SImode,
1674 gen_rtx_PRE_DEC (SImode,
1675 stack_pointer_rtx)),
1676 arg);
1677}
1678
1679/* Return number of registers to be saved on the stack. */
1680
1681static int
1682ix86_nsaved_regs ()
1683{
1684 int nregs = 0;
1685 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1686 || current_function_uses_const_pool);
1687 int limit = (frame_pointer_needed
1688 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1689 int regno;
1690
1691 for (regno = limit - 1; regno >= 0; regno--)
1692 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1693 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1694 {
1695 nregs ++;
1696 }
1697 return nregs;
1698}
1699
1700/* Return the offset between two registers, one to be eliminated, and the other
1701 its replacement, at the start of a routine. */
1702
1703HOST_WIDE_INT
1704ix86_initial_elimination_offset (from, to)
1705 int from;
1706 int to;
1707{
1708 int padding1;
1709 int nregs;
1710
1711 /* Stack grows downward:
1712
1713 [arguments]
1714 <- ARG_POINTER
1715 saved pc
1716
1717 saved frame pointer if frame_pointer_needed
1718 <- HARD_FRAME_POINTER
1719 [saved regs]
1720
1721 [padding1] \
1722 | <- FRAME_POINTER
1723 [frame] > tsize
1724 |
1725 [padding2] /
1726 */
1727
1728 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1729 /* Skip saved PC and previous frame pointer.
1730 Executed only when frame_pointer_needed. */
1731 return 8;
1732 else if (from == FRAME_POINTER_REGNUM
1733 && to == HARD_FRAME_POINTER_REGNUM)
1734 {
1735 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
1736 padding1 += nregs * UNITS_PER_WORD;
1737 return -padding1;
1738 }
1739 else
1740 {
1741 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1742 int frame_size = frame_pointer_needed ? 8 : 4;
1743 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1744 &nregs, &padding1, (int *)0);
1745
1746
1747 if (to != STACK_POINTER_REGNUM)
1748 abort ();
1749 else if (from == ARG_POINTER_REGNUM)
1750 return tsize + nregs * UNITS_PER_WORD + frame_size;
1751 else if (from != FRAME_POINTER_REGNUM)
1752 abort ();
1753 else
1754 return tsize - padding1;
1755 }
1756}
1757
1758/* Compute the size of local storage taking into consideration the
1759 desired stack alignment which is to be maintained. Also determine
1760 the number of registers saved below the local storage.
1761
1762 PADDING1 returns padding before stack frame and PADDING2 returns
1763 padding after stack frame;
1764 */
1765
1766static HOST_WIDE_INT
1767ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1768 HOST_WIDE_INT size;
1769 int *nregs_on_stack;
1770 int *rpadding1;
1771 int *rpadding2;
1772{
1773 int nregs;
1774 int padding1 = 0;
1775 int padding2 = 0;
1776 HOST_WIDE_INT total_size;
1777 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1778 int offset;
1779 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1780
1781 nregs = ix86_nsaved_regs ();
1782 total_size = size;
1783
1784 offset = frame_pointer_needed ? 8 : 4;
1785
1786 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1787 since i386 port is the only using those features that may break easilly. */
1788
1789 if (size && !stack_alignment_needed)
1790 abort ();
1791 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
1792 abort ();
1793 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1794 abort ();
1795 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1796 abort ();
1797 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1798 abort ();
1799
1800 if (stack_alignment_needed < 4)
1801 stack_alignment_needed = 4;
1802
1803 offset += nregs * UNITS_PER_WORD;
1804
1805 total_size += offset;
1806
1807 /* Align start of frame for local function. */
1808 padding1 = ((offset + stack_alignment_needed - 1)
1809 & -stack_alignment_needed) - offset;
1810 total_size += padding1;
1811
1812 /* Align stack boundary. */
1813 padding2 = ((total_size + preferred_alignment - 1)
1814 & -preferred_alignment) - total_size;
1815
1816 if (nregs_on_stack)
1817 *nregs_on_stack = nregs;
1818 if (rpadding1)
1819 *rpadding1 = padding1;
1820 if (rpadding2)
1821 *rpadding2 = padding2;
1822
1823 return size + padding1 + padding2;
1824}
1825
1826/* Emit code to save registers in the prologue. */
1827
1828static void
1829ix86_emit_save_regs ()
1830{
1831 register int regno;
1832 int limit;
1833 rtx insn;
1834 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1835 || current_function_uses_const_pool);
1836 limit = (frame_pointer_needed
1837 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1838
1839 for (regno = limit - 1; regno >= 0; regno--)
1840 if ((regs_ever_live[regno] && !call_used_regs[regno])
1841 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1842 {
1843 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1844 RTX_FRAME_RELATED_P (insn) = 1;
1845 }
1846}
1847
1848/* Expand the prologue into a bunch of separate insns. */
1849
1850void
1851ix86_expand_prologue ()
1852{
1853 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1854 (int *)0);
1855 rtx insn;
1856 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1857 || current_function_uses_const_pool);
1858
1859 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1860 slower on all targets. Also sdb doesn't like it. */
1861
1862 if (frame_pointer_needed)
1863 {
1864 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1865 RTX_FRAME_RELATED_P (insn) = 1;
1866
1867 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1868 RTX_FRAME_RELATED_P (insn) = 1;
1869 }
1870
1871 ix86_emit_save_regs ();
1872
1873 if (tsize == 0)
1874 ;
1875 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1876 {
1877 if (frame_pointer_needed)
1878 insn = emit_insn (gen_pro_epilogue_adjust_stack
1879 (stack_pointer_rtx, stack_pointer_rtx,
1880 GEN_INT (-tsize), hard_frame_pointer_rtx));
1881 else
1882 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1883 GEN_INT (-tsize)));
1884 RTX_FRAME_RELATED_P (insn) = 1;
1885 }
1886 else
1887 {
1888 /* ??? Is this only valid for Win32? */
1889
1890 rtx arg0, sym;
1891
1892 arg0 = gen_rtx_REG (SImode, 0);
1893 emit_move_insn (arg0, GEN_INT (tsize));
1894
1895 sym = gen_rtx_MEM (FUNCTION_MODE,
1896 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1897 insn = emit_call_insn (gen_call (sym, const0_rtx));
1898
1899 CALL_INSN_FUNCTION_USAGE (insn)
1900 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1901 CALL_INSN_FUNCTION_USAGE (insn));
1902 }
1903
1904#ifdef SUBTARGET_PROLOGUE
1905 SUBTARGET_PROLOGUE;
1906#endif
1907
1908 if (pic_reg_used)
1909 load_pic_register ();
1910
1911 /* If we are profiling, make sure no instructions are scheduled before
1912 the call to mcount. However, if -fpic, the above call will have
1913 done that. */
1914 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
1915 emit_insn (gen_blockage ());
1916}
1917
1918/* Emit code to add TSIZE to esp value. Use POP instruction when
1919 profitable. */
1920
1921static void
1922ix86_emit_epilogue_esp_adjustment (tsize)
1923 int tsize;
1924{
1925 /* Intel's docs say that for 4 or 8 bytes of stack frame one should
1926 use `pop' and not `add'. */
1927 int use_pop = tsize == 4;
1928 rtx edx = 0, ecx;
1929
1930 /* Use two pops only for the Pentium processors. */
1931 if (tsize == 8 && !TARGET_386 && !TARGET_486)
1932 {
1933 rtx retval = current_function_return_rtx;
1934
1935 edx = gen_rtx_REG (SImode, 1);
1936
1937 /* This case is a bit more complex. Since we cannot pop into
1938 %ecx twice we need a second register. But this is only
1939 available if the return value is not of DImode in which
1940 case the %edx register is not available. */
1941 use_pop = (retval == NULL
1942 || !reg_overlap_mentioned_p (edx, retval));
1943 }
1944
1945 if (use_pop)
1946 {
1947 ecx = gen_rtx_REG (SImode, 2);
1948
1949 /* We have to prevent the two pops here from being scheduled.
1950 GCC otherwise would try in some situation to put other
1951 instructions in between them which has a bad effect. */
1952 emit_insn (gen_blockage ());
1953 emit_insn (gen_popsi1 (ecx));
1954 if (tsize == 8)
1955 emit_insn (gen_popsi1 (edx));
1956 }
1957 else
1958 {
1959 /* If a frame pointer is present, we must be sure to tie the sp
1960 to the fp so that we don't mis-schedule. */
1961 if (frame_pointer_needed)
1962 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
1963 stack_pointer_rtx,
1964 GEN_INT (tsize),
1965 hard_frame_pointer_rtx));
1966 else
1967 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1968 GEN_INT (tsize)));
1969 }
1970}
1971
1972/* Emit code to restore saved registers using MOV insns. First register
1973 is restored from POINTER + OFFSET. */
1974static void
1975ix86_emit_restore_regs_using_mov (pointer, offset)
1976 rtx pointer;
1977 int offset;
1978{
1979 int regno;
1980 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1981 || current_function_uses_const_pool);
1982 int limit = (frame_pointer_needed
1983 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1984
1985 for (regno = 0; regno < limit; regno++)
1986 if ((regs_ever_live[regno] && !call_used_regs[regno])
1987 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1988 {
1989 emit_move_insn (gen_rtx_REG (SImode, regno),
1990 adj_offsettable_operand (gen_rtx_MEM (SImode,
1991 pointer),
1992 offset));
1993 offset += 4;
1994 }
1995}
1996
1997/* Restore function stack, frame, and registers. */
1998
1999void
2000ix86_expand_epilogue ()
2001{
2002 int nregs;
2003 int regno;
2004
2005 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2006 || current_function_uses_const_pool);
2007 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2008 HOST_WIDE_INT offset;
2009 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2010 (int *)0, (int *)0);
2011
2012
2013 /* Calculate start of saved registers relative to ebp. */
2014 offset = -nregs * UNITS_PER_WORD;
2015
2016#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2017 if (profile_block_flag == 2)
2018 {
2019 FUNCTION_BLOCK_PROFILER_EXIT;
2020 }
2021#endif
2022
2023 /* If we're only restoring one register and sp is not valid then
2024 using a move instruction to restore the register since it's
2025 less work than reloading sp and popping the register.
2026
2027 The default code result in stack adjustment using add/lea instruction,
2028 while this code results in LEAVE instruction (or discrete equivalent),
2029 so it is profitable in some other cases as well. Especially when there
2030 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2031 and there is exactly one register to pop. This heruistic may need some
2032 tuning in future. */
2033 if ((!sp_valid && nregs <= 1)
2034 || (frame_pointer_needed && !nregs && tsize)
2035 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2036 && nregs == 1))
2037 {
2038 /* Restore registers. We can use ebp or esp to address the memory
2039 locations. If both are available, default to ebp, since offsets
2040 are known to be small. Only exception is esp pointing directly to the
2041 end of block of saved registers, where we may simplify addressing
2042 mode. */
2043
2044 if (!frame_pointer_needed || (sp_valid && !tsize))
2045 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2046 else
2047 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2048
2049 if (!frame_pointer_needed)
2050 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2051 /* If not an i386, mov & pop is faster than "leave". */
2052 else if (TARGET_USE_LEAVE || optimize_size)
2053 emit_insn (gen_leave ());
2054 else
2055 {
2056 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2057 hard_frame_pointer_rtx,
2058 const0_rtx,
2059 hard_frame_pointer_rtx));
2060 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2061 }
2062 }
2063 else
2064 {
2065 /* First step is to deallocate the stack frame so that we can
2066 pop the registers. */
2067 if (!sp_valid)
2068 {
2069 if (!frame_pointer_needed)
2070 abort ();
2071 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2072 hard_frame_pointer_rtx,
2073 GEN_INT (offset),
2074 hard_frame_pointer_rtx));
2075 }
2076 else if (tsize)
2077 ix86_emit_epilogue_esp_adjustment (tsize);
2078
2079 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2080 if ((regs_ever_live[regno] && !call_used_regs[regno])
2081 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2082 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2083 }
2084
2085 if (current_function_pops_args && current_function_args_size)
2086 {
2087 rtx popc = GEN_INT (current_function_pops_args);
2088
2089 /* i386 can only pop 32K bytes (maybe 64K? Is it signed?). If
2090 asked to pop more, pop return address, do explicit add, and jump
2091 indirectly to the caller. */
2092
2093 if (current_function_pops_args >= 32768)
2094 {
2095 rtx ecx = gen_rtx_REG (SImode, 2);
2096
2097 emit_insn (gen_popsi1 (ecx));
2098 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2099 emit_indirect_jump (ecx);
2100 }
2101 else
2102 emit_jump_insn (gen_return_pop_internal (popc));
2103 }
2104 else
2105 emit_jump_insn (gen_return_internal ());
2106}
2107\f
2108/* Extract the parts of an RTL expression that is a valid memory address
2109 for an instruction. Return false if the structure of the address is
2110 grossly off. */
2111
2112static int
2113ix86_decompose_address (addr, out)
2114 register rtx addr;
2115 struct ix86_address *out;
2116{
2117 rtx base = NULL_RTX;
2118 rtx index = NULL_RTX;
2119 rtx disp = NULL_RTX;
2120 HOST_WIDE_INT scale = 1;
2121 rtx scale_rtx = NULL_RTX;
2122
2123 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2124 base = addr;
2125 else if (GET_CODE (addr) == PLUS)
2126 {
2127 rtx op0 = XEXP (addr, 0);
2128 rtx op1 = XEXP (addr, 1);
2129 enum rtx_code code0 = GET_CODE (op0);
2130 enum rtx_code code1 = GET_CODE (op1);
2131
2132 if (code0 == REG || code0 == SUBREG)
2133 {
2134 if (code1 == REG || code1 == SUBREG)
2135 index = op0, base = op1; /* index + base */
2136 else
2137 base = op0, disp = op1; /* base + displacement */
2138 }
2139 else if (code0 == MULT)
2140 {
2141 index = XEXP (op0, 0);
2142 scale_rtx = XEXP (op0, 1);
2143 if (code1 == REG || code1 == SUBREG)
2144 base = op1; /* index*scale + base */
2145 else
2146 disp = op1; /* index*scale + disp */
2147 }
2148 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2149 {
2150 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2151 scale_rtx = XEXP (XEXP (op0, 0), 1);
2152 base = XEXP (op0, 1);
2153 disp = op1;
2154 }
2155 else if (code0 == PLUS)
2156 {
2157 index = XEXP (op0, 0); /* index + base + disp */
2158 base = XEXP (op0, 1);
2159 disp = op1;
2160 }
2161 else
2162 return FALSE;
2163 }
2164 else if (GET_CODE (addr) == MULT)
2165 {
2166 index = XEXP (addr, 0); /* index*scale */
2167 scale_rtx = XEXP (addr, 1);
2168 }
2169 else if (GET_CODE (addr) == ASHIFT)
2170 {
2171 rtx tmp;
2172
2173 /* We're called for lea too, which implements ashift on occasion. */
2174 index = XEXP (addr, 0);
2175 tmp = XEXP (addr, 1);
2176 if (GET_CODE (tmp) != CONST_INT)
2177 return FALSE;
2178 scale = INTVAL (tmp);
2179 if ((unsigned HOST_WIDE_INT) scale > 3)
2180 return FALSE;
2181 scale = 1 << scale;
2182 }
2183 else
2184 disp = addr; /* displacement */
2185
2186 /* Extract the integral value of scale. */
2187 if (scale_rtx)
2188 {
2189 if (GET_CODE (scale_rtx) != CONST_INT)
2190 return FALSE;
2191 scale = INTVAL (scale_rtx);
2192 }
2193
2194 /* Allow arg pointer and stack pointer as index if there is not scaling */
2195 if (base && index && scale == 1
2196 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2197 || index == stack_pointer_rtx))
2198 {
2199 rtx tmp = base;
2200 base = index;
2201 index = tmp;
2202 }
2203
2204 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2205 if ((base == hard_frame_pointer_rtx
2206 || base == frame_pointer_rtx
2207 || base == arg_pointer_rtx) && !disp)
2208 disp = const0_rtx;
2209
2210 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2211 Avoid this by transforming to [%esi+0]. */
2212 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2213 && base && !index && !disp
2214 && REG_P (base)
2215 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2216 disp = const0_rtx;
2217
2218 /* Special case: encode reg+reg instead of reg*2. */
2219 if (!base && index && scale && scale == 2)
2220 base = index, scale = 1;
2221
2222 /* Special case: scaling cannot be encoded without base or displacement. */
2223 if (!base && !disp && index && scale != 1)
2224 disp = const0_rtx;
2225
2226 out->base = base;
2227 out->index = index;
2228 out->disp = disp;
2229 out->scale = scale;
2230
2231 return TRUE;
2232}
2233
2234/* Determine if a given CONST RTX is a valid memory displacement
2235 in PIC mode. */
2236
2237int
2238legitimate_pic_address_disp_p (disp)
2239 register rtx disp;
2240{
2241 if (GET_CODE (disp) != CONST)
2242 return 0;
2243 disp = XEXP (disp, 0);
2244
2245 if (GET_CODE (disp) == PLUS)
2246 {
2247 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2248 return 0;
2249 disp = XEXP (disp, 0);
2250 }
2251
2252 if (GET_CODE (disp) != UNSPEC
2253 || XVECLEN (disp, 0) != 1)
2254 return 0;
2255
2256 /* Must be @GOT or @GOTOFF. */
2257 if (XINT (disp, 1) != 6
2258 && XINT (disp, 1) != 7)
2259 return 0;
2260
2261 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2262 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2263 return 0;
2264
2265 return 1;
2266}
2267
2268/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2269 memory address for an instruction. The MODE argument is the machine mode
2270 for the MEM expression that wants to use this address.
2271
2272 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2273 convert common non-canonical forms to canonical form so that they will
2274 be recognized. */
2275
2276int
2277legitimate_address_p (mode, addr, strict)
2278 enum machine_mode mode;
2279 register rtx addr;
2280 int strict;
2281{
2282 struct ix86_address parts;
2283 rtx base, index, disp;
2284 HOST_WIDE_INT scale;
2285 const char *reason = NULL;
2286 rtx reason_rtx = NULL_RTX;
2287
2288 if (TARGET_DEBUG_ADDR)
2289 {
2290 fprintf (stderr,
2291 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2292 GET_MODE_NAME (mode), strict);
2293 debug_rtx (addr);
2294 }
2295
2296 if (! ix86_decompose_address (addr, &parts))
2297 {
2298 reason = "decomposition failed";
2299 goto error;
2300 }
2301
2302 base = parts.base;
2303 index = parts.index;
2304 disp = parts.disp;
2305 scale = parts.scale;
2306
2307 /* Validate base register.
2308
2309 Don't allow SUBREG's here, it can lead to spill failures when the base
2310 is one word out of a two word structure, which is represented internally
2311 as a DImode int. */
2312
2313 if (base)
2314 {
2315 reason_rtx = base;
2316
2317 if (GET_CODE (base) != REG)
2318 {
2319 reason = "base is not a register";
2320 goto error;
2321 }
2322
2323 if (GET_MODE (base) != Pmode)
2324 {
2325 reason = "base is not in Pmode";
2326 goto error;
2327 }
2328
2329 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2330 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2331 {
2332 reason = "base is not valid";
2333 goto error;
2334 }
2335 }
2336
2337 /* Validate index register.
2338
2339 Don't allow SUBREG's here, it can lead to spill failures when the index
2340 is one word out of a two word structure, which is represented internally
2341 as a DImode int. */
2342
2343 if (index)
2344 {
2345 reason_rtx = index;
2346
2347 if (GET_CODE (index) != REG)
2348 {
2349 reason = "index is not a register";
2350 goto error;
2351 }
2352
2353 if (GET_MODE (index) != Pmode)
2354 {
2355 reason = "index is not in Pmode";
2356 goto error;
2357 }
2358
2359 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2360 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2361 {
2362 reason = "index is not valid";
2363 goto error;
2364 }
2365 }
2366
2367 /* Validate scale factor. */
2368 if (scale != 1)
2369 {
2370 reason_rtx = GEN_INT (scale);
2371 if (!index)
2372 {
2373 reason = "scale without index";
2374 goto error;
2375 }
2376
2377 if (scale != 2 && scale != 4 && scale != 8)
2378 {
2379 reason = "scale is not a valid multiplier";
2380 goto error;
2381 }
2382 }
2383
2384 /* Validate displacement. */
2385 if (disp)
2386 {
2387 reason_rtx = disp;
2388
2389 if (!CONSTANT_ADDRESS_P (disp))
2390 {
2391 reason = "displacement is not constant";
2392 goto error;
2393 }
2394
2395 if (GET_CODE (disp) == CONST_DOUBLE)
2396 {
2397 reason = "displacement is a const_double";
2398 goto error;
2399 }
2400
2401 if (flag_pic && SYMBOLIC_CONST (disp))
2402 {
2403 if (! legitimate_pic_address_disp_p (disp))
2404 {
2405 reason = "displacement is an invalid pic construct";
2406 goto error;
2407 }
2408
2409 /* Verify that a symbolic pic displacement includes
2410 the pic_offset_table_rtx register. */
2411 if (base != pic_offset_table_rtx
2412 && (index != pic_offset_table_rtx || scale != 1))
2413 {
2414 reason = "pic displacement against invalid base";
2415 goto error;
2416 }
2417 }
2418 else if (HALF_PIC_P ())
2419 {
2420 if (! HALF_PIC_ADDRESS_P (disp)
2421 || (base != NULL_RTX || index != NULL_RTX))
2422 {
2423 reason = "displacement is an invalid half-pic reference";
2424 goto error;
2425 }
2426 }
2427 }
2428
2429 /* Everything looks valid. */
2430 if (TARGET_DEBUG_ADDR)
2431 fprintf (stderr, "Success.\n");
2432 return TRUE;
2433
2434error:
2435 if (TARGET_DEBUG_ADDR)
2436 {
2437 fprintf (stderr, "Error: %s\n", reason);
2438 debug_rtx (reason_rtx);
2439 }
2440 return FALSE;
2441}
2442\f
2443/* Return a legitimate reference for ORIG (an address) using the
2444 register REG. If REG is 0, a new pseudo is generated.
2445
2446 There are two types of references that must be handled:
2447
2448 1. Global data references must load the address from the GOT, via
2449 the PIC reg. An insn is emitted to do this load, and the reg is
2450 returned.
2451
2452 2. Static data references, constant pool addresses, and code labels
2453 compute the address as an offset from the GOT, whose base is in
2454 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2455 differentiate them from global data objects. The returned
2456 address is the PIC reg + an unspec constant.
2457
2458 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2459 reg also appears in the address. */
2460
2461rtx
2462legitimize_pic_address (orig, reg)
2463 rtx orig;
2464 rtx reg;
2465{
2466 rtx addr = orig;
2467 rtx new = orig;
2468 rtx base;
2469
2470 if (GET_CODE (addr) == LABEL_REF
2471 || (GET_CODE (addr) == SYMBOL_REF
2472 && (CONSTANT_POOL_ADDRESS_P (addr)
2473 || SYMBOL_REF_FLAG (addr))))
2474 {
2475 /* This symbol may be referenced via a displacement from the PIC
2476 base address (@GOTOFF). */
2477
2478 current_function_uses_pic_offset_table = 1;
2479 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2480 new = gen_rtx_CONST (VOIDmode, new);
2481 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2482
2483 if (reg != 0)
2484 {
2485 emit_move_insn (reg, new);
2486 new = reg;
2487 }
2488 }
2489 else if (GET_CODE (addr) == SYMBOL_REF)
2490 {
2491 /* This symbol must be referenced via a load from the
2492 Global Offset Table (@GOT). */
2493
2494 current_function_uses_pic_offset_table = 1;
2495 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2496 new = gen_rtx_CONST (VOIDmode, new);
2497 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2498 new = gen_rtx_MEM (Pmode, new);
2499 RTX_UNCHANGING_P (new) = 1;
2500
2501 if (reg == 0)
2502 reg = gen_reg_rtx (Pmode);
2503 emit_move_insn (reg, new);
2504 new = reg;
2505 }
2506 else
2507 {
2508 if (GET_CODE (addr) == CONST)
2509 {
2510 addr = XEXP (addr, 0);
2511 if (GET_CODE (addr) == UNSPEC)
2512 {
2513 /* Check that the unspec is one of the ones we generate? */
2514 }
2515 else if (GET_CODE (addr) != PLUS)
2516 abort ();
2517 }
2518 if (GET_CODE (addr) == PLUS)
2519 {
2520 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2521
2522 /* Check first to see if this is a constant offset from a @GOTOFF
2523 symbol reference. */
2524 if ((GET_CODE (op0) == LABEL_REF
2525 || (GET_CODE (op0) == SYMBOL_REF
2526 && (CONSTANT_POOL_ADDRESS_P (op0)
2527 || SYMBOL_REF_FLAG (op0))))
2528 && GET_CODE (op1) == CONST_INT)
2529 {
2530 current_function_uses_pic_offset_table = 1;
2531 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2532 new = gen_rtx_PLUS (VOIDmode, new, op1);
2533 new = gen_rtx_CONST (VOIDmode, new);
2534 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2535
2536 if (reg != 0)
2537 {
2538 emit_move_insn (reg, new);
2539 new = reg;
2540 }
2541 }
2542 else
2543 {
2544 base = legitimize_pic_address (XEXP (addr, 0), reg);
2545 new = legitimize_pic_address (XEXP (addr, 1),
2546 base == reg ? NULL_RTX : reg);
2547
2548 if (GET_CODE (new) == CONST_INT)
2549 new = plus_constant (base, INTVAL (new));
2550 else
2551 {
2552 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2553 {
2554 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2555 new = XEXP (new, 1);
2556 }
2557 new = gen_rtx_PLUS (Pmode, base, new);
2558 }
2559 }
2560 }
2561 }
2562 return new;
2563}
2564\f
2565/* Try machine-dependent ways of modifying an illegitimate address
2566 to be legitimate. If we find one, return the new, valid address.
2567 This macro is used in only one place: `memory_address' in explow.c.
2568
2569 OLDX is the address as it was before break_out_memory_refs was called.
2570 In some cases it is useful to look at this to decide what needs to be done.
2571
2572 MODE and WIN are passed so that this macro can use
2573 GO_IF_LEGITIMATE_ADDRESS.
2574
2575 It is always safe for this macro to do nothing. It exists to recognize
2576 opportunities to optimize the output.
2577
2578 For the 80386, we handle X+REG by loading X into a register R and
2579 using R+REG. R will go in a general reg and indexing will be used.
2580 However, if REG is a broken-out memory address or multiplication,
2581 nothing needs to be done because REG can certainly go in a general reg.
2582
2583 When -fpic is used, special handling is needed for symbolic references.
2584 See comments by legitimize_pic_address in i386.c for details. */
2585
2586rtx
2587legitimize_address (x, oldx, mode)
2588 register rtx x;
2589 register rtx oldx ATTRIBUTE_UNUSED;
2590 enum machine_mode mode;
2591{
2592 int changed = 0;
2593 unsigned log;
2594
2595 if (TARGET_DEBUG_ADDR)
2596 {
2597 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2598 GET_MODE_NAME (mode));
2599 debug_rtx (x);
2600 }
2601
2602 if (flag_pic && SYMBOLIC_CONST (x))
2603 return legitimize_pic_address (x, 0);
2604
2605 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2606 if (GET_CODE (x) == ASHIFT
2607 && GET_CODE (XEXP (x, 1)) == CONST_INT
2608 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2609 {
2610 changed = 1;
2611 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2612 GEN_INT (1 << log));
2613 }
2614
2615 if (GET_CODE (x) == PLUS)
2616 {
2617 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2618
2619 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2620 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2621 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2622 {
2623 changed = 1;
2624 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2625 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2626 GEN_INT (1 << log));
2627 }
2628
2629 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2630 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2631 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2632 {
2633 changed = 1;
2634 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2635 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2636 GEN_INT (1 << log));
2637 }
2638
2639 /* Put multiply first if it isn't already. */
2640 if (GET_CODE (XEXP (x, 1)) == MULT)
2641 {
2642 rtx tmp = XEXP (x, 0);
2643 XEXP (x, 0) = XEXP (x, 1);
2644 XEXP (x, 1) = tmp;
2645 changed = 1;
2646 }
2647
2648 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2649 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2650 created by virtual register instantiation, register elimination, and
2651 similar optimizations. */
2652 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2653 {
2654 changed = 1;
2655 x = gen_rtx_PLUS (Pmode,
2656 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2657 XEXP (XEXP (x, 1), 0)),
2658 XEXP (XEXP (x, 1), 1));
2659 }
2660
2661 /* Canonicalize
2662 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2663 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2664 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2665 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2666 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2667 && CONSTANT_P (XEXP (x, 1)))
2668 {
2669 rtx constant;
2670 rtx other = NULL_RTX;
2671
2672 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2673 {
2674 constant = XEXP (x, 1);
2675 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2676 }
2677 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2678 {
2679 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2680 other = XEXP (x, 1);
2681 }
2682 else
2683 constant = 0;
2684
2685 if (constant)
2686 {
2687 changed = 1;
2688 x = gen_rtx_PLUS (Pmode,
2689 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2690 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2691 plus_constant (other, INTVAL (constant)));
2692 }
2693 }
2694
2695 if (changed && legitimate_address_p (mode, x, FALSE))
2696 return x;
2697
2698 if (GET_CODE (XEXP (x, 0)) == MULT)
2699 {
2700 changed = 1;
2701 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2702 }
2703
2704 if (GET_CODE (XEXP (x, 1)) == MULT)
2705 {
2706 changed = 1;
2707 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2708 }
2709
2710 if (changed
2711 && GET_CODE (XEXP (x, 1)) == REG
2712 && GET_CODE (XEXP (x, 0)) == REG)
2713 return x;
2714
2715 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2716 {
2717 changed = 1;
2718 x = legitimize_pic_address (x, 0);
2719 }
2720
2721 if (changed && legitimate_address_p (mode, x, FALSE))
2722 return x;
2723
2724 if (GET_CODE (XEXP (x, 0)) == REG)
2725 {
2726 register rtx temp = gen_reg_rtx (Pmode);
2727 register rtx val = force_operand (XEXP (x, 1), temp);
2728 if (val != temp)
2729 emit_move_insn (temp, val);
2730
2731 XEXP (x, 1) = temp;
2732 return x;
2733 }
2734
2735 else if (GET_CODE (XEXP (x, 1)) == REG)
2736 {
2737 register rtx temp = gen_reg_rtx (Pmode);
2738 register rtx val = force_operand (XEXP (x, 0), temp);
2739 if (val != temp)
2740 emit_move_insn (temp, val);
2741
2742 XEXP (x, 0) = temp;
2743 return x;
2744 }
2745 }
2746
2747 return x;
2748}
2749\f
2750/* Print an integer constant expression in assembler syntax. Addition
2751 and subtraction are the only arithmetic that may appear in these
2752 expressions. FILE is the stdio stream to write to, X is the rtx, and
2753 CODE is the operand print code from the output string. */
2754
2755static void
2756output_pic_addr_const (file, x, code)
2757 FILE *file;
2758 rtx x;
2759 int code;
2760{
2761 char buf[256];
2762
2763 switch (GET_CODE (x))
2764 {
2765 case PC:
2766 if (flag_pic)
2767 putc ('.', file);
2768 else
2769 abort ();
2770 break;
2771
2772 case SYMBOL_REF:
2773 assemble_name (file, XSTR (x, 0));
2774 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2775 fputs ("@PLT", file);
2776 break;
2777
2778 case LABEL_REF:
2779 x = XEXP (x, 0);
2780 /* FALLTHRU */
2781 case CODE_LABEL:
2782 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2783 assemble_name (asm_out_file, buf);
2784 break;
2785
2786 case CONST_INT:
2787 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2788 break;
2789
2790 case CONST:
2791 /* This used to output parentheses around the expression,
2792 but that does not work on the 386 (either ATT or BSD assembler). */
2793 output_pic_addr_const (file, XEXP (x, 0), code);
2794 break;
2795
2796 case CONST_DOUBLE:
2797 if (GET_MODE (x) == VOIDmode)
2798 {
2799 /* We can use %d if the number is <32 bits and positive. */
2800 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2801 fprintf (file, "0x%lx%08lx",
2802 (unsigned long) CONST_DOUBLE_HIGH (x),
2803 (unsigned long) CONST_DOUBLE_LOW (x));
2804 else
2805 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2806 }
2807 else
2808 /* We can't handle floating point constants;
2809 PRINT_OPERAND must handle them. */
2810 output_operand_lossage ("floating constant misused");
2811 break;
2812
2813 case PLUS:
2814 /* Some assemblers need integer constants to appear first. */
2815 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2816 {
2817 output_pic_addr_const (file, XEXP (x, 0), code);
2818 putc ('+', file);
2819 output_pic_addr_const (file, XEXP (x, 1), code);
2820 }
2821 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2822 {
2823 output_pic_addr_const (file, XEXP (x, 1), code);
2824 putc ('+', file);
2825 output_pic_addr_const (file, XEXP (x, 0), code);
2826 }
2827 else
2828 abort ();
2829 break;
2830
2831 case MINUS:
2832 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2833 output_pic_addr_const (file, XEXP (x, 0), code);
2834 putc ('-', file);
2835 output_pic_addr_const (file, XEXP (x, 1), code);
2836 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2837 break;
2838
2839 case UNSPEC:
2840 if (XVECLEN (x, 0) != 1)
2841 abort ();
2842 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2843 switch (XINT (x, 1))
2844 {
2845 case 6:
2846 fputs ("@GOT", file);
2847 break;
2848 case 7:
2849 fputs ("@GOTOFF", file);
2850 break;
2851 case 8:
2852 fputs ("@PLT", file);
2853 break;
2854 default:
2855 output_operand_lossage ("invalid UNSPEC as operand");
2856 break;
2857 }
2858 break;
2859
2860 default:
2861 output_operand_lossage ("invalid expression as operand");
2862 }
2863}
2864
2865/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2866 We need to handle our special PIC relocations. */
2867
2868void
2869i386_dwarf_output_addr_const (file, x)
2870 FILE *file;
2871 rtx x;
2872{
2873 fprintf (file, "\t%s\t", INT_ASM_OP);
2874 if (flag_pic)
2875 output_pic_addr_const (file, x, '\0');
2876 else
2877 output_addr_const (file, x);
2878 fputc ('\n', file);
2879}
2880
2881/* In the name of slightly smaller debug output, and to cater to
2882 general assembler losage, recognize PIC+GOTOFF and turn it back
2883 into a direct symbol reference. */
2884
2885rtx
2886i386_simplify_dwarf_addr (orig_x)
2887 rtx orig_x;
2888{
2889 rtx x = orig_x;
2890
2891 if (GET_CODE (x) != PLUS
2892 || GET_CODE (XEXP (x, 0)) != REG
2893 || GET_CODE (XEXP (x, 1)) != CONST)
2894 return orig_x;
2895
2896 x = XEXP (XEXP (x, 1), 0);
2897 if (GET_CODE (x) == UNSPEC
2898 && XINT (x, 1) == 7)
2899 return XVECEXP (x, 0, 0);
2900
2901 if (GET_CODE (x) == PLUS
2902 && GET_CODE (XEXP (x, 0)) == UNSPEC
2903 && GET_CODE (XEXP (x, 1)) == CONST_INT
2904 && XINT (XEXP (x, 0), 1) == 7)
2905 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
2906
2907 return orig_x;
2908}
2909\f
2910static void
2911put_condition_code (code, mode, reverse, fp, file)
2912 enum rtx_code code;
2913 enum machine_mode mode;
2914 int reverse, fp;
2915 FILE *file;
2916{
2917 const char *suffix;
2918
2919 if (reverse)
2920 code = reverse_condition (code);
2921
2922 switch (code)
2923 {
2924 case EQ:
2925 suffix = "e";
2926 break;
2927 case NE:
2928 suffix = "ne";
2929 break;
2930 case GT:
2931 if (mode == CCNOmode)
2932 abort ();
2933 suffix = "g";
2934 break;
2935 case GTU:
2936 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2937 Those same assemblers have the same but opposite losage on cmov. */
2938 suffix = fp ? "nbe" : "a";
2939 break;
2940 case LT:
2941 if (mode == CCNOmode)
2942 suffix = "s";
2943 else
2944 suffix = "l";
2945 break;
2946 case LTU:
2947 suffix = "b";
2948 break;
2949 case GE:
2950 if (mode == CCNOmode)
2951 suffix = "ns";
2952 else
2953 suffix = "ge";
2954 break;
2955 case GEU:
2956 /* ??? As above. */
2957 suffix = fp ? "nb" : "ae";
2958 break;
2959 case LE:
2960 if (mode == CCNOmode)
2961 abort ();
2962 suffix = "le";
2963 break;
2964 case LEU:
2965 suffix = "be";
2966 break;
2967 default:
2968 abort ();
2969 }
2970 fputs (suffix, file);
2971}
2972
2973void
2974print_reg (x, code, file)
2975 rtx x;
2976 int code;
2977 FILE *file;
2978{
2979 if (REGNO (x) == ARG_POINTER_REGNUM
2980 || REGNO (x) == FRAME_POINTER_REGNUM
2981 || REGNO (x) == FLAGS_REG
2982 || REGNO (x) == FPSR_REG)
2983 abort ();
2984
2985 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
2986 putc ('%', file);
2987
2988 if (code == 'w')
2989 code = 2;
2990 else if (code == 'b')
2991 code = 1;
2992 else if (code == 'k')
2993 code = 4;
2994 else if (code == 'y')
2995 code = 3;
2996 else if (code == 'h')
2997 code = 0;
2998 else
2999 code = GET_MODE_SIZE (GET_MODE (x));
3000
3001 switch (code)
3002 {
3003 case 3:
3004 if (STACK_TOP_P (x))
3005 {
3006 fputs ("st(0)", file);
3007 break;
3008 }
3009 /* FALLTHRU */
3010 case 4:
3011 case 8:
3012 case 12:
3013 if (! FP_REG_P (x))
3014 putc ('e', file);
3015 /* FALLTHRU */
3016 case 2:
3017 fputs (hi_reg_name[REGNO (x)], file);
3018 break;
3019 case 1:
3020 fputs (qi_reg_name[REGNO (x)], file);
3021 break;
3022 case 0:
3023 fputs (qi_high_reg_name[REGNO (x)], file);
3024 break;
3025 default:
3026 abort ();
3027 }
3028}
3029
3030/* Meaning of CODE:
3031 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3032 C -- print opcode suffix for set/cmov insn.
3033 c -- like C, but print reversed condition
3034 R -- print the prefix for register names.
3035 z -- print the opcode suffix for the size of the current operand.
3036 * -- print a star (in certain assembler syntax)
3037 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3038 s -- print a shift double count, followed by the assemblers argument
3039 delimiter.
3040 b -- print the QImode name of the register for the indicated operand.
3041 %b0 would print %al if operands[0] is reg 0.
3042 w -- likewise, print the HImode name of the register.
3043 k -- likewise, print the SImode name of the register.
3044 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3045 y -- print "st(0)" instead of "st" as a register. */
3046
3047void
3048print_operand (file, x, code)
3049 FILE *file;
3050 rtx x;
3051 int code;
3052{
3053 if (code)
3054 {
3055 switch (code)
3056 {
3057 case '*':
3058 if (ASSEMBLER_DIALECT == 0)
3059 putc ('*', file);
3060 return;
3061
3062 case 'L':
3063 if (ASSEMBLER_DIALECT == 0)
3064 putc ('l', file);
3065 return;
3066
3067 case 'W':
3068 if (ASSEMBLER_DIALECT == 0)
3069 putc ('w', file);
3070 return;
3071
3072 case 'B':
3073 if (ASSEMBLER_DIALECT == 0)
3074 putc ('b', file);
3075 return;
3076
3077 case 'Q':
3078 if (ASSEMBLER_DIALECT == 0)
3079 putc ('l', file);
3080 return;
3081
3082 case 'S':
3083 if (ASSEMBLER_DIALECT == 0)
3084 putc ('s', file);
3085 return;
3086
3087 case 'T':
3088 if (ASSEMBLER_DIALECT == 0)
3089 putc ('t', file);
3090 return;
3091
3092 case 'z':
3093 /* 387 opcodes don't get size suffixes if the operands are
3094 registers. */
3095
3096 if (STACK_REG_P (x))
3097 return;
3098
3099 /* Intel syntax has no truck with instruction suffixes. */
3100 if (ASSEMBLER_DIALECT != 0)
3101 return;
3102
3103 /* this is the size of op from size of operand */
3104 switch (GET_MODE_SIZE (GET_MODE (x)))
3105 {
3106 case 1:
3107 putc ('b', file);
3108 return;
3109
3110 case 2:
3111 putc ('w', file);
3112 return;
3113
3114 case 4:
3115 if (GET_MODE (x) == SFmode)
3116 {
3117 putc ('s', file);
3118 return;
3119 }
3120 else
3121 putc ('l', file);
3122 return;
3123
3124 case 12:
3125 putc ('t', file);
3126 return;
3127
3128 case 8:
3129 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3130 {
3131#ifdef GAS_MNEMONICS
3132 putc ('q', file);
3133#else
3134 putc ('l', file);
3135 putc ('l', file);
3136#endif
3137 }
3138 else
3139 putc ('l', file);
3140 return;
3141 }
3142
3143 case 'b':
3144 case 'w':
3145 case 'k':
3146 case 'h':
3147 case 'y':
3148 case 'X':
3149 case 'P':
3150 break;
3151
3152 case 's':
3153 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3154 {
3155 PRINT_OPERAND (file, x, 0);
3156 putc (',', file);
3157 }
3158 return;
3159
3160 case 'C':
3161 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3162 return;
3163 case 'F':
3164 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3165 return;
3166
3167 /* Like above, but reverse condition */
3168 case 'c':
3169 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3170 return;
3171 case 'f':
3172 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3173 return;
3174
3175 default:
3176 {
3177 char str[50];
3178 sprintf (str, "invalid operand code `%c'", code);
3179 output_operand_lossage (str);
3180 }
3181 }
3182 }
3183
3184 if (GET_CODE (x) == REG)
3185 {
3186 PRINT_REG (x, code, file);
3187 }
3188
3189 else if (GET_CODE (x) == MEM)
3190 {
3191 /* No `byte ptr' prefix for call instructions. */
3192 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3193 {
3194 const char * size;
3195 switch (GET_MODE_SIZE (GET_MODE (x)))
3196 {
3197 case 1: size = "BYTE"; break;
3198 case 2: size = "WORD"; break;
3199 case 4: size = "DWORD"; break;
3200 case 8: size = "QWORD"; break;
3201 case 12: size = "XWORD"; break;
3202 default:
3203 abort ();
3204 }
3205 fputs (size, file);
3206 fputs (" PTR ", file);
3207 }
3208
3209 x = XEXP (x, 0);
3210 if (flag_pic && CONSTANT_ADDRESS_P (x))
3211 output_pic_addr_const (file, x, code);
3212 else
3213 output_address (x);
3214 }
3215
3216 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3217 {
3218 REAL_VALUE_TYPE r;
3219 long l;
3220
3221 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3222 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3223
3224 if (ASSEMBLER_DIALECT == 0)
3225 putc ('$', file);
3226 fprintf (file, "0x%lx", l);
3227 }
3228
3229 /* These float cases don't actually occur as immediate operands. */
3230 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3231 {
3232 REAL_VALUE_TYPE r;
3233 char dstr[30];
3234
3235 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3236 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3237 fprintf (file, "%s", dstr);
3238 }
3239
3240 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
3241 {
3242 REAL_VALUE_TYPE r;
3243 char dstr[30];
3244
3245 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3246 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3247 fprintf (file, "%s", dstr);
3248 }
3249 else
3250 {
3251 if (code != 'P')
3252 {
3253 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3254 {
3255 if (ASSEMBLER_DIALECT == 0)
3256 putc ('$', file);
3257 }
3258 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3259 || GET_CODE (x) == LABEL_REF)
3260 {
3261 if (ASSEMBLER_DIALECT == 0)
3262 putc ('$', file);
3263 else
3264 fputs ("OFFSET FLAT:", file);
3265 }
3266 }
3267 if (GET_CODE (x) == CONST_INT)
3268 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3269 else if (flag_pic)
3270 output_pic_addr_const (file, x, code);
3271 else
3272 output_addr_const (file, x);
3273 }
3274}
3275\f
3276/* Print a memory operand whose address is ADDR. */
3277
3278void
3279print_operand_address (file, addr)
3280 FILE *file;
3281 register rtx addr;
3282{
3283 struct ix86_address parts;
3284 rtx base, index, disp;
3285 int scale;
3286
3287 if (! ix86_decompose_address (addr, &parts))
3288 abort ();
3289
3290 base = parts.base;
3291 index = parts.index;
3292 disp = parts.disp;
3293 scale = parts.scale;
3294
3295 if (!base && !index)
3296 {
3297 /* Displacement only requires special attention. */
3298
3299 if (GET_CODE (disp) == CONST_INT)
3300 {
3301 if (ASSEMBLER_DIALECT != 0)
3302 fputs ("ds:", file);
3303 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3304 }
3305 else if (flag_pic)
3306 output_pic_addr_const (file, addr, 0);
3307 else
3308 output_addr_const (file, addr);
3309 }
3310 else
3311 {
3312 if (ASSEMBLER_DIALECT == 0)
3313 {
3314 if (disp)
3315 {
3316 if (flag_pic)
3317 output_pic_addr_const (file, disp, 0);
3318 else if (GET_CODE (disp) == LABEL_REF)
3319 output_asm_label (disp);
3320 else
3321 output_addr_const (file, disp);
3322 }
3323
3324 putc ('(', file);
3325 if (base)
3326 PRINT_REG (base, 0, file);
3327 if (index)
3328 {
3329 putc (',', file);
3330 PRINT_REG (index, 0, file);
3331 if (scale != 1)
3332 fprintf (file, ",%d", scale);
3333 }
3334 putc (')', file);
3335 }
3336 else
3337 {
3338 rtx offset = NULL_RTX;
3339
3340 if (disp)
3341 {
3342 /* Pull out the offset of a symbol; print any symbol itself. */
3343 if (GET_CODE (disp) == CONST
3344 && GET_CODE (XEXP (disp, 0)) == PLUS
3345 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3346 {
3347 offset = XEXP (XEXP (disp, 0), 1);
3348 disp = gen_rtx_CONST (VOIDmode,
3349 XEXP (XEXP (disp, 0), 0));
3350 }
3351
3352 if (flag_pic)
3353 output_pic_addr_const (file, disp, 0);
3354 else if (GET_CODE (disp) == LABEL_REF)
3355 output_asm_label (disp);
3356 else if (GET_CODE (disp) == CONST_INT)
3357 offset = disp;
3358 else
3359 output_addr_const (file, disp);
3360 }
3361
3362 putc ('[', file);
3363 if (base)
3364 {
3365 PRINT_REG (base, 0, file);
3366 if (offset)
3367 {
3368 if (INTVAL (offset) >= 0)
3369 putc ('+', file);
3370 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3371 }
3372 }
3373 else if (offset)
3374 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3375 else
3376 putc ('0', file);
3377
3378 if (index)
3379 {
3380 putc ('+', file);
3381 PRINT_REG (index, 0, file);
3382 if (scale != 1)
3383 fprintf (file, "*%d", scale);
3384 }
3385 putc (']', file);
3386 }
3387 }
3388}
3389\f
3390/* Split one or more DImode RTL references into pairs of SImode
3391 references. The RTL can be REG, offsettable MEM, integer constant, or
3392 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3393 split and "num" is its length. lo_half and hi_half are output arrays
3394 that parallel "operands". */
3395
3396void
3397split_di (operands, num, lo_half, hi_half)
3398 rtx operands[];
3399 int num;
3400 rtx lo_half[], hi_half[];
3401{
3402 while (num--)
3403 {
3404 rtx op = operands[num];
3405 if (CONSTANT_P (op))
3406 split_double (op, &lo_half[num], &hi_half[num]);
3407 else if (! reload_completed)
3408 {
3409 lo_half[num] = gen_lowpart (SImode, op);
3410 hi_half[num] = gen_highpart (SImode, op);
3411 }
3412 else if (GET_CODE (op) == REG)
3413 {
3414 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3415 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3416 }
3417 else if (offsettable_memref_p (op))
3418 {
3419 rtx lo_addr = XEXP (op, 0);
3420 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3421 lo_half[num] = change_address (op, SImode, lo_addr);
3422 hi_half[num] = change_address (op, SImode, hi_addr);
3423 }
3424 else
3425 abort ();
3426 }
3427}
3428\f
3429/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3430 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3431 is the expression of the binary operation. The output may either be
3432 emitted here, or returned to the caller, like all output_* functions.
3433
3434 There is no guarantee that the operands are the same mode, as they
3435 might be within FLOAT or FLOAT_EXTEND expressions. */
3436
3437const char *
3438output_387_binary_op (insn, operands)
3439 rtx insn;
3440 rtx *operands;
3441{
3442 static char buf[100];
3443 rtx temp;
3444 const char *p;
3445
3446 switch (GET_CODE (operands[3]))
3447 {
3448 case PLUS:
3449 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3450 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3451 p = "fiadd";
3452 else
3453 p = "fadd";
3454 break;
3455
3456 case MINUS:
3457 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3458 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3459 p = "fisub";
3460 else
3461 p = "fsub";
3462 break;
3463
3464 case MULT:
3465 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3466 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3467 p = "fimul";
3468 else
3469 p = "fmul";
3470 break;
3471
3472 case DIV:
3473 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3474 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3475 p = "fidiv";
3476 else
3477 p = "fdiv";
3478 break;
3479
3480 default:
3481 abort ();
3482 }
3483
3484 strcpy (buf, p);
3485
3486 switch (GET_CODE (operands[3]))
3487 {
3488 case MULT:
3489 case PLUS:
3490 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3491 {
3492 temp = operands[2];
3493 operands[2] = operands[1];
3494 operands[1] = temp;
3495 }
3496
3497 if (GET_CODE (operands[2]) == MEM)
3498 {
3499 p = "%z2\t%2";
3500 break;
3501 }
3502
3503 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3504 {
3505 if (STACK_TOP_P (operands[0]))
3506 p = "p\t{%0,%2|%2, %0}";
3507 else
3508 p = "p\t{%2,%0|%0, %2}";
3509 break;
3510 }
3511
3512 if (STACK_TOP_P (operands[0]))
3513 p = "\t{%y2,%0|%0, %y2}";
3514 else
3515 p = "\t{%2,%0|%0, %2}";
3516 break;
3517
3518 case MINUS:
3519 case DIV:
3520 if (GET_CODE (operands[1]) == MEM)
3521 {
3522 p = "r%z1\t%1";
3523 break;
3524 }
3525
3526 if (GET_CODE (operands[2]) == MEM)
3527 {
3528 p = "%z2\t%2";
3529 break;
3530 }
3531
3532 if (! STACK_REG_P (operands[1]) || ! STACK_REG_P (operands[2]))
3533 abort ();
3534
3535 /* Note that the Unixware assembler, and the AT&T assembler before
3536 that, are confusingly not reversed from Intel syntax in this
3537 area. */
3538 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3539 {
3540 if (STACK_TOP_P (operands[0]))
3541 p = "p\t%0,%2";
3542 else
3543 p = "rp\t%2,%0";
3544 break;
3545 }
3546
3547 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3548 {
3549 if (STACK_TOP_P (operands[0]))
3550 p = "rp\t%0,%1";
3551 else
3552 p = "p\t%1,%0";
3553 break;
3554 }
3555
3556 if (STACK_TOP_P (operands[0]))
3557 {
3558 if (STACK_TOP_P (operands[1]))
3559 p = "\t%y2,%0";
3560 else
3561 p = "r\t%y1,%0";
3562 break;
3563 }
3564 else if (STACK_TOP_P (operands[1]))
3565 p = "\t%1,%0";
3566 else
3567 p = "r\t%2,%0";
3568 break;
3569
3570 default:
3571 abort ();
3572 }
3573
3574 strcat (buf, p);
3575 return buf;
3576}
3577
3578/* Output code for INSN to convert a float to a signed int. OPERANDS
3579 are the insn operands. The output may be [SD]Imode and the input
3580 operand may be [SDX]Fmode. */
3581
3582const char *
3583output_fix_trunc (insn, operands)
3584 rtx insn;
3585 rtx *operands;
3586{
3587 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3588 int dimode_p = GET_MODE (operands[0]) == DImode;
3589 rtx xops[4];
3590
3591 /* Jump through a hoop or two for DImode, since the hardware has no
3592 non-popping instruction. We used to do this a different way, but
3593 that was somewhat fragile and broke with post-reload splitters. */
3594 if (dimode_p && !stack_top_dies)
3595 output_asm_insn ("fld\t%y1", operands);
3596
3597 if (! STACK_TOP_P (operands[1]))
3598 abort ();
3599
3600 xops[0] = GEN_INT (12);
3601 xops[1] = adj_offsettable_operand (operands[2], 1);
3602 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3603
3604 xops[2] = operands[0];
3605 if (GET_CODE (operands[0]) != MEM)
3606 xops[2] = operands[3];
3607
3608 output_asm_insn ("fnstcw\t%2", operands);
3609 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3610 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3611 output_asm_insn ("fldcw\t%2", operands);
3612 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3613
3614 if (stack_top_dies || dimode_p)
3615 output_asm_insn ("fistp%z2\t%2", xops);
3616 else
3617 output_asm_insn ("fist%z2\t%2", xops);
3618
3619 output_asm_insn ("fldcw\t%2", operands);
3620
3621 if (GET_CODE (operands[0]) != MEM)
3622 {
3623 if (dimode_p)
3624 {
3625 split_di (operands+0, 1, xops+0, xops+1);
3626 split_di (operands+3, 1, xops+2, xops+3);
3627 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3628 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3629 }
3630 else
3631 output_asm_insn ("mov{l}\t{%3,%0|%0, %3}", operands);
3632 }
3633
3634 return "";
3635}
3636
3637/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3638 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3639 when fucom should be used. */
3640
3641const char *
3642output_fp_compare (insn, operands, eflags_p, unordered_p)
3643 rtx insn;
3644 rtx *operands;
3645 int eflags_p, unordered_p;
3646{
3647 int stack_top_dies;
3648 rtx cmp_op0 = operands[0];
3649 rtx cmp_op1 = operands[1];
3650
3651 if (eflags_p == 2)
3652 {
3653 cmp_op0 = cmp_op1;
3654 cmp_op1 = operands[2];
3655 }
3656
3657 if (! STACK_TOP_P (cmp_op0))
3658 abort ();
3659
3660 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3661
3662 if (STACK_REG_P (cmp_op1)
3663 && stack_top_dies
3664 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3665 && REGNO (cmp_op1) != FIRST_STACK_REG)
3666 {
3667 /* If both the top of the 387 stack dies, and the other operand
3668 is also a stack register that dies, then this must be a
3669 `fcompp' float compare */
3670
3671 if (eflags_p == 1)
3672 {
3673 /* There is no double popping fcomi variant. Fortunately,
3674 eflags is immune from the fstp's cc clobbering. */
3675 if (unordered_p)
3676 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3677 else
3678 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3679 return "fstp\t%y0";
3680 }
3681 else
3682 {
3683 if (eflags_p == 2)
3684 {
3685 if (unordered_p)
3686 return "fucompp\n\tfnstsw\t%0";
3687 else
3688 return "fcompp\n\tfnstsw\t%0";
3689 }
3690 else
3691 {
3692 if (unordered_p)
3693 return "fucompp";
3694 else
3695 return "fcompp";
3696 }
3697 }
3698 }
3699 else
3700 {
3701 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3702
3703 static const char * const alt[24] =
3704 {
3705 "fcom%z1\t%y1",
3706 "fcomp%z1\t%y1",
3707 "fucom%z1\t%y1",
3708 "fucomp%z1\t%y1",
3709
3710 "ficom%z1\t%y1",
3711 "ficomp%z1\t%y1",
3712 NULL,
3713 NULL,
3714
3715 "fcomi\t{%y1, %0|%0, %y1}",
3716 "fcomip\t{%y1, %0|%0, %y1}",
3717 "fucomi\t{%y1, %0|%0, %y1}",
3718 "fucomip\t{%y1, %0|%0, %y1}",
3719
3720 NULL,
3721 NULL,
3722 NULL,
3723 NULL,
3724
3725 "fcom%z2\t%y2\n\tfnstsw\t%0",
3726 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3727 "fucom%z2\t%y2\n\tfnstsw\t%0",
3728 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3729
3730 "ficom%z2\t%y2\n\tfnstsw\t%0",
3731 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3732 NULL,
3733 NULL
3734 };
3735
3736 int mask;
3737 const char *ret;
3738
3739 mask = eflags_p << 3;
3740 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3741 mask |= unordered_p << 1;
3742 mask |= stack_top_dies;
3743
3744 if (mask >= 24)
3745 abort ();
3746 ret = alt[mask];
3747 if (ret == NULL)
3748 abort ();
3749
3750 return ret;
3751 }
3752}
3753
3754/* Output assembler code to FILE to initialize basic-block profiling.
3755
3756 If profile_block_flag == 2
3757
3758 Output code to call the subroutine `__bb_init_trace_func'
3759 and pass two parameters to it. The first parameter is
3760 the address of a block allocated in the object module.
3761 The second parameter is the number of the first basic block
3762 of the function.
3763
3764 The name of the block is a local symbol made with this statement:
3765
3766 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3767
3768 Of course, since you are writing the definition of
3769 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3770 can take a short cut in the definition of this macro and use the
3771 name that you know will result.
3772
3773 The number of the first basic block of the function is
3774 passed to the macro in BLOCK_OR_LABEL.
3775
3776 If described in a virtual assembler language the code to be
3777 output looks like:
3778
3779 parameter1 <- LPBX0
3780 parameter2 <- BLOCK_OR_LABEL
3781 call __bb_init_trace_func
3782
3783 else if profile_block_flag != 0
3784
3785 Output code to call the subroutine `__bb_init_func'
3786 and pass one single parameter to it, which is the same
3787 as the first parameter to `__bb_init_trace_func'.
3788
3789 The first word of this parameter is a flag which will be nonzero if
3790 the object module has already been initialized. So test this word
3791 first, and do not call `__bb_init_func' if the flag is nonzero.
3792 Note: When profile_block_flag == 2 the test need not be done
3793 but `__bb_init_trace_func' *must* be called.
3794
3795 BLOCK_OR_LABEL may be used to generate a label number as a
3796 branch destination in case `__bb_init_func' will not be called.
3797
3798 If described in a virtual assembler language the code to be
3799 output looks like:
3800
3801 cmp (LPBX0),0
3802 jne local_label
3803 parameter1 <- LPBX0
3804 call __bb_init_func
3805 local_label:
3806*/
3807
3808void
3809ix86_output_function_block_profiler (file, block_or_label)
3810 FILE *file;
3811 int block_or_label;
3812{
3813 static int num_func = 0;
3814 rtx xops[8];
3815 char block_table[80], false_label[80];
3816
3817 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
3818
3819 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3820 xops[5] = stack_pointer_rtx;
3821 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
3822
3823 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
3824
3825 switch (profile_block_flag)
3826 {
3827 case 2:
3828 xops[2] = GEN_INT (block_or_label);
3829 xops[3] = gen_rtx_MEM (Pmode,
3830 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
3831 xops[6] = GEN_INT (8);
3832
3833 output_asm_insn ("push{l}\t%2", xops);
3834 if (!flag_pic)
3835 output_asm_insn ("push{l}\t%1", xops);
3836 else
3837 {
3838 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3839 output_asm_insn ("push{l}\t%7", xops);
3840 }
3841 output_asm_insn ("call\t%P3", xops);
3842 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3843 break;
3844
3845 default:
3846 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
3847
3848 xops[0] = const0_rtx;
3849 xops[2] = gen_rtx_MEM (Pmode,
3850 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
3851 xops[3] = gen_rtx_MEM (Pmode,
3852 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
3853 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
3854 xops[6] = GEN_INT (4);
3855
3856 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
3857
3858 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
3859 output_asm_insn ("jne\t%2", xops);
3860
3861 if (!flag_pic)
3862 output_asm_insn ("push{l}\t%1", xops);
3863 else
3864 {
3865 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
3866 output_asm_insn ("push{l}\t%7", xops);
3867 }
3868 output_asm_insn ("call\t%P3", xops);
3869 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3870 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
3871 num_func++;
3872 break;
3873 }
3874}
3875
3876/* Output assembler code to FILE to increment a counter associated
3877 with basic block number BLOCKNO.
3878
3879 If profile_block_flag == 2
3880
3881 Output code to initialize the global structure `__bb' and
3882 call the function `__bb_trace_func' which will increment the
3883 counter.
3884
3885 `__bb' consists of two words. In the first word the number
3886 of the basic block has to be stored. In the second word
3887 the address of a block allocated in the object module
3888 has to be stored.
3889
3890 The basic block number is given by BLOCKNO.
3891
3892 The address of the block is given by the label created with
3893
3894 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3895
3896 by FUNCTION_BLOCK_PROFILER.
3897
3898 Of course, since you are writing the definition of
3899 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3900 can take a short cut in the definition of this macro and use the
3901 name that you know will result.
3902
3903 If described in a virtual assembler language the code to be
3904 output looks like:
3905
3906 move BLOCKNO -> (__bb)
3907 move LPBX0 -> (__bb+4)
3908 call __bb_trace_func
3909
3910 Note that function `__bb_trace_func' must not change the
3911 machine state, especially the flag register. To grant
3912 this, you must output code to save and restore registers
3913 either in this macro or in the macros MACHINE_STATE_SAVE
3914 and MACHINE_STATE_RESTORE. The last two macros will be
3915 used in the function `__bb_trace_func', so you must make
3916 sure that the function prologue does not change any
3917 register prior to saving it with MACHINE_STATE_SAVE.
3918
3919 else if profile_block_flag != 0
3920
3921 Output code to increment the counter directly.
3922 Basic blocks are numbered separately from zero within each
3923 compiled object module. The count associated with block number
3924 BLOCKNO is at index BLOCKNO in an array of words; the name of
3925 this array is a local symbol made with this statement:
3926
3927 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
3928
3929 Of course, since you are writing the definition of
3930 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3931 can take a short cut in the definition of this macro and use the
3932 name that you know will result.
3933
3934 If described in a virtual assembler language the code to be
3935 output looks like:
3936
3937 inc (LPBX2+4*BLOCKNO)
3938*/
3939
3940void
3941ix86_output_block_profiler (file, blockno)
3942 FILE *file ATTRIBUTE_UNUSED;
3943 int blockno;
3944{
3945 rtx xops[8], cnt_rtx;
3946 char counts[80];
3947 char *block_table = counts;
3948
3949 switch (profile_block_flag)
3950 {
3951 case 2:
3952 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
3953
3954 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3955 xops[2] = GEN_INT (blockno);
3956 xops[3] = gen_rtx_MEM (Pmode,
3957 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
3958 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
3959 xops[5] = plus_constant (xops[4], 4);
3960 xops[0] = gen_rtx_MEM (SImode, xops[4]);
3961 xops[6] = gen_rtx_MEM (SImode, xops[5]);
3962
3963 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
3964
3965 output_asm_insn ("pushf", xops);
3966 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3967 if (flag_pic)
3968 {
3969 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
3970 output_asm_insn ("push{l}\t%7", xops);
3971 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3972 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
3973 output_asm_insn ("pop{l}\t%7", xops);
3974 }
3975 else
3976 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
3977 output_asm_insn ("call\t%P3", xops);
3978 output_asm_insn ("popf", xops);
3979
3980 break;
3981
3982 default:
3983 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
3984 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
3985 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
3986
3987 if (blockno)
3988 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
3989
3990 if (flag_pic)
3991 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
3992
3993 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
3994 output_asm_insn ("inc{l}\t%0", xops);
3995
3996 break;
3997 }
3998}
3999\f
4000void
4001ix86_expand_move (mode, operands)
4002 enum machine_mode mode;
4003 rtx operands[];
4004{
4005 int strict = (reload_in_progress || reload_completed);
4006 rtx insn;
4007
4008 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4009 {
4010 /* Emit insns to move operands[1] into operands[0]. */
4011
4012 if (GET_CODE (operands[0]) == MEM)
4013 operands[1] = force_reg (Pmode, operands[1]);
4014 else
4015 {
4016 rtx temp = operands[0];
4017 if (GET_CODE (temp) != REG)
4018 temp = gen_reg_rtx (Pmode);
4019 temp = legitimize_pic_address (operands[1], temp);
4020 if (temp == operands[0])
4021 return;
4022 operands[1] = temp;
4023 }
4024 }
4025 else
4026 {
4027 if (GET_CODE (operands[0]) == MEM
4028 && (GET_MODE (operands[0]) == QImode
4029 || !push_operand (operands[0], mode))
4030 && GET_CODE (operands[1]) == MEM)
4031 operands[1] = force_reg (mode, operands[1]);
4032
4033 if (push_operand (operands[0], mode)
4034 && ! general_no_elim_operand (operands[1], mode))
4035 operands[1] = copy_to_mode_reg (mode, operands[1]);
4036
4037 if (FLOAT_MODE_P (mode))
4038 {
4039 /* If we are loading a floating point constant to a register,
4040 force the value to memory now, since we'll get better code
4041 out the back end. */
4042
4043 if (strict)
4044 ;
4045 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4046 && register_operand (operands[0], mode))
4047 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4048 }
4049 }
4050
4051 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4052
4053 emit_insn (insn);
4054}
4055
4056/* Attempt to expand a binary operator. Make the expansion closer to the
4057 actual machine, then just general_operand, which will allow 3 separate
4058 memory references (one output, two input) in a single insn. */
4059
4060void
4061ix86_expand_binary_operator (code, mode, operands)
4062 enum rtx_code code;
4063 enum machine_mode mode;
4064 rtx operands[];
4065{
4066 int matching_memory;
4067 rtx src1, src2, dst, op, clob;
4068
4069 dst = operands[0];
4070 src1 = operands[1];
4071 src2 = operands[2];
4072
4073 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4074 if (GET_RTX_CLASS (code) == 'c'
4075 && (rtx_equal_p (dst, src2)
4076 || immediate_operand (src1, mode)))
4077 {
4078 rtx temp = src1;
4079 src1 = src2;
4080 src2 = temp;
4081 }
4082
4083 /* If the destination is memory, and we do not have matching source
4084 operands, do things in registers. */
4085 matching_memory = 0;
4086 if (GET_CODE (dst) == MEM)
4087 {
4088 if (rtx_equal_p (dst, src1))
4089 matching_memory = 1;
4090 else if (GET_RTX_CLASS (code) == 'c'
4091 && rtx_equal_p (dst, src2))
4092 matching_memory = 2;
4093 else
4094 dst = gen_reg_rtx (mode);
4095 }
4096
4097 /* Both source operands cannot be in memory. */
4098 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4099 {
4100 if (matching_memory != 2)
4101 src2 = force_reg (mode, src2);
4102 else
4103 src1 = force_reg (mode, src1);
4104 }
4105
4106 /* If the operation is not commutable, source 1 cannot be a constant
4107 or non-matching memory. */
4108 if ((CONSTANT_P (src1)
4109 || (!matching_memory && GET_CODE (src1) == MEM))
4110 && GET_RTX_CLASS (code) != 'c')
4111 src1 = force_reg (mode, src1);
4112
4113 /* If optimizing, copy to regs to improve CSE */
4114 if (optimize && !reload_in_progress && !reload_completed)
4115 {
4116 if (GET_CODE (dst) == MEM)
4117 dst = gen_reg_rtx (mode);
4118 if (GET_CODE (src1) == MEM)
4119 src1 = force_reg (mode, src1);
4120 if (GET_CODE (src2) == MEM)
4121 src2 = force_reg (mode, src2);
4122 }
4123
4124 /* Emit the instruction. */
4125
4126 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4127 if (reload_in_progress)
4128 {
4129 /* Reload doesn't know about the flags register, and doesn't know that
4130 it doesn't want to clobber it. We can only do this with PLUS. */
4131 if (code != PLUS)
4132 abort ();
4133 emit_insn (op);
4134 }
4135 else
4136 {
4137 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4138 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4139 }
4140
4141 /* Fix up the destination if needed. */
4142 if (dst != operands[0])
4143 emit_move_insn (operands[0], dst);
4144}
4145
4146/* Return TRUE or FALSE depending on whether the binary operator meets the
4147 appropriate constraints. */
4148
4149int
4150ix86_binary_operator_ok (code, mode, operands)
4151 enum rtx_code code;
4152 enum machine_mode mode ATTRIBUTE_UNUSED;
4153 rtx operands[3];
4154{
4155 /* Both source operands cannot be in memory. */
4156 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4157 return 0;
4158 /* If the operation is not commutable, source 1 cannot be a constant. */
4159 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4160 return 0;
4161 /* If the destination is memory, we must have a matching source operand. */
4162 if (GET_CODE (operands[0]) == MEM
4163 && ! (rtx_equal_p (operands[0], operands[1])
4164 || (GET_RTX_CLASS (code) == 'c'
4165 && rtx_equal_p (operands[0], operands[2]))))
4166 return 0;
4167 /* If the operation is not commutable and the source 1 is memory, we must
4168 have a matching destionation. */
4169 if (GET_CODE (operands[1]) == MEM
4170 && GET_RTX_CLASS (code) != 'c'
4171 && ! rtx_equal_p (operands[0], operands[1]))
4172 return 0;
4173 return 1;
4174}
4175
4176/* Attempt to expand a unary operator. Make the expansion closer to the
4177 actual machine, then just general_operand, which will allow 2 separate
4178 memory references (one output, one input) in a single insn. */
4179
4180void
4181ix86_expand_unary_operator (code, mode, operands)
4182 enum rtx_code code;
4183 enum machine_mode mode;
4184 rtx operands[];
4185{
4186 int matching_memory;
4187 rtx src, dst, op, clob;
4188
4189 dst = operands[0];
4190 src = operands[1];
4191
4192 /* If the destination is memory, and we do not have matching source
4193 operands, do things in registers. */
4194 matching_memory = 0;
4195 if (GET_CODE (dst) == MEM)
4196 {
4197 if (rtx_equal_p (dst, src))
4198 matching_memory = 1;
4199 else
4200 dst = gen_reg_rtx (mode);
4201 }
4202
4203 /* When source operand is memory, destination must match. */
4204 if (!matching_memory && GET_CODE (src) == MEM)
4205 src = force_reg (mode, src);
4206
4207 /* If optimizing, copy to regs to improve CSE */
4208 if (optimize && !reload_in_progress && !reload_completed)
4209 {
4210 if (GET_CODE (dst) == MEM)
4211 dst = gen_reg_rtx (mode);
4212 if (GET_CODE (src) == MEM)
4213 src = force_reg (mode, src);
4214 }
4215
4216 /* Emit the instruction. */
4217
4218 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4219 if (reload_in_progress || code == NOT)
4220 {
4221 /* Reload doesn't know about the flags register, and doesn't know that
4222 it doesn't want to clobber it. */
4223 if (code != NOT)
4224 abort ();
4225 emit_insn (op);
4226 }
4227 else
4228 {
4229 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4230 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4231 }
4232
4233 /* Fix up the destination if needed. */
4234 if (dst != operands[0])
4235 emit_move_insn (operands[0], dst);
4236}
4237
4238/* Return TRUE or FALSE depending on whether the unary operator meets the
4239 appropriate constraints. */
4240
4241int
4242ix86_unary_operator_ok (code, mode, operands)
4243 enum rtx_code code ATTRIBUTE_UNUSED;
4244 enum machine_mode mode ATTRIBUTE_UNUSED;
4245 rtx operands[2] ATTRIBUTE_UNUSED;
4246{
4247 /* If one of operands is memory, source and destination must match. */
4248 if ((GET_CODE (operands[0]) == MEM
4249 || GET_CODE (operands[1]) == MEM)
4250 && ! rtx_equal_p (operands[0], operands[1]))
4251 return FALSE;
4252 return TRUE;
4253}
4254
4255/* Produce an unsigned comparison for a given signed comparison. */
4256
4257static enum rtx_code
4258unsigned_comparison (code)
4259 enum rtx_code code;
4260{
4261 switch (code)
4262 {
4263 case GT:
4264 code = GTU;
4265 break;
4266 case LT:
4267 code = LTU;
4268 break;
4269 case GE:
4270 code = GEU;
4271 break;
4272 case LE:
4273 code = LEU;
4274 break;
4275 case EQ:
4276 case NE:
4277 case LEU:
4278 case LTU:
4279 case GEU:
4280 case GTU:
4281 break;
4282 default:
4283 abort ();
4284 }
4285 return code;
4286}
4287
4288/* Generate insn patterns to do an integer compare of OPERANDS. */
4289
4290static rtx
4291ix86_expand_int_compare (code, op0, op1)
4292 enum rtx_code code;
4293 rtx op0, op1;
4294{
4295 enum machine_mode cmpmode;
4296 rtx tmp, flags;
4297
4298 cmpmode = SELECT_CC_MODE (code, op0, op1);
4299 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4300
4301 /* This is very simple, but making the interface the same as in the
4302 FP case makes the rest of the code easier. */
4303 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4304 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4305
4306 /* Return the test that should be put into the flags user, i.e.
4307 the bcc, scc, or cmov instruction. */
4308 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4309}
4310
4311/* Generate insn patterns to do a floating point compare of OPERANDS.
4312 If UNORDERED, allow for unordered compares. */
4313
4314static rtx
4315ix86_expand_fp_compare (code, op0, op1, unordered)
4316 enum rtx_code code;
4317 rtx op0, op1;
4318 int unordered;
4319{
4320 enum machine_mode fpcmp_mode;
4321 enum machine_mode intcmp_mode;
4322 rtx tmp;
4323
4324 /* When not doing IEEE compliant compares, disable unordered. */
4325 if (! TARGET_IEEE_FP)
4326 unordered = 0;
4327 fpcmp_mode = unordered ? CCFPUmode : CCFPmode;
4328
4329 /* ??? If we knew whether invalid-operand exceptions were masked,
4330 we could rely on fcom to raise an exception and take care of
4331 NaNs. But we don't. We could know this from c9x math bits. */
4332 if (TARGET_IEEE_FP)
4333 unordered = 1;
4334
4335 /* All of the unordered compare instructions only work on registers.
4336 The same is true of the XFmode compare instructions. */
4337 if (unordered || GET_MODE (op0) == XFmode)
4338 {
4339 op0 = force_reg (GET_MODE (op0), op0);
4340 op1 = force_reg (GET_MODE (op1), op1);
4341 }
4342 else
4343 {
4344 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4345 things around if they appear profitable, otherwise force op0
4346 into a register. */
4347
4348 if (standard_80387_constant_p (op0) == 0
4349 || (GET_CODE (op0) == MEM
4350 && ! (standard_80387_constant_p (op1) == 0
4351 || GET_CODE (op1) == MEM)))
4352 {
4353 rtx tmp;
4354 tmp = op0, op0 = op1, op1 = tmp;
4355 code = swap_condition (code);
4356 }
4357
4358 if (GET_CODE (op0) != REG)
4359 op0 = force_reg (GET_MODE (op0), op0);
4360
4361 if (CONSTANT_P (op1))
4362 {
4363 if (standard_80387_constant_p (op1))
4364 op1 = force_reg (GET_MODE (op1), op1);
4365 else
4366 op1 = validize_mem (force_const_mem (GET_MODE (op1), op1));
4367 }
4368 }
4369
4370 /* %%% fcomi is probably always faster, even when dealing with memory,
4371 since compare-and-branch would be three insns instead of four. */
4372 if (TARGET_CMOVE && !unordered)
4373 {
4374 if (GET_CODE (op0) != REG)
4375 op0 = force_reg (GET_MODE (op0), op0);
4376 if (GET_CODE (op1) != REG)
4377 op1 = force_reg (GET_MODE (op1), op1);
4378
4379 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4380 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4381 emit_insn (tmp);
4382
4383 /* The FP codes work out to act like unsigned. */
4384 code = unsigned_comparison (code);
4385 intcmp_mode = fpcmp_mode;
4386 }
4387 else
4388 {
4389 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4390
4391 rtx tmp2;
4392 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4393 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4394 tmp = gen_reg_rtx (HImode);
4395 emit_insn (gen_rtx_SET (VOIDmode, tmp, tmp2));
4396
4397 if (! unordered)
4398 {
4399 /* We have two options here -- use sahf, or testing bits of ah
4400 directly. On PPRO, they are equivalent, sahf being one byte
4401 smaller. On Pentium, sahf is non-pairable while test is UV
4402 pairable. */
4403
4404 if (TARGET_USE_SAHF || optimize_size)
4405 {
4406 do_sahf:
4407
4408 /* The FP codes work out to act like unsigned. */
4409 code = unsigned_comparison (code);
4410 emit_insn (gen_x86_sahf_1 (tmp));
4411 intcmp_mode = CCmode;
4412 }
4413 else
4414 {
4415 /*
4416 * The numbers below correspond to the bits of the FPSW in AH.
4417 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4418 *
4419 * cmp C3 C2 C0
4420 * > 0 0 0
4421 * < 0 0 1
4422 * = 1 0 0
4423 * un 1 1 1
4424 */
4425
4426 int mask;
4427
4428 switch (code)
4429 {
4430 case GT:
4431 mask = 0x41;
4432 code = EQ;
4433 break;
4434 case LT:
4435 mask = 0x01;
4436 code = NE;
4437 break;
4438 case GE:
4439 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4440 faster in all cases to just fall back on sahf. */
4441 goto do_sahf;
4442 case LE:
4443 mask = 0x41;
4444 code = NE;
4445 break;
4446 case EQ:
4447 mask = 0x40;
4448 code = NE;
4449 break;
4450 case NE:
4451 mask = 0x40;
4452 code = EQ;
4453 break;
4454 default:
4455 abort ();
4456 }
4457
4458 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (mask)));
4459 intcmp_mode = CCNOmode;
4460 }
4461 }
4462 else
4463 {
4464 /* In the unordered case, we have to check C2 for NaN's, which
4465 doesn't happen to work out to anything nice combination-wise.
4466 So do some bit twiddling on the value we've got in AH to come
4467 up with an appropriate set of condition codes. */
4468
4469 intcmp_mode = CCNOmode;
4470 switch (code)
4471 {
4472 case GT:
4473 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x45)));
4474 code = EQ;
4475 break;
4476 case LT:
4477 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4478 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x01)));
4479 intcmp_mode = CCmode;
4480 code = EQ;
4481 break;
4482 case GE:
4483 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x05)));
4484 code = EQ;
4485 break;
4486 case LE:
4487 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4488 emit_insn (gen_addqi_ext_1 (tmp, tmp, constm1_rtx));
4489 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4490 intcmp_mode = CCmode;
4491 code = LTU;
4492 break;
4493 case EQ:
4494 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4495 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4496 intcmp_mode = CCmode;
4497 code = EQ;
4498 break;
4499 case NE:
4500 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4501 emit_insn (gen_xorqi_cc_ext_1 (tmp, tmp, GEN_INT (0x40)));
4502 code = NE;
4503 break;
4504 default:
4505 abort ();
4506 }
4507 }
4508 }
4509
4510 /* Return the test that should be put into the flags user, i.e.
4511 the bcc, scc, or cmov instruction. */
4512 return gen_rtx_fmt_ee (code, VOIDmode,
4513 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4514 const0_rtx);
4515}
4516
4517static rtx
4518ix86_expand_compare (code, unordered)
4519 enum rtx_code code;
4520 int unordered;
4521{
4522 rtx op0, op1, ret;
4523 op0 = ix86_compare_op0;
4524 op1 = ix86_compare_op1;
4525
4526 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4527 ret = ix86_expand_fp_compare (code, op0, op1, unordered);
4528 else
4529 ret = ix86_expand_int_compare (code, op0, op1);
4530
4531 return ret;
4532}
4533
4534void
4535ix86_expand_branch (code, unordered, label)
4536 enum rtx_code code;
4537 int unordered;
4538 rtx label;
4539{
4540 rtx tmp, lo[2], hi[2], label2;
4541 enum rtx_code code1, code2, code3;
4542
4543 if (GET_MODE (ix86_compare_op0) != DImode)
4544 {
4545 tmp = ix86_expand_compare (code, unordered);
4546 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4547 gen_rtx_LABEL_REF (VOIDmode, label),
4548 pc_rtx);
4549 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4550 return;
4551 }
4552
4553 /* Expand DImode branch into multiple compare+branch. */
4554
4555 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
4556 {
4557 tmp = ix86_compare_op0;
4558 ix86_compare_op0 = ix86_compare_op1;
4559 ix86_compare_op1 = tmp;
4560 code = swap_condition (code);
4561 }
4562 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4563 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
4564
4565 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid
4566 two branches. This costs one extra insn, so disable when optimizing
4567 for size. */
4568
4569 if ((code == EQ || code == NE)
4570 && (!optimize_size
4571 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4572 {
4573 rtx xor0, xor1;
4574
4575 xor1 = hi[0];
4576 if (hi[1] != const0_rtx)
4577 {
4578 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4579 NULL_RTX, 0, OPTAB_WIDEN);
4580 }
4581
4582 xor0 = lo[0];
4583 if (lo[1] != const0_rtx)
4584 {
4585 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4586 NULL_RTX, 0, OPTAB_WIDEN);
4587 }
4588
4589 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4590 NULL_RTX, 0, OPTAB_WIDEN);
4591
4592 ix86_compare_op0 = tmp;
4593 ix86_compare_op1 = const0_rtx;
4594 ix86_expand_branch (code, unordered, label);
4595 return;
4596 }
4597
4598 /* Otherwise, if we are doing less-than, op1 is a constant and the
4599 low word is zero, then we can just examine the high word. */
4600
4601 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4602 && (code == LT || code == LTU))
4603 {
4604 ix86_compare_op0 = hi[0];
4605 ix86_compare_op1 = hi[1];
4606 ix86_expand_branch (code, unordered, label);
4607 return;
4608 }
4609
4610 /* Otherwise, we need two or three jumps. */
4611
4612 label2 = gen_label_rtx ();
4613
4614 code1 = code;
4615 code2 = swap_condition (code);
4616 code3 = unsigned_condition (code);
4617
4618 switch (code)
4619 {
4620 case LT: case GT: case LTU: case GTU:
4621 break;
4622
4623 case LE: code1 = LT; code2 = GT; break;
4624 case GE: code1 = GT; code2 = LT; break;
4625 case LEU: code1 = LTU; code2 = GTU; break;
4626 case GEU: code1 = GTU; code2 = LTU; break;
4627
4628 case EQ: code1 = NIL; code2 = NE; break;
4629 case NE: code2 = NIL; break;
4630
4631 default:
4632 abort ();
4633 }
4634
4635 /*
4636 * a < b =>
4637 * if (hi(a) < hi(b)) goto true;
4638 * if (hi(a) > hi(b)) goto false;
4639 * if (lo(a) < lo(b)) goto true;
4640 * false:
4641 */
4642
4643 ix86_compare_op0 = hi[0];
4644 ix86_compare_op1 = hi[1];
4645
4646 if (code1 != NIL)
4647 ix86_expand_branch (code1, unordered, label);
4648 if (code2 != NIL)
4649 ix86_expand_branch (code2, unordered, label2);
4650
4651 ix86_compare_op0 = lo[0];
4652 ix86_compare_op1 = lo[1];
4653 ix86_expand_branch (code3, unordered, label);
4654
4655 if (code2 != NIL)
4656 emit_label (label2);
4657}
4658
4659int
4660ix86_expand_setcc (code, unordered, dest)
4661 enum rtx_code code;
4662 int unordered;
4663 rtx dest;
4664{
4665 rtx ret, tmp;
4666 int type;
4667
4668 if (GET_MODE (ix86_compare_op0) == DImode)
4669 return 0; /* FAIL */
4670
4671 /* Three modes of generation:
4672 0 -- destination does not overlap compare sources:
4673 clear dest first, emit strict_low_part setcc.
4674 1 -- destination does overlap compare sources:
4675 emit subreg setcc, zero extend.
4676 2 -- destination is in QImode:
4677 emit setcc only.
4678 */
4679
4680 type = 0;
4681
4682 if (GET_MODE (dest) == QImode)
4683 type = 2;
4684 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
4685 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
4686 type = 1;
4687
4688 if (type == 0)
4689 emit_move_insn (dest, const0_rtx);
4690
4691 ret = ix86_expand_compare (code, unordered);
4692 PUT_MODE (ret, QImode);
4693
4694 tmp = dest;
4695 if (type == 0)
4696 {
4697 tmp = gen_lowpart (QImode, dest);
4698 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
4699 }
4700 else if (type == 1)
4701 {
4702 if (!cse_not_expected)
4703 tmp = gen_reg_rtx (QImode);
4704 else
4705 tmp = gen_lowpart (QImode, dest);
4706 }
4707
4708 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
4709
4710 if (type == 1)
4711 {
4712 rtx clob;
4713
4714 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
4715 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
4716 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4717 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4718 emit_insn (tmp);
4719 }
4720
4721 return 1; /* DONE */
4722}
4723
4724int
4725ix86_expand_int_movcc (operands)
4726 rtx operands[];
4727{
4728 enum rtx_code code = GET_CODE (operands[1]), compare_code;
4729 rtx compare_seq, compare_op;
4730
4731 /* When the compare code is not LTU or GEU, we can not use sbbl case.
4732 In case comparsion is done with immediate, we can convert it to LTU or
4733 GEU by altering the integer. */
4734
4735 if ((code == LEU || code == GTU)
4736 && GET_CODE (ix86_compare_op1) == CONST_INT
4737 && GET_MODE (operands[0]) != HImode
4738 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
4739 && GET_CODE (operands[2]) == CONST_INT
4740 && GET_CODE (operands[3]) == CONST_INT)
4741 {
4742 if (code == LEU)
4743 code = LTU;
4744 else
4745 code = GEU;
4746 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
4747 }
4748 start_sequence ();
4749 compare_op = ix86_expand_compare (code, code == EQ || code == NE);
4750 compare_seq = gen_sequence ();
4751 end_sequence ();
4752
4753 compare_code = GET_CODE (compare_op);
4754
4755 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
4756 HImode insns, we'd be swallowed in word prefix ops. */
4757
4758 if (GET_MODE (operands[0]) != HImode
4759 && GET_CODE (operands[2]) == CONST_INT
4760 && GET_CODE (operands[3]) == CONST_INT)
4761 {
4762 rtx out = operands[0];
4763 HOST_WIDE_INT ct = INTVAL (operands[2]);
4764 HOST_WIDE_INT cf = INTVAL (operands[3]);
4765 HOST_WIDE_INT diff;
4766
4767 if (compare_code == LTU || compare_code == GEU)
4768 {
4769
4770 /* Detect overlap between destination and compare sources. */
4771 rtx tmp = out;
4772
4773 /* To simplify rest of code, restrict to the GEU case. */
4774 if (compare_code == LTU)
4775 {
4776 int tmp = ct;
4777 ct = cf;
4778 cf = tmp;
4779 compare_code = reverse_condition (compare_code);
4780 code = reverse_condition (code);
4781 }
4782 diff = ct - cf;
4783
4784 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
4785 || reg_overlap_mentioned_p (out, ix86_compare_op1))
4786 tmp = gen_reg_rtx (SImode);
4787
4788 emit_insn (compare_seq);
4789 emit_insn (gen_x86_movsicc_0_m1 (tmp));
4790
4791 if (diff == 1)
4792 {
4793 /*
4794 * cmpl op0,op1
4795 * sbbl dest,dest
4796 * [addl dest, ct]
4797 *
4798 * Size 5 - 8.
4799 */
4800 if (ct)
4801 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4802 }
4803 else if (cf == -1)
4804 {
4805 /*
4806 * cmpl op0,op1
4807 * sbbl dest,dest
4808 * orl $ct, dest
4809 *
4810 * Size 8.
4811 */
4812 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
4813 }
4814 else if (diff == -1 && ct)
4815 {
4816 /*
4817 * cmpl op0,op1
4818 * sbbl dest,dest
4819 * xorl $-1, dest
4820 * [addl dest, cf]
4821 *
4822 * Size 8 - 11.
4823 */
4824 emit_insn (gen_one_cmplsi2 (tmp, tmp));
4825 if (cf)
4826 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
4827 }
4828 else
4829 {
4830 /*
4831 * cmpl op0,op1
4832 * sbbl dest,dest
4833 * andl cf - ct, dest
4834 * [addl dest, ct]
4835 *
4836 * Size 8 - 11.
4837 */
4838 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
4839 if (ct)
4840 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4841 }
4842
4843 if (tmp != out)
4844 emit_move_insn (out, tmp);
4845
4846 return 1; /* DONE */
4847 }
4848
4849 diff = ct - cf;
4850 if (diff < 0)
4851 {
4852 HOST_WIDE_INT tmp;
4853 tmp = ct, ct = cf, cf = tmp;
4854 diff = -diff;
4855 compare_code = reverse_condition (compare_code);
4856 code = reverse_condition (code);
4857 }
4858 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
4859 || diff == 3 || diff == 5 || diff == 9)
4860 {
4861 /*
4862 * xorl dest,dest
4863 * cmpl op1,op2
4864 * setcc dest
4865 * lea cf(dest*(ct-cf)),dest
4866 *
4867 * Size 14.
4868 *
4869 * This also catches the degenerate setcc-only case.
4870 */
4871
4872 rtx tmp;
4873 int nops;
4874
4875 out = emit_store_flag (out, code, ix86_compare_op0,
4876 ix86_compare_op1, VOIDmode, 0, 1);
4877
4878 nops = 0;
4879 if (diff == 1)
4880 tmp = out;
4881 else
4882 {
4883 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
4884 nops++;
4885 if (diff & 1)
4886 {
4887 tmp = gen_rtx_PLUS (SImode, tmp, out);
4888 nops++;
4889 }
4890 }
4891 if (cf != 0)
4892 {
4893 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
4894 nops++;
4895 }
4896 if (tmp != out)
4897 {
4898 if (nops == 0)
4899 emit_move_insn (out, tmp);
4900 else if (nops == 1)
4901 {
4902 rtx clob;
4903
4904 clob = gen_rtx_REG (CCmode, FLAGS_REG);
4905 clob = gen_rtx_CLOBBER (VOIDmode, clob);
4906
4907 tmp = gen_rtx_SET (VOIDmode, out, tmp);
4908 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4909 emit_insn (tmp);
4910 }
4911 else
4912 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
4913 }
4914 if (out != operands[0])
4915 emit_move_insn (operands[0], out);
4916
4917 return 1; /* DONE */
4918 }
4919
4920 /*
4921 * General case: Jumpful:
4922 * xorl dest,dest cmpl op1, op2
4923 * cmpl op1, op2 movl ct, dest
4924 * setcc dest jcc 1f
4925 * decl dest movl cf, dest
4926 * andl (cf-ct),dest 1:
4927 * addl ct,dest
4928 *
4929 * Size 20. Size 14.
4930 *
4931 * This is reasonably steep, but branch mispredict costs are
4932 * high on modern cpus, so consider failing only if optimizing
4933 * for space.
4934 *
4935 * %%% Parameterize branch_cost on the tuning architecture, then
4936 * use that. The 80386 couldn't care less about mispredicts.
4937 */
4938
4939 if (!optimize_size && !TARGET_CMOVE)
4940 {
4941 if (ct == 0)
4942 {
4943 ct = cf;
4944 cf = 0;
4945 compare_code = reverse_condition (compare_code);
4946 code = reverse_condition (code);
4947 }
4948
4949 out = emit_store_flag (out, code, ix86_compare_op0,
4950 ix86_compare_op1, VOIDmode, 0, 1);
4951
4952 emit_insn (gen_addsi3 (out, out, constm1_rtx));
4953 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
4954 if (ct != 0)
4955 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4956 if (out != operands[0])
4957 emit_move_insn (operands[0], out);
4958
4959 return 1; /* DONE */
4960 }
4961 }
4962
4963 if (!TARGET_CMOVE)
4964 {
4965 /* Try a few things more with specific constants and a variable. */
4966
4967 optab op;
4968 rtx var, orig_out, out, tmp;
4969
4970 if (optimize_size)
4971 return 0; /* FAIL */
4972
4973 /* If one of the two operands is an interesting constant, load a
4974 constant with the above and mask it in with a logical operation. */
4975
4976 if (GET_CODE (operands[2]) == CONST_INT)
4977 {
4978 var = operands[3];
4979 if (INTVAL (operands[2]) == 0)
4980 operands[3] = constm1_rtx, op = and_optab;
4981 else if (INTVAL (operands[2]) == -1)
4982 operands[3] = const0_rtx, op = ior_optab;
4983 else
4984 return 0; /* FAIL */
4985 }
4986 else if (GET_CODE (operands[3]) == CONST_INT)
4987 {
4988 var = operands[2];
4989 if (INTVAL (operands[3]) == 0)
4990 operands[2] = constm1_rtx, op = and_optab;
4991 else if (INTVAL (operands[3]) == -1)
4992 operands[2] = const0_rtx, op = ior_optab;
4993 else
4994 return 0; /* FAIL */
4995 }
4996 else
4997 return 0; /* FAIL */
4998
4999 orig_out = operands[0];
5000 tmp = gen_reg_rtx (GET_MODE (orig_out));
5001 operands[0] = tmp;
5002
5003 /* Recurse to get the constant loaded. */
5004 if (ix86_expand_int_movcc (operands) == 0)
5005 return 0; /* FAIL */
5006
5007 /* Mask in the interesting variable. */
5008 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5009 OPTAB_WIDEN);
5010 if (out != orig_out)
5011 emit_move_insn (orig_out, out);
5012
5013 return 1; /* DONE */
5014 }
5015
5016 /*
5017 * For comparison with above,
5018 *
5019 * movl cf,dest
5020 * movl ct,tmp
5021 * cmpl op1,op2
5022 * cmovcc tmp,dest
5023 *
5024 * Size 15.
5025 */
5026
5027 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5028 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5029 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5030 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5031
5032 emit_insn (compare_seq);
5033 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5034 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5035 compare_op, operands[2],
5036 operands[3])));
5037
5038 return 1; /* DONE */
5039}
5040
5041int
5042ix86_expand_fp_movcc (operands)
5043 rtx operands[];
5044{
5045 enum rtx_code code;
5046 enum machine_mode mode;
5047 rtx tmp;
5048
5049 /* The floating point conditional move instructions don't directly
5050 support conditions resulting from a signed integer comparison. */
5051
5052 code = GET_CODE (operands[1]);
5053 switch (code)
5054 {
5055 case LT:
5056 case LE:
5057 case GE:
5058 case GT:
5059 tmp = gen_reg_rtx (QImode);
5060 ix86_expand_setcc (code, 0, tmp);
5061 code = NE;
5062 ix86_compare_op0 = tmp;
5063 ix86_compare_op1 = const0_rtx;
5064 break;
5065
5066 default:
5067 break;
5068 }
5069
5070 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5071 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5072 gen_rtx_COMPARE (mode,
5073 ix86_compare_op0,
5074 ix86_compare_op1)));
5075 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5076 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5077 gen_rtx_fmt_ee (code, VOIDmode,
5078 gen_rtx_REG (mode, FLAGS_REG),
5079 const0_rtx),
5080 operands[2],
5081 operands[3])));
5082
5083 return 1;
5084}
5085
5086/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5087 works for floating pointer parameters and nonoffsetable memories.
5088 For pushes, it returns just stack offsets; the values will be saved
5089 in the right order. Maximally three parts are generated. */
5090
5091static void
5092ix86_split_to_parts (operand, parts, mode)
5093 rtx operand;
5094 rtx *parts;
5095 enum machine_mode mode;
5096{
5097 int size = GET_MODE_SIZE (mode) / 4;
5098
5099 if (size < 2 || size > 3)
5100 abort ();
5101
5102 /* Optimize constant pool reference to immediates. This is used by fp moves,
5103 that force all constants to memory to allow combining. */
5104
5105 if (GET_CODE (operand) == MEM
5106 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5107 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5108 operand = get_pool_constant (XEXP (operand, 0));
5109
5110 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5111 {
5112 /* The only non-offsetable memories we handle are pushes. */
5113 if (! push_operand (operand, VOIDmode))
5114 abort ();
5115
5116 PUT_MODE (operand, SImode);
5117 parts[0] = parts[1] = parts[2] = operand;
5118 }
5119 else
5120 {
5121 if (mode == DImode)
5122 split_di (&operand, 1, &parts[0], &parts[1]);
5123 else
5124 {
5125 if (REG_P (operand))
5126 {
5127 if (!reload_completed)
5128 abort ();
5129 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5130 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5131 if (size == 3)
5132 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5133 }
5134 else if (offsettable_memref_p (operand))
5135 {
5136 PUT_MODE (operand, SImode);
5137 parts[0] = operand;
5138 parts[1] = adj_offsettable_operand (operand, 4);
5139 if (size == 3)
5140 parts[2] = adj_offsettable_operand (operand, 8);
5141 }
5142 else if (GET_CODE (operand) == CONST_DOUBLE)
5143 {
5144 REAL_VALUE_TYPE r;
5145 long l[3];
5146
5147 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5148 switch (mode)
5149 {
5150 case XFmode:
5151 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5152 parts[2] = GEN_INT (l[2]);
5153 break;
5154 case DFmode:
5155 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5156 break;
5157 default:
5158 abort ();
5159 }
5160 parts[1] = GEN_INT (l[1]);
5161 parts[0] = GEN_INT (l[0]);
5162 }
5163 else
5164 abort ();
5165 }
5166 }
5167
5168 return;
5169}
5170
5171/* Emit insns to perform a move or push of DI, DF, and XF values.
5172 Return false when normal moves are needed; true when all required
5173 insns have been emitted. Operands 2-4 contain the input values
5174 int the correct order; operands 5-7 contain the output values. */
5175
5176int
5177ix86_split_long_move (operands1)
5178 rtx operands1[];
5179{
5180 rtx part[2][3];
5181 rtx operands[2];
5182 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5183 int push = 0;
5184 int collisions = 0;
5185
5186 /* Make our own copy to avoid clobbering the operands. */
5187 operands[0] = copy_rtx (operands1[0]);
5188 operands[1] = copy_rtx (operands1[1]);
5189
5190 if (size < 2 || size > 3)
5191 abort ();
5192
5193 /* The only non-offsettable memory we handle is push. */
5194 if (push_operand (operands[0], VOIDmode))
5195 push = 1;
5196 else if (GET_CODE (operands[0]) == MEM
5197 && ! offsettable_memref_p (operands[0]))
5198 abort ();
5199
5200 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5201 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5202
5203 /* When emitting push, take care for source operands on the stack. */
5204 if (push && GET_CODE (operands[1]) == MEM
5205 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5206 {
5207 if (size == 3)
5208 part[1][1] = part[1][2];
5209 part[1][0] = part[1][1];
5210 }
5211
5212 /* We need to do copy in the right order in case an address register
5213 of the source overlaps the destination. */
5214 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5215 {
5216 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5217 collisions++;
5218 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5219 collisions++;
5220 if (size == 3
5221 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5222 collisions++;
5223
5224 /* Collision in the middle part can be handled by reordering. */
5225 if (collisions == 1 && size == 3
5226 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5227 {
5228 rtx tmp;
5229 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5230 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5231 }
5232
5233 /* If there are more collisions, we can't handle it by reordering.
5234 Do an lea to the last part and use only one colliding move. */
5235 else if (collisions > 1)
5236 {
5237 collisions = 1;
5238 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5239 XEXP (part[1][0], 0)));
5240 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5241 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5242 if (size == 3)
5243 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5244 }
5245 }
5246
5247 if (push)
5248 {
5249 if (size == 3)
5250 emit_insn (gen_push (part[1][2]));
5251 emit_insn (gen_push (part[1][1]));
5252 emit_insn (gen_push (part[1][0]));
5253 return 1;
5254 }
5255
5256 /* Choose correct order to not overwrite the source before it is copied. */
5257 if ((REG_P (part[0][0])
5258 && REG_P (part[1][1])
5259 && (REGNO (part[0][0]) == REGNO (part[1][1])
5260 || (size == 3
5261 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5262 || (collisions > 0
5263 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5264 {
5265 if (size == 3)
5266 {
5267 operands1[2] = part[0][2];
5268 operands1[3] = part[0][1];
5269 operands1[4] = part[0][0];
5270 operands1[5] = part[1][2];
5271 operands1[6] = part[1][1];
5272 operands1[7] = part[1][0];
5273 }
5274 else
5275 {
5276 operands1[2] = part[0][1];
5277 operands1[3] = part[0][0];
5278 operands1[5] = part[1][1];
5279 operands1[6] = part[1][0];
5280 }
5281 }
5282 else
5283 {
5284 if (size == 3)
5285 {
5286 operands1[2] = part[0][0];
5287 operands1[3] = part[0][1];
5288 operands1[4] = part[0][2];
5289 operands1[5] = part[1][0];
5290 operands1[6] = part[1][1];
5291 operands1[7] = part[1][2];
5292 }
5293 else
5294 {
5295 operands1[2] = part[0][0];
5296 operands1[3] = part[0][1];
5297 operands1[5] = part[1][0];
5298 operands1[6] = part[1][1];
5299 }
5300 }
5301
5302 return 0;
5303}
5304
5305void
5306ix86_split_ashldi (operands, scratch)
5307 rtx *operands, scratch;
5308{
5309 rtx low[2], high[2];
5310 int count;
5311
5312 if (GET_CODE (operands[2]) == CONST_INT)
5313 {
5314 split_di (operands, 2, low, high);
5315 count = INTVAL (operands[2]) & 63;
5316
5317 if (count >= 32)
5318 {
5319 emit_move_insn (high[0], low[1]);
5320 emit_move_insn (low[0], const0_rtx);
5321
5322 if (count > 32)
5323 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5324 }
5325 else
5326 {
5327 if (!rtx_equal_p (operands[0], operands[1]))
5328 emit_move_insn (operands[0], operands[1]);
5329 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5330 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5331 }
5332 }
5333 else
5334 {
5335 if (!rtx_equal_p (operands[0], operands[1]))
5336 emit_move_insn (operands[0], operands[1]);
5337
5338 split_di (operands, 1, low, high);
5339
5340 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5341 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
5342
5343 if (TARGET_CMOVE && (! reload_completed || scratch))
5344 {
5345 if (! reload_completed)
5346 scratch = force_reg (SImode, const0_rtx);
5347 else
5348 emit_move_insn (scratch, const0_rtx);
5349
5350 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5351 scratch));
5352 }
5353 else
5354 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5355 }
5356}
5357
5358void
5359ix86_split_ashrdi (operands, scratch)
5360 rtx *operands, scratch;
5361{
5362 rtx low[2], high[2];
5363 int count;
5364
5365 if (GET_CODE (operands[2]) == CONST_INT)
5366 {
5367 split_di (operands, 2, low, high);
5368 count = INTVAL (operands[2]) & 63;
5369
5370 if (count >= 32)
5371 {
5372 emit_move_insn (low[0], high[1]);
5373
5374 if (! reload_completed)
5375 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5376 else
5377 {
5378 emit_move_insn (high[0], low[0]);
5379 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5380 }
5381
5382 if (count > 32)
5383 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5384 }
5385 else
5386 {
5387 if (!rtx_equal_p (operands[0], operands[1]))
5388 emit_move_insn (operands[0], operands[1]);
5389 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5390 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5391 }
5392 }
5393 else
5394 {
5395 if (!rtx_equal_p (operands[0], operands[1]))
5396 emit_move_insn (operands[0], operands[1]);
5397
5398 split_di (operands, 1, low, high);
5399
5400 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5401 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5402
5403 if (TARGET_CMOVE && (!reload_completed || scratch))
5404 {
5405 if (! reload_completed)
5406 scratch = gen_reg_rtx (SImode);
5407 emit_move_insn (scratch, high[0]);
5408 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5409 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5410 scratch));
5411 }
5412 else
5413 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
5414 }
5415}
5416
5417void
5418ix86_split_lshrdi (operands, scratch)
5419 rtx *operands, scratch;
5420{
5421 rtx low[2], high[2];
5422 int count;
5423
5424 if (GET_CODE (operands[2]) == CONST_INT)
5425 {
5426 split_di (operands, 2, low, high);
5427 count = INTVAL (operands[2]) & 63;
5428
5429 if (count >= 32)
5430 {
5431 emit_move_insn (low[0], high[1]);
5432 emit_move_insn (high[0], const0_rtx);
5433
5434 if (count > 32)
5435 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5436 }
5437 else
5438 {
5439 if (!rtx_equal_p (operands[0], operands[1]))
5440 emit_move_insn (operands[0], operands[1]);
5441 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5442 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5443 }
5444 }
5445 else
5446 {
5447 if (!rtx_equal_p (operands[0], operands[1]))
5448 emit_move_insn (operands[0], operands[1]);
5449
5450 split_di (operands, 1, low, high);
5451
5452 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5453 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5454
5455 /* Heh. By reversing the arguments, we can reuse this pattern. */
5456 if (TARGET_CMOVE && (! reload_completed || scratch))
5457 {
5458 if (! reload_completed)
5459 scratch = force_reg (SImode, const0_rtx);
5460 else
5461 emit_move_insn (scratch, const0_rtx);
5462
5463 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5464 scratch));
5465 }
5466 else
5467 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5468 }
5469}
5470
5471/* Expand the appropriate insns for doing strlen if not just doing
5472 repnz; scasb
5473
5474 out = result, initialized with the start address
5475 align_rtx = alignment of the address.
5476 scratch = scratch register, initialized with the startaddress when
5477 not aligned, otherwise undefined
5478
5479 This is just the body. It needs the initialisations mentioned above and
5480 some address computing at the end. These things are done in i386.md. */
5481
5482void
5483ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5484 rtx out, align_rtx, scratch;
5485{
5486 int align;
5487 rtx tmp;
5488 rtx align_2_label = NULL_RTX;
5489 rtx align_3_label = NULL_RTX;
5490 rtx align_4_label = gen_label_rtx ();
5491 rtx end_0_label = gen_label_rtx ();
5492 rtx mem;
5493 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5494 rtx tmpreg = gen_reg_rtx (SImode);
5495
5496 align = 0;
5497 if (GET_CODE (align_rtx) == CONST_INT)
5498 align = INTVAL (align_rtx);
5499
5500 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
5501
5502 /* Is there a known alignment and is it less than 4? */
5503 if (align < 4)
5504 {
5505 /* Is there a known alignment and is it not 2? */
5506 if (align != 2)
5507 {
5508 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5509 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5510
5511 /* Leave just the 3 lower bits. */
5512 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5513 NULL_RTX, 0, OPTAB_WIDEN);
5514
5515 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
5516
5517 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5518 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5519 gen_rtx_LABEL_REF (VOIDmode,
5520 align_4_label),
5521 pc_rtx);
5522 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5523
5524 emit_insn (gen_cmpsi_1 (align_rtx, GEN_INT (2)));
5525
5526 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5527 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5528 gen_rtx_LABEL_REF (VOIDmode,
5529 align_2_label),
5530 pc_rtx);
5531 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5532
5533 tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx);
5534 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5535 gen_rtx_LABEL_REF (VOIDmode,
5536 align_3_label),
5537 pc_rtx);
5538 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5539 }
5540 else
5541 {
5542 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5543 check if is aligned to 4 - byte. */
5544
5545 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5546 NULL_RTX, 0, OPTAB_WIDEN);
5547
5548 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
5549
5550 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5551 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5552 gen_rtx_LABEL_REF (VOIDmode,
5553 align_4_label),
5554 pc_rtx);
5555 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5556 }
5557
5558 mem = gen_rtx_MEM (QImode, out);
5559
5560 /* Now compare the bytes. */
5561
5562 /* Compare the first n unaligned byte on a byte per byte basis. */
5563 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
5564
5565 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5566 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5567 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5568 pc_rtx);
5569 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5570
5571 /* Increment the address. */
5572 emit_insn (gen_addsi3 (out, out, const1_rtx));
5573
5574 /* Not needed with an alignment of 2 */
5575 if (align != 2)
5576 {
5577 emit_label (align_2_label);
5578
5579 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
5580
5581 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5582 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5583 gen_rtx_LABEL_REF (VOIDmode,
5584 end_0_label),
5585 pc_rtx);
5586 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5587
5588 emit_insn (gen_addsi3 (out, out, const1_rtx));
5589
5590 emit_label (align_3_label);
5591 }
5592
5593 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
5594
5595 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5596 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5597 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5598 pc_rtx);
5599 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5600
5601 emit_insn (gen_addsi3 (out, out, const1_rtx));
5602 }
5603
5604 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5605 align this loop. It gives only huge programs, but does not help to
5606 speed up. */
5607 emit_label (align_4_label);
5608
5609 mem = gen_rtx_MEM (SImode, out);
5610 emit_move_insn (scratch, mem);
5611 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
5612
5613 /* This formula yields a nonzero result iff one of the bytes is zero.
5614 This saves three branches inside loop and many cycles. */
5615
5616 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
5617 emit_insn (gen_one_cmplsi2 (scratch, scratch));
5618 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
5619 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
5620 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
5621
5622 if (TARGET_CMOVE)
5623 {
5624 rtx reg = gen_reg_rtx (SImode);
5625 emit_move_insn (reg, tmpreg);
5626 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
5627
5628 /* If zero is not in the first two bytes, move two bytes forward. */
5629 emit_insn (gen_testsi_1 (tmpreg, GEN_INT (0x8080)));
5630 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5631 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5632 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
5633 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5634 reg,
5635 tmpreg)));
5636 /* Emit lea manually to avoid clobbering of flags. */
5637 emit_insn (gen_rtx_SET (SImode, reg,
5638 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
5639
5640 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5641 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5642 emit_insn (gen_rtx_SET (VOIDmode, out,
5643 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5644 reg,
5645 out)));
5646
5647 }
5648 else
5649 {
5650 rtx end_2_label = gen_label_rtx ();
5651 /* Is zero in the first two bytes? */
5652
5653 emit_insn (gen_testsi_1 (tmpreg, GEN_INT (0x8080)));
5654 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5655 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
5656 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5657 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
5658 pc_rtx);
5659 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5660 JUMP_LABEL (tmp) = end_2_label;
5661
5662 /* Not in the first two. Move two bytes forward. */
5663 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
5664 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
5665
5666 emit_label (end_2_label);
5667
5668 }
5669
5670 /* Avoid branch in fixing the byte. */
5671 tmpreg = gen_lowpart (QImode, tmpreg);
5672 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
5673 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
5674
5675 emit_label (end_0_label);
5676}
5677\f
5678/* Clear stack slot assignments remembered from previous functions.
5679 This is called from INIT_EXPANDERS once before RTL is emitted for each
5680 function. */
5681
5682static void
5683ix86_init_machine_status (p)
5684 struct function *p;
5685{
5686 enum machine_mode mode;
5687 int n;
5688 p->machine
5689 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
5690
5691 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5692 mode = (enum machine_mode) ((int) mode + 1))
5693 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5694 ix86_stack_locals[(int) mode][n] = NULL_RTX;
5695}
5696
5697/* Mark machine specific bits of P for GC. */
5698static void
5699ix86_mark_machine_status (p)
5700 struct function *p;
5701{
5702 enum machine_mode mode;
5703 int n;
5704
5705 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5706 mode = (enum machine_mode) ((int) mode + 1))
5707 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5708 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
5709}
5710
5711/* Return a MEM corresponding to a stack slot with mode MODE.
5712 Allocate a new slot if necessary.
5713
5714 The RTL for a function can have several slots available: N is
5715 which slot to use. */
5716
5717rtx
5718assign_386_stack_local (mode, n)
5719 enum machine_mode mode;
5720 int n;
5721{
5722 if (n < 0 || n >= MAX_386_STACK_LOCALS)
5723 abort ();
5724
5725 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
5726 ix86_stack_locals[(int) mode][n]
5727 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
5728
5729 return ix86_stack_locals[(int) mode][n];
5730}
5731\f
5732/* Calculate the length of the memory address in the instruction
5733 encoding. Does not include the one-byte modrm, opcode, or prefix. */
5734
5735static int
5736memory_address_length (addr)
5737 rtx addr;
5738{
5739 struct ix86_address parts;
5740 rtx base, index, disp;
5741 int len;
5742
5743 if (GET_CODE (addr) == PRE_DEC
5744 || GET_CODE (addr) == POST_INC)
5745 return 0;
5746
5747 if (! ix86_decompose_address (addr, &parts))
5748 abort ();
5749
5750 base = parts.base;
5751 index = parts.index;
5752 disp = parts.disp;
5753 len = 0;
5754
5755 /* Register Indirect. */
5756 if (base && !index && !disp)
5757 {
5758 /* Special cases: ebp and esp need the two-byte modrm form. */
5759 if (addr == stack_pointer_rtx
5760 || addr == arg_pointer_rtx
5761 || addr == frame_pointer_rtx
5762 || addr == hard_frame_pointer_rtx)
5763 len = 1;
5764 }
5765
5766 /* Direct Addressing. */
5767 else if (disp && !base && !index)
5768 len = 4;
5769
5770 else
5771 {
5772 /* Find the length of the displacement constant. */
5773 if (disp)
5774 {
5775 if (GET_CODE (disp) == CONST_INT
5776 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
5777 len = 1;
5778 else
5779 len = 4;
5780 }
5781
5782 /* An index requires the two-byte modrm form. */
5783 if (index)
5784 len += 1;
5785 }
5786
5787 return len;
5788}
5789
5790int
5791ix86_attr_length_default (insn)
5792 rtx insn;
5793{
5794 enum attr_type type;
5795 int len = 0, i;
5796
5797 type = get_attr_type (insn);
5798 extract_insn (insn);
5799 switch (type)
5800 {
5801 case TYPE_INCDEC:
5802 case TYPE_SETCC:
5803 case TYPE_ICMOV:
5804 case TYPE_FMOV:
5805 case TYPE_FOP:
5806 case TYPE_FCMP:
5807 case TYPE_FOP1:
5808 case TYPE_FMUL:
5809 case TYPE_FDIV:
5810 case TYPE_FSGN:
5811 case TYPE_FPSPC:
5812 case TYPE_FCMOV:
5813 case TYPE_IBR:
5814 break;
5815 case TYPE_STR:
5816 case TYPE_CLD:
5817 len = 0;
5818
5819 case TYPE_ALU1:
5820 case TYPE_NEGNOT:
5821 case TYPE_ALU:
5822 case TYPE_ICMP:
5823 case TYPE_IMOVX:
5824 case TYPE_ISHIFT:
5825 case TYPE_IMUL:
5826 case TYPE_IDIV:
5827 case TYPE_PUSH:
5828 case TYPE_POP:
5829 for (i = recog_data.n_operands - 1; i >= 0; --i)
5830 if (CONSTANT_P (recog_data.operand[i]))
5831 {
5832 if (GET_CODE (recog_data.operand[i]) == CONST_INT
5833 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
5834 len += 1;
5835 else
5836 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
5837 }
5838 break;
5839
5840 case TYPE_IMOV:
5841 if (CONSTANT_P (recog_data.operand[1]))
5842 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
5843 break;
5844
5845 case TYPE_CALL:
5846 if (constant_call_address_operand (recog_data.operand[0],
5847 GET_MODE (recog_data.operand[0])))
5848 return 5;
5849 break;
5850
5851 case TYPE_CALLV:
5852 if (constant_call_address_operand (recog_data.operand[1],
5853 GET_MODE (recog_data.operand[1])))
5854 return 5;
5855 break;
5856
5857 case TYPE_LEA:
5858 {
5859 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
5860 as we'll get from running life_analysis during reg-stack when
5861 not optimizing. Not that it matters anyway, now that
5862 pro_epilogue_adjust_stack uses lea, and is by design not
5863 single_set. */
5864 rtx set = PATTERN (insn);
5865 if (GET_CODE (set) == SET)
5866 ;
5867 else if (GET_CODE (set) == PARALLEL
5868 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
5869 set = XVECEXP (set, 0, 0);
5870 else
5871 abort ();
5872
5873 len += memory_address_length (SET_SRC (set));
5874 goto just_opcode;
5875 }
5876
5877 case TYPE_OTHER:
5878 case TYPE_MULTI:
5879 return 15;
5880
5881 case TYPE_FXCH:
5882 if (STACK_TOP_P (recog_data.operand[0]))
5883 return 2 + (REGNO (recog_data.operand[1]) != FIRST_STACK_REG + 1);
5884 else
5885 return 2 + (REGNO (recog_data.operand[0]) != FIRST_STACK_REG + 1);
5886
5887 default:
5888 abort ();
5889 }
5890
5891 for (i = recog_data.n_operands - 1; i >= 0; --i)
5892 if (GET_CODE (recog_data.operand[i]) == MEM)
5893 {
5894 len += memory_address_length (XEXP (recog_data.operand[i], 0));
5895 break;
5896 }
5897
5898just_opcode:
5899 len += get_attr_length_opcode (insn);
5900 len += get_attr_length_prefix (insn);
5901
5902 return len;
5903}
5904\f
5905/* Return the maximum number of instructions a cpu can issue. */
5906
5907int
5908ix86_issue_rate ()
5909{
5910 switch (ix86_cpu)
5911 {
5912 case PROCESSOR_PENTIUM:
5913 case PROCESSOR_K6:
5914 return 2;
5915
5916 case PROCESSOR_PENTIUMPRO:
5917 return 3;
5918
5919 default:
5920 return 1;
5921 }
5922}
5923
5924/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
5925 by DEP_INSN and nothing set by DEP_INSN. */
5926
5927static int
5928ix86_flags_dependant (insn, dep_insn, insn_type)
5929 rtx insn, dep_insn;
5930 enum attr_type insn_type;
5931{
5932 rtx set, set2;
5933
5934 /* Simplify the test for uninteresting insns. */
5935 if (insn_type != TYPE_SETCC
5936 && insn_type != TYPE_ICMOV
5937 && insn_type != TYPE_FCMOV
5938 && insn_type != TYPE_IBR)
5939 return 0;
5940
5941 if ((set = single_set (dep_insn)) != 0)
5942 {
5943 set = SET_DEST (set);
5944 set2 = NULL_RTX;
5945 }
5946 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
5947 && XVECLEN (PATTERN (dep_insn), 0) == 2
5948 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
5949 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
5950 {
5951 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5952 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5953 }
5954 else
5955 return 0;
5956
5957 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
5958 return 0;
5959
5960 /* This test is true if the dependant insn reads the flags but
5961 not any other potentially set register. */
5962 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
5963 return 0;
5964
5965 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
5966 return 0;
5967
5968 return 1;
5969}
5970
5971/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
5972 address with operands set by DEP_INSN. */
5973
5974static int
5975ix86_agi_dependant (insn, dep_insn, insn_type)
5976 rtx insn, dep_insn;
5977 enum attr_type insn_type;
5978{
5979 rtx addr;
5980
5981 if (insn_type == TYPE_LEA)
5982 {
5983 addr = PATTERN (insn);
5984 if (GET_CODE (addr) == SET)
5985 ;
5986 else if (GET_CODE (addr) == PARALLEL
5987 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
5988 addr = XVECEXP (addr, 0, 0);
5989 else
5990 abort ();
5991 addr = SET_SRC (addr);
5992 }
5993 else
5994 {
5995 int i;
5996 extract_insn (insn);
5997 for (i = recog_data.n_operands - 1; i >= 0; --i)
5998 if (GET_CODE (recog_data.operand[i]) == MEM)
5999 {
6000 addr = XEXP (recog_data.operand[i], 0);
6001 goto found;
6002 }
6003 return 0;
6004 found:;
6005 }
6006
6007 return modified_in_p (addr, dep_insn);
6008}
6009
6010int
6011ix86_adjust_cost (insn, link, dep_insn, cost)
6012 rtx insn, link, dep_insn;
6013 int cost;
6014{
6015 enum attr_type insn_type, dep_insn_type;
6016 rtx set, set2;
6017 int dep_insn_code_number;
6018
6019 /* Anti and output depenancies have zero cost on all CPUs. */
6020 if (REG_NOTE_KIND (link) != 0)
6021 return 0;
6022
6023 dep_insn_code_number = recog_memoized (dep_insn);
6024
6025 /* If we can't recognize the insns, we can't really do anything. */
6026 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6027 return cost;
6028
6029 insn_type = get_attr_type (insn);
6030 dep_insn_type = get_attr_type (dep_insn);
6031
6032 /* Prologue and epilogue allocators can have a false dependency on ebp.
6033 This results in one cycle extra stall on Pentium prologue scheduling,
6034 so handle this important case manually. */
6035 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6036 && dep_insn_type == TYPE_ALU
6037 && !reg_mentioned_p (stack_pointer_rtx, insn))
6038 return 0;
6039
6040 switch (ix86_cpu)
6041 {
6042 case PROCESSOR_PENTIUM:
6043 /* Address Generation Interlock adds a cycle of latency. */
6044 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6045 cost += 1;
6046
6047 /* ??? Compares pair with jump/setcc. */
6048 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6049 cost = 0;
6050
6051 /* Floating point stores require value to be ready one cycle ealier. */
6052 if (insn_type == TYPE_FMOV
6053 && get_attr_memory (insn) == MEMORY_STORE
6054 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6055 cost += 1;
6056 break;
6057
6058 case PROCESSOR_PENTIUMPRO:
6059 /* Since we can't represent delayed latencies of load+operation,
6060 increase the cost here for non-imov insns. */
6061 if (dep_insn_type != TYPE_IMOV
6062 && dep_insn_type != TYPE_FMOV
6063 && get_attr_memory (dep_insn) == MEMORY_LOAD)
6064 cost += 1;
6065
6066 /* INT->FP conversion is expensive. */
6067 if (get_attr_fp_int_src (dep_insn))
6068 cost += 5;
6069
6070 /* There is one cycle extra latency between an FP op and a store. */
6071 if (insn_type == TYPE_FMOV
6072 && (set = single_set (dep_insn)) != NULL_RTX
6073 && (set2 = single_set (insn)) != NULL_RTX
6074 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6075 && GET_CODE (SET_DEST (set2)) == MEM)
6076 cost += 1;
6077 break;
6078
6079 case PROCESSOR_K6:
6080 /* The esp dependency is resolved before the instruction is really
6081 finished. */
6082 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6083 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6084 return 1;
6085
6086 /* Since we can't represent delayed latencies of load+operation,
6087 increase the cost here for non-imov insns. */
6088 if (get_attr_memory (dep_insn) == MEMORY_LOAD)
6089 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6090
6091 /* INT->FP conversion is expensive. */
6092 if (get_attr_fp_int_src (dep_insn))
6093 cost += 5;
6094 break;
6095
6096 case PROCESSOR_ATHLON:
6097 /* Address Generation Interlock cause problems on the Athlon CPU because
6098 the loads and stores are done in order so once one load or store has
6099 to wait, others must too, so penalize the AGIs slightly by one cycle.
6100 We might experiment with this value later. */
6101 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6102 cost += 1;
6103
6104 /* Since we can't represent delayed latencies of load+operation,
6105 increase the cost here for non-imov insns. */
6106 if (dep_insn_type != TYPE_IMOV
6107 && dep_insn_type != TYPE_FMOV
6108 && get_attr_memory (dep_insn) == MEMORY_LOAD)
6109 cost += 2;
6110 default:
6111 break;
6112 }
6113
6114 return cost;
6115}
6116
6117static union
6118{
6119 struct ppro_sched_data
6120 {
6121 rtx decode[3];
6122 int issued_this_cycle;
6123 } ppro;
6124} ix86_sched_data;
6125
6126static int
6127ix86_safe_length (insn)
6128 rtx insn;
6129{
6130 if (recog_memoized (insn) >= 0)
6131 return get_attr_length(insn);
6132 else
6133 return 128;
6134}
6135
6136static int
6137ix86_safe_length_prefix (insn)
6138 rtx insn;
6139{
6140 if (recog_memoized (insn) >= 0)
6141 return get_attr_length(insn);
6142 else
6143 return 0;
6144}
6145
6146static enum attr_memory
6147ix86_safe_memory (insn)
6148 rtx insn;
6149{
6150 if (recog_memoized (insn) >= 0)
6151 return get_attr_memory(insn);
6152 else
6153 return MEMORY_UNKNOWN;
6154}
6155
6156static enum attr_pent_pair
6157ix86_safe_pent_pair (insn)
6158 rtx insn;
6159{
6160 if (recog_memoized (insn) >= 0)
6161 return get_attr_pent_pair(insn);
6162 else
6163 return PENT_PAIR_NP;
6164}
6165
6166static enum attr_ppro_uops
6167ix86_safe_ppro_uops (insn)
6168 rtx insn;
6169{
6170 if (recog_memoized (insn) >= 0)
6171 return get_attr_ppro_uops (insn);
6172 else
6173 return PPRO_UOPS_MANY;
6174}
6175
6176static void
6177ix86_dump_ppro_packet (dump)
6178 FILE *dump;
6179{
6180 if (ix86_sched_data.ppro.decode[0])
6181 {
6182 fprintf (dump, "PPRO packet: %d",
6183 INSN_UID (ix86_sched_data.ppro.decode[0]));
6184 if (ix86_sched_data.ppro.decode[1])
6185 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6186 if (ix86_sched_data.ppro.decode[2])
6187 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6188 fputc ('\n', dump);
6189 }
6190}
6191
6192/* We're beginning a new block. Initialize data structures as necessary. */
6193
6194void
6195ix86_sched_init (dump, sched_verbose)
6196 FILE *dump ATTRIBUTE_UNUSED;
6197 int sched_verbose ATTRIBUTE_UNUSED;
6198{
6199 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6200}
6201
6202/* Shift INSN to SLOT, and shift everything else down. */
6203
6204static void
6205ix86_reorder_insn (insnp, slot)
6206 rtx *insnp, *slot;
6207{
6208 if (insnp != slot)
6209 {
6210 rtx insn = *insnp;
6211 do
6212 insnp[0] = insnp[1];
6213 while (++insnp != slot);
6214 *insnp = insn;
6215 }
6216}
6217
6218/* Find an instruction with given pairability and minimal amount of cycles
6219 lost by the fact that the CPU waits for both pipelines to finish before
6220 reading next instructions. Also take care that both instructions together
6221 can not exceed 7 bytes. */
6222
6223static rtx *
6224ix86_pent_find_pair (e_ready, ready, type, first)
6225 rtx *e_ready;
6226 rtx *ready;
6227 enum attr_pent_pair type;
6228 rtx first;
6229{
6230 int mincycles, cycles;
6231 enum attr_pent_pair tmp;
6232 enum attr_memory memory;
6233 rtx *insnp, *bestinsnp = NULL;
6234
6235 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6236 return NULL;
6237
6238 memory = ix86_safe_memory (first);
6239 cycles = result_ready_cost (first);
6240 mincycles = INT_MAX;
6241
6242 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6243 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6244 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6245 {
6246 enum attr_memory second_memory;
6247 int secondcycles, currentcycles;
6248
6249 second_memory = ix86_safe_memory (*insnp);
6250 secondcycles = result_ready_cost (*insnp);
6251 currentcycles = abs (cycles - secondcycles);
6252
6253 if (secondcycles >= 1 && cycles >= 1)
6254 {
6255 /* Two read/modify/write instructions together takes two
6256 cycles longer. */
6257 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6258 currentcycles += 2;
6259
6260 /* Read modify/write instruction followed by read/modify
6261 takes one cycle longer. */
6262 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6263 && tmp != PENT_PAIR_UV
6264 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6265 currentcycles += 1;
6266 }
6267 if (currentcycles < mincycles)
6268 bestinsnp = insnp, mincycles = currentcycles;
6269 }
6270
6271 return bestinsnp;
6272}
6273
6274/* Subroutines of ix86_sched_reorder. */
6275
6276static void
6277ix86_sched_reorder_pentium (ready, e_ready)
6278 rtx *ready;
6279 rtx *e_ready;
6280{
6281 enum attr_pent_pair pair1, pair2;
6282 rtx *insnp;
6283
6284 /* This wouldn't be necessary if Haifa knew that static insn ordering
6285 is important to which pipe an insn is issued to. So we have to make
6286 some minor rearrangements. */
6287
6288 pair1 = ix86_safe_pent_pair (*e_ready);
6289
6290 /* If the first insn is non-pairable, let it be. */
6291 if (pair1 == PENT_PAIR_NP)
6292 return;
6293
6294 pair2 = PENT_PAIR_NP;
6295 insnp = 0;
6296
6297 /* If the first insn is UV or PV pairable, search for a PU
6298 insn to go with. */
6299 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
6300 {
6301 insnp = ix86_pent_find_pair (e_ready-1, ready,
6302 PENT_PAIR_PU, *e_ready);
6303 if (insnp)
6304 pair2 = PENT_PAIR_PU;
6305 }
6306
6307 /* If the first insn is PU or UV pairable, search for a PV
6308 insn to go with. */
6309 if (pair2 == PENT_PAIR_NP
6310 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6311 {
6312 insnp = ix86_pent_find_pair (e_ready-1, ready,
6313 PENT_PAIR_PV, *e_ready);
6314 if (insnp)
6315 pair2 = PENT_PAIR_PV;
6316 }
6317
6318 /* If the first insn is pairable, search for a UV
6319 insn to go with. */
6320 if (pair2 == PENT_PAIR_NP)
6321 {
6322 insnp = ix86_pent_find_pair (e_ready-1, ready,
6323 PENT_PAIR_UV, *e_ready);
6324 if (insnp)
6325 pair2 = PENT_PAIR_UV;
6326 }
6327
6328 if (pair2 == PENT_PAIR_NP)
6329 return;
6330
6331 /* Found something! Decide if we need to swap the order. */
6332 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6333 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6334 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6335 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6336 ix86_reorder_insn (insnp, e_ready);
6337 else
6338 ix86_reorder_insn (insnp, e_ready - 1);
6339}
6340
6341static void
6342ix86_sched_reorder_ppro (ready, e_ready)
6343 rtx *ready;
6344 rtx *e_ready;
6345{
6346 rtx decode[3];
6347 enum attr_ppro_uops cur_uops;
6348 int issued_this_cycle;
6349 rtx *insnp;
6350 int i;
6351
6352 /* At this point .ppro.decode contains the state of the three
6353 decoders from last "cycle". That is, those insns that were
6354 actually independent. But here we're scheduling for the
6355 decoder, and we may find things that are decodable in the
6356 same cycle. */
6357
6358 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
6359 issued_this_cycle = 0;
6360
6361 insnp = e_ready;
6362 cur_uops = ix86_safe_ppro_uops (*insnp);
6363
6364 /* If the decoders are empty, and we've a complex insn at the
6365 head of the priority queue, let it issue without complaint. */
6366 if (decode[0] == NULL)
6367 {
6368 if (cur_uops == PPRO_UOPS_MANY)
6369 {
6370 decode[0] = *insnp;
6371 goto ppro_done;
6372 }
6373
6374 /* Otherwise, search for a 2-4 uop unsn to issue. */
6375 while (cur_uops != PPRO_UOPS_FEW)
6376 {
6377 if (insnp == ready)
6378 break;
6379 cur_uops = ix86_safe_ppro_uops (*--insnp);
6380 }
6381
6382 /* If so, move it to the head of the line. */
6383 if (cur_uops == PPRO_UOPS_FEW)
6384 ix86_reorder_insn (insnp, e_ready);
6385
6386 /* Issue the head of the queue. */
6387 issued_this_cycle = 1;
6388 decode[0] = *e_ready--;
6389 }
6390
6391 /* Look for simple insns to fill in the other two slots. */
6392 for (i = 1; i < 3; ++i)
6393 if (decode[i] == NULL)
6394 {
6395 if (ready >= e_ready)
6396 goto ppro_done;
6397
6398 insnp = e_ready;
6399 cur_uops = ix86_safe_ppro_uops (*insnp);
6400 while (cur_uops != PPRO_UOPS_ONE)
6401 {
6402 if (insnp == ready)
6403 break;
6404 cur_uops = ix86_safe_ppro_uops (*--insnp);
6405 }
6406
6407 /* Found one. Move it to the head of the queue and issue it. */
6408 if (cur_uops == PPRO_UOPS_ONE)
6409 {
6410 ix86_reorder_insn (insnp, e_ready);
6411 decode[i] = *e_ready--;
6412 issued_this_cycle++;
6413 continue;
6414 }
6415
6416 /* ??? Didn't find one. Ideally, here we would do a lazy split
6417 of 2-uop insns, issue one and queue the other. */
6418 }
6419
6420 ppro_done:
6421 if (issued_this_cycle == 0)
6422 issued_this_cycle = 1;
6423 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6424}
6425
6426
6427/* We are about to being issuing insns for this clock cycle.
6428 Override the default sort algorithm to better slot instructions. */
6429int
6430ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6431 FILE *dump ATTRIBUTE_UNUSED;
6432 int sched_verbose ATTRIBUTE_UNUSED;
6433 rtx *ready;
6434 int n_ready;
6435 int clock_var ATTRIBUTE_UNUSED;
6436{
6437 rtx *e_ready = ready + n_ready - 1;
6438
6439 if (n_ready < 2)
6440 goto out;
6441
6442 switch (ix86_cpu)
6443 {
6444 default:
6445 break;
6446
6447 case PROCESSOR_PENTIUM:
6448 ix86_sched_reorder_pentium (ready, e_ready);
6449 break;
6450
6451 case PROCESSOR_PENTIUMPRO:
6452 ix86_sched_reorder_ppro (ready, e_ready);
6453 break;
6454 }
6455
6456out:
6457 return ix86_issue_rate ();
6458}
6459
6460/* We are about to issue INSN. Return the number of insns left on the
6461 ready queue that can be issued this cycle. */
6462
6463int
6464ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6465 FILE *dump;
6466 int sched_verbose;
6467 rtx insn;
6468 int can_issue_more;
6469{
6470 int i;
6471 switch (ix86_cpu)
6472 {
6473 default:
6474 return can_issue_more - 1;
6475
6476 case PROCESSOR_PENTIUMPRO:
6477 {
6478 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
6479
6480 if (uops == PPRO_UOPS_MANY)
6481 {
6482 if (sched_verbose)
6483 ix86_dump_ppro_packet (dump);
6484 ix86_sched_data.ppro.decode[0] = insn;
6485 ix86_sched_data.ppro.decode[1] = NULL;
6486 ix86_sched_data.ppro.decode[2] = NULL;
6487 if (sched_verbose)
6488 ix86_dump_ppro_packet (dump);
6489 ix86_sched_data.ppro.decode[0] = NULL;
6490 }
6491 else if (uops == PPRO_UOPS_FEW)
6492 {
6493 if (sched_verbose)
6494 ix86_dump_ppro_packet (dump);
6495 ix86_sched_data.ppro.decode[0] = insn;
6496 ix86_sched_data.ppro.decode[1] = NULL;
6497 ix86_sched_data.ppro.decode[2] = NULL;
6498 }
6499 else
6500 {
6501 for (i = 0; i < 3; ++i)
6502 if (ix86_sched_data.ppro.decode[i] == NULL)
6503 {
6504 ix86_sched_data.ppro.decode[i] = insn;
6505 break;
6506 }
6507 if (i == 3)
6508 abort ();
6509 if (i == 2)
6510 {
6511 if (sched_verbose)
6512 ix86_dump_ppro_packet (dump);
6513 ix86_sched_data.ppro.decode[0] = NULL;
6514 ix86_sched_data.ppro.decode[1] = NULL;
6515 ix86_sched_data.ppro.decode[2] = NULL;
6516 }
6517 }
6518 }
6519 return --ix86_sched_data.ppro.issued_this_cycle;
6520 }
6521}
This page took 0.07709 seconds and 5 git commands to generate.