]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
Change some regsets to regset_heads
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
3aeae608 2 Copyright (C) 1988, 92, 94-98, 1999 Free Software Foundation, Inc.
2a2ab3f9
JVA
3
4This file is part of GNU CC.
5
6GNU CC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11GNU CC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU CC; see the file COPYING. If not, write to
97aadbb9 18the Free Software Foundation, 59 Temple Place - Suite 330,
32b5b1aa 19Boston, MA 02111-1307, USA. */
2a2ab3f9 20
0b6b2900 21#include <setjmp.h>
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
32#include "insn-flags.h"
33#include "output.h"
34#include "insn-attr.h"
2a2ab3f9 35#include "flags.h"
a8ffcc81 36#include "except.h"
ecbc4695 37#include "function.h"
00c79232 38#include "recog.h"
ced8dd8c 39#include "expr.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
2a2ab3f9 43
997de79c
JVA
44#ifdef EXTRA_CONSTRAINT
45/* If EXTRA_CONSTRAINT is defined, then the 'S'
46 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
47 asm statements that need 'S' for class SIREG will break. */
ad5a6adc
RS
48 error EXTRA_CONSTRAINT conflicts with S constraint letter
49/* The previous line used to be #error, but some compilers barf
50 even if the conditional was untrue. */
997de79c
JVA
51#endif
52
8dfe5673
RK
53#ifndef CHECK_STACK_LIMIT
54#define CHECK_STACK_LIMIT -1
55#endif
56
32b5b1aa
SC
57/* Processor costs (relative to an add) */
58struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 59 1, /* cost of an add instruction */
32b5b1aa
SC
60 1, /* cost of a lea instruction */
61 3, /* variable shift costs */
62 2, /* constant shift costs */
63 6, /* cost of starting a multiply */
64 1, /* cost of multiply per each bit set */
e075ae69 65 23, /* cost of a divide/mod */
96e7ae40 66 15, /* "large" insn */
7c6b971d 67 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
68 {2, 4, 2}, /* cost of loading integer registers
69 in QImode, HImode and SImode.
70 Relative to reg-reg move (2). */
71 {2, 4, 2}, /* cost of storing integer registers */
72 2, /* cost of reg,reg fld/fst */
73 {8, 8, 8}, /* cost of loading fp registers
74 in SFmode, DFmode and XFmode */
75 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
76};
77
78struct processor_costs i486_cost = { /* 486 specific costs */
79 1, /* cost of an add instruction */
80 1, /* cost of a lea instruction */
81 3, /* variable shift costs */
82 2, /* constant shift costs */
83 12, /* cost of starting a multiply */
84 1, /* cost of multiply per each bit set */
e075ae69 85 40, /* cost of a divide/mod */
96e7ae40 86 15, /* "large" insn */
7c6b971d 87 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
88 {2, 4, 2}, /* cost of loading integer registers
89 in QImode, HImode and SImode.
90 Relative to reg-reg move (2). */
91 {2, 4, 2}, /* cost of storing integer registers */
92 2, /* cost of reg,reg fld/fst */
93 {8, 8, 8}, /* cost of loading fp registers
94 in SFmode, DFmode and XFmode */
95 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
96};
97
e5cb57e8 98struct processor_costs pentium_cost = {
32b5b1aa
SC
99 1, /* cost of an add instruction */
100 1, /* cost of a lea instruction */
856b07a1 101 4, /* variable shift costs */
e5cb57e8 102 1, /* constant shift costs */
856b07a1
SC
103 11, /* cost of starting a multiply */
104 0, /* cost of multiply per each bit set */
e075ae69 105 25, /* cost of a divide/mod */
96e7ae40 106 8, /* "large" insn */
7c6b971d 107 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
108 {2, 4, 2}, /* cost of loading integer registers
109 in QImode, HImode and SImode.
110 Relative to reg-reg move (2). */
111 {2, 4, 2}, /* cost of storing integer registers */
112 2, /* cost of reg,reg fld/fst */
113 {2, 2, 6}, /* cost of loading fp registers
114 in SFmode, DFmode and XFmode */
115 {4, 4, 6} /* cost of loading integer registers */
32b5b1aa
SC
116};
117
856b07a1
SC
118struct processor_costs pentiumpro_cost = {
119 1, /* cost of an add instruction */
120 1, /* cost of a lea instruction */
e075ae69 121 1, /* variable shift costs */
856b07a1 122 1, /* constant shift costs */
e075ae69 123 1, /* cost of starting a multiply */
856b07a1 124 0, /* cost of multiply per each bit set */
e075ae69 125 17, /* cost of a divide/mod */
96e7ae40 126 8, /* "large" insn */
7c6b971d 127 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
128 {4, 4, 4}, /* cost of loading integer registers
129 in QImode, HImode and SImode.
130 Relative to reg-reg move (2). */
131 {2, 2, 2}, /* cost of storing integer registers */
132 2, /* cost of reg,reg fld/fst */
133 {2, 2, 6}, /* cost of loading fp registers
134 in SFmode, DFmode and XFmode */
135 {4, 4, 6} /* cost of loading integer registers */
856b07a1
SC
136};
137
a269a03c
JC
138struct processor_costs k6_cost = {
139 1, /* cost of an add instruction */
e075ae69 140 2, /* cost of a lea instruction */
a269a03c
JC
141 1, /* variable shift costs */
142 1, /* constant shift costs */
73fe76e4 143 3, /* cost of starting a multiply */
a269a03c 144 0, /* cost of multiply per each bit set */
e075ae69 145 18, /* cost of a divide/mod */
96e7ae40 146 8, /* "large" insn */
7c6b971d 147 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
148 {4, 5, 4}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 3, 2}, /* cost of storing integer registers */
152 4, /* cost of reg,reg fld/fst */
153 {6, 6, 6}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {4, 4, 4} /* cost of loading integer registers */
a269a03c
JC
156};
157
309ada50
JH
158struct processor_costs athlon_cost = {
159 1, /* cost of an add instruction */
160 1, /* cost of a lea instruction */
161 1, /* variable shift costs */
162 1, /* constant shift costs */
163 5, /* cost of starting a multiply */
164 0, /* cost of multiply per each bit set */
165 19, /* cost of a divide/mod */
166 8, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {4, 5, 4}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 3, 2}, /* cost of storing integer registers */
172 4, /* cost of reg,reg fld/fst */
173 {6, 6, 6}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {4, 4, 4} /* cost of loading integer registers */
176};
177
32b5b1aa
SC
178struct processor_costs *ix86_cost = &pentium_cost;
179
a269a03c
JC
180/* Processor feature/optimization bitmasks. */
181#define m_386 (1<<PROCESSOR_I386)
182#define m_486 (1<<PROCESSOR_I486)
183#define m_PENT (1<<PROCESSOR_PENTIUM)
184#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
185#define m_K6 (1<<PROCESSOR_K6)
309ada50 186#define m_ATHLON (1<<PROCESSOR_ATHLON)
a269a03c 187
309ada50
JH
188const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
189const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
a269a03c 190const int x86_zero_extend_with_and = m_486 | m_PENT;
309ada50 191const int x86_movx = m_ATHLON /* m_386 | m_PPRO | m_K6 */;
e075ae69 192const int x86_double_with_add = ~m_386;
a269a03c 193const int x86_use_bit_test = m_386;
e075ae69 194const int x86_unroll_strlen = m_486 | m_PENT;
a269a03c
JC
195const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
196const int x86_use_any_reg = m_486;
309ada50
JH
197const int x86_cmove = m_PPRO | m_ATHLON;
198const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
199const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
e075ae69
RH
200const int x86_partial_reg_stall = m_PPRO;
201const int x86_use_loop = m_K6;
309ada50 202const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
203const int x86_use_mov0 = m_K6;
204const int x86_use_cltd = ~(m_PENT | m_K6);
205const int x86_read_modify_write = ~m_PENT;
206const int x86_read_modify = ~(m_PENT | m_PPRO);
207const int x86_split_long_moves = m_PPRO;
e9e80858 208const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
a269a03c 209
f64cecad 210#define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx))
2a2ab3f9 211
e075ae69
RH
212const char * const hi_reg_name[] = HI_REGISTER_NAMES;
213const char * const qi_reg_name[] = QI_REGISTER_NAMES;
214const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
215
216/* Array of the smallest class containing reg number REGNO, indexed by
217 REGNO. Used by REGNO_REG_CLASS in i386.h. */
218
e075ae69 219enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
220{
221 /* ax, dx, cx, bx */
ab408a86 222 AREG, DREG, CREG, BREG,
4c0d89b5 223 /* si, di, bp, sp */
e075ae69 224 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
225 /* FP registers */
226 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 227 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 228 /* arg pointer */
e075ae69
RH
229 INDEX_REGS,
230 /* flags, fpsr */
231 NO_REGS, NO_REGS
4c0d89b5 232};
c572e5ba
JVA
233
234/* Test and compare insns in i386.md store the information needed to
235 generate branch and scc insns here. */
236
e075ae69
RH
237struct rtx_def *ix86_compare_op0 = NULL_RTX;
238struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 239
36edd3cc
BS
240#define MAX_386_STACK_LOCALS 2
241
242/* Define the structure for the machine field in struct function. */
243struct machine_function
244{
245 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
246};
247
36edd3cc
BS
248#define ix86_stack_locals (current_function->machine->stack_locals)
249
c8c5cb99 250/* which cpu are we scheduling for */
e42ea7f9 251enum processor_type ix86_cpu;
c8c5cb99
SC
252
253/* which instruction set architecture to use. */
c942177e 254int ix86_arch;
c8c5cb99
SC
255
256/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
257const char *ix86_cpu_string; /* for -mcpu=<xxx> */
258const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 259
f5316dfe 260/* Register allocation order */
e075ae69 261const char *ix86_reg_alloc_order;
f5316dfe
MM
262static char regs_allocated[FIRST_PSEUDO_REGISTER];
263
b08de47e 264/* # of registers to use to pass arguments. */
e075ae69 265const char *ix86_regparm_string;
e9a25f70 266
e075ae69
RH
267/* ix86_regparm_string as a number */
268int ix86_regparm;
e9a25f70
JL
269
270/* Alignment to use for loops and jumps: */
271
272/* Power of two alignment for loops. */
e075ae69 273const char *ix86_align_loops_string;
e9a25f70
JL
274
275/* Power of two alignment for non-loop jumps. */
e075ae69 276const char *ix86_align_jumps_string;
e9a25f70 277
3af4bd89 278/* Power of two alignment for stack boundary in bytes. */
e075ae69 279const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
280
281/* Preferred alignment for stack boundary in bits. */
e075ae69 282int ix86_preferred_stack_boundary;
3af4bd89 283
e9a25f70 284/* Values 1-5: see jump.c */
e075ae69
RH
285int ix86_branch_cost;
286const char *ix86_branch_cost_string;
e9a25f70
JL
287
288/* Power of two alignment for functions. */
e075ae69
RH
289int ix86_align_funcs;
290const char *ix86_align_funcs_string;
b08de47e 291
e9a25f70 292/* Power of two alignment for loops. */
e075ae69 293int ix86_align_loops;
b08de47e 294
e9a25f70 295/* Power of two alignment for non-loop jumps. */
e075ae69
RH
296int ix86_align_jumps;
297\f
298static void output_pic_addr_const PROTO ((FILE *, rtx, int));
299static void put_condition_code PROTO ((enum rtx_code, enum machine_mode,
300 int, int, FILE *));
301static enum rtx_code unsigned_comparison PROTO ((enum rtx_code code));
302static rtx ix86_expand_int_compare PROTO ((enum rtx_code, rtx, rtx));
303static rtx ix86_expand_fp_compare PROTO ((enum rtx_code, rtx, rtx, int));
304static rtx ix86_expand_compare PROTO ((enum rtx_code, int));
305static rtx gen_push PROTO ((rtx));
306static int memory_address_length PROTO ((rtx addr));
307static int ix86_flags_dependant PROTO ((rtx, rtx, enum attr_type));
308static int ix86_agi_dependant PROTO ((rtx, rtx, enum attr_type));
309static int ix86_safe_length PROTO ((rtx));
310static enum attr_memory ix86_safe_memory PROTO ((rtx));
311static enum attr_pent_pair ix86_safe_pent_pair PROTO ((rtx));
312static enum attr_ppro_uops ix86_safe_ppro_uops PROTO ((rtx));
313static void ix86_dump_ppro_packet PROTO ((FILE *));
314static void ix86_reorder_insn PROTO ((rtx *, rtx *));
315static rtx * ix86_pent_find_pair PROTO ((rtx *, rtx *, enum attr_pent_pair,
316 rtx));
36edd3cc 317static void ix86_init_machine_status PROTO ((struct function *));
1526a060 318static void ix86_mark_machine_status PROTO ((struct function *));
69ddee61
KG
319static void ix86_split_to_parts PROTO ((rtx, rtx *, enum machine_mode));
320static int ix86_safe_length_prefix PROTO ((rtx));
e075ae69
RH
321
322struct ix86_address
323{
324 rtx base, index, disp;
325 HOST_WIDE_INT scale;
326};
b08de47e 327
e075ae69
RH
328static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
329\f
f5316dfe
MM
330/* Sometimes certain combinations of command options do not make
331 sense on a particular target machine. You can define a macro
332 `OVERRIDE_OPTIONS' to take account of this. This macro, if
333 defined, is executed once just after all the command options have
334 been parsed.
335
336 Don't use this macro to turn on various extra optimizations for
337 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
338
339void
340override_options ()
341{
e075ae69
RH
342 /* Comes from final.c -- no real reason to change it. */
343#define MAX_CODE_ALIGN 16
f5316dfe 344
c8c5cb99
SC
345 static struct ptt
346 {
e075ae69
RH
347 struct processor_costs *cost; /* Processor costs */
348 int target_enable; /* Target flags to enable. */
349 int target_disable; /* Target flags to disable. */
350 int align_loop; /* Default alignments. */
351 int align_jump;
352 int align_func;
353 int branch_cost;
354 }
355 const processor_target_table[PROCESSOR_max] =
356 {
357 {&i386_cost, 0, 0, 2, 2, 2, 1},
358 {&i486_cost, 0, 0, 4, 4, 4, 1},
359 {&pentium_cost, 0, 0, -4, -4, -4, 1},
360 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50
JH
361 {&k6_cost, 0, 0, -5, -5, 4, 1},
362 {&athlon_cost, 0, 0, 4, -4, 4, 1}
e075ae69
RH
363 };
364
365 static struct pta
366 {
69ddee61 367 const char *name; /* processor name or nickname. */
e075ae69
RH
368 enum processor_type processor;
369 }
370 const processor_alias_table[] =
371 {
372 {"i386", PROCESSOR_I386},
373 {"i486", PROCESSOR_I486},
374 {"i586", PROCESSOR_PENTIUM},
375 {"pentium", PROCESSOR_PENTIUM},
376 {"i686", PROCESSOR_PENTIUMPRO},
377 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 378 {"k6", PROCESSOR_K6},
309ada50 379 {"athlon", PROCESSOR_ATHLON},
3af4bd89 380 };
c8c5cb99 381
e075ae69 382 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
c8c5cb99 383
f5316dfe
MM
384#ifdef SUBTARGET_OVERRIDE_OPTIONS
385 SUBTARGET_OVERRIDE_OPTIONS;
386#endif
387
5a6ee819 388 ix86_arch = PROCESSOR_I386;
e075ae69
RH
389 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
390
391 if (ix86_arch_string != 0)
392 {
393 int i;
394 for (i = 0; i < pta_size; i++)
395 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
396 {
397 ix86_arch = processor_alias_table[i].processor;
398 /* Default cpu tuning to the architecture. */
399 ix86_cpu = ix86_arch;
400 break;
401 }
402 if (i == pta_size)
403 error ("bad value (%s) for -march= switch", ix86_arch_string);
404 }
405
406 if (ix86_cpu_string != 0)
407 {
408 int i;
409 for (i = 0; i < pta_size; i++)
410 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
411 {
412 ix86_cpu = processor_alias_table[i].processor;
413 break;
414 }
415 if (i == pta_size)
416 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
417 }
418
419 ix86_cost = processor_target_table[ix86_cpu].cost;
420 target_flags |= processor_target_table[ix86_cpu].target_enable;
421 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
422
36edd3cc
BS
423 /* Arrange to set up i386_stack_locals for all functions. */
424 init_machine_status = ix86_init_machine_status;
1526a060 425 mark_machine_status = ix86_mark_machine_status;
36edd3cc 426
e9a25f70 427 /* Validate registers in register allocation order. */
e075ae69 428 if (ix86_reg_alloc_order)
f5316dfe 429 {
e075ae69
RH
430 int i, ch;
431 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 432 {
00c79232 433 int regno = 0;
79325812 434
f5316dfe
MM
435 switch (ch)
436 {
437 case 'a': regno = 0; break;
438 case 'd': regno = 1; break;
439 case 'c': regno = 2; break;
440 case 'b': regno = 3; break;
441 case 'S': regno = 4; break;
442 case 'D': regno = 5; break;
443 case 'B': regno = 6; break;
444
445 default: fatal ("Register '%c' is unknown", ch);
446 }
447
448 if (regs_allocated[regno])
e9a25f70 449 fatal ("Register '%c' already specified in allocation order", ch);
f5316dfe
MM
450
451 regs_allocated[regno] = 1;
452 }
453 }
b08de47e 454
e9a25f70 455 /* Validate -mregparm= value. */
e075ae69 456 if (ix86_regparm_string)
b08de47e 457 {
e075ae69
RH
458 ix86_regparm = atoi (ix86_regparm_string);
459 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
e9a25f70 460 fatal ("-mregparm=%d is not between 0 and %d",
e075ae69 461 ix86_regparm, REGPARM_MAX);
b08de47e
MM
462 }
463
e9a25f70 464 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
465 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
466 if (ix86_align_loops_string)
b08de47e 467 {
e075ae69
RH
468 ix86_align_loops = atoi (ix86_align_loops_string);
469 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
b08de47e 470 fatal ("-malign-loops=%d is not between 0 and %d",
e075ae69 471 ix86_align_loops, MAX_CODE_ALIGN);
b08de47e 472 }
3af4bd89
JH
473
474 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
475 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
476 if (ix86_align_jumps_string)
b08de47e 477 {
e075ae69
RH
478 ix86_align_jumps = atoi (ix86_align_jumps_string);
479 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
b08de47e 480 fatal ("-malign-jumps=%d is not between 0 and %d",
e075ae69 481 ix86_align_jumps, MAX_CODE_ALIGN);
b08de47e 482 }
b08de47e 483
e9a25f70 484 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
485 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
486 if (ix86_align_funcs_string)
b08de47e 487 {
e075ae69
RH
488 ix86_align_funcs = atoi (ix86_align_funcs_string);
489 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
b08de47e 490 fatal ("-malign-functions=%d is not between 0 and %d",
e075ae69 491 ix86_align_funcs, MAX_CODE_ALIGN);
b08de47e 492 }
3af4bd89
JH
493
494 /* Validate -mpreferred_stack_boundary= value, or provide default.
495 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
496 ix86_preferred_stack_boundary = 128;
497 if (ix86_preferred_stack_boundary_string)
3af4bd89 498 {
e075ae69 499 int i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89
JH
500 if (i < 2 || i > 31)
501 fatal ("-mpreferred_stack_boundary=%d is not between 2 and 31", i);
e075ae69 502 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 503 }
77a989d1 504
e9a25f70 505 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
506 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
507 if (ix86_branch_cost_string)
804a8ee0 508 {
e075ae69
RH
509 ix86_branch_cost = atoi (ix86_branch_cost_string);
510 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
511 fatal ("-mbranch-cost=%d is not between 0 and 5",
512 ix86_branch_cost);
804a8ee0 513 }
804a8ee0 514
e9a25f70
JL
515 /* Keep nonleaf frame pointers. */
516 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 517 flag_omit_frame_pointer = 1;
e075ae69
RH
518
519 /* If we're doing fast math, we don't care about comparison order
520 wrt NaNs. This lets us use a shorter comparison sequence. */
521 if (flag_fast_math)
522 target_flags &= ~MASK_IEEE_FP;
523
524 /* If we're planning on using `loop', use it. */
525 if (TARGET_USE_LOOP && optimize)
526 flag_branch_on_count_reg = 1;
f5316dfe
MM
527}
528\f
529/* A C statement (sans semicolon) to choose the order in which to
530 allocate hard registers for pseudo-registers local to a basic
531 block.
532
533 Store the desired register order in the array `reg_alloc_order'.
534 Element 0 should be the register to allocate first; element 1, the
535 next register; and so on.
536
537 The macro body should not assume anything about the contents of
538 `reg_alloc_order' before execution of the macro.
539
540 On most machines, it is not necessary to define this macro. */
541
542void
543order_regs_for_local_alloc ()
544{
00c79232 545 int i, ch, order;
f5316dfe 546
e9a25f70
JL
547 /* User specified the register allocation order. */
548
e075ae69 549 if (ix86_reg_alloc_order)
f5316dfe 550 {
e075ae69 551 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 552 {
00c79232 553 int regno = 0;
79325812 554
f5316dfe
MM
555 switch (ch)
556 {
557 case 'a': regno = 0; break;
558 case 'd': regno = 1; break;
559 case 'c': regno = 2; break;
560 case 'b': regno = 3; break;
561 case 'S': regno = 4; break;
562 case 'D': regno = 5; break;
563 case 'B': regno = 6; break;
564 }
565
566 reg_alloc_order[order++] = regno;
567 }
568
569 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
570 {
e9a25f70 571 if (! regs_allocated[i])
f5316dfe
MM
572 reg_alloc_order[order++] = i;
573 }
574 }
575
e9a25f70 576 /* If user did not specify a register allocation order, use natural order. */
f5316dfe
MM
577 else
578 {
579 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
580 reg_alloc_order[i] = i;
f5316dfe
MM
581 }
582}
32b5b1aa
SC
583\f
584void
c6aded7c 585optimization_options (level, size)
32b5b1aa 586 int level;
bb5177ac 587 int size ATTRIBUTE_UNUSED;
32b5b1aa 588{
e9a25f70
JL
589 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
590 make the problem with not enough registers even worse. */
32b5b1aa
SC
591#ifdef INSN_SCHEDULING
592 if (level > 1)
593 flag_schedule_insns = 0;
594#endif
595}
b08de47e 596\f
e075ae69
RH
597/* Return nonzero if the rtx is known aligned. */
598/* ??? Unused. */
5bc7cd8e
SC
599
600int
e075ae69 601ix86_aligned_p (op)
5bc7cd8e
SC
602 rtx op;
603{
e075ae69
RH
604 struct ix86_address parts;
605
e9a25f70 606 /* Registers and immediate operands are always "aligned". */
5bc7cd8e
SC
607 if (GET_CODE (op) != MEM)
608 return 1;
609
e9a25f70 610 /* Don't even try to do any aligned optimizations with volatiles. */
5bc7cd8e
SC
611 if (MEM_VOLATILE_P (op))
612 return 0;
613
5bc7cd8e
SC
614 op = XEXP (op, 0);
615
e075ae69
RH
616 /* Pushes and pops are only valid on the stack pointer. */
617 if (GET_CODE (op) == PRE_DEC
618 || GET_CODE (op) == POST_INC)
619 return 1;
e9a25f70 620
e075ae69
RH
621 /* Decode the address. */
622 if (! ix86_decompose_address (op, &parts))
623 abort ();
79325812 624
e075ae69
RH
625 /* Look for some component that isn't known to be aligned. */
626 if (parts.index)
627 {
628 if (parts.scale < 4
629 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 4)
630 return 0;
631 }
632 if (parts.base)
633 {
634 if (REGNO_POINTER_ALIGN (REGNO (parts.index)) < 4)
635 return 0;
636 }
637 if (parts.disp)
638 {
639 if (GET_CODE (parts.disp) != CONST_INT
640 || (INTVAL (parts.disp) & 3) != 0)
641 return 0;
5bc7cd8e 642 }
e9a25f70 643
e075ae69
RH
644 /* Didn't find one -- this must be an aligned address. */
645 return 1;
5bc7cd8e
SC
646}
647\f
b08de47e
MM
648/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
649 attribute for DECL. The attributes in ATTRIBUTES have previously been
650 assigned to DECL. */
651
652int
e075ae69 653ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
654 tree decl ATTRIBUTE_UNUSED;
655 tree attributes ATTRIBUTE_UNUSED;
656 tree identifier ATTRIBUTE_UNUSED;
657 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
658{
659 return 0;
660}
661
662/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
663 attribute for TYPE. The attributes in ATTRIBUTES have previously been
664 assigned to TYPE. */
665
666int
e075ae69 667ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 668 tree type;
bb5177ac 669 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
670 tree identifier;
671 tree args;
672{
673 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 674 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
675 && TREE_CODE (type) != FIELD_DECL
676 && TREE_CODE (type) != TYPE_DECL)
677 return 0;
678
679 /* Stdcall attribute says callee is responsible for popping arguments
680 if they are not variable. */
681 if (is_attribute_p ("stdcall", identifier))
682 return (args == NULL_TREE);
683
e9a25f70 684 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
685 if (is_attribute_p ("cdecl", identifier))
686 return (args == NULL_TREE);
687
688 /* Regparm attribute specifies how many integer arguments are to be
e9a25f70 689 passed in registers. */
b08de47e
MM
690 if (is_attribute_p ("regparm", identifier))
691 {
692 tree cst;
693
e9a25f70 694 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
695 || TREE_CHAIN (args) != NULL_TREE
696 || TREE_VALUE (args) == NULL_TREE)
697 return 0;
698
699 cst = TREE_VALUE (args);
700 if (TREE_CODE (cst) != INTEGER_CST)
701 return 0;
702
703 if (TREE_INT_CST_HIGH (cst) != 0
704 || TREE_INT_CST_LOW (cst) < 0
705 || TREE_INT_CST_LOW (cst) > REGPARM_MAX)
706 return 0;
707
708 return 1;
709 }
710
711 return 0;
712}
713
714/* Return 0 if the attributes for two types are incompatible, 1 if they
715 are compatible, and 2 if they are nearly compatible (which causes a
716 warning to be generated). */
717
718int
e075ae69 719ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
720 tree type1;
721 tree type2;
b08de47e 722{
afcfe58c 723 /* Check for mismatch of non-default calling convention. */
69ddee61 724 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
725
726 if (TREE_CODE (type1) != FUNCTION_TYPE)
727 return 1;
728
729 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
730 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
731 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 732 return 0;
b08de47e
MM
733 return 1;
734}
b08de47e
MM
735\f
736/* Value is the number of bytes of arguments automatically
737 popped when returning from a subroutine call.
738 FUNDECL is the declaration node of the function (as a tree),
739 FUNTYPE is the data type of the function (as a tree),
740 or for a library call it is an identifier node for the subroutine name.
741 SIZE is the number of bytes of arguments passed on the stack.
742
743 On the 80386, the RTD insn may be used to pop them if the number
744 of args is fixed, but if the number is variable then the caller
745 must pop them all. RTD can't be used for library calls now
746 because the library is compiled with the Unix compiler.
747 Use of RTD is a selectable option, since it is incompatible with
748 standard Unix calling sequences. If the option is not selected,
749 the caller must always pop the args.
750
751 The attribute stdcall is equivalent to RTD on a per module basis. */
752
753int
e075ae69 754ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
755 tree fundecl;
756 tree funtype;
757 int size;
79325812 758{
3345ee7d 759 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 760
e9a25f70
JL
761 /* Cdecl functions override -mrtd, and never pop the stack. */
762 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 763
e9a25f70 764 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
765 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
766 rtd = 1;
79325812 767
698cdd84
SC
768 if (rtd
769 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
770 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
771 == void_type_node)))
698cdd84
SC
772 return size;
773 }
79325812 774
e9a25f70 775 /* Lose any fake structure return argument. */
698cdd84
SC
776 if (aggregate_value_p (TREE_TYPE (funtype)))
777 return GET_MODE_SIZE (Pmode);
79325812 778
2614aac6 779 return 0;
b08de47e 780}
b08de47e
MM
781\f
782/* Argument support functions. */
783
784/* Initialize a variable CUM of type CUMULATIVE_ARGS
785 for a call to a function whose data type is FNTYPE.
786 For a library call, FNTYPE is 0. */
787
788void
789init_cumulative_args (cum, fntype, libname)
e9a25f70 790 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
791 tree fntype; /* tree ptr for function decl */
792 rtx libname; /* SYMBOL_REF of library name or 0 */
793{
794 static CUMULATIVE_ARGS zero_cum;
795 tree param, next_param;
796
797 if (TARGET_DEBUG_ARG)
798 {
799 fprintf (stderr, "\ninit_cumulative_args (");
800 if (fntype)
e9a25f70
JL
801 fprintf (stderr, "fntype code = %s, ret code = %s",
802 tree_code_name[(int) TREE_CODE (fntype)],
803 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
804 else
805 fprintf (stderr, "no fntype");
806
807 if (libname)
808 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
809 }
810
811 *cum = zero_cum;
812
813 /* Set up the number of registers to use for passing arguments. */
e075ae69 814 cum->nregs = ix86_regparm;
b08de47e
MM
815 if (fntype)
816 {
817 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 818
b08de47e
MM
819 if (attr)
820 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
821 }
822
823 /* Determine if this function has variable arguments. This is
824 indicated by the last argument being 'void_type_mode' if there
825 are no variable arguments. If there are variable arguments, then
826 we won't pass anything in registers */
827
828 if (cum->nregs)
829 {
830 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 831 param != 0; param = next_param)
b08de47e
MM
832 {
833 next_param = TREE_CHAIN (param);
e9a25f70 834 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
835 cum->nregs = 0;
836 }
837 }
838
839 if (TARGET_DEBUG_ARG)
840 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
841
842 return;
843}
844
845/* Update the data in CUM to advance over an argument
846 of mode MODE and data type TYPE.
847 (TYPE is null for libcalls where that information may not be available.) */
848
849void
850function_arg_advance (cum, mode, type, named)
851 CUMULATIVE_ARGS *cum; /* current arg information */
852 enum machine_mode mode; /* current arg mode */
853 tree type; /* type of the argument or 0 if lib support */
854 int named; /* whether or not the argument was named */
855{
e9a25f70
JL
856 int bytes
857 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
b08de47e
MM
858 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
859
860 if (TARGET_DEBUG_ARG)
861 fprintf (stderr,
e9a25f70 862 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e
MM
863 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
864
865 cum->words += words;
866 cum->nregs -= words;
867 cum->regno += words;
868
869 if (cum->nregs <= 0)
870 {
871 cum->nregs = 0;
872 cum->regno = 0;
873 }
874
875 return;
876}
877
878/* Define where to put the arguments to a function.
879 Value is zero to push the argument on the stack,
880 or a hard register in which to store the argument.
881
882 MODE is the argument's machine mode.
883 TYPE is the data type of the argument (as a tree).
884 This is null for libcalls where that information may
885 not be available.
886 CUM is a variable of type CUMULATIVE_ARGS which gives info about
887 the preceding args and about the function being called.
888 NAMED is nonzero if this argument is a named parameter
889 (otherwise it is an extra parameter matching an ellipsis). */
890
891struct rtx_def *
892function_arg (cum, mode, type, named)
893 CUMULATIVE_ARGS *cum; /* current arg information */
894 enum machine_mode mode; /* current arg mode */
895 tree type; /* type of the argument or 0 if lib support */
896 int named; /* != 0 for normal args, == 0 for ... args */
897{
898 rtx ret = NULL_RTX;
e9a25f70
JL
899 int bytes
900 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
b08de47e
MM
901 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
902
903 switch (mode)
904 {
e9a25f70
JL
905 /* For now, pass fp/complex values on the stack. */
906 default:
b08de47e
MM
907 break;
908
909 case BLKmode:
910 case DImode:
911 case SImode:
912 case HImode:
913 case QImode:
914 if (words <= cum->nregs)
f64cecad 915 ret = gen_rtx_REG (mode, cum->regno);
b08de47e
MM
916 break;
917 }
918
919 if (TARGET_DEBUG_ARG)
920 {
921 fprintf (stderr,
e9a25f70 922 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
923 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
924
925 if (ret)
926 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
927 else
928 fprintf (stderr, ", stack");
929
930 fprintf (stderr, " )\n");
931 }
932
933 return ret;
934}
e075ae69
RH
935\f
936/* Returns 1 if OP is either a symbol reference or a sum of a symbol
937 reference and a constant. */
b08de47e
MM
938
939int
e075ae69
RH
940symbolic_operand (op, mode)
941 register rtx op;
942 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 943{
e075ae69 944 switch (GET_CODE (op))
2a2ab3f9 945 {
e075ae69
RH
946 case SYMBOL_REF:
947 case LABEL_REF:
948 return 1;
949
950 case CONST:
951 op = XEXP (op, 0);
952 if (GET_CODE (op) == SYMBOL_REF
953 || GET_CODE (op) == LABEL_REF
954 || (GET_CODE (op) == UNSPEC
955 && XINT (op, 1) >= 6
956 && XINT (op, 1) <= 7))
957 return 1;
958 if (GET_CODE (op) != PLUS
959 || GET_CODE (XEXP (op, 1)) != CONST_INT)
960 return 0;
961
962 op = XEXP (op, 0);
963 if (GET_CODE (op) == SYMBOL_REF
964 || GET_CODE (op) == LABEL_REF)
965 return 1;
966 /* Only @GOTOFF gets offsets. */
967 if (GET_CODE (op) != UNSPEC
968 || XINT (op, 1) != 7)
969 return 0;
970
971 op = XVECEXP (op, 0, 0);
972 if (GET_CODE (op) == SYMBOL_REF
973 || GET_CODE (op) == LABEL_REF)
974 return 1;
975 return 0;
976
977 default:
978 return 0;
2a2ab3f9
JVA
979 }
980}
2a2ab3f9 981
e075ae69 982/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 983
e075ae69
RH
984int
985pic_symbolic_operand (op, mode)
986 register rtx op;
987 enum machine_mode mode ATTRIBUTE_UNUSED;
988{
989 if (GET_CODE (op) == CONST)
2a2ab3f9 990 {
e075ae69
RH
991 op = XEXP (op, 0);
992 if (GET_CODE (op) == UNSPEC)
993 return 1;
994 if (GET_CODE (op) != PLUS
995 || GET_CODE (XEXP (op, 1)) != CONST_INT)
996 return 0;
997 op = XEXP (op, 0);
998 if (GET_CODE (op) == UNSPEC)
999 return 1;
2a2ab3f9 1000 }
e075ae69 1001 return 0;
2a2ab3f9 1002}
2a2ab3f9 1003
28d52ffb
RH
1004/* Test for a valid operand for a call instruction. Don't allow the
1005 arg pointer register or virtual regs since they may decay into
1006 reg + const, which the patterns can't handle. */
2a2ab3f9 1007
e075ae69
RH
1008int
1009call_insn_operand (op, mode)
1010 rtx op;
1011 enum machine_mode mode ATTRIBUTE_UNUSED;
1012{
1013 if (GET_CODE (op) != MEM)
1014 return 0;
1015 op = XEXP (op, 0);
2a2ab3f9 1016
e075ae69
RH
1017 /* Disallow indirect through a virtual register. This leads to
1018 compiler aborts when trying to eliminate them. */
1019 if (GET_CODE (op) == REG
1020 && (op == arg_pointer_rtx
1021 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1022 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1023 return 0;
2a2ab3f9 1024
28d52ffb
RH
1025 /* Disallow `call 1234'. Due to varying assembler lameness this
1026 gets either rejected or translated to `call .+1234'. */
1027 if (GET_CODE (op) == CONST_INT)
1028 return 0;
1029
e075ae69
RH
1030 /* Otherwise we can allow any general_operand in the address. */
1031 return general_operand (op, Pmode);
1032}
2a2ab3f9 1033
28d52ffb 1034/* Like call_insn_operand but allow (mem (symbol_ref ...)) even if pic. */
5f1ec3e6 1035
e075ae69
RH
1036int
1037expander_call_insn_operand (op, mode)
1038 rtx op;
28d52ffb 1039 enum machine_mode mode;
e075ae69 1040{
28d52ffb
RH
1041 if (GET_CODE (op) == MEM
1042 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF)
e075ae69 1043 return 1;
2a2ab3f9 1044
28d52ffb 1045 return call_insn_operand (op, mode);
e075ae69 1046}
79325812 1047
e075ae69
RH
1048int
1049constant_call_address_operand (op, mode)
1050 rtx op;
1051 enum machine_mode mode ATTRIBUTE_UNUSED;
1052{
1053 return GET_CODE (op) == MEM && CONSTANT_ADDRESS_P (XEXP (op, 0));
1054}
2a2ab3f9 1055
e075ae69 1056/* Match exactly zero and one. */
e9a25f70 1057
e075ae69
RH
1058int
1059const0_operand (op, mode)
1060 register rtx op;
1061 enum machine_mode mode;
1062{
1063 return op == CONST0_RTX (mode);
1064}
e9a25f70 1065
e075ae69
RH
1066int
1067const1_operand (op, mode)
1068 register rtx op;
1069 enum machine_mode mode ATTRIBUTE_UNUSED;
1070{
1071 return op == const1_rtx;
1072}
2a2ab3f9 1073
e075ae69 1074/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1075
e075ae69
RH
1076int
1077const248_operand (op, mode)
1078 register rtx op;
1079 enum machine_mode mode ATTRIBUTE_UNUSED;
1080{
1081 return (GET_CODE (op) == CONST_INT
1082 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1083}
e9a25f70 1084
e075ae69 1085/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1086
e075ae69
RH
1087int
1088incdec_operand (op, mode)
1089 register rtx op;
1090 enum machine_mode mode;
1091{
1092 if (op == const1_rtx || op == constm1_rtx)
1093 return 1;
1094 if (GET_CODE (op) != CONST_INT)
1095 return 0;
1096 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1097 return 1;
1098 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1099 return 1;
1100 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1101 return 1;
1102 return 0;
1103}
2a2ab3f9 1104
e075ae69
RH
1105/* Return false if this is the stack pointer, or any other fake
1106 register eliminable to the stack pointer. Otherwise, this is
1107 a register operand.
2a2ab3f9 1108
e075ae69
RH
1109 This is used to prevent esp from being used as an index reg.
1110 Which would only happen in pathological cases. */
5f1ec3e6 1111
e075ae69
RH
1112int
1113reg_no_sp_operand (op, mode)
1114 register rtx op;
1115 enum machine_mode mode;
1116{
1117 rtx t = op;
1118 if (GET_CODE (t) == SUBREG)
1119 t = SUBREG_REG (t);
1120 if (t == stack_pointer_rtx || t == arg_pointer_rtx)
1121 return 0;
2a2ab3f9 1122
e075ae69 1123 return register_operand (op, mode);
2a2ab3f9 1124}
b840bfb0 1125
e075ae69 1126/* Return true if op is a Q_REGS class register. */
b840bfb0 1127
e075ae69
RH
1128int
1129q_regs_operand (op, mode)
1130 register rtx op;
1131 enum machine_mode mode;
b840bfb0 1132{
e075ae69
RH
1133 if (mode != VOIDmode && GET_MODE (op) != mode)
1134 return 0;
1135 if (GET_CODE (op) == SUBREG)
1136 op = SUBREG_REG (op);
1137 return QI_REG_P (op);
1138}
b840bfb0 1139
e075ae69 1140/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1141
e075ae69
RH
1142int
1143non_q_regs_operand (op, mode)
1144 register rtx op;
1145 enum machine_mode mode;
1146{
1147 if (mode != VOIDmode && GET_MODE (op) != mode)
1148 return 0;
1149 if (GET_CODE (op) == SUBREG)
1150 op = SUBREG_REG (op);
1151 return NON_QI_REG_P (op);
1152}
b840bfb0 1153
e075ae69
RH
1154/* Return 1 if OP is a comparison operator that can use the condition code
1155 generated by a logical operation, which characteristicly does not set
1156 overflow or carry. To be used with CCNOmode. */
b840bfb0 1157
e075ae69
RH
1158int
1159no_comparison_operator (op, mode)
1160 register rtx op;
1161 enum machine_mode mode;
1162{
1163 return ((mode == VOIDmode || GET_MODE (op) == mode)
1164 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1165 && GET_CODE (op) != LE
1166 && GET_CODE (op) != GT);
1167}
b840bfb0 1168
e075ae69 1169/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
b840bfb0 1170
e075ae69
RH
1171int
1172fcmov_comparison_operator (op, mode)
1173 register rtx op;
1174 enum machine_mode mode;
1175{
1176 return ((mode == VOIDmode || GET_MODE (op) == mode)
1177 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1178 && GET_CODE (op) == unsigned_condition (GET_CODE (op)));
1179}
b840bfb0 1180
e9e80858
JH
1181/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1182
1183int
1184promotable_binary_operator (op, mode)
1185 register rtx op;
1186 enum machine_mode mode ATTRIBUTE_UNUSED;
1187{
1188 switch (GET_CODE (op))
1189 {
1190 case MULT:
1191 /* Modern CPUs have same latency for HImode and SImode multiply,
1192 but 386 and 486 do HImode multiply faster. */
1193 return ix86_cpu > PROCESSOR_I486;
1194 case PLUS:
1195 case AND:
1196 case IOR:
1197 case XOR:
1198 case ASHIFT:
1199 return 1;
1200 default:
1201 return 0;
1202 }
1203}
1204
e075ae69
RH
1205/* Nearly general operand, but accept any const_double, since we wish
1206 to be able to drop them into memory rather than have them get pulled
1207 into registers. */
b840bfb0 1208
2a2ab3f9 1209int
e075ae69
RH
1210cmp_fp_expander_operand (op, mode)
1211 register rtx op;
1212 enum machine_mode mode;
2a2ab3f9 1213{
e075ae69 1214 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1215 return 0;
e075ae69 1216 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1217 return 1;
e075ae69 1218 return general_operand (op, mode);
2a2ab3f9
JVA
1219}
1220
e075ae69 1221/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1222
1223int
e075ae69 1224ext_register_operand (op, mode)
2a2ab3f9 1225 register rtx op;
bb5177ac 1226 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1227{
e075ae69
RH
1228 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1229 return 0;
1230 return register_operand (op, VOIDmode);
1231}
1232
1233/* Return 1 if this is a valid binary floating-point operation.
1234 OP is the expression matched, and MODE is its mode. */
1235
1236int
1237binary_fp_operator (op, mode)
1238 register rtx op;
1239 enum machine_mode mode;
1240{
1241 if (mode != VOIDmode && mode != GET_MODE (op))
1242 return 0;
1243
2a2ab3f9
JVA
1244 switch (GET_CODE (op))
1245 {
e075ae69
RH
1246 case PLUS:
1247 case MINUS:
1248 case MULT:
1249 case DIV:
1250 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1251
2a2ab3f9
JVA
1252 default:
1253 return 0;
1254 }
1255}
fee2770d 1256
e075ae69
RH
1257int
1258mult_operator(op, mode)
1259 register rtx op;
1260 enum machine_mode mode ATTRIBUTE_UNUSED;
1261{
1262 return GET_CODE (op) == MULT;
1263}
1264
1265int
1266div_operator(op, mode)
1267 register rtx op;
1268 enum machine_mode mode ATTRIBUTE_UNUSED;
1269{
1270 return GET_CODE (op) == DIV;
1271}
0a726ef1
JL
1272
1273int
e075ae69
RH
1274arith_or_logical_operator (op, mode)
1275 rtx op;
1276 enum machine_mode mode;
0a726ef1 1277{
e075ae69
RH
1278 return ((mode == VOIDmode || GET_MODE (op) == mode)
1279 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1280 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1281}
1282
e075ae69 1283/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1284
1285int
e075ae69
RH
1286memory_displacement_operand (op, mode)
1287 register rtx op;
1288 enum machine_mode mode;
4f2c8ebb 1289{
e075ae69 1290 struct ix86_address parts;
e9a25f70 1291
e075ae69
RH
1292 if (! memory_operand (op, mode))
1293 return 0;
1294
1295 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1296 abort ();
1297
1298 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1299}
1300
e075ae69
RH
1301/* To avoid problems when jump re-emits comparisons like testqi_ext_0,
1302 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1303
1304 ??? It seems likely that this will only work because cmpsi is an
1305 expander, and no actual insns use this. */
4f2c8ebb
RS
1306
1307int
e075ae69
RH
1308cmpsi_operand (op, mode)
1309 rtx op;
1310 enum machine_mode mode;
fee2770d 1311{
e075ae69
RH
1312 if (general_operand (op, mode))
1313 return 1;
1314
1315 if (GET_CODE (op) == AND
1316 && GET_MODE (op) == SImode
1317 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1318 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1319 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1320 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1321 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1322 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1323 return 1;
e9a25f70 1324
fee2770d
RS
1325 return 0;
1326}
d784886d 1327
e075ae69
RH
1328/* Returns 1 if OP is memory operand that can not be represented by the
1329 modRM array. */
d784886d
RK
1330
1331int
e075ae69 1332long_memory_operand (op, mode)
d784886d
RK
1333 register rtx op;
1334 enum machine_mode mode;
1335{
e075ae69 1336 if (! memory_operand (op, mode))
d784886d
RK
1337 return 0;
1338
e075ae69 1339 return memory_address_length (op) != 0;
d784886d 1340}
e075ae69
RH
1341\f
1342/* Return true if the constant is something that can be loaded with
1343 a special instruction. Only handle 0.0 and 1.0; others are less
1344 worthwhile. */
57dbca5e
BS
1345
1346int
e075ae69
RH
1347standard_80387_constant_p (x)
1348 rtx x;
57dbca5e 1349{
e075ae69
RH
1350 if (GET_CODE (x) != CONST_DOUBLE)
1351 return -1;
1352
1353#if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1354 {
1355 REAL_VALUE_TYPE d;
1356 jmp_buf handler;
1357 int is0, is1;
1358
1359 if (setjmp (handler))
1360 return 0;
1361
1362 set_float_handler (handler);
1363 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1364 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1365 is1 = REAL_VALUES_EQUAL (d, dconst1);
1366 set_float_handler (NULL_PTR);
1367
1368 if (is0)
1369 return 1;
1370
1371 if (is1)
1372 return 2;
1373
1374 /* Note that on the 80387, other constants, such as pi,
1375 are much slower to load as standard constants
1376 than to load from doubles in memory! */
1377 /* ??? Not true on K6: all constants are equal cost. */
1378 }
1379#endif
1380
1381 return 0;
57dbca5e
BS
1382}
1383
2a2ab3f9
JVA
1384/* Returns 1 if OP contains a symbol reference */
1385
1386int
1387symbolic_reference_mentioned_p (op)
1388 rtx op;
1389{
6f7d635c 1390 register const char *fmt;
2a2ab3f9
JVA
1391 register int i;
1392
1393 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1394 return 1;
1395
1396 fmt = GET_RTX_FORMAT (GET_CODE (op));
1397 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1398 {
1399 if (fmt[i] == 'E')
1400 {
1401 register int j;
1402
1403 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1404 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1405 return 1;
1406 }
e9a25f70 1407
2a2ab3f9
JVA
1408 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1409 return 1;
1410 }
1411
1412 return 0;
1413}
e075ae69
RH
1414
1415/* Return 1 if it is appropriate to emit `ret' instructions in the
1416 body of a function. Do this only if the epilogue is simple, needing a
1417 couple of insns. Prior to reloading, we can't tell how many registers
1418 must be saved, so return 0 then. Return 0 if there is no frame
1419 marker to de-allocate.
1420
1421 If NON_SAVING_SETJMP is defined and true, then it is not possible
1422 for the epilogue to be simple, so return 0. This is a special case
1423 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1424 until final, but jump_optimize may need to know sooner if a
1425 `return' is OK. */
32b5b1aa
SC
1426
1427int
e075ae69 1428ix86_can_use_return_insn_p ()
32b5b1aa 1429{
e075ae69
RH
1430 int regno;
1431 int nregs = 0;
1432 int reglimit = (frame_pointer_needed
1433 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1434 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1435 || current_function_uses_const_pool);
32b5b1aa 1436
e075ae69
RH
1437#ifdef NON_SAVING_SETJMP
1438 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1439 return 0;
1440#endif
32b5b1aa 1441
e075ae69
RH
1442 if (! reload_completed)
1443 return 0;
32b5b1aa 1444
e075ae69
RH
1445 for (regno = reglimit - 1; regno >= 0; regno--)
1446 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1447 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1448 nregs++;
0afeb08a 1449
e075ae69
RH
1450 return nregs == 0 || ! frame_pointer_needed;
1451}
1452\f
21a427cc 1453static char *pic_label_name;
e075ae69 1454static int pic_label_output;
21a427cc 1455static char *global_offset_table_name;
e9a25f70 1456
e075ae69
RH
1457/* This function generates code for -fpic that loads %ebx with
1458 the return address of the caller and then returns. */
1459
1460void
1461asm_output_function_prefix (file, name)
1462 FILE *file;
1463 char *name ATTRIBUTE_UNUSED;
1464{
1465 rtx xops[2];
1466 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1467 || current_function_uses_const_pool);
1468 xops[0] = pic_offset_table_rtx;
1469 xops[1] = stack_pointer_rtx;
32b5b1aa 1470
e075ae69
RH
1471 /* Deep branch prediction favors having a return for every call. */
1472 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1473 {
e075ae69
RH
1474 if (!pic_label_output)
1475 {
1476 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1477 internal (non-global) label that's being emitted, it didn't make
1478 sense to have .type information for local labels. This caused
1479 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1480 me debug info for a label that you're declaring non-global?) this
1481 was changed to call ASM_OUTPUT_LABEL() instead. */
32b5b1aa 1482
e075ae69 1483 ASM_OUTPUT_LABEL (file, pic_label_name);
e9a25f70 1484
e075ae69
RH
1485 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1486 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1487 output_asm_insn ("ret", xops);
0afeb08a 1488
e075ae69 1489 pic_label_output = 1;
32b5b1aa 1490 }
32b5b1aa 1491 }
32b5b1aa 1492}
32b5b1aa 1493
e075ae69
RH
1494void
1495load_pic_register ()
32b5b1aa 1496{
e075ae69 1497 rtx gotsym, pclab;
32b5b1aa 1498
21a427cc
AS
1499 if (global_offset_table_name == NULL)
1500 {
1501 global_offset_table_name =
1502 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1503 ggc_add_string_root (&global_offset_table_name, 1);
1504 }
1505 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
32b5b1aa 1506
e075ae69 1507 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1508 {
21a427cc
AS
1509 if (pic_label_name == NULL)
1510 {
1511 pic_label_name = ggc_alloc_string (NULL, 32);
1512 ggc_add_string_root (&pic_label_name, 1);
1513 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1514 }
e075ae69 1515 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 1516 }
e075ae69 1517 else
e5cb57e8 1518 {
e075ae69 1519 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 1520 }
e5cb57e8 1521
e075ae69 1522 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 1523
e075ae69
RH
1524 if (! TARGET_DEEP_BRANCH_PREDICTION)
1525 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 1526
e075ae69 1527 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 1528}
8dfe5673 1529
e075ae69 1530/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 1531
e075ae69
RH
1532static rtx
1533gen_push (arg)
1534 rtx arg;
e9a25f70 1535{
c5c76735
JL
1536 return gen_rtx_SET (VOIDmode,
1537 gen_rtx_MEM (SImode,
1538 gen_rtx_PRE_DEC (SImode,
1539 stack_pointer_rtx)),
1540 arg);
e9a25f70
JL
1541}
1542
65954bd8
JL
1543/* Compute the size of local storage taking into consideration the
1544 desired stack alignment which is to be maintained. Also determine
1545 the number of registers saved below the local storage. */
1546
1547HOST_WIDE_INT
1548ix86_compute_frame_size (size, nregs_on_stack)
1549 HOST_WIDE_INT size;
1550 int *nregs_on_stack;
1551{
1552 int limit;
1553 int nregs;
1554 int regno;
1555 int padding;
1556 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1557 || current_function_uses_const_pool);
1558 HOST_WIDE_INT total_size;
1559
1560 limit = frame_pointer_needed
1561 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM;
1562
1563 nregs = 0;
1564
1565 for (regno = limit - 1; regno >= 0; regno--)
1566 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1567 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1568 nregs++;
1569
1570 padding = 0;
1571 total_size = size + (nregs * UNITS_PER_WORD);
1572
1573#ifdef PREFERRED_STACK_BOUNDARY
1574 {
1575 int offset;
1576 int preferred_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
1577
1578 offset = 4;
1579 if (frame_pointer_needed)
1580 offset += UNITS_PER_WORD;
1581
1582 total_size += offset;
1583
1584 padding = ((total_size + preferred_alignment - 1)
1585 & -preferred_alignment) - total_size;
1586
1587 if (padding < (((offset + preferred_alignment - 1)
1588 & -preferred_alignment) - offset))
1589 padding += preferred_alignment;
54ff41b7
JW
1590
1591 /* Don't bother aligning the stack of a leaf function
1592 which doesn't allocate any stack slots. */
1593 if (size == 0 && current_function_is_leaf)
1594 padding = 0;
65954bd8
JL
1595 }
1596#endif
1597
1598 if (nregs_on_stack)
1599 *nregs_on_stack = nregs;
1600
1601 return size + padding;
1602}
1603
e075ae69
RH
1604/* Expand the prologue into a bunch of separate insns. */
1605
1606void
1607ix86_expand_prologue ()
2a2ab3f9
JVA
1608{
1609 register int regno;
1610 int limit;
aae75261
JVA
1611 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1612 || current_function_uses_const_pool);
65954bd8 1613 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0);
469ac993 1614 rtx insn;
79325812 1615
e075ae69
RH
1616 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1617 slower on all targets. Also sdb doesn't like it. */
e9a25f70 1618
2a2ab3f9
JVA
1619 if (frame_pointer_needed)
1620 {
e075ae69
RH
1621 insn = emit_insn (gen_push (frame_pointer_rtx));
1622 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1623
e075ae69
RH
1624 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
1625 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
1626 }
1627
8dfe5673
RK
1628 if (tsize == 0)
1629 ;
1630 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
469ac993 1631 {
e075ae69
RH
1632 if (frame_pointer_needed)
1633 insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx,
1634 stack_pointer_rtx,
1635 GEN_INT (-tsize),
1636 frame_pointer_rtx));
79325812 1637 else
e075ae69
RH
1638 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1639 GEN_INT (-tsize)));
1640 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 1641 }
79325812 1642 else
8dfe5673 1643 {
e075ae69 1644 /* ??? Is this only valid for Win32? */
e9a25f70 1645
e075ae69 1646 rtx arg0, sym;
e9a25f70 1647
e075ae69
RH
1648 arg0 = gen_rtx_REG (SImode, 0);
1649 emit_move_insn (arg0, GEN_INT (tsize));
77a989d1 1650
e075ae69
RH
1651 sym = gen_rtx_MEM (FUNCTION_MODE,
1652 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1653 insn = emit_call_insn (gen_call (sym, const0_rtx));
1654
1655 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
1656 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1657 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 1658 }
e9a25f70 1659
2a2ab3f9
JVA
1660 limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1661 for (regno = limit - 1; regno >= 0; regno--)
1662 if ((regs_ever_live[regno] && ! call_used_regs[regno])
aae75261 1663 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2a2ab3f9 1664 {
e075ae69
RH
1665 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1666 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1667 }
2a2ab3f9 1668
84530511
SC
1669#ifdef SUBTARGET_PROLOGUE
1670 SUBTARGET_PROLOGUE;
1671#endif
1672
e9a25f70 1673 if (pic_reg_used)
e075ae69 1674 load_pic_register ();
77a989d1 1675
e9a25f70
JL
1676 /* If we are profiling, make sure no instructions are scheduled before
1677 the call to mcount. However, if -fpic, the above call will have
1678 done that. */
e075ae69 1679 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 1680 emit_insn (gen_blockage ());
77a989d1
SC
1681}
1682
79325812 1683/* Restore function stack, frame, and registers. */
e9a25f70 1684
2a2ab3f9 1685void
77a989d1 1686ix86_expand_epilogue ()
2a2ab3f9
JVA
1687{
1688 register int regno;
65954bd8
JL
1689 register int limit;
1690 int nregs;
aae75261
JVA
1691 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1692 || current_function_uses_const_pool);
fdb8a883 1693 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
65954bd8
JL
1694 HOST_WIDE_INT offset;
1695 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs);
2a2ab3f9 1696
e075ae69 1697 /* SP is often unreliable so we may have to go off the frame pointer. */
2a2ab3f9 1698
65954bd8 1699 offset = -(tsize + nregs * UNITS_PER_WORD);
2a2ab3f9 1700
fdb8a883
JW
1701 /* If we're only restoring one register and sp is not valid then
1702 using a move instruction to restore the register since it's
1703 less work than reloading sp and popping the register. Otherwise,
1704 restore sp (if necessary) and pop the registers. */
1705
e075ae69
RH
1706 limit = (frame_pointer_needed
1707 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
65954bd8 1708
fdb8a883 1709 if (nregs > 1 || sp_valid)
2a2ab3f9 1710 {
fdb8a883 1711 if ( !sp_valid )
2a2ab3f9 1712 {
e075ae69
RH
1713 rtx addr_offset;
1714 addr_offset = adj_offsettable_operand (AT_BP (QImode), offset);
1715 addr_offset = XEXP (addr_offset, 0);
1716
1717 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, addr_offset));
2a2ab3f9
JVA
1718 }
1719
1720 for (regno = 0; regno < limit; regno++)
1721 if ((regs_ever_live[regno] && ! call_used_regs[regno])
aae75261 1722 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2a2ab3f9 1723 {
e075ae69 1724 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2a2ab3f9
JVA
1725 }
1726 }
1727 else
e075ae69
RH
1728 {
1729 for (regno = 0; regno < limit; regno++)
1730 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1731 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1732 {
1733 emit_move_insn (gen_rtx_REG (SImode, regno),
1734 adj_offsettable_operand (AT_BP (Pmode), offset));
1735 offset += 4;
1736 }
1737 }
2a2ab3f9
JVA
1738
1739 if (frame_pointer_needed)
1740 {
c8c5cb99 1741 /* If not an i386, mov & pop is faster than "leave". */
3f803cd9 1742 if (TARGET_USE_LEAVE)
e075ae69 1743 emit_insn (gen_leave());
c8c5cb99 1744 else
2a2ab3f9 1745 {
e075ae69
RH
1746 emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
1747 frame_pointer_rtx));
1748 emit_insn (gen_popsi1 (frame_pointer_rtx));
e9a25f70
JL
1749 }
1750 }
77a989d1 1751 else if (tsize)
2a2ab3f9 1752 {
3403c6ca
UD
1753 /* Intel's docs say that for 4 or 8 bytes of stack frame one should
1754 use `pop' and not `add'. */
1755 int use_pop = tsize == 4;
e075ae69 1756 rtx edx = 0, ecx;
e9a25f70 1757
3403c6ca
UD
1758 /* Use two pops only for the Pentium processors. */
1759 if (tsize == 8 && !TARGET_386 && !TARGET_486)
1760 {
1761 rtx retval = current_function_return_rtx;
1762
e075ae69 1763 edx = gen_rtx_REG (SImode, 1);
3403c6ca
UD
1764
1765 /* This case is a bit more complex. Since we cannot pop into
1766 %ecx twice we need a second register. But this is only
1767 available if the return value is not of DImode in which
1768 case the %edx register is not available. */
1769 use_pop = (retval == NULL
e075ae69 1770 || ! reg_overlap_mentioned_p (edx, retval));
3403c6ca
UD
1771 }
1772
1773 if (use_pop)
1774 {
e075ae69
RH
1775 ecx = gen_rtx_REG (SImode, 2);
1776
1777 /* We have to prevent the two pops here from being scheduled.
1778 GCC otherwise would try in some situation to put other
1779 instructions in between them which has a bad effect. */
1780 emit_insn (gen_blockage ());
1781 emit_insn (gen_popsi1 (ecx));
1782 if (tsize == 8)
1783 emit_insn (gen_popsi1 (edx));
3403c6ca 1784 }
e9a25f70 1785 else
3403c6ca
UD
1786 {
1787 /* If there is no frame pointer, we must still release the frame. */
e075ae69
RH
1788 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1789 GEN_INT (tsize)));
3403c6ca 1790 }
2a2ab3f9
JVA
1791 }
1792
68f654ec
RK
1793#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1794 if (profile_block_flag == 2)
1795 {
e075ae69 1796 FUNCTION_BLOCK_PROFILER_EXIT;
68f654ec
RK
1797 }
1798#endif
1799
2a2ab3f9
JVA
1800 if (current_function_pops_args && current_function_args_size)
1801 {
e075ae69 1802 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9
JVA
1803
1804 /* i386 can only pop 32K bytes (maybe 64K? Is it signed?). If
1805 asked to pop more, pop return address, do explicit add, and jump
1806 indirectly to the caller. */
1807
1808 if (current_function_pops_args >= 32768)
1809 {
e075ae69 1810 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 1811
e075ae69
RH
1812 emit_insn (gen_popsi1 (ecx));
1813 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
1814 emit_indirect_jump (ecx);
e9a25f70 1815 }
79325812 1816 else
e075ae69
RH
1817 emit_jump_insn (gen_return_pop_internal (popc));
1818 }
1819 else
1820 emit_jump_insn (gen_return_internal ());
1821}
1822\f
1823/* Extract the parts of an RTL expression that is a valid memory address
1824 for an instruction. Return false if the structure of the address is
1825 grossly off. */
1826
1827static int
1828ix86_decompose_address (addr, out)
1829 register rtx addr;
1830 struct ix86_address *out;
1831{
1832 rtx base = NULL_RTX;
1833 rtx index = NULL_RTX;
1834 rtx disp = NULL_RTX;
1835 HOST_WIDE_INT scale = 1;
1836 rtx scale_rtx = NULL_RTX;
1837
1838 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
1839 base = addr;
1840 else if (GET_CODE (addr) == PLUS)
1841 {
1842 rtx op0 = XEXP (addr, 0);
1843 rtx op1 = XEXP (addr, 1);
1844 enum rtx_code code0 = GET_CODE (op0);
1845 enum rtx_code code1 = GET_CODE (op1);
1846
1847 if (code0 == REG || code0 == SUBREG)
1848 {
1849 if (code1 == REG || code1 == SUBREG)
1850 index = op0, base = op1; /* index + base */
1851 else
1852 base = op0, disp = op1; /* base + displacement */
1853 }
1854 else if (code0 == MULT)
e9a25f70 1855 {
e075ae69
RH
1856 index = XEXP (op0, 0);
1857 scale_rtx = XEXP (op0, 1);
1858 if (code1 == REG || code1 == SUBREG)
1859 base = op1; /* index*scale + base */
e9a25f70 1860 else
e075ae69
RH
1861 disp = op1; /* index*scale + disp */
1862 }
1863 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
1864 {
1865 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
1866 scale_rtx = XEXP (XEXP (op0, 0), 1);
1867 base = XEXP (op0, 1);
1868 disp = op1;
2a2ab3f9 1869 }
e075ae69
RH
1870 else if (code0 == PLUS)
1871 {
1872 index = XEXP (op0, 0); /* index + base + disp */
1873 base = XEXP (op0, 1);
1874 disp = op1;
1875 }
1876 else
1877 return FALSE;
1878 }
1879 else if (GET_CODE (addr) == MULT)
1880 {
1881 index = XEXP (addr, 0); /* index*scale */
1882 scale_rtx = XEXP (addr, 1);
1883 }
1884 else if (GET_CODE (addr) == ASHIFT)
1885 {
1886 rtx tmp;
1887
1888 /* We're called for lea too, which implements ashift on occasion. */
1889 index = XEXP (addr, 0);
1890 tmp = XEXP (addr, 1);
1891 if (GET_CODE (tmp) != CONST_INT)
1892 return FALSE;
1893 scale = INTVAL (tmp);
1894 if ((unsigned HOST_WIDE_INT) scale > 3)
1895 return FALSE;
1896 scale = 1 << scale;
2a2ab3f9 1897 }
2a2ab3f9 1898 else
e075ae69
RH
1899 disp = addr; /* displacement */
1900
1901 /* Extract the integral value of scale. */
1902 if (scale_rtx)
e9a25f70 1903 {
e075ae69
RH
1904 if (GET_CODE (scale_rtx) != CONST_INT)
1905 return FALSE;
1906 scale = INTVAL (scale_rtx);
e9a25f70 1907 }
3b3c6a3f 1908
e075ae69
RH
1909 /* Allow arg pointer and stack pointer as index if there is not scaling */
1910 if (base && index && scale == 1
1911 && (index == arg_pointer_rtx || index == stack_pointer_rtx))
1912 {
1913 rtx tmp = base;
1914 base = index;
1915 index = tmp;
1916 }
1917
1918 /* Special case: %ebp cannot be encoded as a base without a displacement. */
1919 if (base == frame_pointer_rtx && !disp)
1920 disp = const0_rtx;
1921
1922 /* Special case: on K6, [%esi] makes the instruction vector decoded.
1923 Avoid this by transforming to [%esi+0]. */
1924 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
1925 && base && !index && !disp
329e1d01 1926 && REG_P (base)
e075ae69
RH
1927 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
1928 disp = const0_rtx;
1929
1930 /* Special case: encode reg+reg instead of reg*2. */
1931 if (!base && index && scale && scale == 2)
1932 base = index, scale = 1;
1933
1934 /* Special case: scaling cannot be encoded without base or displacement. */
1935 if (!base && !disp && index && scale != 1)
1936 disp = const0_rtx;
1937
1938 out->base = base;
1939 out->index = index;
1940 out->disp = disp;
1941 out->scale = scale;
3b3c6a3f 1942
e075ae69
RH
1943 return TRUE;
1944}
3b3c6a3f 1945
e075ae69
RH
1946/* Determine if a given CONST RTX is a valid memory displacement
1947 in PIC mode. */
1948
59be65f6 1949int
91bb873f
RH
1950legitimate_pic_address_disp_p (disp)
1951 register rtx disp;
1952{
1953 if (GET_CODE (disp) != CONST)
1954 return 0;
1955 disp = XEXP (disp, 0);
1956
1957 if (GET_CODE (disp) == PLUS)
1958 {
1959 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
1960 return 0;
1961 disp = XEXP (disp, 0);
1962 }
1963
1964 if (GET_CODE (disp) != UNSPEC
1965 || XVECLEN (disp, 0) != 1)
1966 return 0;
1967
1968 /* Must be @GOT or @GOTOFF. */
1969 if (XINT (disp, 1) != 6
1970 && XINT (disp, 1) != 7)
1971 return 0;
1972
1973 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
1974 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
1975 return 0;
1976
1977 return 1;
1978}
1979
e075ae69
RH
1980/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
1981 memory address for an instruction. The MODE argument is the machine mode
1982 for the MEM expression that wants to use this address.
1983
1984 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1985 convert common non-canonical forms to canonical form so that they will
1986 be recognized. */
1987
3b3c6a3f
MM
1988int
1989legitimate_address_p (mode, addr, strict)
1990 enum machine_mode mode;
1991 register rtx addr;
1992 int strict;
1993{
e075ae69
RH
1994 struct ix86_address parts;
1995 rtx base, index, disp;
1996 HOST_WIDE_INT scale;
1997 const char *reason = NULL;
1998 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
1999
2000 if (TARGET_DEBUG_ADDR)
2001 {
2002 fprintf (stderr,
e9a25f70 2003 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2004 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2005 debug_rtx (addr);
2006 }
2007
e075ae69 2008 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2009 {
e075ae69
RH
2010 reason = "decomposition failed";
2011 goto error;
3b3c6a3f
MM
2012 }
2013
e075ae69
RH
2014 base = parts.base;
2015 index = parts.index;
2016 disp = parts.disp;
2017 scale = parts.scale;
91f0226f 2018
e075ae69 2019 /* Validate base register.
e9a25f70
JL
2020
2021 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2022 is one word out of a two word structure, which is represented internally
2023 as a DImode int. */
e9a25f70 2024
3b3c6a3f
MM
2025 if (base)
2026 {
e075ae69
RH
2027 reason_rtx = base;
2028
3d771dfd 2029 if (GET_CODE (base) != REG)
3b3c6a3f 2030 {
e075ae69
RH
2031 reason = "base is not a register";
2032 goto error;
3b3c6a3f
MM
2033 }
2034
c954bd01
RH
2035 if (GET_MODE (base) != Pmode)
2036 {
e075ae69
RH
2037 reason = "base is not in Pmode";
2038 goto error;
c954bd01
RH
2039 }
2040
e9a25f70
JL
2041 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2042 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2043 {
e075ae69
RH
2044 reason = "base is not valid";
2045 goto error;
3b3c6a3f
MM
2046 }
2047 }
2048
e075ae69 2049 /* Validate index register.
e9a25f70
JL
2050
2051 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2052 is one word out of a two word structure, which is represented internally
2053 as a DImode int. */
e075ae69
RH
2054
2055 if (index)
3b3c6a3f 2056 {
e075ae69
RH
2057 reason_rtx = index;
2058
2059 if (GET_CODE (index) != REG)
3b3c6a3f 2060 {
e075ae69
RH
2061 reason = "index is not a register";
2062 goto error;
3b3c6a3f
MM
2063 }
2064
e075ae69 2065 if (GET_MODE (index) != Pmode)
c954bd01 2066 {
e075ae69
RH
2067 reason = "index is not in Pmode";
2068 goto error;
c954bd01
RH
2069 }
2070
e075ae69
RH
2071 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2072 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2073 {
e075ae69
RH
2074 reason = "index is not valid";
2075 goto error;
3b3c6a3f
MM
2076 }
2077 }
3b3c6a3f 2078
e075ae69
RH
2079 /* Validate scale factor. */
2080 if (scale != 1)
3b3c6a3f 2081 {
e075ae69
RH
2082 reason_rtx = GEN_INT (scale);
2083 if (!index)
3b3c6a3f 2084 {
e075ae69
RH
2085 reason = "scale without index";
2086 goto error;
3b3c6a3f
MM
2087 }
2088
e075ae69 2089 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2090 {
e075ae69
RH
2091 reason = "scale is not a valid multiplier";
2092 goto error;
3b3c6a3f
MM
2093 }
2094 }
2095
91bb873f 2096 /* Validate displacement. */
3b3c6a3f
MM
2097 if (disp)
2098 {
e075ae69
RH
2099 reason_rtx = disp;
2100
91bb873f 2101 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2102 {
e075ae69
RH
2103 reason = "displacement is not constant";
2104 goto error;
3b3c6a3f
MM
2105 }
2106
e075ae69 2107 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2108 {
e075ae69
RH
2109 reason = "displacement is a const_double";
2110 goto error;
3b3c6a3f
MM
2111 }
2112
91bb873f 2113 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2114 {
91bb873f
RH
2115 if (! legitimate_pic_address_disp_p (disp))
2116 {
e075ae69
RH
2117 reason = "displacement is an invalid pic construct";
2118 goto error;
91bb873f
RH
2119 }
2120
e075ae69
RH
2121 /* Verify that a symbolic pic displacement includes
2122 the pic_offset_table_rtx register. */
91bb873f 2123 if (base != pic_offset_table_rtx
e075ae69 2124 && (index != pic_offset_table_rtx || scale != 1))
91bb873f 2125 {
e075ae69
RH
2126 reason = "pic displacement against invalid base";
2127 goto error;
91bb873f 2128 }
3b3c6a3f 2129 }
91bb873f 2130 else if (HALF_PIC_P ())
3b3c6a3f 2131 {
91bb873f 2132 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2133 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2134 {
e075ae69
RH
2135 reason = "displacement is an invalid half-pic reference";
2136 goto error;
91bb873f 2137 }
3b3c6a3f
MM
2138 }
2139 }
2140
e075ae69 2141 /* Everything looks valid. */
3b3c6a3f 2142 if (TARGET_DEBUG_ADDR)
e075ae69 2143 fprintf (stderr, "Success.\n");
3b3c6a3f 2144 return TRUE;
e075ae69
RH
2145
2146error:
2147 if (TARGET_DEBUG_ADDR)
2148 {
2149 fprintf (stderr, "Error: %s\n", reason);
2150 debug_rtx (reason_rtx);
2151 }
2152 return FALSE;
3b3c6a3f 2153}
3b3c6a3f
MM
2154\f
2155/* Return a legitimate reference for ORIG (an address) using the
2156 register REG. If REG is 0, a new pseudo is generated.
2157
91bb873f 2158 There are two types of references that must be handled:
3b3c6a3f
MM
2159
2160 1. Global data references must load the address from the GOT, via
2161 the PIC reg. An insn is emitted to do this load, and the reg is
2162 returned.
2163
91bb873f
RH
2164 2. Static data references, constant pool addresses, and code labels
2165 compute the address as an offset from the GOT, whose base is in
2166 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2167 differentiate them from global data objects. The returned
2168 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2169
2170 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2171 reg also appears in the address. */
3b3c6a3f
MM
2172
2173rtx
2174legitimize_pic_address (orig, reg)
2175 rtx orig;
2176 rtx reg;
2177{
2178 rtx addr = orig;
2179 rtx new = orig;
91bb873f 2180 rtx base;
3b3c6a3f 2181
91bb873f
RH
2182 if (GET_CODE (addr) == LABEL_REF
2183 || (GET_CODE (addr) == SYMBOL_REF
2184 && (CONSTANT_POOL_ADDRESS_P (addr)
2185 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2186 {
91bb873f
RH
2187 /* This symbol may be referenced via a displacement from the PIC
2188 base address (@GOTOFF). */
3b3c6a3f 2189
91bb873f
RH
2190 current_function_uses_pic_offset_table = 1;
2191 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2192 new = gen_rtx_CONST (VOIDmode, new);
2193 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2194
91bb873f
RH
2195 if (reg != 0)
2196 {
3b3c6a3f 2197 emit_move_insn (reg, new);
91bb873f 2198 new = reg;
3b3c6a3f 2199 }
3b3c6a3f 2200 }
91bb873f 2201 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 2202 {
91bb873f
RH
2203 /* This symbol must be referenced via a load from the
2204 Global Offset Table (@GOT). */
3b3c6a3f 2205
91bb873f
RH
2206 current_function_uses_pic_offset_table = 1;
2207 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2208 new = gen_rtx_CONST (VOIDmode, new);
2209 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2210 new = gen_rtx_MEM (Pmode, new);
2211 RTX_UNCHANGING_P (new) = 1;
3b3c6a3f
MM
2212
2213 if (reg == 0)
2214 reg = gen_reg_rtx (Pmode);
91bb873f
RH
2215 emit_move_insn (reg, new);
2216 new = reg;
2217 }
2218 else
2219 {
2220 if (GET_CODE (addr) == CONST)
3b3c6a3f 2221 {
91bb873f
RH
2222 addr = XEXP (addr, 0);
2223 if (GET_CODE (addr) == UNSPEC)
2224 {
2225 /* Check that the unspec is one of the ones we generate? */
2226 }
2227 else if (GET_CODE (addr) != PLUS)
2228 abort();
3b3c6a3f 2229 }
91bb873f
RH
2230 if (GET_CODE (addr) == PLUS)
2231 {
2232 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 2233
91bb873f
RH
2234 /* Check first to see if this is a constant offset from a @GOTOFF
2235 symbol reference. */
2236 if ((GET_CODE (op0) == LABEL_REF
2237 || (GET_CODE (op0) == SYMBOL_REF
2238 && (CONSTANT_POOL_ADDRESS_P (op0)
2239 || SYMBOL_REF_FLAG (op0))))
2240 && GET_CODE (op1) == CONST_INT)
2241 {
2242 current_function_uses_pic_offset_table = 1;
2243 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2244 new = gen_rtx_PLUS (VOIDmode, new, op1);
2245 new = gen_rtx_CONST (VOIDmode, new);
2246 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2247
2248 if (reg != 0)
2249 {
2250 emit_move_insn (reg, new);
2251 new = reg;
2252 }
2253 }
2254 else
2255 {
2256 base = legitimize_pic_address (XEXP (addr, 0), reg);
2257 new = legitimize_pic_address (XEXP (addr, 1),
2258 base == reg ? NULL_RTX : reg);
2259
2260 if (GET_CODE (new) == CONST_INT)
2261 new = plus_constant (base, INTVAL (new));
2262 else
2263 {
2264 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2265 {
2266 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2267 new = XEXP (new, 1);
2268 }
2269 new = gen_rtx_PLUS (Pmode, base, new);
2270 }
2271 }
2272 }
3b3c6a3f
MM
2273 }
2274 return new;
2275}
2276\f
3b3c6a3f
MM
2277/* Try machine-dependent ways of modifying an illegitimate address
2278 to be legitimate. If we find one, return the new, valid address.
2279 This macro is used in only one place: `memory_address' in explow.c.
2280
2281 OLDX is the address as it was before break_out_memory_refs was called.
2282 In some cases it is useful to look at this to decide what needs to be done.
2283
2284 MODE and WIN are passed so that this macro can use
2285 GO_IF_LEGITIMATE_ADDRESS.
2286
2287 It is always safe for this macro to do nothing. It exists to recognize
2288 opportunities to optimize the output.
2289
2290 For the 80386, we handle X+REG by loading X into a register R and
2291 using R+REG. R will go in a general reg and indexing will be used.
2292 However, if REG is a broken-out memory address or multiplication,
2293 nothing needs to be done because REG can certainly go in a general reg.
2294
2295 When -fpic is used, special handling is needed for symbolic references.
2296 See comments by legitimize_pic_address in i386.c for details. */
2297
2298rtx
2299legitimize_address (x, oldx, mode)
2300 register rtx x;
bb5177ac 2301 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
2302 enum machine_mode mode;
2303{
2304 int changed = 0;
2305 unsigned log;
2306
2307 if (TARGET_DEBUG_ADDR)
2308 {
e9a25f70
JL
2309 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2310 GET_MODE_NAME (mode));
3b3c6a3f
MM
2311 debug_rtx (x);
2312 }
2313
2314 if (flag_pic && SYMBOLIC_CONST (x))
2315 return legitimize_pic_address (x, 0);
2316
2317 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2318 if (GET_CODE (x) == ASHIFT
2319 && GET_CODE (XEXP (x, 1)) == CONST_INT
2320 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2321 {
2322 changed = 1;
a269a03c
JC
2323 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2324 GEN_INT (1 << log));
3b3c6a3f
MM
2325 }
2326
2327 if (GET_CODE (x) == PLUS)
2328 {
e9a25f70
JL
2329 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2330
3b3c6a3f
MM
2331 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2332 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2333 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2334 {
2335 changed = 1;
c5c76735
JL
2336 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2337 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2338 GEN_INT (1 << log));
3b3c6a3f
MM
2339 }
2340
2341 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2342 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2343 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2344 {
2345 changed = 1;
c5c76735
JL
2346 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2347 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2348 GEN_INT (1 << log));
3b3c6a3f
MM
2349 }
2350
e9a25f70 2351 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
2352 if (GET_CODE (XEXP (x, 1)) == MULT)
2353 {
2354 rtx tmp = XEXP (x, 0);
2355 XEXP (x, 0) = XEXP (x, 1);
2356 XEXP (x, 1) = tmp;
2357 changed = 1;
2358 }
2359
2360 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2361 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2362 created by virtual register instantiation, register elimination, and
2363 similar optimizations. */
2364 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2365 {
2366 changed = 1;
c5c76735
JL
2367 x = gen_rtx_PLUS (Pmode,
2368 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2369 XEXP (XEXP (x, 1), 0)),
2370 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
2371 }
2372
e9a25f70
JL
2373 /* Canonicalize
2374 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
2375 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2376 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2377 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2378 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2379 && CONSTANT_P (XEXP (x, 1)))
2380 {
00c79232
ML
2381 rtx constant;
2382 rtx other = NULL_RTX;
3b3c6a3f
MM
2383
2384 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2385 {
2386 constant = XEXP (x, 1);
2387 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2388 }
2389 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2390 {
2391 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2392 other = XEXP (x, 1);
2393 }
2394 else
2395 constant = 0;
2396
2397 if (constant)
2398 {
2399 changed = 1;
c5c76735
JL
2400 x = gen_rtx_PLUS (Pmode,
2401 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2402 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2403 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
2404 }
2405 }
2406
2407 if (changed && legitimate_address_p (mode, x, FALSE))
2408 return x;
2409
2410 if (GET_CODE (XEXP (x, 0)) == MULT)
2411 {
2412 changed = 1;
2413 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2414 }
2415
2416 if (GET_CODE (XEXP (x, 1)) == MULT)
2417 {
2418 changed = 1;
2419 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2420 }
2421
2422 if (changed
2423 && GET_CODE (XEXP (x, 1)) == REG
2424 && GET_CODE (XEXP (x, 0)) == REG)
2425 return x;
2426
2427 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2428 {
2429 changed = 1;
2430 x = legitimize_pic_address (x, 0);
2431 }
2432
2433 if (changed && legitimate_address_p (mode, x, FALSE))
2434 return x;
2435
2436 if (GET_CODE (XEXP (x, 0)) == REG)
2437 {
2438 register rtx temp = gen_reg_rtx (Pmode);
2439 register rtx val = force_operand (XEXP (x, 1), temp);
2440 if (val != temp)
2441 emit_move_insn (temp, val);
2442
2443 XEXP (x, 1) = temp;
2444 return x;
2445 }
2446
2447 else if (GET_CODE (XEXP (x, 1)) == REG)
2448 {
2449 register rtx temp = gen_reg_rtx (Pmode);
2450 register rtx val = force_operand (XEXP (x, 0), temp);
2451 if (val != temp)
2452 emit_move_insn (temp, val);
2453
2454 XEXP (x, 0) = temp;
2455 return x;
2456 }
2457 }
2458
2459 return x;
2460}
2a2ab3f9
JVA
2461\f
2462/* Print an integer constant expression in assembler syntax. Addition
2463 and subtraction are the only arithmetic that may appear in these
2464 expressions. FILE is the stdio stream to write to, X is the rtx, and
2465 CODE is the operand print code from the output string. */
2466
2467static void
2468output_pic_addr_const (file, x, code)
2469 FILE *file;
2470 rtx x;
2471 int code;
2472{
2473 char buf[256];
2474
2475 switch (GET_CODE (x))
2476 {
2477 case PC:
2478 if (flag_pic)
2479 putc ('.', file);
2480 else
2481 abort ();
2482 break;
2483
2484 case SYMBOL_REF:
91bb873f
RH
2485 assemble_name (file, XSTR (x, 0));
2486 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2487 fputs ("@PLT", file);
2a2ab3f9
JVA
2488 break;
2489
91bb873f
RH
2490 case LABEL_REF:
2491 x = XEXP (x, 0);
2492 /* FALLTHRU */
2a2ab3f9
JVA
2493 case CODE_LABEL:
2494 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2495 assemble_name (asm_out_file, buf);
2496 break;
2497
2498 case CONST_INT:
f64cecad 2499 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
2500 break;
2501
2502 case CONST:
2503 /* This used to output parentheses around the expression,
2504 but that does not work on the 386 (either ATT or BSD assembler). */
2505 output_pic_addr_const (file, XEXP (x, 0), code);
2506 break;
2507
2508 case CONST_DOUBLE:
2509 if (GET_MODE (x) == VOIDmode)
2510 {
2511 /* We can use %d if the number is <32 bits and positive. */
2512 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
2513 fprintf (file, "0x%lx%08lx",
2514 (unsigned long) CONST_DOUBLE_HIGH (x),
2515 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 2516 else
f64cecad 2517 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
2518 }
2519 else
2520 /* We can't handle floating point constants;
2521 PRINT_OPERAND must handle them. */
2522 output_operand_lossage ("floating constant misused");
2523 break;
2524
2525 case PLUS:
e9a25f70 2526 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
2527 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2528 {
2a2ab3f9 2529 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2530 putc ('+', file);
e9a25f70 2531 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 2532 }
91bb873f 2533 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 2534 {
2a2ab3f9 2535 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2536 putc ('+', file);
e9a25f70 2537 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 2538 }
91bb873f
RH
2539 else
2540 abort ();
2a2ab3f9
JVA
2541 break;
2542
2543 case MINUS:
e075ae69 2544 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 2545 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2546 putc ('-', file);
2a2ab3f9 2547 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2548 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
2549 break;
2550
91bb873f
RH
2551 case UNSPEC:
2552 if (XVECLEN (x, 0) != 1)
2553 abort ();
2554 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2555 switch (XINT (x, 1))
2556 {
2557 case 6:
2558 fputs ("@GOT", file);
2559 break;
2560 case 7:
2561 fputs ("@GOTOFF", file);
2562 break;
2563 case 8:
2564 fputs ("@PLT", file);
2565 break;
2566 default:
2567 output_operand_lossage ("invalid UNSPEC as operand");
2568 break;
2569 }
2570 break;
2571
2a2ab3f9
JVA
2572 default:
2573 output_operand_lossage ("invalid expression as operand");
2574 }
2575}
2576\f
a269a03c 2577static void
e075ae69 2578put_condition_code (code, mode, reverse, fp, file)
a269a03c 2579 enum rtx_code code;
e075ae69
RH
2580 enum machine_mode mode;
2581 int reverse, fp;
a269a03c
JC
2582 FILE *file;
2583{
a269a03c
JC
2584 const char *suffix;
2585
a269a03c
JC
2586 if (reverse)
2587 code = reverse_condition (code);
e075ae69 2588
a269a03c
JC
2589 switch (code)
2590 {
2591 case EQ:
2592 suffix = "e";
2593 break;
a269a03c
JC
2594 case NE:
2595 suffix = "ne";
2596 break;
a269a03c 2597 case GT:
e075ae69
RH
2598 if (mode == CCNOmode)
2599 abort ();
2600 suffix = "g";
a269a03c 2601 break;
a269a03c 2602 case GTU:
e075ae69
RH
2603 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2604 Those same assemblers have the same but opposite losage on cmov. */
2605 suffix = fp ? "nbe" : "a";
a269a03c 2606 break;
a269a03c 2607 case LT:
e075ae69 2608 if (mode == CCNOmode)
a269a03c
JC
2609 suffix = "s";
2610 else
e075ae69 2611 suffix = "l";
a269a03c 2612 break;
a269a03c
JC
2613 case LTU:
2614 suffix = "b";
2615 break;
a269a03c 2616 case GE:
e075ae69 2617 if (mode == CCNOmode)
a269a03c
JC
2618 suffix = "ns";
2619 else
e075ae69 2620 suffix = "ge";
a269a03c 2621 break;
a269a03c 2622 case GEU:
e075ae69
RH
2623 /* ??? As above. */
2624 suffix = fp ? "nb" : "ae";
a269a03c 2625 break;
a269a03c 2626 case LE:
e075ae69
RH
2627 if (mode == CCNOmode)
2628 abort ();
2629 suffix = "le";
a269a03c 2630 break;
a269a03c
JC
2631 case LEU:
2632 suffix = "be";
2633 break;
a269a03c
JC
2634 default:
2635 abort ();
2636 }
2637 fputs (suffix, file);
2638}
2639
e075ae69
RH
2640void
2641print_reg (x, code, file)
2642 rtx x;
2643 int code;
2644 FILE *file;
e5cb57e8 2645{
e075ae69
RH
2646 if (REGNO (x) == ARG_POINTER_REGNUM
2647 || REGNO (x) == FLAGS_REG
2648 || REGNO (x) == FPSR_REG)
2649 abort ();
e9a25f70 2650
e075ae69
RH
2651 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
2652 putc ('%', file);
2653
2654 if (code == 'w')
2655 code = 2;
2656 else if (code == 'b')
2657 code = 1;
2658 else if (code == 'k')
2659 code = 4;
2660 else if (code == 'y')
2661 code = 3;
2662 else if (code == 'h')
2663 code = 0;
2664 else
2665 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 2666
e075ae69
RH
2667 switch (code)
2668 {
2669 case 3:
2670 if (STACK_TOP_P (x))
2671 {
2672 fputs ("st(0)", file);
2673 break;
2674 }
2675 /* FALLTHRU */
2676 case 4:
2677 case 8:
2678 case 12:
2679 if (! FP_REG_P (x))
2680 putc ('e', file);
2681 /* FALLTHRU */
2682 case 2:
2683 fputs (hi_reg_name[REGNO (x)], file);
2684 break;
2685 case 1:
2686 fputs (qi_reg_name[REGNO (x)], file);
2687 break;
2688 case 0:
2689 fputs (qi_high_reg_name[REGNO (x)], file);
2690 break;
2691 default:
2692 abort ();
fe25fea3 2693 }
e5cb57e8
SC
2694}
2695
2a2ab3f9 2696/* Meaning of CODE:
fe25fea3 2697 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 2698 C -- print opcode suffix for set/cmov insn.
fe25fea3 2699 c -- like C, but print reversed condition
2a2ab3f9
JVA
2700 R -- print the prefix for register names.
2701 z -- print the opcode suffix for the size of the current operand.
2702 * -- print a star (in certain assembler syntax)
2703 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
2704 s -- print a shift double count, followed by the assemblers argument
2705 delimiter.
fe25fea3
SC
2706 b -- print the QImode name of the register for the indicated operand.
2707 %b0 would print %al if operands[0] is reg 0.
2708 w -- likewise, print the HImode name of the register.
2709 k -- likewise, print the SImode name of the register.
2710 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
e075ae69 2711 y -- print "st(0)" instead of "st" as a register. */
2a2ab3f9
JVA
2712
2713void
2714print_operand (file, x, code)
2715 FILE *file;
2716 rtx x;
2717 int code;
2718{
2719 if (code)
2720 {
2721 switch (code)
2722 {
2723 case '*':
e075ae69 2724 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
2725 putc ('*', file);
2726 return;
2727
2a2ab3f9 2728 case 'L':
e075ae69
RH
2729 if (ASSEMBLER_DIALECT == 0)
2730 putc ('l', file);
2a2ab3f9
JVA
2731 return;
2732
2733 case 'W':
e075ae69
RH
2734 if (ASSEMBLER_DIALECT == 0)
2735 putc ('w', file);
2a2ab3f9
JVA
2736 return;
2737
2738 case 'B':
e075ae69
RH
2739 if (ASSEMBLER_DIALECT == 0)
2740 putc ('b', file);
2a2ab3f9
JVA
2741 return;
2742
2743 case 'Q':
e075ae69
RH
2744 if (ASSEMBLER_DIALECT == 0)
2745 putc ('l', file);
2a2ab3f9
JVA
2746 return;
2747
2748 case 'S':
e075ae69
RH
2749 if (ASSEMBLER_DIALECT == 0)
2750 putc ('s', file);
2a2ab3f9
JVA
2751 return;
2752
5f1ec3e6 2753 case 'T':
e075ae69
RH
2754 if (ASSEMBLER_DIALECT == 0)
2755 putc ('t', file);
5f1ec3e6
JVA
2756 return;
2757
2a2ab3f9
JVA
2758 case 'z':
2759 /* 387 opcodes don't get size suffixes if the operands are
2760 registers. */
2761
2762 if (STACK_REG_P (x))
2763 return;
2764
e075ae69
RH
2765 /* Intel syntax has no truck with instruction suffixes. */
2766 if (ASSEMBLER_DIALECT != 0)
2767 return;
2768
2a2ab3f9
JVA
2769 /* this is the size of op from size of operand */
2770 switch (GET_MODE_SIZE (GET_MODE (x)))
2771 {
e075ae69
RH
2772 case 1:
2773 putc ('b', file);
2774 return;
2775
2a2ab3f9 2776 case 2:
e075ae69 2777 putc ('w', file);
2a2ab3f9
JVA
2778 return;
2779
2780 case 4:
2781 if (GET_MODE (x) == SFmode)
2782 {
e075ae69 2783 putc ('s', file);
2a2ab3f9
JVA
2784 return;
2785 }
2786 else
e075ae69 2787 putc ('l', file);
2a2ab3f9
JVA
2788 return;
2789
5f1ec3e6 2790 case 12:
e075ae69
RH
2791 putc ('t', file);
2792 return;
5f1ec3e6 2793
2a2ab3f9
JVA
2794 case 8:
2795 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
2796 {
2797#ifdef GAS_MNEMONICS
e075ae69 2798 putc ('q', file);
56c0e8fa 2799#else
e075ae69
RH
2800 putc ('l', file);
2801 putc ('l', file);
56c0e8fa
JVA
2802#endif
2803 }
e075ae69
RH
2804 else
2805 putc ('l', file);
2a2ab3f9
JVA
2806 return;
2807 }
4af3895e
JVA
2808
2809 case 'b':
2810 case 'w':
2811 case 'k':
2812 case 'h':
2813 case 'y':
5cb6195d 2814 case 'X':
e075ae69 2815 case 'P':
4af3895e
JVA
2816 break;
2817
2d49677f
SC
2818 case 's':
2819 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
2820 {
2821 PRINT_OPERAND (file, x, 0);
e075ae69 2822 putc (',', file);
2d49677f 2823 }
a269a03c
JC
2824 return;
2825
1853aadd 2826 case 'C':
e075ae69 2827 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 2828 return;
fe25fea3 2829 case 'F':
e075ae69 2830 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
2831 return;
2832
e9a25f70 2833 /* Like above, but reverse condition */
e075ae69
RH
2834 case 'c':
2835 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
2836 return;
fe25fea3 2837 case 'f':
e075ae69 2838 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 2839 return;
e5cb57e8 2840
4af3895e 2841 default:
68daafd4
JVA
2842 {
2843 char str[50];
68daafd4
JVA
2844 sprintf (str, "invalid operand code `%c'", code);
2845 output_operand_lossage (str);
2846 }
2a2ab3f9
JVA
2847 }
2848 }
e9a25f70 2849
2a2ab3f9
JVA
2850 if (GET_CODE (x) == REG)
2851 {
2852 PRINT_REG (x, code, file);
2853 }
e9a25f70 2854
2a2ab3f9
JVA
2855 else if (GET_CODE (x) == MEM)
2856 {
e075ae69
RH
2857 /* No `byte ptr' prefix for call instructions. */
2858 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 2859 {
69ddee61 2860 const char * size;
e075ae69
RH
2861 switch (GET_MODE_SIZE (GET_MODE (x)))
2862 {
2863 case 1: size = "BYTE"; break;
2864 case 2: size = "WORD"; break;
2865 case 4: size = "DWORD"; break;
2866 case 8: size = "QWORD"; break;
2867 case 12: size = "XWORD"; break;
2868 default:
2869 abort();
2870 }
2871 fputs (size, file);
2872 fputs (" PTR ", file);
2a2ab3f9 2873 }
e075ae69
RH
2874
2875 x = XEXP (x, 0);
2876 if (flag_pic && CONSTANT_ADDRESS_P (x))
2877 output_pic_addr_const (file, x, code);
2a2ab3f9 2878 else
e075ae69 2879 output_address (x);
2a2ab3f9 2880 }
e9a25f70 2881
2a2ab3f9
JVA
2882 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
2883 {
e9a25f70
JL
2884 REAL_VALUE_TYPE r;
2885 long l;
2886
5f1ec3e6
JVA
2887 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2888 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
2889
2890 if (ASSEMBLER_DIALECT == 0)
2891 putc ('$', file);
52267fcb 2892 fprintf (file, "0x%lx", l);
5f1ec3e6 2893 }
e9a25f70 2894
5f1ec3e6
JVA
2895 /* These float cases don't actually occur as immediate operands. */
2896 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
2897 {
e9a25f70
JL
2898 REAL_VALUE_TYPE r;
2899 char dstr[30];
2900
5f1ec3e6
JVA
2901 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2902 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
2903 fprintf (file, "%s", dstr);
2a2ab3f9 2904 }
e9a25f70 2905
5f1ec3e6 2906 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
2a2ab3f9 2907 {
e9a25f70
JL
2908 REAL_VALUE_TYPE r;
2909 char dstr[30];
2910
5f1ec3e6
JVA
2911 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2912 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
2913 fprintf (file, "%s", dstr);
2a2ab3f9 2914 }
79325812 2915 else
2a2ab3f9 2916 {
4af3895e 2917 if (code != 'P')
2a2ab3f9 2918 {
695dac07 2919 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
2920 {
2921 if (ASSEMBLER_DIALECT == 0)
2922 putc ('$', file);
2923 }
2a2ab3f9
JVA
2924 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
2925 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
2926 {
2927 if (ASSEMBLER_DIALECT == 0)
2928 putc ('$', file);
2929 else
2930 fputs ("OFFSET FLAT:", file);
2931 }
2a2ab3f9 2932 }
e075ae69
RH
2933 if (GET_CODE (x) == CONST_INT)
2934 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2935 else if (flag_pic)
2a2ab3f9
JVA
2936 output_pic_addr_const (file, x, code);
2937 else
2938 output_addr_const (file, x);
2939 }
2940}
2941\f
2942/* Print a memory operand whose address is ADDR. */
2943
2944void
2945print_operand_address (file, addr)
2946 FILE *file;
2947 register rtx addr;
2948{
e075ae69
RH
2949 struct ix86_address parts;
2950 rtx base, index, disp;
2951 int scale;
e9a25f70 2952
e075ae69
RH
2953 if (! ix86_decompose_address (addr, &parts))
2954 abort ();
e9a25f70 2955
e075ae69
RH
2956 base = parts.base;
2957 index = parts.index;
2958 disp = parts.disp;
2959 scale = parts.scale;
e9a25f70 2960
e075ae69
RH
2961 if (!base && !index)
2962 {
2963 /* Displacement only requires special attention. */
e9a25f70 2964
e075ae69 2965 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 2966 {
e075ae69
RH
2967 if (ASSEMBLER_DIALECT != 0)
2968 fputs ("ds:", file);
2969 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 2970 }
e075ae69
RH
2971 else if (flag_pic)
2972 output_pic_addr_const (file, addr, 0);
2973 else
2974 output_addr_const (file, addr);
2975 }
2976 else
2977 {
2978 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 2979 {
e075ae69 2980 if (disp)
2a2ab3f9 2981 {
c399861d 2982 if (flag_pic)
e075ae69
RH
2983 output_pic_addr_const (file, disp, 0);
2984 else if (GET_CODE (disp) == LABEL_REF)
2985 output_asm_label (disp);
2a2ab3f9 2986 else
e075ae69 2987 output_addr_const (file, disp);
2a2ab3f9
JVA
2988 }
2989
e075ae69
RH
2990 putc ('(', file);
2991 if (base)
2992 PRINT_REG (base, 0, file);
2993 if (index)
2a2ab3f9 2994 {
e075ae69
RH
2995 putc (',', file);
2996 PRINT_REG (index, 0, file);
2997 if (scale != 1)
2998 fprintf (file, ",%d", scale);
2a2ab3f9 2999 }
e075ae69 3000 putc (')', file);
2a2ab3f9 3001 }
2a2ab3f9
JVA
3002 else
3003 {
e075ae69 3004 rtx offset = NULL_RTX;
e9a25f70 3005
e075ae69
RH
3006 if (disp)
3007 {
3008 /* Pull out the offset of a symbol; print any symbol itself. */
3009 if (GET_CODE (disp) == CONST
3010 && GET_CODE (XEXP (disp, 0)) == PLUS
3011 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3012 {
3013 offset = XEXP (XEXP (disp, 0), 1);
3014 disp = gen_rtx_CONST (VOIDmode,
3015 XEXP (XEXP (disp, 0), 0));
3016 }
ce193852 3017
e075ae69
RH
3018 if (flag_pic)
3019 output_pic_addr_const (file, disp, 0);
3020 else if (GET_CODE (disp) == LABEL_REF)
3021 output_asm_label (disp);
3022 else if (GET_CODE (disp) == CONST_INT)
3023 offset = disp;
3024 else
3025 output_addr_const (file, disp);
3026 }
e9a25f70 3027
e075ae69
RH
3028 putc ('[', file);
3029 if (base)
a8620236 3030 {
e075ae69
RH
3031 PRINT_REG (base, 0, file);
3032 if (offset)
3033 {
3034 if (INTVAL (offset) >= 0)
3035 putc ('+', file);
3036 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3037 }
a8620236 3038 }
e075ae69
RH
3039 else if (offset)
3040 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 3041 else
e075ae69 3042 putc ('0', file);
e9a25f70 3043
e075ae69
RH
3044 if (index)
3045 {
3046 putc ('+', file);
3047 PRINT_REG (index, 0, file);
3048 if (scale != 1)
3049 fprintf (file, "*%d", scale);
3050 }
3051 putc (']', file);
3052 }
2a2ab3f9
JVA
3053 }
3054}
3055\f
3056/* Split one or more DImode RTL references into pairs of SImode
3057 references. The RTL can be REG, offsettable MEM, integer constant, or
3058 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3059 split and "num" is its length. lo_half and hi_half are output arrays
3060 that parallel "operands". */
3061
3062void
3063split_di (operands, num, lo_half, hi_half)
3064 rtx operands[];
3065 int num;
3066 rtx lo_half[], hi_half[];
3067{
3068 while (num--)
3069 {
57dbca5e 3070 rtx op = operands[num];
e075ae69
RH
3071 if (CONSTANT_P (op))
3072 split_double (op, &lo_half[num], &hi_half[num]);
3073 else if (! reload_completed)
a269a03c
JC
3074 {
3075 lo_half[num] = gen_lowpart (SImode, op);
3076 hi_half[num] = gen_highpart (SImode, op);
3077 }
3078 else if (GET_CODE (op) == REG)
2a2ab3f9 3079 {
57dbca5e
BS
3080 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3081 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 3082 }
57dbca5e 3083 else if (offsettable_memref_p (op))
2a2ab3f9 3084 {
57dbca5e
BS
3085 rtx lo_addr = XEXP (op, 0);
3086 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3087 lo_half[num] = change_address (op, SImode, lo_addr);
3088 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
3089 }
3090 else
3091 abort();
3092 }
3093}
3094\f
2a2ab3f9
JVA
3095/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3096 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3097 is the expression of the binary operation. The output may either be
3098 emitted here, or returned to the caller, like all output_* functions.
3099
3100 There is no guarantee that the operands are the same mode, as they
3101 might be within FLOAT or FLOAT_EXTEND expressions. */
3102
69ddee61 3103const char *
2a2ab3f9
JVA
3104output_387_binary_op (insn, operands)
3105 rtx insn;
3106 rtx *operands;
3107{
2a2ab3f9 3108 static char buf[100];
e075ae69 3109 rtx temp;
69ddee61 3110 const char *p;
2a2ab3f9
JVA
3111
3112 switch (GET_CODE (operands[3]))
3113 {
3114 case PLUS:
e075ae69
RH
3115 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3116 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3117 p = "fiadd";
3118 else
3119 p = "fadd";
2a2ab3f9
JVA
3120 break;
3121
3122 case MINUS:
e075ae69
RH
3123 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3124 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3125 p = "fisub";
3126 else
3127 p = "fsub";
2a2ab3f9
JVA
3128 break;
3129
3130 case MULT:
e075ae69
RH
3131 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3132 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3133 p = "fimul";
3134 else
3135 p = "fmul";
2a2ab3f9
JVA
3136 break;
3137
3138 case DIV:
e075ae69
RH
3139 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3140 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3141 p = "fidiv";
3142 else
3143 p = "fdiv";
2a2ab3f9
JVA
3144 break;
3145
3146 default:
3147 abort ();
3148 }
3149
e075ae69 3150 strcpy (buf, p);
2a2ab3f9
JVA
3151
3152 switch (GET_CODE (operands[3]))
3153 {
3154 case MULT:
3155 case PLUS:
3156 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3157 {
3158 temp = operands[2];
3159 operands[2] = operands[1];
3160 operands[1] = temp;
3161 }
3162
3163 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3164 {
3165 p = "%z2\t%2";
3166 break;
3167 }
2a2ab3f9
JVA
3168
3169 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3170 {
3171 if (STACK_TOP_P (operands[0]))
e075ae69 3172 p = "p\t{%0,%2|%2, %0}";
6b28fd63 3173 else
e075ae69
RH
3174 p = "p\t{%2,%0|%0, %2}";
3175 break;
6b28fd63 3176 }
2a2ab3f9
JVA
3177
3178 if (STACK_TOP_P (operands[0]))
e075ae69 3179 p = "\t{%y2,%0|%0, %y2}";
2a2ab3f9 3180 else
e075ae69
RH
3181 p = "\t{%2,%0|%0, %2}";
3182 break;
2a2ab3f9
JVA
3183
3184 case MINUS:
3185 case DIV:
3186 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
3187 {
3188 p = "r%z1\t%1";
3189 break;
3190 }
2a2ab3f9
JVA
3191
3192 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3193 {
3194 p = "%z2\t%2";
3195 break;
3196 }
2a2ab3f9 3197
2a2ab3f9
JVA
3198 if (! STACK_REG_P (operands[1]) || ! STACK_REG_P (operands[2]))
3199 abort ();
3200
e075ae69
RH
3201 /* Note that the Unixware assembler, and the AT&T assembler before
3202 that, are confusingly not reversed from Intel syntax in this
3203 area. */
2a2ab3f9 3204 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3205 {
3206 if (STACK_TOP_P (operands[0]))
e075ae69 3207 p = "p\t%0,%2";
6b28fd63 3208 else
e075ae69
RH
3209 p = "rp\t%2,%0";
3210 break;
6b28fd63 3211 }
2a2ab3f9
JVA
3212
3213 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63
JL
3214 {
3215 if (STACK_TOP_P (operands[0]))
e075ae69 3216 p = "rp\t%0,%1";
6b28fd63 3217 else
e075ae69
RH
3218 p = "p\t%1,%0";
3219 break;
6b28fd63 3220 }
2a2ab3f9
JVA
3221
3222 if (STACK_TOP_P (operands[0]))
3223 {
3224 if (STACK_TOP_P (operands[1]))
e075ae69 3225 p = "\t%y2,%0";
2a2ab3f9 3226 else
e075ae69
RH
3227 p = "r\t%y1,%0";
3228 break;
2a2ab3f9
JVA
3229 }
3230 else if (STACK_TOP_P (operands[1]))
e075ae69 3231 p = "\t%1,%0";
2a2ab3f9 3232 else
e075ae69
RH
3233 p = "r\t%2,%0";
3234 break;
2a2ab3f9
JVA
3235
3236 default:
3237 abort ();
3238 }
e075ae69
RH
3239
3240 strcat (buf, p);
3241 return buf;
2a2ab3f9 3242}
e075ae69 3243
2a2ab3f9 3244/* Output code for INSN to convert a float to a signed int. OPERANDS
e075ae69
RH
3245 are the insn operands. The output may be [SD]Imode and the input
3246 operand may be [SDX]Fmode. */
2a2ab3f9 3247
69ddee61 3248const char *
2a2ab3f9
JVA
3249output_fix_trunc (insn, operands)
3250 rtx insn;
3251 rtx *operands;
3252{
3253 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
3254 int dimode_p = GET_MODE (operands[0]) == DImode;
3255 rtx xops[4];
2a2ab3f9 3256
e075ae69
RH
3257 /* Jump through a hoop or two for DImode, since the hardware has no
3258 non-popping instruction. We used to do this a different way, but
3259 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
3260 if (dimode_p && !stack_top_dies)
3261 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
3262
3263 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
3264 abort ();
3265
e075ae69
RH
3266 xops[0] = GEN_INT (12);
3267 xops[1] = adj_offsettable_operand (operands[2], 1);
3268 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 3269
e075ae69
RH
3270 xops[2] = operands[0];
3271 if (GET_CODE (operands[0]) != MEM)
3272 xops[2] = operands[3];
2a2ab3f9 3273
e075ae69
RH
3274 output_asm_insn ("fnstcw\t%2", operands);
3275 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3276 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3277 output_asm_insn ("fldcw\t%2", operands);
3278 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 3279
e075ae69
RH
3280 if (stack_top_dies || dimode_p)
3281 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 3282 else
e075ae69
RH
3283 output_asm_insn ("fist%z2\t%2", xops);
3284
3285 output_asm_insn ("fldcw\t%2", operands);
10195bd8 3286
e075ae69 3287 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 3288 {
e075ae69 3289 if (dimode_p)
2e14a41b 3290 {
e075ae69
RH
3291 split_di (operands+0, 1, xops+0, xops+1);
3292 split_di (operands+3, 1, xops+2, xops+3);
3293 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3294 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 3295 }
e075ae69
RH
3296 else
3297 output_asm_insn ("mov{l}\t{%3,%0|%0, %3}", operands);
2a2ab3f9 3298 }
2a2ab3f9 3299
e075ae69 3300 return "";
2a2ab3f9 3301}
cda749b1 3302
e075ae69
RH
3303/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3304 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3305 when fucom should be used. */
3306
69ddee61 3307const char *
e075ae69 3308output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
3309 rtx insn;
3310 rtx *operands;
e075ae69 3311 int eflags_p, unordered_p;
cda749b1 3312{
e075ae69
RH
3313 int stack_top_dies;
3314 rtx cmp_op0 = operands[0];
3315 rtx cmp_op1 = operands[1];
3316
3317 if (eflags_p == 2)
3318 {
3319 cmp_op0 = cmp_op1;
3320 cmp_op1 = operands[2];
3321 }
cda749b1 3322
e075ae69 3323 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
3324 abort ();
3325
e075ae69 3326 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 3327
e075ae69
RH
3328 if (STACK_REG_P (cmp_op1)
3329 && stack_top_dies
3330 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3331 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 3332 {
e075ae69
RH
3333 /* If both the top of the 387 stack dies, and the other operand
3334 is also a stack register that dies, then this must be a
3335 `fcompp' float compare */
3336
3337 if (eflags_p == 1)
3338 {
3339 /* There is no double popping fcomi variant. Fortunately,
3340 eflags is immune from the fstp's cc clobbering. */
3341 if (unordered_p)
3342 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3343 else
3344 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3345 return "fstp\t%y0";
3346 }
3347 else
cda749b1 3348 {
e075ae69
RH
3349 if (eflags_p == 2)
3350 {
3351 if (unordered_p)
3352 return "fucompp\n\tfnstsw\t%0";
3353 else
3354 return "fcompp\n\tfnstsw\t%0";
3355 }
cda749b1
JW
3356 else
3357 {
e075ae69
RH
3358 if (unordered_p)
3359 return "fucompp";
3360 else
3361 return "fcompp";
cda749b1
JW
3362 }
3363 }
cda749b1
JW
3364 }
3365 else
3366 {
e075ae69 3367 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 3368
69ddee61 3369 static const char * const alt[24] =
e075ae69
RH
3370 {
3371 "fcom%z1\t%y1",
3372 "fcomp%z1\t%y1",
3373 "fucom%z1\t%y1",
3374 "fucomp%z1\t%y1",
3375
3376 "ficom%z1\t%y1",
3377 "ficomp%z1\t%y1",
3378 NULL,
3379 NULL,
3380
3381 "fcomi\t{%y1, %0|%0, %y1}",
3382 "fcomip\t{%y1, %0|%0, %y1}",
3383 "fucomi\t{%y1, %0|%0, %y1}",
3384 "fucomip\t{%y1, %0|%0, %y1}",
3385
3386 NULL,
3387 NULL,
3388 NULL,
3389 NULL,
3390
3391 "fcom%z2\t%y2\n\tfnstsw\t%0",
3392 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3393 "fucom%z2\t%y2\n\tfnstsw\t%0",
3394 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3395
3396 "ficom%z2\t%y2\n\tfnstsw\t%0",
3397 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3398 NULL,
3399 NULL
3400 };
3401
3402 int mask;
69ddee61 3403 const char *ret;
e075ae69
RH
3404
3405 mask = eflags_p << 3;
3406 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3407 mask |= unordered_p << 1;
3408 mask |= stack_top_dies;
3409
3410 if (mask >= 24)
3411 abort ();
3412 ret = alt[mask];
3413 if (ret == NULL)
3414 abort ();
cda749b1 3415
e075ae69 3416 return ret;
cda749b1
JW
3417 }
3418}
2a2ab3f9 3419
e075ae69 3420/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 3421
e075ae69 3422 If profile_block_flag == 2
2a2ab3f9 3423
e075ae69
RH
3424 Output code to call the subroutine `__bb_init_trace_func'
3425 and pass two parameters to it. The first parameter is
3426 the address of a block allocated in the object module.
3427 The second parameter is the number of the first basic block
3428 of the function.
2a2ab3f9 3429
e075ae69
RH
3430 The name of the block is a local symbol made with this statement:
3431
3432 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 3433
e075ae69
RH
3434 Of course, since you are writing the definition of
3435 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3436 can take a short cut in the definition of this macro and use the
3437 name that you know will result.
2a2ab3f9 3438
e075ae69
RH
3439 The number of the first basic block of the function is
3440 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 3441
e075ae69
RH
3442 If described in a virtual assembler language the code to be
3443 output looks like:
2a2ab3f9 3444
e075ae69
RH
3445 parameter1 <- LPBX0
3446 parameter2 <- BLOCK_OR_LABEL
3447 call __bb_init_trace_func
2a2ab3f9 3448
e075ae69 3449 else if profile_block_flag != 0
e74389ff 3450
e075ae69
RH
3451 Output code to call the subroutine `__bb_init_func'
3452 and pass one single parameter to it, which is the same
3453 as the first parameter to `__bb_init_trace_func'.
e74389ff 3454
e075ae69
RH
3455 The first word of this parameter is a flag which will be nonzero if
3456 the object module has already been initialized. So test this word
3457 first, and do not call `__bb_init_func' if the flag is nonzero.
3458 Note: When profile_block_flag == 2 the test need not be done
3459 but `__bb_init_trace_func' *must* be called.
e74389ff 3460
e075ae69
RH
3461 BLOCK_OR_LABEL may be used to generate a label number as a
3462 branch destination in case `__bb_init_func' will not be called.
e74389ff 3463
e075ae69
RH
3464 If described in a virtual assembler language the code to be
3465 output looks like:
2a2ab3f9 3466
e075ae69
RH
3467 cmp (LPBX0),0
3468 jne local_label
3469 parameter1 <- LPBX0
3470 call __bb_init_func
3471 local_label:
3472*/
c572e5ba 3473
e075ae69
RH
3474void
3475ix86_output_function_block_profiler (file, block_or_label)
3476 FILE *file;
3477 int block_or_label;
c572e5ba 3478{
e075ae69
RH
3479 static int num_func = 0;
3480 rtx xops[8];
3481 char block_table[80], false_label[80];
c572e5ba 3482
e075ae69 3483 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 3484
e075ae69
RH
3485 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3486 xops[5] = stack_pointer_rtx;
3487 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 3488
e075ae69 3489 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 3490
e075ae69 3491 switch (profile_block_flag)
c572e5ba 3492 {
e075ae69
RH
3493 case 2:
3494 xops[2] = GEN_INT (block_or_label);
3495 xops[3] = gen_rtx_MEM (Pmode,
3496 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
3497 xops[6] = GEN_INT (8);
e9a25f70 3498
e075ae69
RH
3499 output_asm_insn ("push{l}\t%2", xops);
3500 if (!flag_pic)
3501 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 3502 else
870a0c2c 3503 {
e075ae69
RH
3504 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3505 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 3506 }
e075ae69
RH
3507 output_asm_insn ("call\t%P3", xops);
3508 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3509 break;
c572e5ba 3510
e075ae69
RH
3511 default:
3512 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 3513
e075ae69
RH
3514 xops[0] = const0_rtx;
3515 xops[2] = gen_rtx_MEM (Pmode,
3516 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
3517 xops[3] = gen_rtx_MEM (Pmode,
3518 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
3519 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
3520 xops[6] = GEN_INT (4);
a14003ee 3521
e075ae69 3522 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 3523
e075ae69
RH
3524 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
3525 output_asm_insn ("jne\t%2", xops);
870a0c2c 3526
e075ae69
RH
3527 if (!flag_pic)
3528 output_asm_insn ("push{l}\t%1", xops);
3529 else
3530 {
3531 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
3532 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 3533 }
e075ae69
RH
3534 output_asm_insn ("call\t%P3", xops);
3535 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3536 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
3537 num_func++;
3538 break;
c572e5ba 3539 }
2a2ab3f9 3540}
305f097e 3541
e075ae69
RH
3542/* Output assembler code to FILE to increment a counter associated
3543 with basic block number BLOCKNO.
305f097e 3544
e075ae69 3545 If profile_block_flag == 2
ecbc4695 3546
e075ae69
RH
3547 Output code to initialize the global structure `__bb' and
3548 call the function `__bb_trace_func' which will increment the
3549 counter.
ecbc4695 3550
e075ae69
RH
3551 `__bb' consists of two words. In the first word the number
3552 of the basic block has to be stored. In the second word
3553 the address of a block allocated in the object module
3554 has to be stored.
ecbc4695 3555
e075ae69 3556 The basic block number is given by BLOCKNO.
ecbc4695 3557
e075ae69 3558 The address of the block is given by the label created with
305f097e 3559
e075ae69 3560 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 3561
e075ae69 3562 by FUNCTION_BLOCK_PROFILER.
ecbc4695 3563
e075ae69
RH
3564 Of course, since you are writing the definition of
3565 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3566 can take a short cut in the definition of this macro and use the
3567 name that you know will result.
305f097e 3568
e075ae69
RH
3569 If described in a virtual assembler language the code to be
3570 output looks like:
305f097e 3571
e075ae69
RH
3572 move BLOCKNO -> (__bb)
3573 move LPBX0 -> (__bb+4)
3574 call __bb_trace_func
305f097e 3575
e075ae69
RH
3576 Note that function `__bb_trace_func' must not change the
3577 machine state, especially the flag register. To grant
3578 this, you must output code to save and restore registers
3579 either in this macro or in the macros MACHINE_STATE_SAVE
3580 and MACHINE_STATE_RESTORE. The last two macros will be
3581 used in the function `__bb_trace_func', so you must make
3582 sure that the function prologue does not change any
3583 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 3584
e075ae69 3585 else if profile_block_flag != 0
305f097e 3586
e075ae69
RH
3587 Output code to increment the counter directly.
3588 Basic blocks are numbered separately from zero within each
3589 compiled object module. The count associated with block number
3590 BLOCKNO is at index BLOCKNO in an array of words; the name of
3591 this array is a local symbol made with this statement:
32b5b1aa 3592
e075ae69 3593 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 3594
e075ae69
RH
3595 Of course, since you are writing the definition of
3596 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3597 can take a short cut in the definition of this macro and use the
3598 name that you know will result.
32b5b1aa 3599
e075ae69
RH
3600 If described in a virtual assembler language the code to be
3601 output looks like:
32b5b1aa 3602
e075ae69
RH
3603 inc (LPBX2+4*BLOCKNO)
3604*/
32b5b1aa 3605
e075ae69
RH
3606void
3607ix86_output_block_profiler (file, blockno)
3608 FILE *file ATTRIBUTE_UNUSED;
3609 int blockno;
3610{
3611 rtx xops[8], cnt_rtx;
3612 char counts[80];
3613 char *block_table = counts;
3614
3615 switch (profile_block_flag)
3616 {
3617 case 2:
3618 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 3619
e075ae69
RH
3620 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3621 xops[2] = GEN_INT (blockno);
3622 xops[3] = gen_rtx_MEM (Pmode,
3623 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
3624 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
3625 xops[5] = plus_constant (xops[4], 4);
3626 xops[0] = gen_rtx_MEM (SImode, xops[4]);
3627 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 3628
e075ae69 3629 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 3630
e075ae69
RH
3631 output_asm_insn ("pushf", xops);
3632 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3633 if (flag_pic)
32b5b1aa 3634 {
e075ae69
RH
3635 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
3636 output_asm_insn ("push{l}\t%7", xops);
3637 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3638 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
3639 output_asm_insn ("pop{l}\t%7", xops);
3640 }
3641 else
3642 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
3643 output_asm_insn ("call\t%P3", xops);
3644 output_asm_insn ("popf", xops);
32b5b1aa 3645
e075ae69 3646 break;
32b5b1aa 3647
e075ae69
RH
3648 default:
3649 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
3650 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
3651 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 3652
e075ae69
RH
3653 if (blockno)
3654 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 3655
e075ae69
RH
3656 if (flag_pic)
3657 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 3658
e075ae69
RH
3659 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
3660 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 3661
e075ae69 3662 break;
32b5b1aa 3663 }
32b5b1aa 3664}
32b5b1aa 3665\f
79325812 3666void
e075ae69
RH
3667ix86_expand_move (mode, operands)
3668 enum machine_mode mode;
3669 rtx operands[];
32b5b1aa 3670{
e075ae69 3671 int strict = (reload_in_progress || reload_completed);
e075ae69 3672 rtx insn;
e9a25f70 3673
e075ae69 3674 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 3675 {
e075ae69 3676 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 3677
e075ae69
RH
3678 if (GET_CODE (operands[0]) == MEM)
3679 operands[1] = force_reg (Pmode, operands[1]);
3680 else
32b5b1aa 3681 {
e075ae69
RH
3682 rtx temp = operands[0];
3683 if (GET_CODE (temp) != REG)
3684 temp = gen_reg_rtx (Pmode);
3685 temp = legitimize_pic_address (operands[1], temp);
3686 if (temp == operands[0])
3687 return;
3688 operands[1] = temp;
32b5b1aa 3689 }
e075ae69
RH
3690 }
3691 else
3692 {
3693 if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
3694 operands[1] = force_reg (mode, operands[1]);
e9a25f70 3695
e075ae69 3696 if (FLOAT_MODE_P (mode))
32b5b1aa 3697 {
e075ae69
RH
3698 /* If we are loading a floating point constant that isn't 0 or 1
3699 into a register, force the value to memory now, since we'll
3700 get better code out the back end. */
3701
3702 if (strict)
3703 ;
3704 else if (GET_CODE (operands[0]) == MEM)
3705 operands[1] = force_reg (mode, operands[1]);
3706 else if (GET_CODE (operands[1]) == CONST_DOUBLE
3707 && ! standard_80387_constant_p (operands[1]))
3708 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 3709 }
32b5b1aa 3710 }
e9a25f70 3711
e075ae69 3712 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 3713
e075ae69
RH
3714 emit_insn (insn);
3715}
e9a25f70 3716
e075ae69
RH
3717/* Attempt to expand a binary operator. Make the expansion closer to the
3718 actual machine, then just general_operand, which will allow 3 separate
3719 memory references (one output, two input) in a single insn. Return
3720 whether the insn fails, or succeeds. */
e9a25f70 3721
e075ae69
RH
3722void
3723ix86_expand_binary_operator (code, mode, operands)
3724 enum rtx_code code;
3725 enum machine_mode mode;
3726 rtx operands[];
3727{
3728 int matching_memory;
3729 rtx src1, src2, dst, op, clob;
3730
3731 dst = operands[0];
3732 src1 = operands[1];
3733 src2 = operands[2];
3734
3735 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
3736 if (GET_RTX_CLASS (code) == 'c'
3737 && (rtx_equal_p (dst, src2)
3738 || immediate_operand (src1, mode)))
3739 {
3740 rtx temp = src1;
3741 src1 = src2;
3742 src2 = temp;
32b5b1aa 3743 }
e9a25f70 3744
e075ae69
RH
3745 /* If the destination is memory, and we do not have matching source
3746 operands, do things in registers. */
3747 matching_memory = 0;
3748 if (GET_CODE (dst) == MEM)
32b5b1aa 3749 {
e075ae69
RH
3750 if (rtx_equal_p (dst, src1))
3751 matching_memory = 1;
3752 else if (GET_RTX_CLASS (code) == 'c'
3753 && rtx_equal_p (dst, src2))
3754 matching_memory = 2;
3755 else
3756 dst = gen_reg_rtx (mode);
3757 }
3758
3759 /* Both source operands cannot be in memory. */
3760 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
3761 {
3762 if (matching_memory != 2)
3763 src2 = force_reg (mode, src2);
3764 else
3765 src1 = force_reg (mode, src1);
32b5b1aa 3766 }
e9a25f70 3767
06a964de
JH
3768 /* If the operation is not commutable, source 1 cannot be a constant
3769 or non-matching memory. */
3770 if ((CONSTANT_P (src1)
3771 || (!matching_memory && GET_CODE (src1) == MEM))
3772 && GET_RTX_CLASS (code) != 'c')
e075ae69
RH
3773 src1 = force_reg (mode, src1);
3774
3775 /* If optimizing, copy to regs to improve CSE */
3776 if (optimize && !reload_in_progress && !reload_completed)
32b5b1aa 3777 {
e075ae69
RH
3778 if (GET_CODE (dst) == MEM)
3779 dst = gen_reg_rtx (mode);
3780 if (GET_CODE (src1) == MEM)
3781 src1 = force_reg (mode, src1);
3782 if (GET_CODE (src2) == MEM)
3783 src2 = force_reg (mode, src2);
32b5b1aa 3784 }
e9a25f70 3785
e075ae69
RH
3786 /* Emit the instruction. */
3787
3788 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
3789 if (reload_in_progress)
3790 {
3791 /* Reload doesn't know about the flags register, and doesn't know that
3792 it doesn't want to clobber it. We can only do this with PLUS. */
3793 if (code != PLUS)
3794 abort ();
3795 emit_insn (op);
3796 }
3797 else
32b5b1aa 3798 {
e075ae69
RH
3799 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
3800 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 3801 }
e9a25f70 3802
e075ae69
RH
3803 /* Fix up the destination if needed. */
3804 if (dst != operands[0])
3805 emit_move_insn (operands[0], dst);
3806}
3807
3808/* Return TRUE or FALSE depending on whether the binary operator meets the
3809 appropriate constraints. */
3810
3811int
3812ix86_binary_operator_ok (code, mode, operands)
3813 enum rtx_code code;
3814 enum machine_mode mode ATTRIBUTE_UNUSED;
3815 rtx operands[3];
3816{
3817 /* Both source operands cannot be in memory. */
3818 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
3819 return 0;
3820 /* If the operation is not commutable, source 1 cannot be a constant. */
3821 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
3822 return 0;
3823 /* If the destination is memory, we must have a matching source operand. */
3824 if (GET_CODE (operands[0]) == MEM
3825 && ! (rtx_equal_p (operands[0], operands[1])
3826 || (GET_RTX_CLASS (code) == 'c'
3827 && rtx_equal_p (operands[0], operands[2]))))
3828 return 0;
06a964de
JH
3829 /* If the operation is not commutable and the source 1 is memory, we must
3830 have a matching destionation. */
3831 if (GET_CODE (operands[1]) == MEM
3832 && GET_RTX_CLASS (code) != 'c'
3833 && ! rtx_equal_p (operands[0], operands[1]))
3834 return 0;
e075ae69
RH
3835 return 1;
3836}
3837
3838/* Attempt to expand a unary operator. Make the expansion closer to the
3839 actual machine, then just general_operand, which will allow 2 separate
3840 memory references (one output, one input) in a single insn. Return
3841 whether the insn fails, or succeeds. */
3842
3843int
3844ix86_expand_unary_operator (code, mode, operands)
3845 enum rtx_code code;
3846 enum machine_mode mode;
3847 rtx operands[];
3848{
06a964de
JH
3849 int matching_memory;
3850 rtx src, dst, op, clob;
3851
3852 dst = operands[0];
3853 src = operands[1];
e075ae69 3854
06a964de
JH
3855 /* If the destination is memory, and we do not have matching source
3856 operands, do things in registers. */
3857 matching_memory = 0;
3858 if (GET_CODE (dst) == MEM)
32b5b1aa 3859 {
06a964de
JH
3860 if (rtx_equal_p (dst, src))
3861 matching_memory = 1;
e075ae69 3862 else
06a964de 3863 dst = gen_reg_rtx (mode);
32b5b1aa 3864 }
e9a25f70 3865
06a964de
JH
3866 /* When source operand is memory, destination must match. */
3867 if (!matching_memory && GET_CODE (src) == MEM)
3868 src = force_reg (mode, src);
3869
3870 /* If optimizing, copy to regs to improve CSE */
3871 if (optimize && !reload_in_progress && !reload_completed)
3872 {
3873 if (GET_CODE (dst) == MEM)
3874 dst = gen_reg_rtx (mode);
3875 if (GET_CODE (src) == MEM)
3876 src = force_reg (mode, src);
3877 }
3878
3879 /* Emit the instruction. */
3880
3881 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
3882 if (reload_in_progress || code == NOT)
3883 {
3884 /* Reload doesn't know about the flags register, and doesn't know that
3885 it doesn't want to clobber it. */
3886 if (code != NOT)
3887 abort ();
3888 emit_insn (op);
3889 }
3890 else
3891 {
3892 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
3893 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
3894 }
3895
3896 /* Fix up the destination if needed. */
3897 if (dst != operands[0])
3898 emit_move_insn (operands[0], dst);
e075ae69
RH
3899}
3900
3901/* Return TRUE or FALSE depending on whether the unary operator meets the
3902 appropriate constraints. */
3903
3904int
3905ix86_unary_operator_ok (code, mode, operands)
3906 enum rtx_code code ATTRIBUTE_UNUSED;
3907 enum machine_mode mode ATTRIBUTE_UNUSED;
3908 rtx operands[2] ATTRIBUTE_UNUSED;
3909{
06a964de
JH
3910 /* If one of operands is memory, source and destination must match. */
3911 if ((GET_CODE (operands[0]) == MEM
3912 || GET_CODE (operands[1]) == MEM)
3913 && ! rtx_equal_p (operands[0], operands[1]))
3914 return FALSE;
e075ae69
RH
3915 return TRUE;
3916}
3917
3918/* Produce an unsigned comparison for a given signed comparison. */
3919
3920static enum rtx_code
3921unsigned_comparison (code)
3922 enum rtx_code code;
3923{
3924 switch (code)
32b5b1aa 3925 {
e075ae69
RH
3926 case GT:
3927 code = GTU;
3928 break;
3929 case LT:
3930 code = LTU;
3931 break;
3932 case GE:
3933 code = GEU;
3934 break;
3935 case LE:
3936 code = LEU;
3937 break;
3938 case EQ:
3939 case NE:
3940 case LEU:
3941 case LTU:
3942 case GEU:
3943 case GTU:
3944 break;
3945 default:
3946 abort ();
3947 }
3948 return code;
3949}
3950
3951/* Generate insn patterns to do an integer compare of OPERANDS. */
3952
3953static rtx
3954ix86_expand_int_compare (code, op0, op1)
3955 enum rtx_code code;
3956 rtx op0, op1;
3957{
3958 enum machine_mode cmpmode;
3959 rtx tmp, flags;
3960
3961 cmpmode = SELECT_CC_MODE (code, op0, op1);
3962 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
3963
3964 /* This is very simple, but making the interface the same as in the
3965 FP case makes the rest of the code easier. */
3966 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
3967 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
3968
3969 /* Return the test that should be put into the flags user, i.e.
3970 the bcc, scc, or cmov instruction. */
3971 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
3972}
3973
3974/* Generate insn patterns to do a floating point compare of OPERANDS.
3975 If UNORDERED, allow for unordered compares. */
3976
3977static rtx
3978ix86_expand_fp_compare (code, op0, op1, unordered)
3979 enum rtx_code code;
3980 rtx op0, op1;
3981 int unordered;
3982{
3983 enum machine_mode fpcmp_mode;
3984 enum machine_mode intcmp_mode;
3985 rtx tmp;
3986
3987 /* When not doing IEEE compliant compares, disable unordered. */
3988 if (! TARGET_IEEE_FP)
3989 unordered = 0;
3990 fpcmp_mode = unordered ? CCFPUmode : CCFPmode;
3991
3992 /* ??? If we knew whether invalid-operand exceptions were masked,
3993 we could rely on fcom to raise an exception and take care of
3994 NaNs. But we don't. We could know this from c9x math bits. */
3995 if (TARGET_IEEE_FP)
3996 unordered = 1;
3997
3998 /* All of the unordered compare instructions only work on registers.
3999 The same is true of the XFmode compare instructions. */
4000 if (unordered || GET_MODE (op0) == XFmode)
4001 {
4002 op0 = force_reg (GET_MODE (op0), op0);
4003 op1 = force_reg (GET_MODE (op1), op1);
4004 }
4005 else
4006 {
4007 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4008 things around if they appear profitable, otherwise force op0
4009 into a register. */
4010
4011 if (standard_80387_constant_p (op0) == 0
4012 || (GET_CODE (op0) == MEM
4013 && ! (standard_80387_constant_p (op1) == 0
4014 || GET_CODE (op1) == MEM)))
32b5b1aa 4015 {
e075ae69
RH
4016 rtx tmp;
4017 tmp = op0, op0 = op1, op1 = tmp;
4018 code = swap_condition (code);
4019 }
4020
4021 if (GET_CODE (op0) != REG)
4022 op0 = force_reg (GET_MODE (op0), op0);
4023
4024 if (CONSTANT_P (op1))
4025 {
4026 if (standard_80387_constant_p (op1))
4027 op1 = force_reg (GET_MODE (op1), op1);
4028 else
4029 op1 = validize_mem (force_const_mem (GET_MODE (op1), op1));
32b5b1aa
SC
4030 }
4031 }
e9a25f70 4032
e075ae69
RH
4033 /* %%% fcomi is probably always faster, even when dealing with memory,
4034 since compare-and-branch would be three insns instead of four. */
4035 if (TARGET_CMOVE && !unordered)
32b5b1aa 4036 {
e075ae69
RH
4037 if (GET_CODE (op0) != REG)
4038 op0 = force_reg (GET_MODE (op0), op0);
4039 if (GET_CODE (op1) != REG)
4040 op1 = force_reg (GET_MODE (op1), op1);
4041
4042 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4043 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4044 emit_insn (tmp);
4045
4046 /* The FP codes work out to act like unsigned. */
4047 code = unsigned_comparison (code);
4048 intcmp_mode = fpcmp_mode;
4049 }
4050 else
4051 {
4052 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e9a25f70 4053
e075ae69
RH
4054 rtx tmp2;
4055 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4056 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4057 tmp = gen_reg_rtx (HImode);
4058 emit_insn (gen_rtx_SET (VOIDmode, tmp, tmp2));
4059
4060 if (! unordered)
32b5b1aa 4061 {
e075ae69
RH
4062 /* We have two options here -- use sahf, or testing bits of ah
4063 directly. On PPRO, they are equivalent, sahf being one byte
4064 smaller. On Pentium, sahf is non-pairable while test is UV
4065 pairable. */
4066
4067 if (TARGET_USE_SAHF || optimize_size)
32b5b1aa 4068 {
e075ae69 4069 do_sahf:
e9a25f70 4070
e075ae69
RH
4071 /* The FP codes work out to act like unsigned. */
4072 code = unsigned_comparison (code);
4073 emit_insn (gen_x86_sahf_1 (tmp));
4074 intcmp_mode = CCmode;
32b5b1aa
SC
4075 }
4076 else
4077 {
e075ae69
RH
4078 /*
4079 * The numbers below correspond to the bits of the FPSW in AH.
4080 * C3, C2, and C0 are in bits 0x40, 0x40, and 0x01 respectively.
4081 *
4082 * cmp C3 C2 C0
4083 * > 0 0 0
4084 * < 0 0 1
4085 * = 1 0 0
4086 * un 1 1 1
4087 */
4088
4089 int mask;
4090
4091 switch (code)
32b5b1aa 4092 {
e075ae69
RH
4093 case GT:
4094 mask = 0x01;
4095 code = EQ;
4096 break;
4097 case LT:
4098 mask = 0x01;
4099 code = NE;
4100 break;
4101 case GE:
4102 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4103 faster in all cases to just fall back on sahf. */
4104 goto do_sahf;
4105 case LE:
4106 mask = 0x41;
4107 code = NE;
4108 break;
4109 case EQ:
4110 mask = 0x40;
4111 code = NE;
4112 break;
4113 case NE:
4114 mask = 0x40;
4115 code = EQ;
4116 break;
4117 default:
4118 abort ();
32b5b1aa 4119 }
e075ae69
RH
4120
4121 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (mask)));
4122 intcmp_mode = CCNOmode;
32b5b1aa
SC
4123 }
4124 }
4125 else
4126 {
e075ae69
RH
4127 /* In the unordered case, we have to check C2 for NaN's, which
4128 doesn't happen to work out to anything nice combination-wise.
4129 So do some bit twiddling on the value we've got in AH to come
4130 up with an appropriate set of condition codes. */
4131
4132 intcmp_mode = CCNOmode;
4133 switch (code)
32b5b1aa 4134 {
e075ae69
RH
4135 case GT:
4136 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x45)));
4137 code = EQ;
4138 break;
4139 case LT:
4140 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4141 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x01)));
4142 intcmp_mode = CCmode;
4143 code = EQ;
4144 break;
4145 case GE:
4146 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x05)));
4147 code = EQ;
4148 break;
4149 case LE:
4150 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4151 emit_insn (gen_addqi_ext_1 (tmp, tmp, constm1_rtx));
4152 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4153 intcmp_mode = CCmode;
4154 code = LTU;
4155 break;
4156 case EQ:
4157 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4158 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4159 intcmp_mode = CCmode;
4160 code = EQ;
4161 break;
4162 case NE:
4163 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
7abd4e00 4164 emit_insn (gen_xorqi_cc_ext_1 (tmp, tmp, GEN_INT (0x40)));
e075ae69
RH
4165 code = NE;
4166 break;
4167 default:
4168 abort ();
32b5b1aa
SC
4169 }
4170 }
32b5b1aa 4171 }
e075ae69
RH
4172
4173 /* Return the test that should be put into the flags user, i.e.
4174 the bcc, scc, or cmov instruction. */
4175 return gen_rtx_fmt_ee (code, VOIDmode,
4176 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4177 const0_rtx);
4178}
4179
4180static rtx
4181ix86_expand_compare (code, unordered)
4182 enum rtx_code code;
4183 int unordered;
4184{
4185 rtx op0, op1, ret;
4186 op0 = ix86_compare_op0;
4187 op1 = ix86_compare_op1;
4188
4189 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4190 ret = ix86_expand_fp_compare (code, op0, op1, unordered);
32b5b1aa 4191 else
e075ae69
RH
4192 ret = ix86_expand_int_compare (code, op0, op1);
4193
4194 return ret;
4195}
4196
4197void
4198ix86_expand_branch (code, unordered, label)
4199 enum rtx_code code;
4200 int unordered;
4201 rtx label;
4202{
4203 rtx tmp, lo[2], hi[2], label2;
4204 enum rtx_code code1, code2, code3;
4205
4206 if (GET_MODE (ix86_compare_op0) != DImode)
32b5b1aa 4207 {
e075ae69
RH
4208 tmp = ix86_expand_compare (code, unordered);
4209 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4210 gen_rtx_LABEL_REF (VOIDmode, label),
4211 pc_rtx);
4212 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa
SC
4213 return;
4214 }
32b5b1aa 4215
e075ae69
RH
4216 /* Expand DImode branch into multiple compare+branch. */
4217
4218 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
32b5b1aa 4219 {
e075ae69
RH
4220 tmp = ix86_compare_op0;
4221 ix86_compare_op0 = ix86_compare_op1;
4222 ix86_compare_op1 = tmp;
4223 code = swap_condition (code);
4224 }
4225 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4226 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 4227
e075ae69
RH
4228 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid
4229 two branches. This costs one extra insn, so disable when optimizing
4230 for size. */
32b5b1aa 4231
e075ae69
RH
4232 if ((code == EQ || code == NE)
4233 && (!optimize_size
4234 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4235 {
4236 rtx xor0, xor1;
32b5b1aa 4237
e075ae69
RH
4238 xor1 = hi[0];
4239 if (hi[1] != const0_rtx)
4240 {
4241 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4242 NULL_RTX, 0, OPTAB_WIDEN);
4243 }
32b5b1aa 4244
e075ae69
RH
4245 xor0 = lo[0];
4246 if (lo[1] != const0_rtx)
4247 {
4248 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4249 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa
SC
4250 }
4251
e075ae69
RH
4252 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4253 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 4254
e075ae69
RH
4255 ix86_compare_op0 = tmp;
4256 ix86_compare_op1 = const0_rtx;
4257 ix86_expand_branch (code, unordered, label);
4258 return;
32b5b1aa
SC
4259 }
4260
e075ae69
RH
4261 /* Otherwise, if we are doing less-than, op1 is a constant and the
4262 low word is zero, then we can just examine the high word. */
4263
4264 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4265 && (code == LT || code == LTU))
32b5b1aa 4266 {
e075ae69
RH
4267 ix86_compare_op0 = hi[0];
4268 ix86_compare_op1 = hi[1];
4269 ix86_expand_branch (code, unordered, label);
4270 return;
4271 }
32b5b1aa 4272
e075ae69
RH
4273 /* Otherwise, we need two or three jumps. */
4274
4275 label2 = gen_label_rtx ();
32b5b1aa 4276
e075ae69
RH
4277 code1 = code;
4278 code2 = swap_condition (code);
4279 code3 = unsigned_condition (code);
4280
4281 switch (code)
4282 {
4283 case LT: case GT: case LTU: case GTU:
4284 break;
4285
4286 case LE: code1 = LT; code2 = GT; break;
4287 case GE: code1 = GT; code2 = LT; break;
4288 case LEU: code1 = LTU; code2 = GTU; break;
4289 case GEU: code1 = GTU; code2 = LTU; break;
4290
4291 case EQ: code1 = NIL; code2 = NE; break;
4292 case NE: code2 = NIL; break;
4293
4294 default:
4295 abort ();
32b5b1aa 4296 }
e075ae69
RH
4297
4298 /*
4299 * a < b =>
4300 * if (hi(a) < hi(b)) goto true;
4301 * if (hi(a) > hi(b)) goto false;
4302 * if (lo(a) < lo(b)) goto true;
4303 * false:
4304 */
4305
4306 ix86_compare_op0 = hi[0];
4307 ix86_compare_op1 = hi[1];
4308
4309 if (code1 != NIL)
4310 ix86_expand_branch (code1, unordered, label);
4311 if (code2 != NIL)
4312 ix86_expand_branch (code2, unordered, label2);
4313
4314 ix86_compare_op0 = lo[0];
4315 ix86_compare_op1 = lo[1];
4316 ix86_expand_branch (code3, unordered, label);
4317
4318 if (code2 != NIL)
4319 emit_label (label2);
32b5b1aa 4320}
e075ae69 4321
32b5b1aa 4322int
e075ae69
RH
4323ix86_expand_setcc (code, unordered, dest)
4324 enum rtx_code code;
4325 int unordered;
4326 rtx dest;
32b5b1aa 4327{
e075ae69
RH
4328 rtx ret, tmp;
4329 int type;
4330
4331 if (GET_MODE (ix86_compare_op0) == DImode)
4332 return 0; /* FAIL */
4333
4334 /* Three modes of generation:
4335 0 -- destination does not overlap compare sources:
4336 clear dest first, emit strict_low_part setcc.
4337 1 -- destination does overlap compare sources:
4338 emit subreg setcc, zero extend.
4339 2 -- destination is in QImode:
4340 emit setcc only.
4341 */
4342
4343 type = 0;
4344 /* %%% reload problems with in-out. Revisit. */
4345 type = 1;
4346
4347 if (GET_MODE (dest) == QImode)
4348 type = 2;
4349 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
4350 || reg_overlap_mentioned_p (dest, ix86_compare_op0))
4351 type = 1;
4352
4353 if (type == 0)
4354 emit_move_insn (dest, const0_rtx);
4355
4356 ret = ix86_expand_compare (code, unordered);
4357 PUT_MODE (ret, QImode);
4358
4359 tmp = dest;
4360 if (type == 0)
32b5b1aa 4361 {
e075ae69
RH
4362 tmp = gen_lowpart (QImode, dest);
4363 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
4364 }
4365 else if (type == 1)
4366 {
4367 if (!cse_not_expected)
4368 tmp = gen_reg_rtx (QImode);
4369 else
4370 tmp = gen_lowpart (QImode, dest);
4371 }
32b5b1aa 4372
e075ae69
RH
4373 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
4374
4375 if (type == 1)
4376 {
4377 rtx clob;
4378
4379 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
4380 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
4381 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4382 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4383 emit_insn (tmp);
32b5b1aa 4384 }
e075ae69
RH
4385
4386 return 1; /* DONE */
32b5b1aa 4387}
e075ae69 4388
32b5b1aa 4389int
e075ae69
RH
4390ix86_expand_int_movcc (operands)
4391 rtx operands[];
32b5b1aa 4392{
e075ae69
RH
4393 enum rtx_code code = GET_CODE (operands[1]), compare_code;
4394 rtx compare_seq, compare_op;
32b5b1aa 4395
36583fea
JH
4396 /* When the compare code is not LTU or GEU, we can not use sbbl case.
4397 In case comparsion is done with immediate, we can convert it to LTU or
4398 GEU by altering the integer. */
4399
4400 if ((code == LEU || code == GTU)
4401 && GET_CODE (ix86_compare_op1) == CONST_INT
4402 && GET_MODE (operands[0]) != HImode
4403 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
4404 && GET_CODE (operands[2]) == CONST_INT
4405 && GET_CODE (operands[3]) == CONST_INT)
4406 {
4407 if (code == LEU)
4408 code = LTU;
4409 else
4410 code = GEU;
4411 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
4412 }
e075ae69
RH
4413 start_sequence ();
4414 compare_op = ix86_expand_compare (code, code == EQ || code == NE);
4415 compare_seq = gen_sequence ();
4416 end_sequence ();
4417
4418 compare_code = GET_CODE (compare_op);
4419
4420 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
4421 HImode insns, we'd be swallowed in word prefix ops. */
4422
4423 if (GET_MODE (operands[0]) != HImode
4424 && GET_CODE (operands[2]) == CONST_INT
4425 && GET_CODE (operands[3]) == CONST_INT)
4426 {
4427 rtx out = operands[0];
4428 HOST_WIDE_INT ct = INTVAL (operands[2]);
4429 HOST_WIDE_INT cf = INTVAL (operands[3]);
4430 HOST_WIDE_INT diff;
4431
36583fea 4432 if (compare_code == LTU || compare_code == GEU)
e075ae69 4433 {
e075ae69
RH
4434
4435 /* Detect overlap between destination and compare sources. */
4436 rtx tmp = out;
4437
36583fea
JH
4438 /* To simplify rest of code, restrict to the GEU case. */
4439 if (compare_code == LTU)
4440 {
4441 int tmp = ct;
4442 ct = cf;
4443 cf = tmp;
4444 compare_code = reverse_condition (compare_code);
4445 code = reverse_condition (code);
4446 }
4447 diff = ct - cf;
4448
e075ae69
RH
4449 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
4450 || reg_overlap_mentioned_p (out, ix86_compare_op0))
4451 tmp = gen_reg_rtx (SImode);
4452
4453 emit_insn (compare_seq);
4454 emit_insn (gen_x86_movsicc_0_m1 (tmp));
4455
36583fea
JH
4456 if (diff == 1)
4457 {
4458 /*
4459 * cmpl op0,op1
4460 * sbbl dest,dest
4461 * [addl dest, ct]
4462 *
4463 * Size 5 - 8.
4464 */
4465 if (ct)
4466 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4467 }
4468 else if (cf == -1)
4469 {
4470 /*
4471 * cmpl op0,op1
4472 * sbbl dest,dest
4473 * orl $ct, dest
4474 *
4475 * Size 8.
4476 */
4477 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
4478 }
4479 else if (diff == -1 && ct)
4480 {
4481 /*
4482 * cmpl op0,op1
4483 * sbbl dest,dest
4484 * xorl $-1, dest
4485 * [addl dest, cf]
4486 *
4487 * Size 8 - 11.
4488 */
4489 emit_insn (gen_one_cmplsi2 (tmp, tmp));
4490 if (cf)
4491 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
4492 }
4493 else
4494 {
4495 /*
4496 * cmpl op0,op1
4497 * sbbl dest,dest
4498 * andl cf - ct, dest
4499 * [addl dest, ct]
4500 *
4501 * Size 8 - 11.
4502 */
4503 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
4504 if (ct)
4505 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4506 }
e075ae69
RH
4507
4508 if (tmp != out)
4509 emit_move_insn (out, tmp);
4510
4511 return 1; /* DONE */
4512 }
4513
4514 diff = ct - cf;
4515 if (diff < 0)
4516 {
4517 HOST_WIDE_INT tmp;
4518 tmp = ct, ct = cf, cf = tmp;
4519 diff = -diff;
4520 compare_code = reverse_condition (compare_code);
4521 code = reverse_condition (code);
4522 }
4523 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
4524 || diff == 3 || diff == 5 || diff == 9)
4525 {
4526 /*
4527 * xorl dest,dest
4528 * cmpl op1,op2
4529 * setcc dest
4530 * lea cf(dest*(ct-cf)),dest
4531 *
4532 * Size 14.
4533 *
4534 * This also catches the degenerate setcc-only case.
4535 */
4536
4537 rtx tmp;
4538 int nops;
4539
4540 out = emit_store_flag (out, code, ix86_compare_op0,
4541 ix86_compare_op1, VOIDmode, 0, 1);
4542
4543 nops = 0;
4544 if (diff == 1)
4545 tmp = out;
4546 else
4547 {
4548 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
4549 nops++;
4550 if (diff & 1)
4551 {
4552 tmp = gen_rtx_PLUS (SImode, tmp, out);
4553 nops++;
4554 }
4555 }
4556 if (cf != 0)
4557 {
4558 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
4559 nops++;
4560 }
4561 if (tmp != out)
4562 {
4563 if (nops == 0)
4564 emit_move_insn (out, tmp);
4565 else if (nops == 1)
4566 {
4567 rtx clob;
4568
4569 clob = gen_rtx_REG (CCmode, FLAGS_REG);
4570 clob = gen_rtx_CLOBBER (VOIDmode, clob);
4571
4572 tmp = gen_rtx_SET (VOIDmode, out, tmp);
4573 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4574 emit_insn (tmp);
4575 }
4576 else
4577 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
4578 }
4579 if (out != operands[0])
4580 emit_move_insn (operands[0], out);
4581
4582 return 1; /* DONE */
4583 }
4584
4585 /*
4586 * General case: Jumpful:
4587 * xorl dest,dest cmpl op1, op2
4588 * cmpl op1, op2 movl ct, dest
4589 * setcc dest jcc 1f
4590 * decl dest movl cf, dest
4591 * andl (cf-ct),dest 1:
4592 * addl ct,dest
4593 *
4594 * Size 20. Size 14.
4595 *
4596 * This is reasonably steep, but branch mispredict costs are
4597 * high on modern cpus, so consider failing only if optimizing
4598 * for space.
4599 *
4600 * %%% Parameterize branch_cost on the tuning architecture, then
4601 * use that. The 80386 couldn't care less about mispredicts.
4602 */
4603
4604 if (!optimize_size && !TARGET_CMOVE)
4605 {
4606 if (ct == 0)
4607 {
4608 ct = cf;
4609 cf = 0;
4610 compare_code = reverse_condition (compare_code);
4611 code = reverse_condition (code);
4612 }
4613
4614 out = emit_store_flag (out, code, ix86_compare_op0,
4615 ix86_compare_op1, VOIDmode, 0, 1);
4616
4617 emit_insn (gen_addsi3 (out, out, constm1_rtx));
4618 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
4619 if (ct != 0)
4620 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4621 if (out != operands[0])
4622 emit_move_insn (operands[0], out);
4623
4624 return 1; /* DONE */
4625 }
4626 }
4627
4628 if (!TARGET_CMOVE)
4629 {
4630 /* Try a few things more with specific constants and a variable. */
4631
4632 optab op = NULL;
4633 rtx var, orig_out, out, tmp;
4634
4635 if (optimize_size)
4636 return 0; /* FAIL */
4637
4638 /* If one of the two operands is an interesting constant, load a
4639 constant with the above and mask it in with a logical operation. */
4640
4641 if (GET_CODE (operands[2]) == CONST_INT)
4642 {
4643 var = operands[3];
4644 if (INTVAL (operands[2]) == 0)
4645 operands[3] = constm1_rtx, op = and_optab;
4646 else if (INTVAL (operands[2]) == -1)
4647 operands[3] = const0_rtx, op = ior_optab;
4648 }
4649 else if (GET_CODE (operands[3]) == CONST_INT)
4650 {
4651 var = operands[2];
4652 if (INTVAL (operands[3]) == 0)
4653 operands[2] = constm1_rtx, op = and_optab;
4654 else if (INTVAL (operands[3]) == -1)
4655 operands[2] = const0_rtx, op = ior_optab;
4656 }
4657
4658 if (op == NULL)
4659 return 0; /* FAIL */
4660
4661 orig_out = operands[0];
4662 tmp = gen_reg_rtx (GET_MODE (orig_out));
4663 operands[0] = tmp;
4664
4665 /* Recurse to get the constant loaded. */
4666 if (ix86_expand_int_movcc (operands) == 0)
4667 return 0; /* FAIL */
4668
4669 /* Mask in the interesting variable. */
4670 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
4671 OPTAB_WIDEN);
4672 if (out != orig_out)
4673 emit_move_insn (orig_out, out);
4674
4675 return 1; /* DONE */
4676 }
4677
4678 /*
4679 * For comparison with above,
4680 *
4681 * movl cf,dest
4682 * movl ct,tmp
4683 * cmpl op1,op2
4684 * cmovcc tmp,dest
4685 *
4686 * Size 15.
4687 */
4688
4689 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
4690 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
4691 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
4692 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
4693
4694 emit_insn (compare_seq);
4695 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4696 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
4697 compare_op, operands[2],
4698 operands[3])));
4699
4700 return 1; /* DONE */
e9a25f70 4701}
e075ae69 4702
32b5b1aa 4703int
e075ae69
RH
4704ix86_expand_fp_movcc (operands)
4705 rtx operands[];
32b5b1aa 4706{
e075ae69
RH
4707 enum rtx_code code;
4708 enum machine_mode mode;
4709 rtx tmp;
32b5b1aa 4710
e075ae69
RH
4711 /* The floating point conditional move instructions don't directly
4712 support conditions resulting from a signed integer comparison. */
32b5b1aa 4713
e075ae69
RH
4714 code = GET_CODE (operands[1]);
4715 switch (code)
4716 {
4717 case LT:
4718 case LE:
4719 case GE:
4720 case GT:
4721 tmp = gen_reg_rtx (QImode);
4722 ix86_expand_setcc (code, 0, tmp);
4723 code = NE;
4724 ix86_compare_op0 = tmp;
4725 ix86_compare_op1 = const0_rtx;
4726 break;
4727
4728 default:
4729 break;
4730 }
e9a25f70 4731
e075ae69
RH
4732 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
4733 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
4734 gen_rtx_COMPARE (mode,
4735 ix86_compare_op0,
4736 ix86_compare_op1)));
4737 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4738 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
4739 gen_rtx_fmt_ee (code, VOIDmode,
4740 gen_rtx_REG (mode, FLAGS_REG),
4741 const0_rtx),
4742 operands[2],
4743 operands[3])));
32b5b1aa 4744
e075ae69 4745 return 1;
32b5b1aa
SC
4746}
4747
2450a057
JH
4748/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
4749 works for floating pointer parameters and nonoffsetable memories.
4750 For pushes, it returns just stack offsets; the values will be saved
4751 in the right order. Maximally three parts are generated. */
4752
4753static void
4754ix86_split_to_parts (operand, parts, mode)
4755 rtx operand;
4756 rtx *parts;
4757 enum machine_mode mode;
32b5b1aa 4758{
2450a057
JH
4759 int size = GET_MODE_SIZE (mode) / 4;
4760
4761 if (size < 2 || size > 3)
4762 abort ();
4763
4764 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 4765 {
2450a057
JH
4766 /* The only non-offsetable memories we handle are pushes. */
4767 if (! push_operand (operand, VOIDmode))
4768 abort ();
4769
4770 PUT_MODE (operand, SImode);
4771 parts[0] = parts[1] = parts[2] = operand;
4772 }
4773 else
4774 {
4775 if (mode == DImode)
4776 split_di (&operand, 1, &parts[0], &parts[1]);
4777 else
e075ae69 4778 {
2450a057
JH
4779 if (REG_P (operand))
4780 {
4781 if (!reload_completed)
4782 abort ();
4783 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
4784 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
4785 if (size == 3)
4786 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
4787 }
4788 else if (offsettable_memref_p (operand))
4789 {
4790 PUT_MODE (operand, SImode);
4791 parts[0] = operand;
4792 parts[1] = adj_offsettable_operand (operand, 4);
4793 if (size == 3)
4794 parts[2] = adj_offsettable_operand (operand, 8);
4795 }
4796 else if (GET_CODE (operand) == CONST_DOUBLE)
4797 {
4798 REAL_VALUE_TYPE r;
4799 long l[3];
4800
4801 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
4802 switch (mode)
4803 {
4804 case XFmode:
4805 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
4806 parts[2] = GEN_INT (l[2]);
4807 break;
4808 case DFmode:
4809 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
4810 break;
4811 default:
4812 abort ();
4813 }
4814 parts[1] = GEN_INT (l[1]);
4815 parts[0] = GEN_INT (l[0]);
4816 }
4817 else
4818 abort ();
e075ae69 4819 }
2450a057
JH
4820 }
4821
4822 return;
4823}
4824
4825/* Emit insns to perform a move or push of DI, DF, and XF values.
4826 Return false when normal moves are needed; true when all required
4827 insns have been emitted. Operands 2-4 contain the input values
4828 int the correct order; operands 5-7 contain the output values. */
4829
4830int
4831ix86_split_long_move (operands1)
4832 rtx operands1[];
4833{
4834 rtx part[2][3];
4835 rtx operands[2];
4836 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
4837 int push = 0;
4838 int collisions = 0;
4839
4840 /* Make our own copy to avoid clobbering the operands. */
4841 operands[0] = copy_rtx (operands1[0]);
4842 operands[1] = copy_rtx (operands1[1]);
4843
4844 if (size < 2 || size > 3)
4845 abort ();
4846
4847 /* The only non-offsettable memory we handle is push. */
4848 if (push_operand (operands[0], VOIDmode))
4849 push = 1;
4850 else if (GET_CODE (operands[0]) == MEM
4851 && ! offsettable_memref_p (operands[0]))
4852 abort ();
4853
4854 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
4855 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
4856
4857 /* When emitting push, take care for source operands on the stack. */
4858 if (push && GET_CODE (operands[1]) == MEM
4859 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
4860 {
4861 if (size == 3)
4862 part[1][1] = part[1][2];
4863 part[1][0] = part[1][1];
4864 }
4865
4866 /* We need to do copy in the right order in case an address register
4867 of the source overlaps the destination. */
4868 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
4869 {
4870 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
4871 collisions++;
4872 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
4873 collisions++;
4874 if (size == 3
4875 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
4876 collisions++;
4877
4878 /* Collision in the middle part can be handled by reordering. */
4879 if (collisions == 1 && size == 3
4880 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 4881 {
2450a057
JH
4882 rtx tmp;
4883 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
4884 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
4885 }
e075ae69 4886
2450a057
JH
4887 /* If there are more collisions, we can't handle it by reordering.
4888 Do an lea to the last part and use only one colliding move. */
4889 else if (collisions > 1)
4890 {
4891 collisions = 1;
4892 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
4893 XEXP (part[1][0], 0)));
4894 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
4895 part[1][1] = adj_offsettable_operand (part[1][0], 4);
4896 if (size == 3)
4897 part[1][2] = adj_offsettable_operand (part[1][0], 8);
4898 }
4899 }
4900
4901 if (push)
4902 {
4903 if (size == 3)
4904 emit_insn (gen_push (part[1][2]));
4905 emit_insn (gen_push (part[1][1]));
4906 emit_insn (gen_push (part[1][0]));
4907 return 1;
4908 }
4909
4910 /* Choose correct order to not overwrite the source before it is copied. */
4911 if ((REG_P (part[0][0])
4912 && REG_P (part[1][1])
4913 && (REGNO (part[0][0]) == REGNO (part[1][1])
4914 || (size == 3
4915 && REGNO (part[0][0]) == REGNO (part[1][2]))))
4916 || (collisions > 0
4917 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
4918 {
4919 if (size == 3)
4920 {
4921 operands1[2] = part[0][2];
4922 operands1[3] = part[0][1];
4923 operands1[4] = part[0][0];
4924 operands1[5] = part[1][2];
4925 operands1[6] = part[1][1];
4926 operands1[7] = part[1][0];
4927 }
4928 else
4929 {
4930 operands1[2] = part[0][1];
4931 operands1[3] = part[0][0];
4932 operands1[5] = part[1][1];
4933 operands1[6] = part[1][0];
4934 }
4935 }
4936 else
4937 {
4938 if (size == 3)
4939 {
4940 operands1[2] = part[0][0];
4941 operands1[3] = part[0][1];
4942 operands1[4] = part[0][2];
4943 operands1[5] = part[1][0];
4944 operands1[6] = part[1][1];
4945 operands1[7] = part[1][2];
4946 }
4947 else
4948 {
4949 operands1[2] = part[0][0];
4950 operands1[3] = part[0][1];
4951 operands1[5] = part[1][0];
4952 operands1[6] = part[1][1];
e075ae69
RH
4953 }
4954 }
32b5b1aa 4955
e9a25f70 4956 return 0;
32b5b1aa 4957}
32b5b1aa 4958
e075ae69
RH
4959void
4960ix86_split_ashldi (operands, scratch)
4961 rtx *operands, scratch;
32b5b1aa 4962{
e075ae69
RH
4963 rtx low[2], high[2];
4964 int count;
b985a30f 4965
e075ae69
RH
4966 if (GET_CODE (operands[2]) == CONST_INT)
4967 {
4968 split_di (operands, 2, low, high);
4969 count = INTVAL (operands[2]) & 63;
32b5b1aa 4970
e075ae69
RH
4971 if (count >= 32)
4972 {
4973 emit_move_insn (high[0], low[1]);
4974 emit_move_insn (low[0], const0_rtx);
b985a30f 4975
e075ae69
RH
4976 if (count > 32)
4977 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
4978 }
4979 else
4980 {
4981 if (!rtx_equal_p (operands[0], operands[1]))
4982 emit_move_insn (operands[0], operands[1]);
4983 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
4984 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
4985 }
4986 }
4987 else
4988 {
4989 if (!rtx_equal_p (operands[0], operands[1]))
4990 emit_move_insn (operands[0], operands[1]);
b985a30f 4991
e075ae69 4992 split_di (operands, 1, low, high);
b985a30f 4993
e075ae69
RH
4994 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
4995 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 4996
e075ae69
RH
4997 if (TARGET_CMOVE && (! reload_completed || scratch))
4998 {
4999 if (! reload_completed)
5000 scratch = force_reg (SImode, const0_rtx);
5001 else
5002 emit_move_insn (scratch, const0_rtx);
5003
5004 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5005 scratch));
5006 }
5007 else
5008 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5009 }
e9a25f70 5010}
32b5b1aa 5011
e075ae69
RH
5012void
5013ix86_split_ashrdi (operands, scratch)
5014 rtx *operands, scratch;
32b5b1aa 5015{
e075ae69
RH
5016 rtx low[2], high[2];
5017 int count;
32b5b1aa 5018
e075ae69
RH
5019 if (GET_CODE (operands[2]) == CONST_INT)
5020 {
5021 split_di (operands, 2, low, high);
5022 count = INTVAL (operands[2]) & 63;
32b5b1aa 5023
e075ae69
RH
5024 if (count >= 32)
5025 {
5026 emit_move_insn (low[0], high[1]);
32b5b1aa 5027
e075ae69
RH
5028 if (! reload_completed)
5029 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5030 else
5031 {
5032 emit_move_insn (high[0], low[0]);
5033 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5034 }
5035
5036 if (count > 32)
5037 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5038 }
5039 else
5040 {
5041 if (!rtx_equal_p (operands[0], operands[1]))
5042 emit_move_insn (operands[0], operands[1]);
5043 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5044 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5045 }
5046 }
5047 else
32b5b1aa 5048 {
e075ae69
RH
5049 if (!rtx_equal_p (operands[0], operands[1]))
5050 emit_move_insn (operands[0], operands[1]);
5051
5052 split_di (operands, 1, low, high);
5053
5054 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5055 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5056
5057 if (TARGET_CMOVE && (!reload_completed || scratch))
5058 {
5059 if (! reload_completed)
5060 scratch = gen_reg_rtx (SImode);
5061 emit_move_insn (scratch, high[0]);
5062 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5063 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5064 scratch));
5065 }
5066 else
5067 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 5068 }
e075ae69 5069}
32b5b1aa 5070
e075ae69
RH
5071void
5072ix86_split_lshrdi (operands, scratch)
5073 rtx *operands, scratch;
5074{
5075 rtx low[2], high[2];
5076 int count;
32b5b1aa 5077
e075ae69 5078 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 5079 {
e075ae69
RH
5080 split_di (operands, 2, low, high);
5081 count = INTVAL (operands[2]) & 63;
5082
5083 if (count >= 32)
c7271385 5084 {
e075ae69
RH
5085 emit_move_insn (low[0], high[1]);
5086 emit_move_insn (high[0], const0_rtx);
32b5b1aa 5087
e075ae69
RH
5088 if (count > 32)
5089 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5090 }
5091 else
5092 {
5093 if (!rtx_equal_p (operands[0], operands[1]))
5094 emit_move_insn (operands[0], operands[1]);
5095 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5096 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5097 }
32b5b1aa 5098 }
e075ae69
RH
5099 else
5100 {
5101 if (!rtx_equal_p (operands[0], operands[1]))
5102 emit_move_insn (operands[0], operands[1]);
32b5b1aa 5103
e075ae69
RH
5104 split_di (operands, 1, low, high);
5105
5106 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5107 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5108
5109 /* Heh. By reversing the arguments, we can reuse this pattern. */
5110 if (TARGET_CMOVE && (! reload_completed || scratch))
5111 {
5112 if (! reload_completed)
5113 scratch = force_reg (SImode, const0_rtx);
5114 else
5115 emit_move_insn (scratch, const0_rtx);
5116
5117 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5118 scratch));
5119 }
5120 else
5121 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5122 }
32b5b1aa 5123}
3f803cd9 5124
e075ae69
RH
5125/* Expand the appropriate insns for doing strlen if not just doing
5126 repnz; scasb
5127
5128 out = result, initialized with the start address
5129 align_rtx = alignment of the address.
5130 scratch = scratch register, initialized with the startaddress when
5131 not aligned, otherwise undefined
3f803cd9
SC
5132
5133 This is just the body. It needs the initialisations mentioned above and
5134 some address computing at the end. These things are done in i386.md. */
5135
e075ae69
RH
5136void
5137ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5138 rtx out, align_rtx, scratch;
3f803cd9 5139{
e075ae69
RH
5140 int align;
5141 rtx tmp;
5142 rtx align_2_label = NULL_RTX;
5143 rtx align_3_label = NULL_RTX;
5144 rtx align_4_label = gen_label_rtx ();
5145 rtx end_0_label = gen_label_rtx ();
5146 rtx end_2_label = gen_label_rtx ();
5147 rtx end_3_label = gen_label_rtx ();
5148 rtx mem;
5149 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5150
5151 align = 0;
5152 if (GET_CODE (align_rtx) == CONST_INT)
5153 align = INTVAL (align_rtx);
3f803cd9 5154
e9a25f70 5155 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 5156
e9a25f70 5157 /* Is there a known alignment and is it less than 4? */
e075ae69 5158 if (align < 4)
3f803cd9 5159 {
e9a25f70 5160 /* Is there a known alignment and is it not 2? */
e075ae69 5161 if (align != 2)
3f803cd9 5162 {
e075ae69
RH
5163 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5164 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5165
5166 /* Leave just the 3 lower bits. */
5167 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5168 NULL_RTX, 0, OPTAB_WIDEN);
5169
5170 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
5171
5172 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5173 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5174 gen_rtx_LABEL_REF (VOIDmode,
5175 align_4_label),
5176 pc_rtx);
5177 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5178
5179 emit_insn (gen_cmpsi_1 (align_rtx, GEN_INT (2)));
5180
5181 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5182 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5183 gen_rtx_LABEL_REF (VOIDmode,
5184 align_2_label),
5185 pc_rtx);
5186 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5187
5188 tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx);
5189 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5190 gen_rtx_LABEL_REF (VOIDmode,
5191 align_3_label),
5192 pc_rtx);
5193 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5194 }
5195 else
5196 {
e9a25f70
JL
5197 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5198 check if is aligned to 4 - byte. */
e9a25f70 5199
e075ae69
RH
5200 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5201 NULL_RTX, 0, OPTAB_WIDEN);
5202
5203 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
5204
5205 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5206 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5207 gen_rtx_LABEL_REF (VOIDmode,
5208 align_4_label),
5209 pc_rtx);
5210 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5211 }
5212
e075ae69 5213 mem = gen_rtx_MEM (QImode, out);
e9a25f70 5214
e075ae69 5215 /* Now compare the bytes. */
e9a25f70 5216
e075ae69
RH
5217 /* Compare the first n unaligned byte on a byte per byte basis. */
5218 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
e9a25f70 5219
e075ae69
RH
5220 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5221 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5222 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5223 pc_rtx);
5224 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9 5225
e075ae69
RH
5226 /* Increment the address. */
5227 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 5228
e075ae69
RH
5229 /* Not needed with an alignment of 2 */
5230 if (align != 2)
5231 {
5232 emit_label (align_2_label);
3f803cd9 5233
e075ae69 5234 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
3f803cd9 5235
e075ae69
RH
5236 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5237 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5238 gen_rtx_LABEL_REF (VOIDmode,
5239 end_0_label),
5240 pc_rtx);
5241 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5242
5243 emit_insn (gen_addsi3 (out, out, const1_rtx));
5244
5245 emit_label (align_3_label);
5246 }
5247
5248 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
e9a25f70 5249
e075ae69
RH
5250 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5251 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5252 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5253 pc_rtx);
5254 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5255
5256 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
5257 }
5258
e075ae69
RH
5259 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5260 align this loop. It gives only huge programs, but does not help to
5261 speed up. */
5262 emit_label (align_4_label);
3f803cd9 5263
e075ae69
RH
5264 mem = gen_rtx_MEM (SImode, out);
5265 emit_move_insn (scratch, mem);
3f803cd9 5266
e075ae69
RH
5267 /* Check first byte. */
5268 emit_insn (gen_cmpqi_0 (gen_lowpart (QImode, scratch), const0_rtx));
5269 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5270 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5271 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5272 pc_rtx);
5273 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5274
5275 /* Check second byte. */
5276 emit_insn (gen_cmpqi_ext_3 (scratch, const0_rtx));
5277 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5278 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5279 gen_rtx_LABEL_REF (VOIDmode, end_3_label),
5280 pc_rtx);
5281 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5282
5283 /* Check third byte. */
5284 emit_insn (gen_testsi_1 (scratch, GEN_INT (0x00ff0000)));
5285 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5286 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5287 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
5288 pc_rtx);
5289 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5290
5291 /* Check fourth byte and increment address. */
5292 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
5293 emit_insn (gen_testsi_1 (scratch, GEN_INT (0xff000000)));
5294 tmp = gen_rtx_NE (VOIDmode, flags, const0_rtx);
5295 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5296 gen_rtx_LABEL_REF (VOIDmode, align_4_label),
5297 pc_rtx);
5298 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5299
5300 /* Now generate fixups when the compare stops within a 4-byte word. */
5301 emit_insn (gen_subsi3 (out, out, GEN_INT (3)));
5302
5303 emit_label (end_2_label);
5304 emit_insn (gen_addsi3 (out, out, const1_rtx));
5305
5306 emit_label (end_3_label);
5307 emit_insn (gen_addsi3 (out, out, const1_rtx));
5308
5309 emit_label (end_0_label);
5310}
5311\f
e075ae69
RH
5312/* Clear stack slot assignments remembered from previous functions.
5313 This is called from INIT_EXPANDERS once before RTL is emitted for each
5314 function. */
5315
36edd3cc
BS
5316static void
5317ix86_init_machine_status (p)
1526a060 5318 struct function *p;
e075ae69
RH
5319{
5320 enum machine_mode mode;
5321 int n;
36edd3cc
BS
5322 p->machine
5323 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
e075ae69
RH
5324
5325 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5326 mode = (enum machine_mode) ((int) mode + 1))
5327 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5328 ix86_stack_locals[(int) mode][n] = NULL_RTX;
e075ae69
RH
5329}
5330
1526a060
BS
5331/* Mark machine specific bits of P for GC. */
5332static void
5333ix86_mark_machine_status (p)
5334 struct function *p;
5335{
5336 enum machine_mode mode;
5337 int n;
5338
5339 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5340 mode = (enum machine_mode) ((int) mode + 1))
5341 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5342 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
5343}
5344
e075ae69
RH
5345/* Return a MEM corresponding to a stack slot with mode MODE.
5346 Allocate a new slot if necessary.
5347
5348 The RTL for a function can have several slots available: N is
5349 which slot to use. */
5350
5351rtx
5352assign_386_stack_local (mode, n)
5353 enum machine_mode mode;
5354 int n;
5355{
5356 if (n < 0 || n >= MAX_386_STACK_LOCALS)
5357 abort ();
5358
5359 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
5360 ix86_stack_locals[(int) mode][n]
5361 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
5362
5363 return ix86_stack_locals[(int) mode][n];
5364}
5365\f
5366/* Calculate the length of the memory address in the instruction
5367 encoding. Does not include the one-byte modrm, opcode, or prefix. */
5368
5369static int
5370memory_address_length (addr)
5371 rtx addr;
5372{
5373 struct ix86_address parts;
5374 rtx base, index, disp;
5375 int len;
5376
5377 if (GET_CODE (addr) == PRE_DEC
5378 || GET_CODE (addr) == POST_INC)
5379 return 0;
3f803cd9 5380
e075ae69
RH
5381 if (! ix86_decompose_address (addr, &parts))
5382 abort ();
3f803cd9 5383
e075ae69
RH
5384 base = parts.base;
5385 index = parts.index;
5386 disp = parts.disp;
5387 len = 0;
3f803cd9 5388
e075ae69
RH
5389 /* Register Indirect. */
5390 if (base && !index && !disp)
5391 {
5392 /* Special cases: ebp and esp need the two-byte modrm form. */
5393 if (addr == stack_pointer_rtx
5394 || addr == arg_pointer_rtx
5395 || addr == frame_pointer_rtx)
5396 len = 1;
3f803cd9 5397 }
e9a25f70 5398
e075ae69
RH
5399 /* Direct Addressing. */
5400 else if (disp && !base && !index)
5401 len = 4;
5402
3f803cd9
SC
5403 else
5404 {
e075ae69
RH
5405 /* Find the length of the displacement constant. */
5406 if (disp)
5407 {
5408 if (GET_CODE (disp) == CONST_INT
5409 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
5410 len = 1;
5411 else
5412 len = 4;
5413 }
3f803cd9 5414
e075ae69
RH
5415 /* An index requires the two-byte modrm form. */
5416 if (index)
5417 len += 1;
3f803cd9
SC
5418 }
5419
e075ae69
RH
5420 return len;
5421}
79325812 5422
e075ae69
RH
5423int
5424ix86_attr_length_default (insn)
5425 rtx insn;
5426{
5427 enum attr_type type;
5428 int len = 0, i;
5429
5430 type = get_attr_type (insn);
5431 extract_insn (insn);
5432 switch (type)
5433 {
5434 case TYPE_INCDEC:
5435 case TYPE_SETCC:
5436 case TYPE_ICMOV:
5437 case TYPE_FMOV:
5438 case TYPE_FOP:
5439 case TYPE_FCMP:
5440 case TYPE_FOP1:
5441 case TYPE_FMUL:
5442 case TYPE_FDIV:
5443 case TYPE_FSGN:
5444 case TYPE_FPSPC:
5445 case TYPE_FCMOV:
5446 case TYPE_IBR:
5447 break;
3f803cd9 5448
e075ae69
RH
5449 case TYPE_ALU1:
5450 case TYPE_NEGNOT:
5451 case TYPE_ALU:
5452 case TYPE_ICMP:
5453 case TYPE_IMOVX:
5454 case TYPE_ISHIFT:
5455 case TYPE_IMUL:
5456 case TYPE_IDIV:
5457 case TYPE_PUSH:
5458 case TYPE_POP:
1ccbefce
RH
5459 for (i = recog_data.n_operands - 1; i >= 0; --i)
5460 if (CONSTANT_P (recog_data.operand[i]))
e075ae69 5461 {
1ccbefce
RH
5462 if (GET_CODE (recog_data.operand[i]) == CONST_INT
5463 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
e075ae69
RH
5464 len += 1;
5465 else
1ccbefce 5466 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
e075ae69
RH
5467 }
5468 break;
5469
5470 case TYPE_IMOV:
1ccbefce
RH
5471 if (CONSTANT_P (recog_data.operand[1]))
5472 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
e075ae69
RH
5473 break;
5474
5475 case TYPE_CALL:
6baf1cc8
BS
5476 if (constant_call_address_operand (recog_data.operand[0],
5477 GET_MODE (recog_data.operand[0])))
e075ae69
RH
5478 return 5;
5479 break;
3f803cd9 5480
e075ae69 5481 case TYPE_CALLV:
6baf1cc8
BS
5482 if (constant_call_address_operand (recog_data.operand[1],
5483 GET_MODE (recog_data.operand[1])))
e075ae69
RH
5484 return 5;
5485 break;
3f803cd9 5486
e075ae69 5487 case TYPE_LEA:
3071fab5
RH
5488 {
5489 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
5490 as we'll get from running life_analysis during reg-stack when
5491 not optimizing. */
5492 rtx set = PATTERN (insn);
5493 if (GET_CODE (set) == SET)
5494 ;
5495 else if (GET_CODE (set) == PARALLEL
5496 && XVECLEN (set, 0) == 2
5497 && GET_CODE (XVECEXP (set, 0, 0)) == SET
5498 && GET_CODE (XVECEXP (set, 0, 1)) == CLOBBER)
5499 set = XVECEXP (set, 0, 0);
5500 else
5501 abort ();
5502
5503 len += memory_address_length (SET_SRC (set));
5504 goto just_opcode;
5505 }
3f803cd9 5506
e075ae69
RH
5507 case TYPE_OTHER:
5508 case TYPE_MULTI:
5509 return 15;
3f803cd9 5510
5d3c4797 5511 case TYPE_FXCH:
1ccbefce
RH
5512 if (STACK_TOP_P (recog_data.operand[0]))
5513 return 2 + (REGNO (recog_data.operand[1]) != FIRST_STACK_REG + 1);
5d3c4797 5514 else
1ccbefce 5515 return 2 + (REGNO (recog_data.operand[0]) != FIRST_STACK_REG + 1);
5d3c4797 5516
e075ae69
RH
5517 default:
5518 abort ();
5519 }
5520
1ccbefce
RH
5521 for (i = recog_data.n_operands - 1; i >= 0; --i)
5522 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 5523 {
1ccbefce 5524 len += memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
5525 break;
5526 }
5527
5528just_opcode:
5529 len += get_attr_length_opcode (insn);
5530 len += get_attr_length_prefix (insn);
5531
5532 return len;
3f803cd9 5533}
e075ae69
RH
5534\f
5535/* Return the maximum number of instructions a cpu can issue. */
b657fc39 5536
e075ae69
RH
5537int
5538ix86_issue_rate ()
b657fc39 5539{
e075ae69 5540 switch (ix86_cpu)
b657fc39 5541 {
e075ae69
RH
5542 case PROCESSOR_PENTIUM:
5543 case PROCESSOR_K6:
5544 return 2;
79325812 5545
e075ae69
RH
5546 case PROCESSOR_PENTIUMPRO:
5547 return 3;
b657fc39 5548
b657fc39 5549 default:
e075ae69 5550 return 1;
b657fc39 5551 }
b657fc39
L
5552}
5553
e075ae69
RH
5554/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
5555 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 5556
e075ae69
RH
5557static int
5558ix86_flags_dependant (insn, dep_insn, insn_type)
5559 rtx insn, dep_insn;
5560 enum attr_type insn_type;
5561{
5562 rtx set, set2;
b657fc39 5563
e075ae69
RH
5564 /* Simplify the test for uninteresting insns. */
5565 if (insn_type != TYPE_SETCC
5566 && insn_type != TYPE_ICMOV
5567 && insn_type != TYPE_FCMOV
5568 && insn_type != TYPE_IBR)
5569 return 0;
b657fc39 5570
e075ae69
RH
5571 if ((set = single_set (dep_insn)) != 0)
5572 {
5573 set = SET_DEST (set);
5574 set2 = NULL_RTX;
5575 }
5576 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
5577 && XVECLEN (PATTERN (dep_insn), 0) == 2
5578 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
5579 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
5580 {
5581 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5582 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5583 }
b657fc39 5584
e075ae69 5585 if (set && GET_CODE (set) == REG && REGNO (set) == FLAGS_REG)
b657fc39 5586 {
e075ae69
RH
5587 /* This test is true if the dependant insn reads the flags but
5588 not any other potentially set register. */
5589 if (reg_overlap_mentioned_p (set, PATTERN (insn))
5590 && (!set2 || !reg_overlap_mentioned_p (set2, PATTERN (insn))))
5591 return 1;
5592 }
b657fc39 5593
e075ae69
RH
5594 return 0;
5595}
b657fc39 5596
e075ae69
RH
5597/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
5598 address with operands set by DEP_INSN. */
5599
5600static int
5601ix86_agi_dependant (insn, dep_insn, insn_type)
5602 rtx insn, dep_insn;
5603 enum attr_type insn_type;
5604{
5605 rtx addr;
5606
5607 if (insn_type == TYPE_LEA)
5608 addr = SET_SRC (single_set (insn));
5609 else
5610 {
5611 int i;
5612 extract_insn (insn);
1ccbefce
RH
5613 for (i = recog_data.n_operands - 1; i >= 0; --i)
5614 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 5615 {
1ccbefce 5616 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
5617 goto found;
5618 }
5619 return 0;
5620 found:;
b657fc39
L
5621 }
5622
e075ae69 5623 return modified_in_p (addr, dep_insn);
b657fc39 5624}
a269a03c
JC
5625
5626int
e075ae69 5627ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
5628 rtx insn, link, dep_insn;
5629 int cost;
5630{
e075ae69
RH
5631 enum attr_type insn_type, dep_insn_type;
5632 rtx set, set2;
9b00189f 5633 int dep_insn_code_number;
a269a03c 5634
309ada50 5635 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 5636 if (REG_NOTE_KIND (link) != 0)
309ada50 5637 return 0;
a269a03c 5638
9b00189f
JH
5639 dep_insn_code_number = recog_memoized (dep_insn);
5640
e075ae69 5641 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 5642 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 5643 return cost;
a269a03c 5644
9b00189f
JH
5645 /* Prologue and epilogue allocators have false dependency on ebp.
5646 This results in one cycle extra stall on Pentium prologue scheduling, so
5647 handle this important case manually. */
5648
5649 if ((dep_insn_code_number == CODE_FOR_prologue_allocate_stack
5650 || dep_insn_code_number == CODE_FOR_epilogue_deallocate_stack)
5651 && !reg_mentioned_p (stack_pointer_rtx, insn))
5652 return 0;
5653
e075ae69
RH
5654 insn_type = get_attr_type (insn);
5655 dep_insn_type = get_attr_type (dep_insn);
a269a03c
JC
5656
5657 switch (ix86_cpu)
5658 {
5659 case PROCESSOR_PENTIUM:
e075ae69
RH
5660 /* Address Generation Interlock adds a cycle of latency. */
5661 if (ix86_agi_dependant (insn, dep_insn, insn_type))
5662 cost += 1;
5663
5664 /* ??? Compares pair with jump/setcc. */
5665 if (ix86_flags_dependant (insn, dep_insn, insn_type))
5666 cost = 0;
5667
5668 /* Floating point stores require value to be ready one cycle ealier. */
5669 if (insn_type == TYPE_FMOV
5670 && get_attr_memory (insn) == MEMORY_STORE
5671 && !ix86_agi_dependant (insn, dep_insn, insn_type))
5672 cost += 1;
5673 break;
a269a03c 5674
e075ae69
RH
5675 case PROCESSOR_PENTIUMPRO:
5676 /* Since we can't represent delayed latencies of load+operation,
5677 increase the cost here for non-imov insns. */
5678 if (dep_insn_type != TYPE_IMOV
5679 && dep_insn_type != TYPE_FMOV
5680 && get_attr_memory (dep_insn) == MEMORY_LOAD)
5681 cost += 1;
5682
5683 /* INT->FP conversion is expensive. */
5684 if (get_attr_fp_int_src (dep_insn))
5685 cost += 5;
5686
5687 /* There is one cycle extra latency between an FP op and a store. */
5688 if (insn_type == TYPE_FMOV
5689 && (set = single_set (dep_insn)) != NULL_RTX
5690 && (set2 = single_set (insn)) != NULL_RTX
5691 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
5692 && GET_CODE (SET_DEST (set2)) == MEM)
5693 cost += 1;
5694 break;
a269a03c 5695
e075ae69
RH
5696 case PROCESSOR_K6:
5697 /* The esp dependency is resolved before the instruction is really
5698 finished. */
5699 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
5700 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
5701 return 1;
a269a03c 5702
e075ae69
RH
5703 /* Since we can't represent delayed latencies of load+operation,
5704 increase the cost here for non-imov insns. */
5705 if (get_attr_memory (dep_insn) == MEMORY_LOAD)
5706 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
5707
5708 /* INT->FP conversion is expensive. */
5709 if (get_attr_fp_int_src (dep_insn))
5710 cost += 5;
a14003ee 5711 break;
e075ae69 5712
309ada50
JH
5713 case PROCESSOR_ATHLON:
5714 /* Address Generation Interlock cause problems on the Athlon CPU because
5715 the loads and stores are done in order so once one load or store has
5716 to wait, others must too, so penalize the AGIs slightly by one cycle.
5717 We might experiment with this value later. */
5718 if (ix86_agi_dependant (insn, dep_insn, insn_type))
5719 cost += 1;
5720
5721 /* Since we can't represent delayed latencies of load+operation,
5722 increase the cost here for non-imov insns. */
5723 if (dep_insn_type != TYPE_IMOV
5724 && dep_insn_type != TYPE_FMOV
5725 && get_attr_memory (dep_insn) == MEMORY_LOAD)
5726 cost += 2;
a269a03c 5727 default:
a269a03c
JC
5728 break;
5729 }
5730
5731 return cost;
5732}
0a726ef1 5733
e075ae69
RH
5734static union
5735{
5736 struct ppro_sched_data
5737 {
5738 rtx decode[3];
5739 int issued_this_cycle;
5740 } ppro;
5741} ix86_sched_data;
0a726ef1 5742
e075ae69
RH
5743static int
5744ix86_safe_length (insn)
5745 rtx insn;
5746{
5747 if (recog_memoized (insn) >= 0)
5748 return get_attr_length(insn);
5749 else
5750 return 128;
5751}
0a726ef1 5752
e075ae69
RH
5753static int
5754ix86_safe_length_prefix (insn)
5755 rtx insn;
5756{
5757 if (recog_memoized (insn) >= 0)
5758 return get_attr_length(insn);
5759 else
5760 return 0;
5761}
5762
5763static enum attr_memory
5764ix86_safe_memory (insn)
5765 rtx insn;
5766{
5767 if (recog_memoized (insn) >= 0)
5768 return get_attr_memory(insn);
5769 else
5770 return MEMORY_UNKNOWN;
5771}
0a726ef1 5772
e075ae69
RH
5773static enum attr_pent_pair
5774ix86_safe_pent_pair (insn)
5775 rtx insn;
5776{
5777 if (recog_memoized (insn) >= 0)
5778 return get_attr_pent_pair(insn);
5779 else
5780 return PENT_PAIR_NP;
5781}
0a726ef1 5782
e075ae69
RH
5783static enum attr_ppro_uops
5784ix86_safe_ppro_uops (insn)
5785 rtx insn;
5786{
5787 if (recog_memoized (insn) >= 0)
5788 return get_attr_ppro_uops (insn);
5789 else
5790 return PPRO_UOPS_MANY;
5791}
0a726ef1 5792
e075ae69
RH
5793static void
5794ix86_dump_ppro_packet (dump)
5795 FILE *dump;
0a726ef1 5796{
e075ae69 5797 if (ix86_sched_data.ppro.decode[0])
0a726ef1 5798 {
e075ae69
RH
5799 fprintf (dump, "PPRO packet: %d",
5800 INSN_UID (ix86_sched_data.ppro.decode[0]));
5801 if (ix86_sched_data.ppro.decode[1])
5802 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
5803 if (ix86_sched_data.ppro.decode[2])
5804 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
5805 fputc ('\n', dump);
5806 }
5807}
0a726ef1 5808
e075ae69 5809/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 5810
e075ae69
RH
5811void
5812ix86_sched_init (dump, sched_verbose)
5813 FILE *dump ATTRIBUTE_UNUSED;
5814 int sched_verbose ATTRIBUTE_UNUSED;
5815{
5816 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
5817}
5818
5819/* Shift INSN to SLOT, and shift everything else down. */
5820
5821static void
5822ix86_reorder_insn (insnp, slot)
5823 rtx *insnp, *slot;
5824{
5825 if (insnp != slot)
5826 {
5827 rtx insn = *insnp;
5828 do
5829 insnp[0] = insnp[1];
5830 while (++insnp != slot);
5831 *insnp = insn;
0a726ef1 5832 }
e075ae69
RH
5833}
5834
5835/* Find an instruction with given pairability and minimal amount of cycles
5836 lost by the fact that the CPU waits for both pipelines to finish before
5837 reading next instructions. Also take care that both instructions together
5838 can not exceed 7 bytes. */
5839
5840static rtx *
5841ix86_pent_find_pair (e_ready, ready, type, first)
5842 rtx *e_ready;
5843 rtx *ready;
5844 enum attr_pent_pair type;
5845 rtx first;
5846{
5847 int mincycles, cycles;
5848 enum attr_pent_pair tmp;
5849 enum attr_memory memory;
5850 rtx *insnp, *bestinsnp = NULL;
0a726ef1 5851
e075ae69
RH
5852 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
5853 return NULL;
0a726ef1 5854
e075ae69
RH
5855 memory = ix86_safe_memory (first);
5856 cycles = result_ready_cost (first);
5857 mincycles = INT_MAX;
5858
5859 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
5860 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
5861 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 5862 {
e075ae69
RH
5863 enum attr_memory second_memory;
5864 int secondcycles, currentcycles;
5865
5866 second_memory = ix86_safe_memory (*insnp);
5867 secondcycles = result_ready_cost (*insnp);
5868 currentcycles = abs (cycles - secondcycles);
5869
5870 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 5871 {
e075ae69
RH
5872 /* Two read/modify/write instructions together takes two
5873 cycles longer. */
5874 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
5875 currentcycles += 2;
5876
5877 /* Read modify/write instruction followed by read/modify
5878 takes one cycle longer. */
5879 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
5880 && tmp != PENT_PAIR_UV
5881 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
5882 currentcycles += 1;
6ec6d558 5883 }
e075ae69
RH
5884 if (currentcycles < mincycles)
5885 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 5886 }
0a726ef1 5887
e075ae69
RH
5888 return bestinsnp;
5889}
5890
5891/* We are about to being issuing insns for this clock cycle.
5892 Override the default sort algorithm to better slot instructions. */
5893
5894int
5895ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
5896 FILE *dump ATTRIBUTE_UNUSED;
5897 int sched_verbose ATTRIBUTE_UNUSED;
5898 rtx *ready;
69ddee61 5899 int n_ready, clock_var ATTRIBUTE_UNUSED;
e075ae69
RH
5900{
5901 rtx *e_ready = ready + n_ready - 1;
5902 rtx *insnp;
5903 int i;
5904
5905 if (n_ready < 2)
5906 goto out;
5907
5908 switch (ix86_cpu)
5909 {
5910 default:
5911 goto out;
5912
5913 case PROCESSOR_PENTIUM:
5914 /* This wouldn't be necessary if Haifa knew that static insn ordering
5915 is important to which pipe an insn is issued to. So we have to make
5916 some minor rearrangements. */
6ec6d558 5917 {
e075ae69
RH
5918 enum attr_pent_pair pair1, pair2;
5919
5920 pair1 = ix86_safe_pent_pair (*e_ready);
5921
5922 /* If the first insn is non-pairable, let it be. */
5923 if (pair1 == PENT_PAIR_NP)
5924 goto out;
5925 pair2 = PENT_PAIR_NP;
5926
5927 /* If the first insn is UV or PV pairable, search for a PU
5928 insn to go with. */
5929 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
5930 {
5931 insnp = ix86_pent_find_pair (e_ready-1, ready,
5932 PENT_PAIR_PU, *e_ready);
5933 if (insnp)
5934 pair2 = PENT_PAIR_PU;
5935 }
5936
5937 /* If the first insn is PU or UV pairable, search for a PV
5938 insn to go with. */
5939 if (pair2 == PENT_PAIR_NP
5940 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
5941 {
5942 insnp = ix86_pent_find_pair (e_ready-1, ready,
5943 PENT_PAIR_PV, *e_ready);
5944 if (insnp)
5945 pair2 = PENT_PAIR_PV;
5946 }
5947
5948 /* If the first insn is pairable, search for a UV
5949 insn to go with. */
5950 if (pair2 == PENT_PAIR_NP)
6ec6d558 5951 {
e075ae69
RH
5952 insnp = ix86_pent_find_pair (e_ready-1, ready,
5953 PENT_PAIR_UV, *e_ready);
5954 if (insnp)
5955 pair2 = PENT_PAIR_UV;
6ec6d558 5956 }
e075ae69
RH
5957
5958 if (pair2 == PENT_PAIR_NP)
5959 goto out;
5960
5961 /* Found something! Decide if we need to swap the order. */
5962 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
5963 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
5964 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
5965 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
5966 ix86_reorder_insn (insnp, e_ready);
6ec6d558 5967 else
e075ae69 5968 ix86_reorder_insn (insnp, e_ready - 1);
6ec6d558 5969 }
e075ae69 5970 break;
0a726ef1 5971
e075ae69
RH
5972 case PROCESSOR_PENTIUMPRO:
5973 {
5974 rtx decode[3];
5975 enum attr_ppro_uops cur_uops;
5976 int issued_this_cycle;
0a726ef1 5977
e075ae69
RH
5978 /* At this point .ppro.decode contains the state of the three
5979 decoders from last "cycle". That is, those insns that were
5980 actually independant. But here we're scheduling for the
5981 decoder, and we may find things that are decodable in the
5982 same cycle. */
fb693d44 5983
e075ae69
RH
5984 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
5985 issued_this_cycle = 0;
fb693d44 5986
e075ae69
RH
5987 insnp = e_ready;
5988 cur_uops = ix86_safe_ppro_uops (*insnp);
fb693d44 5989
e075ae69
RH
5990 /* If the decoders are empty, and we've a complex insn at the
5991 head of the priority queue, let it issue without complaint. */
5992 if (decode[0] == NULL)
5993 {
5994 if (cur_uops == PPRO_UOPS_MANY)
5995 {
5996 decode[0] = *insnp;
5997 goto ppro_done;
5998 }
fb693d44 5999
e075ae69
RH
6000 /* Otherwise, search for a 2-4 uop unsn to issue. */
6001 while (cur_uops != PPRO_UOPS_FEW)
6002 {
6003 if (insnp == ready)
6004 break;
6005 cur_uops = ix86_safe_ppro_uops (*--insnp);
6006 }
fb693d44 6007
e075ae69
RH
6008 /* If so, move it to the head of the line. */
6009 if (cur_uops == PPRO_UOPS_FEW)
6010 ix86_reorder_insn (insnp, e_ready);
fb693d44 6011
e075ae69
RH
6012 /* Issue the head of the queue. */
6013 issued_this_cycle = 1;
6014 decode[0] = *e_ready--;
6015 }
fb693d44 6016
e075ae69
RH
6017 /* Look for simple insns to fill in the other two slots. */
6018 for (i = 1; i < 3; ++i)
6019 if (decode[i] == NULL)
6020 {
6021 if (ready >= e_ready)
6022 goto ppro_done;
fb693d44 6023
e075ae69
RH
6024 insnp = e_ready;
6025 cur_uops = ix86_safe_ppro_uops (*insnp);
6026 while (cur_uops != PPRO_UOPS_ONE)
6027 {
6028 if (insnp == ready)
6029 break;
6030 cur_uops = ix86_safe_ppro_uops (*--insnp);
6031 }
6032
6033 /* Found one. Move it to the head of the queue and issue it. */
6034 if (cur_uops == PPRO_UOPS_ONE)
6035 {
6036 ix86_reorder_insn (insnp, e_ready);
6037 decode[i] = *e_ready--;
6038 issued_this_cycle++;
6039 continue;
6040 }
6041
6042 /* ??? Didn't find one. Ideally, here we would do a lazy split
6043 of 2-uop insns, issue one and queue the other. */
6044 }
6045
6046 ppro_done:
6047 if (issued_this_cycle == 0)
6048 issued_this_cycle = 1;
6049 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6050 }
6051 break;
fb693d44
RH
6052 }
6053
e075ae69
RH
6054out:
6055 return ix86_issue_rate ();
6056}
fb693d44 6057
e075ae69
RH
6058/* We are about to issue INSN. Return the number of insns left on the
6059 ready queue that can be issued this cycle. */
b222082e 6060
e075ae69
RH
6061int
6062ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6063 FILE *dump;
6064 int sched_verbose;
6065 rtx insn;
6066 int can_issue_more;
6067{
6068 int i;
6069 switch (ix86_cpu)
fb693d44 6070 {
e075ae69
RH
6071 default:
6072 return can_issue_more - 1;
fb693d44 6073
e075ae69
RH
6074 case PROCESSOR_PENTIUMPRO:
6075 {
6076 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 6077
e075ae69
RH
6078 if (uops == PPRO_UOPS_MANY)
6079 {
6080 if (sched_verbose)
6081 ix86_dump_ppro_packet (dump);
6082 ix86_sched_data.ppro.decode[0] = insn;
6083 ix86_sched_data.ppro.decode[1] = NULL;
6084 ix86_sched_data.ppro.decode[2] = NULL;
6085 if (sched_verbose)
6086 ix86_dump_ppro_packet (dump);
6087 ix86_sched_data.ppro.decode[0] = NULL;
6088 }
6089 else if (uops == PPRO_UOPS_FEW)
6090 {
6091 if (sched_verbose)
6092 ix86_dump_ppro_packet (dump);
6093 ix86_sched_data.ppro.decode[0] = insn;
6094 ix86_sched_data.ppro.decode[1] = NULL;
6095 ix86_sched_data.ppro.decode[2] = NULL;
6096 }
6097 else
6098 {
6099 for (i = 0; i < 3; ++i)
6100 if (ix86_sched_data.ppro.decode[i] == NULL)
6101 {
6102 ix86_sched_data.ppro.decode[i] = insn;
6103 break;
6104 }
6105 if (i == 3)
6106 abort ();
6107 if (i == 2)
6108 {
6109 if (sched_verbose)
6110 ix86_dump_ppro_packet (dump);
6111 ix86_sched_data.ppro.decode[0] = NULL;
6112 ix86_sched_data.ppro.decode[1] = NULL;
6113 ix86_sched_data.ppro.decode[2] = NULL;
6114 }
6115 }
6116 }
6117 return --ix86_sched_data.ppro.issued_this_cycle;
6118 }
fb693d44 6119}
This page took 1.57418 seconds and 5 git commands to generate.