]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
alpha-protos.h: PROTO -> PARAMS.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
0ef2e39a 2 Copyright (C) 1988, 92, 94-99, 2000 Free Software Foundation, Inc.
2a2ab3f9
JVA
3
4This file is part of GNU CC.
5
6GNU CC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11GNU CC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU CC; see the file COPYING. If not, write to
97aadbb9 18the Free Software Foundation, 59 Temple Place - Suite 330,
32b5b1aa 19Boston, MA 02111-1307, USA. */
2a2ab3f9 20
0b6b2900 21#include <setjmp.h>
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
32#include "insn-flags.h"
33#include "output.h"
34#include "insn-attr.h"
2a2ab3f9 35#include "flags.h"
a8ffcc81 36#include "except.h"
ecbc4695 37#include "function.h"
00c79232 38#include "recog.h"
ced8dd8c 39#include "expr.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
2a2ab3f9 43
997de79c
JVA
44#ifdef EXTRA_CONSTRAINT
45/* If EXTRA_CONSTRAINT is defined, then the 'S'
46 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
47 asm statements that need 'S' for class SIREG will break. */
ad5a6adc
RS
48 error EXTRA_CONSTRAINT conflicts with S constraint letter
49/* The previous line used to be #error, but some compilers barf
50 even if the conditional was untrue. */
997de79c
JVA
51#endif
52
8dfe5673
RK
53#ifndef CHECK_STACK_LIMIT
54#define CHECK_STACK_LIMIT -1
55#endif
56
32b5b1aa
SC
57/* Processor costs (relative to an add) */
58struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 59 1, /* cost of an add instruction */
32b5b1aa
SC
60 1, /* cost of a lea instruction */
61 3, /* variable shift costs */
62 2, /* constant shift costs */
63 6, /* cost of starting a multiply */
64 1, /* cost of multiply per each bit set */
e075ae69 65 23, /* cost of a divide/mod */
96e7ae40 66 15, /* "large" insn */
e2e52e1b 67 3, /* MOVE_RATIO */
7c6b971d 68 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
69 {2, 4, 2}, /* cost of loading integer registers
70 in QImode, HImode and SImode.
71 Relative to reg-reg move (2). */
72 {2, 4, 2}, /* cost of storing integer registers */
73 2, /* cost of reg,reg fld/fst */
74 {8, 8, 8}, /* cost of loading fp registers
75 in SFmode, DFmode and XFmode */
76 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
77};
78
79struct processor_costs i486_cost = { /* 486 specific costs */
80 1, /* cost of an add instruction */
81 1, /* cost of a lea instruction */
82 3, /* variable shift costs */
83 2, /* constant shift costs */
84 12, /* cost of starting a multiply */
85 1, /* cost of multiply per each bit set */
e075ae69 86 40, /* cost of a divide/mod */
96e7ae40 87 15, /* "large" insn */
e2e52e1b 88 3, /* MOVE_RATIO */
7c6b971d 89 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
90 {2, 4, 2}, /* cost of loading integer registers
91 in QImode, HImode and SImode.
92 Relative to reg-reg move (2). */
93 {2, 4, 2}, /* cost of storing integer registers */
94 2, /* cost of reg,reg fld/fst */
95 {8, 8, 8}, /* cost of loading fp registers
96 in SFmode, DFmode and XFmode */
97 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
98};
99
e5cb57e8 100struct processor_costs pentium_cost = {
32b5b1aa
SC
101 1, /* cost of an add instruction */
102 1, /* cost of a lea instruction */
856b07a1 103 4, /* variable shift costs */
e5cb57e8 104 1, /* constant shift costs */
856b07a1
SC
105 11, /* cost of starting a multiply */
106 0, /* cost of multiply per each bit set */
e075ae69 107 25, /* cost of a divide/mod */
96e7ae40 108 8, /* "large" insn */
e2e52e1b 109 6, /* MOVE_RATIO */
7c6b971d 110 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
111 {2, 4, 2}, /* cost of loading integer registers
112 in QImode, HImode and SImode.
113 Relative to reg-reg move (2). */
114 {2, 4, 2}, /* cost of storing integer registers */
115 2, /* cost of reg,reg fld/fst */
116 {2, 2, 6}, /* cost of loading fp registers
117 in SFmode, DFmode and XFmode */
118 {4, 4, 6} /* cost of loading integer registers */
32b5b1aa
SC
119};
120
856b07a1
SC
121struct processor_costs pentiumpro_cost = {
122 1, /* cost of an add instruction */
123 1, /* cost of a lea instruction */
e075ae69 124 1, /* variable shift costs */
856b07a1 125 1, /* constant shift costs */
e075ae69 126 1, /* cost of starting a multiply */
856b07a1 127 0, /* cost of multiply per each bit set */
e075ae69 128 17, /* cost of a divide/mod */
96e7ae40 129 8, /* "large" insn */
e2e52e1b 130 6, /* MOVE_RATIO */
7c6b971d 131 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
132 {4, 4, 4}, /* cost of loading integer registers
133 in QImode, HImode and SImode.
134 Relative to reg-reg move (2). */
135 {2, 2, 2}, /* cost of storing integer registers */
136 2, /* cost of reg,reg fld/fst */
137 {2, 2, 6}, /* cost of loading fp registers
138 in SFmode, DFmode and XFmode */
139 {4, 4, 6} /* cost of loading integer registers */
856b07a1
SC
140};
141
a269a03c
JC
142struct processor_costs k6_cost = {
143 1, /* cost of an add instruction */
e075ae69 144 2, /* cost of a lea instruction */
a269a03c
JC
145 1, /* variable shift costs */
146 1, /* constant shift costs */
73fe76e4 147 3, /* cost of starting a multiply */
a269a03c 148 0, /* cost of multiply per each bit set */
e075ae69 149 18, /* cost of a divide/mod */
96e7ae40 150 8, /* "large" insn */
e2e52e1b 151 4, /* MOVE_RATIO */
7c6b971d 152 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
153 {4, 5, 4}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 3, 2}, /* cost of storing integer registers */
157 4, /* cost of reg,reg fld/fst */
158 {6, 6, 6}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {4, 4, 4} /* cost of loading integer registers */
a269a03c
JC
161};
162
309ada50
JH
163struct processor_costs athlon_cost = {
164 1, /* cost of an add instruction */
165 1, /* cost of a lea instruction */
166 1, /* variable shift costs */
167 1, /* constant shift costs */
168 5, /* cost of starting a multiply */
169 0, /* cost of multiply per each bit set */
170 19, /* cost of a divide/mod */
171 8, /* "large" insn */
e2e52e1b 172 9, /* MOVE_RATIO */
309ada50
JH
173 4, /* cost for loading QImode using movzbl */
174 {4, 5, 4}, /* cost of loading integer registers
175 in QImode, HImode and SImode.
176 Relative to reg-reg move (2). */
177 {2, 3, 2}, /* cost of storing integer registers */
178 4, /* cost of reg,reg fld/fst */
179 {6, 6, 6}, /* cost of loading fp registers
180 in SFmode, DFmode and XFmode */
181 {4, 4, 4} /* cost of loading integer registers */
182};
183
32b5b1aa
SC
184struct processor_costs *ix86_cost = &pentium_cost;
185
a269a03c
JC
186/* Processor feature/optimization bitmasks. */
187#define m_386 (1<<PROCESSOR_I386)
188#define m_486 (1<<PROCESSOR_I486)
189#define m_PENT (1<<PROCESSOR_PENTIUM)
190#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
191#define m_K6 (1<<PROCESSOR_K6)
309ada50 192#define m_ATHLON (1<<PROCESSOR_ATHLON)
a269a03c 193
309ada50
JH
194const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
195const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
a269a03c 196const int x86_zero_extend_with_and = m_486 | m_PENT;
309ada50 197const int x86_movx = m_ATHLON /* m_386 | m_PPRO | m_K6 */;
e075ae69 198const int x86_double_with_add = ~m_386;
a269a03c 199const int x86_use_bit_test = m_386;
e2e52e1b 200const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
a269a03c
JC
201const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
202const int x86_use_any_reg = m_486;
309ada50
JH
203const int x86_cmove = m_PPRO | m_ATHLON;
204const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
205const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
e075ae69
RH
206const int x86_partial_reg_stall = m_PPRO;
207const int x86_use_loop = m_K6;
309ada50 208const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
209const int x86_use_mov0 = m_K6;
210const int x86_use_cltd = ~(m_PENT | m_K6);
211const int x86_read_modify_write = ~m_PENT;
212const int x86_read_modify = ~(m_PENT | m_PPRO);
213const int x86_split_long_moves = m_PPRO;
e9e80858 214const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
f90800f8 215const int x86_single_stringop = m_386;
a269a03c 216
f64cecad 217#define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx))
2a2ab3f9 218
e075ae69
RH
219const char * const hi_reg_name[] = HI_REGISTER_NAMES;
220const char * const qi_reg_name[] = QI_REGISTER_NAMES;
221const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
222
223/* Array of the smallest class containing reg number REGNO, indexed by
224 REGNO. Used by REGNO_REG_CLASS in i386.h. */
225
e075ae69 226enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
227{
228 /* ax, dx, cx, bx */
ab408a86 229 AREG, DREG, CREG, BREG,
4c0d89b5 230 /* si, di, bp, sp */
e075ae69 231 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
232 /* FP registers */
233 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 234 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 235 /* arg pointer */
e075ae69
RH
236 INDEX_REGS,
237 /* flags, fpsr */
238 NO_REGS, NO_REGS
4c0d89b5 239};
c572e5ba
JVA
240
241/* Test and compare insns in i386.md store the information needed to
242 generate branch and scc insns here. */
243
e075ae69
RH
244struct rtx_def *ix86_compare_op0 = NULL_RTX;
245struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 246
36edd3cc
BS
247#define MAX_386_STACK_LOCALS 2
248
249/* Define the structure for the machine field in struct function. */
250struct machine_function
251{
252 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
253};
254
01d939e8 255#define ix86_stack_locals (cfun->machine->stack_locals)
36edd3cc 256
c8c5cb99 257/* which cpu are we scheduling for */
e42ea7f9 258enum processor_type ix86_cpu;
c8c5cb99
SC
259
260/* which instruction set architecture to use. */
c942177e 261int ix86_arch;
c8c5cb99
SC
262
263/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
264const char *ix86_cpu_string; /* for -mcpu=<xxx> */
265const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 266
f5316dfe 267/* Register allocation order */
e075ae69 268const char *ix86_reg_alloc_order;
f5316dfe
MM
269static char regs_allocated[FIRST_PSEUDO_REGISTER];
270
b08de47e 271/* # of registers to use to pass arguments. */
e075ae69 272const char *ix86_regparm_string;
e9a25f70 273
e075ae69
RH
274/* ix86_regparm_string as a number */
275int ix86_regparm;
e9a25f70
JL
276
277/* Alignment to use for loops and jumps: */
278
279/* Power of two alignment for loops. */
e075ae69 280const char *ix86_align_loops_string;
e9a25f70
JL
281
282/* Power of two alignment for non-loop jumps. */
e075ae69 283const char *ix86_align_jumps_string;
e9a25f70 284
3af4bd89 285/* Power of two alignment for stack boundary in bytes. */
e075ae69 286const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
287
288/* Preferred alignment for stack boundary in bits. */
e075ae69 289int ix86_preferred_stack_boundary;
3af4bd89 290
e9a25f70 291/* Values 1-5: see jump.c */
e075ae69
RH
292int ix86_branch_cost;
293const char *ix86_branch_cost_string;
e9a25f70
JL
294
295/* Power of two alignment for functions. */
e075ae69
RH
296int ix86_align_funcs;
297const char *ix86_align_funcs_string;
b08de47e 298
e9a25f70 299/* Power of two alignment for loops. */
e075ae69 300int ix86_align_loops;
b08de47e 301
e9a25f70 302/* Power of two alignment for non-loop jumps. */
e075ae69
RH
303int ix86_align_jumps;
304\f
f6da8bc3
KG
305static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
306static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 307 int, int, FILE *));
f6da8bc3
KG
308static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
309static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
310static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, int));
311static rtx ix86_expand_compare PARAMS ((enum rtx_code, int));
312static rtx gen_push PARAMS ((rtx));
313static int memory_address_length PARAMS ((rtx addr));
314static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
315static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
316static int ix86_safe_length PARAMS ((rtx));
317static enum attr_memory ix86_safe_memory PARAMS ((rtx));
318static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
319static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
320static void ix86_dump_ppro_packet PARAMS ((FILE *));
321static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
322static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 323 rtx));
f6da8bc3
KG
324static void ix86_init_machine_status PARAMS ((struct function *));
325static void ix86_mark_machine_status PARAMS ((struct function *));
326static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
327static int ix86_safe_length_prefix PARAMS ((rtx));
e075ae69
RH
328
329struct ix86_address
330{
331 rtx base, index, disp;
332 HOST_WIDE_INT scale;
333};
b08de47e 334
e075ae69
RH
335static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
336\f
f5316dfe
MM
337/* Sometimes certain combinations of command options do not make
338 sense on a particular target machine. You can define a macro
339 `OVERRIDE_OPTIONS' to take account of this. This macro, if
340 defined, is executed once just after all the command options have
341 been parsed.
342
343 Don't use this macro to turn on various extra optimizations for
344 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
345
346void
347override_options ()
348{
e075ae69
RH
349 /* Comes from final.c -- no real reason to change it. */
350#define MAX_CODE_ALIGN 16
f5316dfe 351
c8c5cb99
SC
352 static struct ptt
353 {
e075ae69
RH
354 struct processor_costs *cost; /* Processor costs */
355 int target_enable; /* Target flags to enable. */
356 int target_disable; /* Target flags to disable. */
357 int align_loop; /* Default alignments. */
358 int align_jump;
359 int align_func;
360 int branch_cost;
361 }
362 const processor_target_table[PROCESSOR_max] =
363 {
364 {&i386_cost, 0, 0, 2, 2, 2, 1},
365 {&i486_cost, 0, 0, 4, 4, 4, 1},
366 {&pentium_cost, 0, 0, -4, -4, -4, 1},
367 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50
JH
368 {&k6_cost, 0, 0, -5, -5, 4, 1},
369 {&athlon_cost, 0, 0, 4, -4, 4, 1}
e075ae69
RH
370 };
371
372 static struct pta
373 {
69ddee61 374 const char *name; /* processor name or nickname. */
e075ae69
RH
375 enum processor_type processor;
376 }
377 const processor_alias_table[] =
378 {
379 {"i386", PROCESSOR_I386},
380 {"i486", PROCESSOR_I486},
381 {"i586", PROCESSOR_PENTIUM},
382 {"pentium", PROCESSOR_PENTIUM},
383 {"i686", PROCESSOR_PENTIUMPRO},
384 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 385 {"k6", PROCESSOR_K6},
309ada50 386 {"athlon", PROCESSOR_ATHLON},
3af4bd89 387 };
c8c5cb99 388
e075ae69 389 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
c8c5cb99 390
f5316dfe
MM
391#ifdef SUBTARGET_OVERRIDE_OPTIONS
392 SUBTARGET_OVERRIDE_OPTIONS;
393#endif
394
5a6ee819 395 ix86_arch = PROCESSOR_I386;
e075ae69
RH
396 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
397
398 if (ix86_arch_string != 0)
399 {
400 int i;
401 for (i = 0; i < pta_size; i++)
402 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
403 {
404 ix86_arch = processor_alias_table[i].processor;
405 /* Default cpu tuning to the architecture. */
406 ix86_cpu = ix86_arch;
407 break;
408 }
409 if (i == pta_size)
410 error ("bad value (%s) for -march= switch", ix86_arch_string);
411 }
412
413 if (ix86_cpu_string != 0)
414 {
415 int i;
416 for (i = 0; i < pta_size; i++)
417 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
418 {
419 ix86_cpu = processor_alias_table[i].processor;
420 break;
421 }
422 if (i == pta_size)
423 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
424 }
425
426 ix86_cost = processor_target_table[ix86_cpu].cost;
427 target_flags |= processor_target_table[ix86_cpu].target_enable;
428 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
429
36edd3cc
BS
430 /* Arrange to set up i386_stack_locals for all functions. */
431 init_machine_status = ix86_init_machine_status;
1526a060 432 mark_machine_status = ix86_mark_machine_status;
36edd3cc 433
e9a25f70 434 /* Validate registers in register allocation order. */
e075ae69 435 if (ix86_reg_alloc_order)
f5316dfe 436 {
e075ae69
RH
437 int i, ch;
438 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 439 {
00c79232 440 int regno = 0;
79325812 441
f5316dfe
MM
442 switch (ch)
443 {
444 case 'a': regno = 0; break;
445 case 'd': regno = 1; break;
446 case 'c': regno = 2; break;
447 case 'b': regno = 3; break;
448 case 'S': regno = 4; break;
449 case 'D': regno = 5; break;
450 case 'B': regno = 6; break;
451
452 default: fatal ("Register '%c' is unknown", ch);
453 }
454
455 if (regs_allocated[regno])
e9a25f70 456 fatal ("Register '%c' already specified in allocation order", ch);
f5316dfe
MM
457
458 regs_allocated[regno] = 1;
459 }
460 }
b08de47e 461
e9a25f70 462 /* Validate -mregparm= value. */
e075ae69 463 if (ix86_regparm_string)
b08de47e 464 {
e075ae69
RH
465 ix86_regparm = atoi (ix86_regparm_string);
466 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
e9a25f70 467 fatal ("-mregparm=%d is not between 0 and %d",
e075ae69 468 ix86_regparm, REGPARM_MAX);
b08de47e
MM
469 }
470
e9a25f70 471 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
472 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
473 if (ix86_align_loops_string)
b08de47e 474 {
e075ae69
RH
475 ix86_align_loops = atoi (ix86_align_loops_string);
476 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
b08de47e 477 fatal ("-malign-loops=%d is not between 0 and %d",
e075ae69 478 ix86_align_loops, MAX_CODE_ALIGN);
b08de47e 479 }
3af4bd89
JH
480
481 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
482 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
483 if (ix86_align_jumps_string)
b08de47e 484 {
e075ae69
RH
485 ix86_align_jumps = atoi (ix86_align_jumps_string);
486 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
b08de47e 487 fatal ("-malign-jumps=%d is not between 0 and %d",
e075ae69 488 ix86_align_jumps, MAX_CODE_ALIGN);
b08de47e 489 }
b08de47e 490
e9a25f70 491 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
492 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
493 if (ix86_align_funcs_string)
b08de47e 494 {
e075ae69
RH
495 ix86_align_funcs = atoi (ix86_align_funcs_string);
496 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
b08de47e 497 fatal ("-malign-functions=%d is not between 0 and %d",
e075ae69 498 ix86_align_funcs, MAX_CODE_ALIGN);
b08de47e 499 }
3af4bd89 500
e4c0478d 501 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 502 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
503 ix86_preferred_stack_boundary = 128;
504 if (ix86_preferred_stack_boundary_string)
3af4bd89 505 {
e075ae69 506 int i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89 507 if (i < 2 || i > 31)
e4c0478d 508 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
e075ae69 509 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 510 }
77a989d1 511
e9a25f70 512 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
513 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
514 if (ix86_branch_cost_string)
804a8ee0 515 {
e075ae69
RH
516 ix86_branch_cost = atoi (ix86_branch_cost_string);
517 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
518 fatal ("-mbranch-cost=%d is not between 0 and 5",
519 ix86_branch_cost);
804a8ee0 520 }
804a8ee0 521
e9a25f70
JL
522 /* Keep nonleaf frame pointers. */
523 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 524 flag_omit_frame_pointer = 1;
e075ae69
RH
525
526 /* If we're doing fast math, we don't care about comparison order
527 wrt NaNs. This lets us use a shorter comparison sequence. */
528 if (flag_fast_math)
529 target_flags &= ~MASK_IEEE_FP;
530
531 /* If we're planning on using `loop', use it. */
532 if (TARGET_USE_LOOP && optimize)
533 flag_branch_on_count_reg = 1;
f5316dfe
MM
534}
535\f
536/* A C statement (sans semicolon) to choose the order in which to
537 allocate hard registers for pseudo-registers local to a basic
538 block.
539
540 Store the desired register order in the array `reg_alloc_order'.
541 Element 0 should be the register to allocate first; element 1, the
542 next register; and so on.
543
544 The macro body should not assume anything about the contents of
545 `reg_alloc_order' before execution of the macro.
546
547 On most machines, it is not necessary to define this macro. */
548
549void
550order_regs_for_local_alloc ()
551{
00c79232 552 int i, ch, order;
f5316dfe 553
e9a25f70
JL
554 /* User specified the register allocation order. */
555
e075ae69 556 if (ix86_reg_alloc_order)
f5316dfe 557 {
e075ae69 558 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 559 {
00c79232 560 int regno = 0;
79325812 561
f5316dfe
MM
562 switch (ch)
563 {
564 case 'a': regno = 0; break;
565 case 'd': regno = 1; break;
566 case 'c': regno = 2; break;
567 case 'b': regno = 3; break;
568 case 'S': regno = 4; break;
569 case 'D': regno = 5; break;
570 case 'B': regno = 6; break;
571 }
572
573 reg_alloc_order[order++] = regno;
574 }
575
576 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
577 {
e9a25f70 578 if (! regs_allocated[i])
f5316dfe
MM
579 reg_alloc_order[order++] = i;
580 }
581 }
582
e9a25f70 583 /* If user did not specify a register allocation order, use natural order. */
f5316dfe
MM
584 else
585 {
586 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
587 reg_alloc_order[i] = i;
f5316dfe
MM
588 }
589}
32b5b1aa
SC
590\f
591void
c6aded7c 592optimization_options (level, size)
32b5b1aa 593 int level;
bb5177ac 594 int size ATTRIBUTE_UNUSED;
32b5b1aa 595{
e9a25f70
JL
596 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
597 make the problem with not enough registers even worse. */
32b5b1aa
SC
598#ifdef INSN_SCHEDULING
599 if (level > 1)
600 flag_schedule_insns = 0;
601#endif
602}
b08de47e 603\f
e075ae69
RH
604/* Return nonzero if the rtx is known aligned. */
605/* ??? Unused. */
5bc7cd8e
SC
606
607int
e075ae69 608ix86_aligned_p (op)
5bc7cd8e
SC
609 rtx op;
610{
e075ae69
RH
611 struct ix86_address parts;
612
e9a25f70 613 /* Registers and immediate operands are always "aligned". */
5bc7cd8e
SC
614 if (GET_CODE (op) != MEM)
615 return 1;
616
e9a25f70 617 /* Don't even try to do any aligned optimizations with volatiles. */
5bc7cd8e
SC
618 if (MEM_VOLATILE_P (op))
619 return 0;
620
5bc7cd8e
SC
621 op = XEXP (op, 0);
622
e075ae69
RH
623 /* Pushes and pops are only valid on the stack pointer. */
624 if (GET_CODE (op) == PRE_DEC
625 || GET_CODE (op) == POST_INC)
626 return 1;
e9a25f70 627
e075ae69
RH
628 /* Decode the address. */
629 if (! ix86_decompose_address (op, &parts))
630 abort ();
79325812 631
e075ae69
RH
632 /* Look for some component that isn't known to be aligned. */
633 if (parts.index)
634 {
635 if (parts.scale < 4
636 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 4)
637 return 0;
638 }
639 if (parts.base)
640 {
641 if (REGNO_POINTER_ALIGN (REGNO (parts.index)) < 4)
642 return 0;
643 }
644 if (parts.disp)
645 {
646 if (GET_CODE (parts.disp) != CONST_INT
647 || (INTVAL (parts.disp) & 3) != 0)
648 return 0;
5bc7cd8e 649 }
e9a25f70 650
e075ae69
RH
651 /* Didn't find one -- this must be an aligned address. */
652 return 1;
5bc7cd8e
SC
653}
654\f
b08de47e
MM
655/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
656 attribute for DECL. The attributes in ATTRIBUTES have previously been
657 assigned to DECL. */
658
659int
e075ae69 660ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
661 tree decl ATTRIBUTE_UNUSED;
662 tree attributes ATTRIBUTE_UNUSED;
663 tree identifier ATTRIBUTE_UNUSED;
664 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
665{
666 return 0;
667}
668
669/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
670 attribute for TYPE. The attributes in ATTRIBUTES have previously been
671 assigned to TYPE. */
672
673int
e075ae69 674ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 675 tree type;
bb5177ac 676 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
677 tree identifier;
678 tree args;
679{
680 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 681 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
682 && TREE_CODE (type) != FIELD_DECL
683 && TREE_CODE (type) != TYPE_DECL)
684 return 0;
685
686 /* Stdcall attribute says callee is responsible for popping arguments
687 if they are not variable. */
688 if (is_attribute_p ("stdcall", identifier))
689 return (args == NULL_TREE);
690
e9a25f70 691 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
692 if (is_attribute_p ("cdecl", identifier))
693 return (args == NULL_TREE);
694
695 /* Regparm attribute specifies how many integer arguments are to be
e9a25f70 696 passed in registers. */
b08de47e
MM
697 if (is_attribute_p ("regparm", identifier))
698 {
699 tree cst;
700
e9a25f70 701 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
702 || TREE_CHAIN (args) != NULL_TREE
703 || TREE_VALUE (args) == NULL_TREE)
704 return 0;
705
706 cst = TREE_VALUE (args);
707 if (TREE_CODE (cst) != INTEGER_CST)
708 return 0;
709
710 if (TREE_INT_CST_HIGH (cst) != 0
711 || TREE_INT_CST_LOW (cst) < 0
712 || TREE_INT_CST_LOW (cst) > REGPARM_MAX)
713 return 0;
714
715 return 1;
716 }
717
718 return 0;
719}
720
721/* Return 0 if the attributes for two types are incompatible, 1 if they
722 are compatible, and 2 if they are nearly compatible (which causes a
723 warning to be generated). */
724
725int
e075ae69 726ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
727 tree type1;
728 tree type2;
b08de47e 729{
afcfe58c 730 /* Check for mismatch of non-default calling convention. */
69ddee61 731 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
732
733 if (TREE_CODE (type1) != FUNCTION_TYPE)
734 return 1;
735
736 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
737 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
738 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 739 return 0;
b08de47e
MM
740 return 1;
741}
b08de47e
MM
742\f
743/* Value is the number of bytes of arguments automatically
744 popped when returning from a subroutine call.
745 FUNDECL is the declaration node of the function (as a tree),
746 FUNTYPE is the data type of the function (as a tree),
747 or for a library call it is an identifier node for the subroutine name.
748 SIZE is the number of bytes of arguments passed on the stack.
749
750 On the 80386, the RTD insn may be used to pop them if the number
751 of args is fixed, but if the number is variable then the caller
752 must pop them all. RTD can't be used for library calls now
753 because the library is compiled with the Unix compiler.
754 Use of RTD is a selectable option, since it is incompatible with
755 standard Unix calling sequences. If the option is not selected,
756 the caller must always pop the args.
757
758 The attribute stdcall is equivalent to RTD on a per module basis. */
759
760int
e075ae69 761ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
762 tree fundecl;
763 tree funtype;
764 int size;
79325812 765{
3345ee7d 766 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 767
e9a25f70
JL
768 /* Cdecl functions override -mrtd, and never pop the stack. */
769 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 770
e9a25f70 771 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
772 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
773 rtd = 1;
79325812 774
698cdd84
SC
775 if (rtd
776 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
777 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
778 == void_type_node)))
698cdd84
SC
779 return size;
780 }
79325812 781
e9a25f70 782 /* Lose any fake structure return argument. */
698cdd84
SC
783 if (aggregate_value_p (TREE_TYPE (funtype)))
784 return GET_MODE_SIZE (Pmode);
79325812 785
2614aac6 786 return 0;
b08de47e 787}
b08de47e
MM
788\f
789/* Argument support functions. */
790
791/* Initialize a variable CUM of type CUMULATIVE_ARGS
792 for a call to a function whose data type is FNTYPE.
793 For a library call, FNTYPE is 0. */
794
795void
796init_cumulative_args (cum, fntype, libname)
e9a25f70 797 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
798 tree fntype; /* tree ptr for function decl */
799 rtx libname; /* SYMBOL_REF of library name or 0 */
800{
801 static CUMULATIVE_ARGS zero_cum;
802 tree param, next_param;
803
804 if (TARGET_DEBUG_ARG)
805 {
806 fprintf (stderr, "\ninit_cumulative_args (");
807 if (fntype)
e9a25f70
JL
808 fprintf (stderr, "fntype code = %s, ret code = %s",
809 tree_code_name[(int) TREE_CODE (fntype)],
810 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
811 else
812 fprintf (stderr, "no fntype");
813
814 if (libname)
815 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
816 }
817
818 *cum = zero_cum;
819
820 /* Set up the number of registers to use for passing arguments. */
e075ae69 821 cum->nregs = ix86_regparm;
b08de47e
MM
822 if (fntype)
823 {
824 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 825
b08de47e
MM
826 if (attr)
827 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
828 }
829
830 /* Determine if this function has variable arguments. This is
831 indicated by the last argument being 'void_type_mode' if there
832 are no variable arguments. If there are variable arguments, then
833 we won't pass anything in registers */
834
835 if (cum->nregs)
836 {
837 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 838 param != 0; param = next_param)
b08de47e
MM
839 {
840 next_param = TREE_CHAIN (param);
e9a25f70 841 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
842 cum->nregs = 0;
843 }
844 }
845
846 if (TARGET_DEBUG_ARG)
847 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
848
849 return;
850}
851
852/* Update the data in CUM to advance over an argument
853 of mode MODE and data type TYPE.
854 (TYPE is null for libcalls where that information may not be available.) */
855
856void
857function_arg_advance (cum, mode, type, named)
858 CUMULATIVE_ARGS *cum; /* current arg information */
859 enum machine_mode mode; /* current arg mode */
860 tree type; /* type of the argument or 0 if lib support */
861 int named; /* whether or not the argument was named */
862{
e9a25f70
JL
863 int bytes
864 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
b08de47e
MM
865 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
866
867 if (TARGET_DEBUG_ARG)
868 fprintf (stderr,
e9a25f70 869 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e
MM
870 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
871
872 cum->words += words;
873 cum->nregs -= words;
874 cum->regno += words;
875
876 if (cum->nregs <= 0)
877 {
878 cum->nregs = 0;
879 cum->regno = 0;
880 }
881
882 return;
883}
884
885/* Define where to put the arguments to a function.
886 Value is zero to push the argument on the stack,
887 or a hard register in which to store the argument.
888
889 MODE is the argument's machine mode.
890 TYPE is the data type of the argument (as a tree).
891 This is null for libcalls where that information may
892 not be available.
893 CUM is a variable of type CUMULATIVE_ARGS which gives info about
894 the preceding args and about the function being called.
895 NAMED is nonzero if this argument is a named parameter
896 (otherwise it is an extra parameter matching an ellipsis). */
897
898struct rtx_def *
899function_arg (cum, mode, type, named)
900 CUMULATIVE_ARGS *cum; /* current arg information */
901 enum machine_mode mode; /* current arg mode */
902 tree type; /* type of the argument or 0 if lib support */
903 int named; /* != 0 for normal args, == 0 for ... args */
904{
905 rtx ret = NULL_RTX;
e9a25f70
JL
906 int bytes
907 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
b08de47e
MM
908 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
909
910 switch (mode)
911 {
e9a25f70
JL
912 /* For now, pass fp/complex values on the stack. */
913 default:
b08de47e
MM
914 break;
915
916 case BLKmode:
917 case DImode:
918 case SImode:
919 case HImode:
920 case QImode:
921 if (words <= cum->nregs)
f64cecad 922 ret = gen_rtx_REG (mode, cum->regno);
b08de47e
MM
923 break;
924 }
925
926 if (TARGET_DEBUG_ARG)
927 {
928 fprintf (stderr,
e9a25f70 929 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
930 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
931
932 if (ret)
933 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
934 else
935 fprintf (stderr, ", stack");
936
937 fprintf (stderr, " )\n");
938 }
939
940 return ret;
941}
e075ae69
RH
942\f
943/* Returns 1 if OP is either a symbol reference or a sum of a symbol
944 reference and a constant. */
b08de47e
MM
945
946int
e075ae69
RH
947symbolic_operand (op, mode)
948 register rtx op;
949 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 950{
e075ae69 951 switch (GET_CODE (op))
2a2ab3f9 952 {
e075ae69
RH
953 case SYMBOL_REF:
954 case LABEL_REF:
955 return 1;
956
957 case CONST:
958 op = XEXP (op, 0);
959 if (GET_CODE (op) == SYMBOL_REF
960 || GET_CODE (op) == LABEL_REF
961 || (GET_CODE (op) == UNSPEC
962 && XINT (op, 1) >= 6
963 && XINT (op, 1) <= 7))
964 return 1;
965 if (GET_CODE (op) != PLUS
966 || GET_CODE (XEXP (op, 1)) != CONST_INT)
967 return 0;
968
969 op = XEXP (op, 0);
970 if (GET_CODE (op) == SYMBOL_REF
971 || GET_CODE (op) == LABEL_REF)
972 return 1;
973 /* Only @GOTOFF gets offsets. */
974 if (GET_CODE (op) != UNSPEC
975 || XINT (op, 1) != 7)
976 return 0;
977
978 op = XVECEXP (op, 0, 0);
979 if (GET_CODE (op) == SYMBOL_REF
980 || GET_CODE (op) == LABEL_REF)
981 return 1;
982 return 0;
983
984 default:
985 return 0;
2a2ab3f9
JVA
986 }
987}
2a2ab3f9 988
e075ae69 989/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 990
e075ae69
RH
991int
992pic_symbolic_operand (op, mode)
993 register rtx op;
994 enum machine_mode mode ATTRIBUTE_UNUSED;
995{
996 if (GET_CODE (op) == CONST)
2a2ab3f9 997 {
e075ae69
RH
998 op = XEXP (op, 0);
999 if (GET_CODE (op) == UNSPEC)
1000 return 1;
1001 if (GET_CODE (op) != PLUS
1002 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1003 return 0;
1004 op = XEXP (op, 0);
1005 if (GET_CODE (op) == UNSPEC)
1006 return 1;
2a2ab3f9 1007 }
e075ae69 1008 return 0;
2a2ab3f9 1009}
2a2ab3f9 1010
28d52ffb
RH
1011/* Test for a valid operand for a call instruction. Don't allow the
1012 arg pointer register or virtual regs since they may decay into
1013 reg + const, which the patterns can't handle. */
2a2ab3f9 1014
e075ae69
RH
1015int
1016call_insn_operand (op, mode)
1017 rtx op;
1018 enum machine_mode mode ATTRIBUTE_UNUSED;
1019{
1020 if (GET_CODE (op) != MEM)
1021 return 0;
1022 op = XEXP (op, 0);
2a2ab3f9 1023
e075ae69
RH
1024 /* Disallow indirect through a virtual register. This leads to
1025 compiler aborts when trying to eliminate them. */
1026 if (GET_CODE (op) == REG
1027 && (op == arg_pointer_rtx
1028 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1029 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1030 return 0;
2a2ab3f9 1031
28d52ffb
RH
1032 /* Disallow `call 1234'. Due to varying assembler lameness this
1033 gets either rejected or translated to `call .+1234'. */
1034 if (GET_CODE (op) == CONST_INT)
1035 return 0;
1036
e075ae69
RH
1037 /* Otherwise we can allow any general_operand in the address. */
1038 return general_operand (op, Pmode);
1039}
2a2ab3f9 1040
28d52ffb 1041/* Like call_insn_operand but allow (mem (symbol_ref ...)) even if pic. */
5f1ec3e6 1042
e075ae69
RH
1043int
1044expander_call_insn_operand (op, mode)
1045 rtx op;
28d52ffb 1046 enum machine_mode mode;
e075ae69 1047{
28d52ffb
RH
1048 if (GET_CODE (op) == MEM
1049 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF)
e075ae69 1050 return 1;
2a2ab3f9 1051
28d52ffb 1052 return call_insn_operand (op, mode);
e075ae69 1053}
79325812 1054
e075ae69
RH
1055int
1056constant_call_address_operand (op, mode)
1057 rtx op;
1058 enum machine_mode mode ATTRIBUTE_UNUSED;
1059{
1060 return GET_CODE (op) == MEM && CONSTANT_ADDRESS_P (XEXP (op, 0));
1061}
2a2ab3f9 1062
e075ae69 1063/* Match exactly zero and one. */
e9a25f70 1064
e075ae69
RH
1065int
1066const0_operand (op, mode)
1067 register rtx op;
1068 enum machine_mode mode;
1069{
1070 return op == CONST0_RTX (mode);
1071}
e9a25f70 1072
e075ae69
RH
1073int
1074const1_operand (op, mode)
1075 register rtx op;
1076 enum machine_mode mode ATTRIBUTE_UNUSED;
1077{
1078 return op == const1_rtx;
1079}
2a2ab3f9 1080
e075ae69 1081/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1082
e075ae69
RH
1083int
1084const248_operand (op, mode)
1085 register rtx op;
1086 enum machine_mode mode ATTRIBUTE_UNUSED;
1087{
1088 return (GET_CODE (op) == CONST_INT
1089 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1090}
e9a25f70 1091
e075ae69 1092/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1093
e075ae69
RH
1094int
1095incdec_operand (op, mode)
1096 register rtx op;
1097 enum machine_mode mode;
1098{
1099 if (op == const1_rtx || op == constm1_rtx)
1100 return 1;
1101 if (GET_CODE (op) != CONST_INT)
1102 return 0;
1103 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1104 return 1;
1105 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1106 return 1;
1107 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1108 return 1;
1109 return 0;
1110}
2a2ab3f9 1111
e075ae69
RH
1112/* Return false if this is the stack pointer, or any other fake
1113 register eliminable to the stack pointer. Otherwise, this is
1114 a register operand.
2a2ab3f9 1115
e075ae69
RH
1116 This is used to prevent esp from being used as an index reg.
1117 Which would only happen in pathological cases. */
5f1ec3e6 1118
e075ae69
RH
1119int
1120reg_no_sp_operand (op, mode)
1121 register rtx op;
1122 enum machine_mode mode;
1123{
1124 rtx t = op;
1125 if (GET_CODE (t) == SUBREG)
1126 t = SUBREG_REG (t);
1127 if (t == stack_pointer_rtx || t == arg_pointer_rtx)
1128 return 0;
2a2ab3f9 1129
e075ae69 1130 return register_operand (op, mode);
2a2ab3f9 1131}
b840bfb0 1132
e075ae69 1133/* Return true if op is a Q_REGS class register. */
b840bfb0 1134
e075ae69
RH
1135int
1136q_regs_operand (op, mode)
1137 register rtx op;
1138 enum machine_mode mode;
b840bfb0 1139{
e075ae69
RH
1140 if (mode != VOIDmode && GET_MODE (op) != mode)
1141 return 0;
1142 if (GET_CODE (op) == SUBREG)
1143 op = SUBREG_REG (op);
1144 return QI_REG_P (op);
1145}
b840bfb0 1146
e075ae69 1147/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1148
e075ae69
RH
1149int
1150non_q_regs_operand (op, mode)
1151 register rtx op;
1152 enum machine_mode mode;
1153{
1154 if (mode != VOIDmode && GET_MODE (op) != mode)
1155 return 0;
1156 if (GET_CODE (op) == SUBREG)
1157 op = SUBREG_REG (op);
1158 return NON_QI_REG_P (op);
1159}
b840bfb0 1160
e075ae69
RH
1161/* Return 1 if OP is a comparison operator that can use the condition code
1162 generated by a logical operation, which characteristicly does not set
1163 overflow or carry. To be used with CCNOmode. */
b840bfb0 1164
e075ae69
RH
1165int
1166no_comparison_operator (op, mode)
1167 register rtx op;
1168 enum machine_mode mode;
1169{
1170 return ((mode == VOIDmode || GET_MODE (op) == mode)
1171 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1172 && GET_CODE (op) != LE
1173 && GET_CODE (op) != GT);
1174}
b840bfb0 1175
e075ae69 1176/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
b840bfb0 1177
e075ae69
RH
1178int
1179fcmov_comparison_operator (op, mode)
1180 register rtx op;
1181 enum machine_mode mode;
1182{
1183 return ((mode == VOIDmode || GET_MODE (op) == mode)
1184 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1185 && GET_CODE (op) == unsigned_condition (GET_CODE (op)));
1186}
b840bfb0 1187
e9e80858
JH
1188/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1189
1190int
1191promotable_binary_operator (op, mode)
1192 register rtx op;
1193 enum machine_mode mode ATTRIBUTE_UNUSED;
1194{
1195 switch (GET_CODE (op))
1196 {
1197 case MULT:
1198 /* Modern CPUs have same latency for HImode and SImode multiply,
1199 but 386 and 486 do HImode multiply faster. */
1200 return ix86_cpu > PROCESSOR_I486;
1201 case PLUS:
1202 case AND:
1203 case IOR:
1204 case XOR:
1205 case ASHIFT:
1206 return 1;
1207 default:
1208 return 0;
1209 }
1210}
1211
e075ae69
RH
1212/* Nearly general operand, but accept any const_double, since we wish
1213 to be able to drop them into memory rather than have them get pulled
1214 into registers. */
b840bfb0 1215
2a2ab3f9 1216int
e075ae69
RH
1217cmp_fp_expander_operand (op, mode)
1218 register rtx op;
1219 enum machine_mode mode;
2a2ab3f9 1220{
e075ae69 1221 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1222 return 0;
e075ae69 1223 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1224 return 1;
e075ae69 1225 return general_operand (op, mode);
2a2ab3f9
JVA
1226}
1227
e075ae69 1228/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1229
1230int
e075ae69 1231ext_register_operand (op, mode)
2a2ab3f9 1232 register rtx op;
bb5177ac 1233 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1234{
e075ae69
RH
1235 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1236 return 0;
1237 return register_operand (op, VOIDmode);
1238}
1239
1240/* Return 1 if this is a valid binary floating-point operation.
1241 OP is the expression matched, and MODE is its mode. */
1242
1243int
1244binary_fp_operator (op, mode)
1245 register rtx op;
1246 enum machine_mode mode;
1247{
1248 if (mode != VOIDmode && mode != GET_MODE (op))
1249 return 0;
1250
2a2ab3f9
JVA
1251 switch (GET_CODE (op))
1252 {
e075ae69
RH
1253 case PLUS:
1254 case MINUS:
1255 case MULT:
1256 case DIV:
1257 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1258
2a2ab3f9
JVA
1259 default:
1260 return 0;
1261 }
1262}
fee2770d 1263
e075ae69
RH
1264int
1265mult_operator(op, mode)
1266 register rtx op;
1267 enum machine_mode mode ATTRIBUTE_UNUSED;
1268{
1269 return GET_CODE (op) == MULT;
1270}
1271
1272int
1273div_operator(op, mode)
1274 register rtx op;
1275 enum machine_mode mode ATTRIBUTE_UNUSED;
1276{
1277 return GET_CODE (op) == DIV;
1278}
0a726ef1
JL
1279
1280int
e075ae69
RH
1281arith_or_logical_operator (op, mode)
1282 rtx op;
1283 enum machine_mode mode;
0a726ef1 1284{
e075ae69
RH
1285 return ((mode == VOIDmode || GET_MODE (op) == mode)
1286 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1287 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1288}
1289
e075ae69 1290/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1291
1292int
e075ae69
RH
1293memory_displacement_operand (op, mode)
1294 register rtx op;
1295 enum machine_mode mode;
4f2c8ebb 1296{
e075ae69 1297 struct ix86_address parts;
e9a25f70 1298
e075ae69
RH
1299 if (! memory_operand (op, mode))
1300 return 0;
1301
1302 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1303 abort ();
1304
1305 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1306}
1307
e075ae69
RH
1308/* To avoid problems when jump re-emits comparisons like testqi_ext_0,
1309 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1310
1311 ??? It seems likely that this will only work because cmpsi is an
1312 expander, and no actual insns use this. */
4f2c8ebb
RS
1313
1314int
e075ae69
RH
1315cmpsi_operand (op, mode)
1316 rtx op;
1317 enum machine_mode mode;
fee2770d 1318{
e075ae69
RH
1319 if (general_operand (op, mode))
1320 return 1;
1321
1322 if (GET_CODE (op) == AND
1323 && GET_MODE (op) == SImode
1324 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1325 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1326 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1327 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1328 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1329 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1330 return 1;
e9a25f70 1331
fee2770d
RS
1332 return 0;
1333}
d784886d 1334
e075ae69
RH
1335/* Returns 1 if OP is memory operand that can not be represented by the
1336 modRM array. */
d784886d
RK
1337
1338int
e075ae69 1339long_memory_operand (op, mode)
d784886d
RK
1340 register rtx op;
1341 enum machine_mode mode;
1342{
e075ae69 1343 if (! memory_operand (op, mode))
d784886d
RK
1344 return 0;
1345
e075ae69 1346 return memory_address_length (op) != 0;
d784886d 1347}
e075ae69
RH
1348\f
1349/* Return true if the constant is something that can be loaded with
1350 a special instruction. Only handle 0.0 and 1.0; others are less
1351 worthwhile. */
57dbca5e
BS
1352
1353int
e075ae69
RH
1354standard_80387_constant_p (x)
1355 rtx x;
57dbca5e 1356{
e075ae69
RH
1357 if (GET_CODE (x) != CONST_DOUBLE)
1358 return -1;
1359
1360#if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1361 {
1362 REAL_VALUE_TYPE d;
1363 jmp_buf handler;
1364 int is0, is1;
1365
1366 if (setjmp (handler))
1367 return 0;
1368
1369 set_float_handler (handler);
1370 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1371 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1372 is1 = REAL_VALUES_EQUAL (d, dconst1);
1373 set_float_handler (NULL_PTR);
1374
1375 if (is0)
1376 return 1;
1377
1378 if (is1)
1379 return 2;
1380
1381 /* Note that on the 80387, other constants, such as pi,
1382 are much slower to load as standard constants
1383 than to load from doubles in memory! */
1384 /* ??? Not true on K6: all constants are equal cost. */
1385 }
1386#endif
1387
1388 return 0;
57dbca5e
BS
1389}
1390
2a2ab3f9
JVA
1391/* Returns 1 if OP contains a symbol reference */
1392
1393int
1394symbolic_reference_mentioned_p (op)
1395 rtx op;
1396{
6f7d635c 1397 register const char *fmt;
2a2ab3f9
JVA
1398 register int i;
1399
1400 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1401 return 1;
1402
1403 fmt = GET_RTX_FORMAT (GET_CODE (op));
1404 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1405 {
1406 if (fmt[i] == 'E')
1407 {
1408 register int j;
1409
1410 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1411 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1412 return 1;
1413 }
e9a25f70 1414
2a2ab3f9
JVA
1415 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1416 return 1;
1417 }
1418
1419 return 0;
1420}
e075ae69
RH
1421
1422/* Return 1 if it is appropriate to emit `ret' instructions in the
1423 body of a function. Do this only if the epilogue is simple, needing a
1424 couple of insns. Prior to reloading, we can't tell how many registers
1425 must be saved, so return 0 then. Return 0 if there is no frame
1426 marker to de-allocate.
1427
1428 If NON_SAVING_SETJMP is defined and true, then it is not possible
1429 for the epilogue to be simple, so return 0. This is a special case
1430 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1431 until final, but jump_optimize may need to know sooner if a
1432 `return' is OK. */
32b5b1aa
SC
1433
1434int
e075ae69 1435ix86_can_use_return_insn_p ()
32b5b1aa 1436{
e075ae69
RH
1437 int regno;
1438 int nregs = 0;
1439 int reglimit = (frame_pointer_needed
1440 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1441 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1442 || current_function_uses_const_pool);
32b5b1aa 1443
e075ae69
RH
1444#ifdef NON_SAVING_SETJMP
1445 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1446 return 0;
1447#endif
32b5b1aa 1448
e075ae69
RH
1449 if (! reload_completed)
1450 return 0;
32b5b1aa 1451
e075ae69
RH
1452 for (regno = reglimit - 1; regno >= 0; regno--)
1453 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1454 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1455 nregs++;
0afeb08a 1456
e075ae69
RH
1457 return nregs == 0 || ! frame_pointer_needed;
1458}
1459\f
21a427cc 1460static char *pic_label_name;
e075ae69 1461static int pic_label_output;
21a427cc 1462static char *global_offset_table_name;
e9a25f70 1463
e075ae69
RH
1464/* This function generates code for -fpic that loads %ebx with
1465 the return address of the caller and then returns. */
1466
1467void
1468asm_output_function_prefix (file, name)
1469 FILE *file;
1470 char *name ATTRIBUTE_UNUSED;
1471{
1472 rtx xops[2];
1473 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1474 || current_function_uses_const_pool);
1475 xops[0] = pic_offset_table_rtx;
1476 xops[1] = stack_pointer_rtx;
32b5b1aa 1477
e075ae69
RH
1478 /* Deep branch prediction favors having a return for every call. */
1479 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1480 {
e075ae69
RH
1481 if (!pic_label_output)
1482 {
1483 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1484 internal (non-global) label that's being emitted, it didn't make
1485 sense to have .type information for local labels. This caused
1486 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1487 me debug info for a label that you're declaring non-global?) this
1488 was changed to call ASM_OUTPUT_LABEL() instead. */
32b5b1aa 1489
e075ae69 1490 ASM_OUTPUT_LABEL (file, pic_label_name);
e9a25f70 1491
e075ae69
RH
1492 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1493 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1494 output_asm_insn ("ret", xops);
0afeb08a 1495
e075ae69 1496 pic_label_output = 1;
32b5b1aa 1497 }
32b5b1aa 1498 }
32b5b1aa 1499}
32b5b1aa 1500
e075ae69
RH
1501void
1502load_pic_register ()
32b5b1aa 1503{
e075ae69 1504 rtx gotsym, pclab;
32b5b1aa 1505
21a427cc
AS
1506 if (global_offset_table_name == NULL)
1507 {
1508 global_offset_table_name =
1509 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1510 ggc_add_string_root (&global_offset_table_name, 1);
1511 }
1512 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
32b5b1aa 1513
e075ae69 1514 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1515 {
21a427cc
AS
1516 if (pic_label_name == NULL)
1517 {
1518 pic_label_name = ggc_alloc_string (NULL, 32);
1519 ggc_add_string_root (&pic_label_name, 1);
1520 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1521 }
e075ae69 1522 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 1523 }
e075ae69 1524 else
e5cb57e8 1525 {
e075ae69 1526 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 1527 }
e5cb57e8 1528
e075ae69 1529 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 1530
e075ae69
RH
1531 if (! TARGET_DEEP_BRANCH_PREDICTION)
1532 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 1533
e075ae69 1534 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 1535}
8dfe5673 1536
e075ae69 1537/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 1538
e075ae69
RH
1539static rtx
1540gen_push (arg)
1541 rtx arg;
e9a25f70 1542{
c5c76735
JL
1543 return gen_rtx_SET (VOIDmode,
1544 gen_rtx_MEM (SImode,
1545 gen_rtx_PRE_DEC (SImode,
1546 stack_pointer_rtx)),
1547 arg);
e9a25f70
JL
1548}
1549
65954bd8
JL
1550/* Compute the size of local storage taking into consideration the
1551 desired stack alignment which is to be maintained. Also determine
1552 the number of registers saved below the local storage. */
1553
1554HOST_WIDE_INT
1555ix86_compute_frame_size (size, nregs_on_stack)
1556 HOST_WIDE_INT size;
1557 int *nregs_on_stack;
1558{
1559 int limit;
1560 int nregs;
1561 int regno;
1562 int padding;
1563 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1564 || current_function_uses_const_pool);
1565 HOST_WIDE_INT total_size;
1566
1567 limit = frame_pointer_needed
1568 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM;
1569
1570 nregs = 0;
1571
1572 for (regno = limit - 1; regno >= 0; regno--)
1573 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1574 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1575 nregs++;
1576
1577 padding = 0;
1578 total_size = size + (nregs * UNITS_PER_WORD);
1579
1580#ifdef PREFERRED_STACK_BOUNDARY
1581 {
1582 int offset;
1583 int preferred_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
1584
1585 offset = 4;
1586 if (frame_pointer_needed)
1587 offset += UNITS_PER_WORD;
1588
1589 total_size += offset;
1590
1591 padding = ((total_size + preferred_alignment - 1)
1592 & -preferred_alignment) - total_size;
1593
1594 if (padding < (((offset + preferred_alignment - 1)
1595 & -preferred_alignment) - offset))
1596 padding += preferred_alignment;
54ff41b7
JW
1597
1598 /* Don't bother aligning the stack of a leaf function
1599 which doesn't allocate any stack slots. */
1600 if (size == 0 && current_function_is_leaf)
1601 padding = 0;
65954bd8
JL
1602 }
1603#endif
1604
1605 if (nregs_on_stack)
1606 *nregs_on_stack = nregs;
1607
1608 return size + padding;
1609}
1610
e075ae69
RH
1611/* Expand the prologue into a bunch of separate insns. */
1612
1613void
1614ix86_expand_prologue ()
2a2ab3f9
JVA
1615{
1616 register int regno;
1617 int limit;
aae75261
JVA
1618 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1619 || current_function_uses_const_pool);
65954bd8 1620 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0);
469ac993 1621 rtx insn;
79325812 1622
e075ae69
RH
1623 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1624 slower on all targets. Also sdb doesn't like it. */
e9a25f70 1625
2a2ab3f9
JVA
1626 if (frame_pointer_needed)
1627 {
e075ae69
RH
1628 insn = emit_insn (gen_push (frame_pointer_rtx));
1629 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1630
e075ae69
RH
1631 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
1632 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
1633 }
1634
8dfe5673
RK
1635 if (tsize == 0)
1636 ;
1637 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
469ac993 1638 {
e075ae69
RH
1639 if (frame_pointer_needed)
1640 insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx,
1641 stack_pointer_rtx,
1642 GEN_INT (-tsize),
1643 frame_pointer_rtx));
79325812 1644 else
e075ae69
RH
1645 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1646 GEN_INT (-tsize)));
1647 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 1648 }
79325812 1649 else
8dfe5673 1650 {
e075ae69 1651 /* ??? Is this only valid for Win32? */
e9a25f70 1652
e075ae69 1653 rtx arg0, sym;
e9a25f70 1654
e075ae69
RH
1655 arg0 = gen_rtx_REG (SImode, 0);
1656 emit_move_insn (arg0, GEN_INT (tsize));
77a989d1 1657
e075ae69
RH
1658 sym = gen_rtx_MEM (FUNCTION_MODE,
1659 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1660 insn = emit_call_insn (gen_call (sym, const0_rtx));
1661
1662 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
1663 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1664 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 1665 }
e9a25f70 1666
2a2ab3f9
JVA
1667 limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1668 for (regno = limit - 1; regno >= 0; regno--)
1669 if ((regs_ever_live[regno] && ! call_used_regs[regno])
aae75261 1670 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2a2ab3f9 1671 {
e075ae69
RH
1672 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1673 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1674 }
2a2ab3f9 1675
84530511
SC
1676#ifdef SUBTARGET_PROLOGUE
1677 SUBTARGET_PROLOGUE;
1678#endif
1679
e9a25f70 1680 if (pic_reg_used)
e075ae69 1681 load_pic_register ();
77a989d1 1682
e9a25f70
JL
1683 /* If we are profiling, make sure no instructions are scheduled before
1684 the call to mcount. However, if -fpic, the above call will have
1685 done that. */
e075ae69 1686 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 1687 emit_insn (gen_blockage ());
77a989d1
SC
1688}
1689
79325812 1690/* Restore function stack, frame, and registers. */
e9a25f70 1691
2a2ab3f9 1692void
77a989d1 1693ix86_expand_epilogue ()
2a2ab3f9
JVA
1694{
1695 register int regno;
65954bd8
JL
1696 register int limit;
1697 int nregs;
aae75261
JVA
1698 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1699 || current_function_uses_const_pool);
fdb8a883 1700 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
65954bd8
JL
1701 HOST_WIDE_INT offset;
1702 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs);
2a2ab3f9 1703
e075ae69 1704 /* SP is often unreliable so we may have to go off the frame pointer. */
2a2ab3f9 1705
65954bd8 1706 offset = -(tsize + nregs * UNITS_PER_WORD);
2a2ab3f9 1707
fdb8a883
JW
1708 /* If we're only restoring one register and sp is not valid then
1709 using a move instruction to restore the register since it's
1710 less work than reloading sp and popping the register. Otherwise,
1711 restore sp (if necessary) and pop the registers. */
1712
e075ae69
RH
1713 limit = (frame_pointer_needed
1714 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
65954bd8 1715
fdb8a883 1716 if (nregs > 1 || sp_valid)
2a2ab3f9 1717 {
fdb8a883 1718 if ( !sp_valid )
2a2ab3f9 1719 {
e075ae69
RH
1720 rtx addr_offset;
1721 addr_offset = adj_offsettable_operand (AT_BP (QImode), offset);
1722 addr_offset = XEXP (addr_offset, 0);
1723
1724 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, addr_offset));
2a2ab3f9
JVA
1725 }
1726
1727 for (regno = 0; regno < limit; regno++)
1728 if ((regs_ever_live[regno] && ! call_used_regs[regno])
aae75261 1729 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2a2ab3f9 1730 {
e075ae69 1731 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2a2ab3f9
JVA
1732 }
1733 }
1734 else
e075ae69
RH
1735 {
1736 for (regno = 0; regno < limit; regno++)
1737 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1738 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1739 {
1740 emit_move_insn (gen_rtx_REG (SImode, regno),
1741 adj_offsettable_operand (AT_BP (Pmode), offset));
1742 offset += 4;
1743 }
1744 }
2a2ab3f9
JVA
1745
1746 if (frame_pointer_needed)
1747 {
c8c5cb99 1748 /* If not an i386, mov & pop is faster than "leave". */
3f803cd9 1749 if (TARGET_USE_LEAVE)
e075ae69 1750 emit_insn (gen_leave());
c8c5cb99 1751 else
2a2ab3f9 1752 {
e075ae69
RH
1753 emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
1754 frame_pointer_rtx));
1755 emit_insn (gen_popsi1 (frame_pointer_rtx));
e9a25f70
JL
1756 }
1757 }
77a989d1 1758 else if (tsize)
2a2ab3f9 1759 {
3403c6ca
UD
1760 /* Intel's docs say that for 4 or 8 bytes of stack frame one should
1761 use `pop' and not `add'. */
1762 int use_pop = tsize == 4;
e075ae69 1763 rtx edx = 0, ecx;
e9a25f70 1764
3403c6ca
UD
1765 /* Use two pops only for the Pentium processors. */
1766 if (tsize == 8 && !TARGET_386 && !TARGET_486)
1767 {
1768 rtx retval = current_function_return_rtx;
1769
e075ae69 1770 edx = gen_rtx_REG (SImode, 1);
3403c6ca
UD
1771
1772 /* This case is a bit more complex. Since we cannot pop into
1773 %ecx twice we need a second register. But this is only
1774 available if the return value is not of DImode in which
1775 case the %edx register is not available. */
1776 use_pop = (retval == NULL
e075ae69 1777 || ! reg_overlap_mentioned_p (edx, retval));
3403c6ca
UD
1778 }
1779
1780 if (use_pop)
1781 {
e075ae69
RH
1782 ecx = gen_rtx_REG (SImode, 2);
1783
1784 /* We have to prevent the two pops here from being scheduled.
1785 GCC otherwise would try in some situation to put other
1786 instructions in between them which has a bad effect. */
1787 emit_insn (gen_blockage ());
1788 emit_insn (gen_popsi1 (ecx));
1789 if (tsize == 8)
1790 emit_insn (gen_popsi1 (edx));
3403c6ca 1791 }
e9a25f70 1792 else
3403c6ca
UD
1793 {
1794 /* If there is no frame pointer, we must still release the frame. */
e075ae69
RH
1795 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1796 GEN_INT (tsize)));
3403c6ca 1797 }
2a2ab3f9
JVA
1798 }
1799
68f654ec
RK
1800#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1801 if (profile_block_flag == 2)
1802 {
e075ae69 1803 FUNCTION_BLOCK_PROFILER_EXIT;
68f654ec
RK
1804 }
1805#endif
1806
2a2ab3f9
JVA
1807 if (current_function_pops_args && current_function_args_size)
1808 {
e075ae69 1809 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9
JVA
1810
1811 /* i386 can only pop 32K bytes (maybe 64K? Is it signed?). If
1812 asked to pop more, pop return address, do explicit add, and jump
1813 indirectly to the caller. */
1814
1815 if (current_function_pops_args >= 32768)
1816 {
e075ae69 1817 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 1818
e075ae69
RH
1819 emit_insn (gen_popsi1 (ecx));
1820 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
1821 emit_indirect_jump (ecx);
e9a25f70 1822 }
79325812 1823 else
e075ae69
RH
1824 emit_jump_insn (gen_return_pop_internal (popc));
1825 }
1826 else
1827 emit_jump_insn (gen_return_internal ());
1828}
1829\f
1830/* Extract the parts of an RTL expression that is a valid memory address
1831 for an instruction. Return false if the structure of the address is
1832 grossly off. */
1833
1834static int
1835ix86_decompose_address (addr, out)
1836 register rtx addr;
1837 struct ix86_address *out;
1838{
1839 rtx base = NULL_RTX;
1840 rtx index = NULL_RTX;
1841 rtx disp = NULL_RTX;
1842 HOST_WIDE_INT scale = 1;
1843 rtx scale_rtx = NULL_RTX;
1844
1845 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
1846 base = addr;
1847 else if (GET_CODE (addr) == PLUS)
1848 {
1849 rtx op0 = XEXP (addr, 0);
1850 rtx op1 = XEXP (addr, 1);
1851 enum rtx_code code0 = GET_CODE (op0);
1852 enum rtx_code code1 = GET_CODE (op1);
1853
1854 if (code0 == REG || code0 == SUBREG)
1855 {
1856 if (code1 == REG || code1 == SUBREG)
1857 index = op0, base = op1; /* index + base */
1858 else
1859 base = op0, disp = op1; /* base + displacement */
1860 }
1861 else if (code0 == MULT)
e9a25f70 1862 {
e075ae69
RH
1863 index = XEXP (op0, 0);
1864 scale_rtx = XEXP (op0, 1);
1865 if (code1 == REG || code1 == SUBREG)
1866 base = op1; /* index*scale + base */
e9a25f70 1867 else
e075ae69
RH
1868 disp = op1; /* index*scale + disp */
1869 }
1870 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
1871 {
1872 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
1873 scale_rtx = XEXP (XEXP (op0, 0), 1);
1874 base = XEXP (op0, 1);
1875 disp = op1;
2a2ab3f9 1876 }
e075ae69
RH
1877 else if (code0 == PLUS)
1878 {
1879 index = XEXP (op0, 0); /* index + base + disp */
1880 base = XEXP (op0, 1);
1881 disp = op1;
1882 }
1883 else
1884 return FALSE;
1885 }
1886 else if (GET_CODE (addr) == MULT)
1887 {
1888 index = XEXP (addr, 0); /* index*scale */
1889 scale_rtx = XEXP (addr, 1);
1890 }
1891 else if (GET_CODE (addr) == ASHIFT)
1892 {
1893 rtx tmp;
1894
1895 /* We're called for lea too, which implements ashift on occasion. */
1896 index = XEXP (addr, 0);
1897 tmp = XEXP (addr, 1);
1898 if (GET_CODE (tmp) != CONST_INT)
1899 return FALSE;
1900 scale = INTVAL (tmp);
1901 if ((unsigned HOST_WIDE_INT) scale > 3)
1902 return FALSE;
1903 scale = 1 << scale;
2a2ab3f9 1904 }
2a2ab3f9 1905 else
e075ae69
RH
1906 disp = addr; /* displacement */
1907
1908 /* Extract the integral value of scale. */
1909 if (scale_rtx)
e9a25f70 1910 {
e075ae69
RH
1911 if (GET_CODE (scale_rtx) != CONST_INT)
1912 return FALSE;
1913 scale = INTVAL (scale_rtx);
e9a25f70 1914 }
3b3c6a3f 1915
e075ae69
RH
1916 /* Allow arg pointer and stack pointer as index if there is not scaling */
1917 if (base && index && scale == 1
1918 && (index == arg_pointer_rtx || index == stack_pointer_rtx))
1919 {
1920 rtx tmp = base;
1921 base = index;
1922 index = tmp;
1923 }
1924
1925 /* Special case: %ebp cannot be encoded as a base without a displacement. */
1926 if (base == frame_pointer_rtx && !disp)
1927 disp = const0_rtx;
1928
1929 /* Special case: on K6, [%esi] makes the instruction vector decoded.
1930 Avoid this by transforming to [%esi+0]. */
1931 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
1932 && base && !index && !disp
329e1d01 1933 && REG_P (base)
e075ae69
RH
1934 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
1935 disp = const0_rtx;
1936
1937 /* Special case: encode reg+reg instead of reg*2. */
1938 if (!base && index && scale && scale == 2)
1939 base = index, scale = 1;
1940
1941 /* Special case: scaling cannot be encoded without base or displacement. */
1942 if (!base && !disp && index && scale != 1)
1943 disp = const0_rtx;
1944
1945 out->base = base;
1946 out->index = index;
1947 out->disp = disp;
1948 out->scale = scale;
3b3c6a3f 1949
e075ae69
RH
1950 return TRUE;
1951}
3b3c6a3f 1952
e075ae69
RH
1953/* Determine if a given CONST RTX is a valid memory displacement
1954 in PIC mode. */
1955
59be65f6 1956int
91bb873f
RH
1957legitimate_pic_address_disp_p (disp)
1958 register rtx disp;
1959{
1960 if (GET_CODE (disp) != CONST)
1961 return 0;
1962 disp = XEXP (disp, 0);
1963
1964 if (GET_CODE (disp) == PLUS)
1965 {
1966 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
1967 return 0;
1968 disp = XEXP (disp, 0);
1969 }
1970
1971 if (GET_CODE (disp) != UNSPEC
1972 || XVECLEN (disp, 0) != 1)
1973 return 0;
1974
1975 /* Must be @GOT or @GOTOFF. */
1976 if (XINT (disp, 1) != 6
1977 && XINT (disp, 1) != 7)
1978 return 0;
1979
1980 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
1981 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
1982 return 0;
1983
1984 return 1;
1985}
1986
e075ae69
RH
1987/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
1988 memory address for an instruction. The MODE argument is the machine mode
1989 for the MEM expression that wants to use this address.
1990
1991 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1992 convert common non-canonical forms to canonical form so that they will
1993 be recognized. */
1994
3b3c6a3f
MM
1995int
1996legitimate_address_p (mode, addr, strict)
1997 enum machine_mode mode;
1998 register rtx addr;
1999 int strict;
2000{
e075ae69
RH
2001 struct ix86_address parts;
2002 rtx base, index, disp;
2003 HOST_WIDE_INT scale;
2004 const char *reason = NULL;
2005 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
2006
2007 if (TARGET_DEBUG_ADDR)
2008 {
2009 fprintf (stderr,
e9a25f70 2010 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2011 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2012 debug_rtx (addr);
2013 }
2014
e075ae69 2015 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2016 {
e075ae69
RH
2017 reason = "decomposition failed";
2018 goto error;
3b3c6a3f
MM
2019 }
2020
e075ae69
RH
2021 base = parts.base;
2022 index = parts.index;
2023 disp = parts.disp;
2024 scale = parts.scale;
91f0226f 2025
e075ae69 2026 /* Validate base register.
e9a25f70
JL
2027
2028 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2029 is one word out of a two word structure, which is represented internally
2030 as a DImode int. */
e9a25f70 2031
3b3c6a3f
MM
2032 if (base)
2033 {
e075ae69
RH
2034 reason_rtx = base;
2035
3d771dfd 2036 if (GET_CODE (base) != REG)
3b3c6a3f 2037 {
e075ae69
RH
2038 reason = "base is not a register";
2039 goto error;
3b3c6a3f
MM
2040 }
2041
c954bd01
RH
2042 if (GET_MODE (base) != Pmode)
2043 {
e075ae69
RH
2044 reason = "base is not in Pmode";
2045 goto error;
c954bd01
RH
2046 }
2047
e9a25f70
JL
2048 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2049 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2050 {
e075ae69
RH
2051 reason = "base is not valid";
2052 goto error;
3b3c6a3f
MM
2053 }
2054 }
2055
e075ae69 2056 /* Validate index register.
e9a25f70
JL
2057
2058 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2059 is one word out of a two word structure, which is represented internally
2060 as a DImode int. */
e075ae69
RH
2061
2062 if (index)
3b3c6a3f 2063 {
e075ae69
RH
2064 reason_rtx = index;
2065
2066 if (GET_CODE (index) != REG)
3b3c6a3f 2067 {
e075ae69
RH
2068 reason = "index is not a register";
2069 goto error;
3b3c6a3f
MM
2070 }
2071
e075ae69 2072 if (GET_MODE (index) != Pmode)
c954bd01 2073 {
e075ae69
RH
2074 reason = "index is not in Pmode";
2075 goto error;
c954bd01
RH
2076 }
2077
e075ae69
RH
2078 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2079 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2080 {
e075ae69
RH
2081 reason = "index is not valid";
2082 goto error;
3b3c6a3f
MM
2083 }
2084 }
3b3c6a3f 2085
e075ae69
RH
2086 /* Validate scale factor. */
2087 if (scale != 1)
3b3c6a3f 2088 {
e075ae69
RH
2089 reason_rtx = GEN_INT (scale);
2090 if (!index)
3b3c6a3f 2091 {
e075ae69
RH
2092 reason = "scale without index";
2093 goto error;
3b3c6a3f
MM
2094 }
2095
e075ae69 2096 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2097 {
e075ae69
RH
2098 reason = "scale is not a valid multiplier";
2099 goto error;
3b3c6a3f
MM
2100 }
2101 }
2102
91bb873f 2103 /* Validate displacement. */
3b3c6a3f
MM
2104 if (disp)
2105 {
e075ae69
RH
2106 reason_rtx = disp;
2107
91bb873f 2108 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2109 {
e075ae69
RH
2110 reason = "displacement is not constant";
2111 goto error;
3b3c6a3f
MM
2112 }
2113
e075ae69 2114 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2115 {
e075ae69
RH
2116 reason = "displacement is a const_double";
2117 goto error;
3b3c6a3f
MM
2118 }
2119
91bb873f 2120 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2121 {
91bb873f
RH
2122 if (! legitimate_pic_address_disp_p (disp))
2123 {
e075ae69
RH
2124 reason = "displacement is an invalid pic construct";
2125 goto error;
91bb873f
RH
2126 }
2127
e075ae69
RH
2128 /* Verify that a symbolic pic displacement includes
2129 the pic_offset_table_rtx register. */
91bb873f 2130 if (base != pic_offset_table_rtx
e075ae69 2131 && (index != pic_offset_table_rtx || scale != 1))
91bb873f 2132 {
e075ae69
RH
2133 reason = "pic displacement against invalid base";
2134 goto error;
91bb873f 2135 }
3b3c6a3f 2136 }
91bb873f 2137 else if (HALF_PIC_P ())
3b3c6a3f 2138 {
91bb873f 2139 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2140 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2141 {
e075ae69
RH
2142 reason = "displacement is an invalid half-pic reference";
2143 goto error;
91bb873f 2144 }
3b3c6a3f
MM
2145 }
2146 }
2147
e075ae69 2148 /* Everything looks valid. */
3b3c6a3f 2149 if (TARGET_DEBUG_ADDR)
e075ae69 2150 fprintf (stderr, "Success.\n");
3b3c6a3f 2151 return TRUE;
e075ae69
RH
2152
2153error:
2154 if (TARGET_DEBUG_ADDR)
2155 {
2156 fprintf (stderr, "Error: %s\n", reason);
2157 debug_rtx (reason_rtx);
2158 }
2159 return FALSE;
3b3c6a3f 2160}
3b3c6a3f
MM
2161\f
2162/* Return a legitimate reference for ORIG (an address) using the
2163 register REG. If REG is 0, a new pseudo is generated.
2164
91bb873f 2165 There are two types of references that must be handled:
3b3c6a3f
MM
2166
2167 1. Global data references must load the address from the GOT, via
2168 the PIC reg. An insn is emitted to do this load, and the reg is
2169 returned.
2170
91bb873f
RH
2171 2. Static data references, constant pool addresses, and code labels
2172 compute the address as an offset from the GOT, whose base is in
2173 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2174 differentiate them from global data objects. The returned
2175 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2176
2177 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2178 reg also appears in the address. */
3b3c6a3f
MM
2179
2180rtx
2181legitimize_pic_address (orig, reg)
2182 rtx orig;
2183 rtx reg;
2184{
2185 rtx addr = orig;
2186 rtx new = orig;
91bb873f 2187 rtx base;
3b3c6a3f 2188
91bb873f
RH
2189 if (GET_CODE (addr) == LABEL_REF
2190 || (GET_CODE (addr) == SYMBOL_REF
2191 && (CONSTANT_POOL_ADDRESS_P (addr)
2192 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2193 {
91bb873f
RH
2194 /* This symbol may be referenced via a displacement from the PIC
2195 base address (@GOTOFF). */
3b3c6a3f 2196
91bb873f
RH
2197 current_function_uses_pic_offset_table = 1;
2198 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2199 new = gen_rtx_CONST (VOIDmode, new);
2200 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2201
91bb873f
RH
2202 if (reg != 0)
2203 {
3b3c6a3f 2204 emit_move_insn (reg, new);
91bb873f 2205 new = reg;
3b3c6a3f 2206 }
3b3c6a3f 2207 }
91bb873f 2208 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 2209 {
91bb873f
RH
2210 /* This symbol must be referenced via a load from the
2211 Global Offset Table (@GOT). */
3b3c6a3f 2212
91bb873f
RH
2213 current_function_uses_pic_offset_table = 1;
2214 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2215 new = gen_rtx_CONST (VOIDmode, new);
2216 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2217 new = gen_rtx_MEM (Pmode, new);
2218 RTX_UNCHANGING_P (new) = 1;
3b3c6a3f
MM
2219
2220 if (reg == 0)
2221 reg = gen_reg_rtx (Pmode);
91bb873f
RH
2222 emit_move_insn (reg, new);
2223 new = reg;
2224 }
2225 else
2226 {
2227 if (GET_CODE (addr) == CONST)
3b3c6a3f 2228 {
91bb873f
RH
2229 addr = XEXP (addr, 0);
2230 if (GET_CODE (addr) == UNSPEC)
2231 {
2232 /* Check that the unspec is one of the ones we generate? */
2233 }
2234 else if (GET_CODE (addr) != PLUS)
2235 abort();
3b3c6a3f 2236 }
91bb873f
RH
2237 if (GET_CODE (addr) == PLUS)
2238 {
2239 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 2240
91bb873f
RH
2241 /* Check first to see if this is a constant offset from a @GOTOFF
2242 symbol reference. */
2243 if ((GET_CODE (op0) == LABEL_REF
2244 || (GET_CODE (op0) == SYMBOL_REF
2245 && (CONSTANT_POOL_ADDRESS_P (op0)
2246 || SYMBOL_REF_FLAG (op0))))
2247 && GET_CODE (op1) == CONST_INT)
2248 {
2249 current_function_uses_pic_offset_table = 1;
2250 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2251 new = gen_rtx_PLUS (VOIDmode, new, op1);
2252 new = gen_rtx_CONST (VOIDmode, new);
2253 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2254
2255 if (reg != 0)
2256 {
2257 emit_move_insn (reg, new);
2258 new = reg;
2259 }
2260 }
2261 else
2262 {
2263 base = legitimize_pic_address (XEXP (addr, 0), reg);
2264 new = legitimize_pic_address (XEXP (addr, 1),
2265 base == reg ? NULL_RTX : reg);
2266
2267 if (GET_CODE (new) == CONST_INT)
2268 new = plus_constant (base, INTVAL (new));
2269 else
2270 {
2271 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2272 {
2273 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2274 new = XEXP (new, 1);
2275 }
2276 new = gen_rtx_PLUS (Pmode, base, new);
2277 }
2278 }
2279 }
3b3c6a3f
MM
2280 }
2281 return new;
2282}
2283\f
3b3c6a3f
MM
2284/* Try machine-dependent ways of modifying an illegitimate address
2285 to be legitimate. If we find one, return the new, valid address.
2286 This macro is used in only one place: `memory_address' in explow.c.
2287
2288 OLDX is the address as it was before break_out_memory_refs was called.
2289 In some cases it is useful to look at this to decide what needs to be done.
2290
2291 MODE and WIN are passed so that this macro can use
2292 GO_IF_LEGITIMATE_ADDRESS.
2293
2294 It is always safe for this macro to do nothing. It exists to recognize
2295 opportunities to optimize the output.
2296
2297 For the 80386, we handle X+REG by loading X into a register R and
2298 using R+REG. R will go in a general reg and indexing will be used.
2299 However, if REG is a broken-out memory address or multiplication,
2300 nothing needs to be done because REG can certainly go in a general reg.
2301
2302 When -fpic is used, special handling is needed for symbolic references.
2303 See comments by legitimize_pic_address in i386.c for details. */
2304
2305rtx
2306legitimize_address (x, oldx, mode)
2307 register rtx x;
bb5177ac 2308 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
2309 enum machine_mode mode;
2310{
2311 int changed = 0;
2312 unsigned log;
2313
2314 if (TARGET_DEBUG_ADDR)
2315 {
e9a25f70
JL
2316 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2317 GET_MODE_NAME (mode));
3b3c6a3f
MM
2318 debug_rtx (x);
2319 }
2320
2321 if (flag_pic && SYMBOLIC_CONST (x))
2322 return legitimize_pic_address (x, 0);
2323
2324 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2325 if (GET_CODE (x) == ASHIFT
2326 && GET_CODE (XEXP (x, 1)) == CONST_INT
2327 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2328 {
2329 changed = 1;
a269a03c
JC
2330 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2331 GEN_INT (1 << log));
3b3c6a3f
MM
2332 }
2333
2334 if (GET_CODE (x) == PLUS)
2335 {
e9a25f70
JL
2336 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2337
3b3c6a3f
MM
2338 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2339 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2340 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2341 {
2342 changed = 1;
c5c76735
JL
2343 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2344 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2345 GEN_INT (1 << log));
3b3c6a3f
MM
2346 }
2347
2348 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2349 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2350 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2351 {
2352 changed = 1;
c5c76735
JL
2353 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2354 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2355 GEN_INT (1 << log));
3b3c6a3f
MM
2356 }
2357
e9a25f70 2358 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
2359 if (GET_CODE (XEXP (x, 1)) == MULT)
2360 {
2361 rtx tmp = XEXP (x, 0);
2362 XEXP (x, 0) = XEXP (x, 1);
2363 XEXP (x, 1) = tmp;
2364 changed = 1;
2365 }
2366
2367 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2368 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2369 created by virtual register instantiation, register elimination, and
2370 similar optimizations. */
2371 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2372 {
2373 changed = 1;
c5c76735
JL
2374 x = gen_rtx_PLUS (Pmode,
2375 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2376 XEXP (XEXP (x, 1), 0)),
2377 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
2378 }
2379
e9a25f70
JL
2380 /* Canonicalize
2381 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
2382 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2383 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2384 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2385 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2386 && CONSTANT_P (XEXP (x, 1)))
2387 {
00c79232
ML
2388 rtx constant;
2389 rtx other = NULL_RTX;
3b3c6a3f
MM
2390
2391 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2392 {
2393 constant = XEXP (x, 1);
2394 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2395 }
2396 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2397 {
2398 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2399 other = XEXP (x, 1);
2400 }
2401 else
2402 constant = 0;
2403
2404 if (constant)
2405 {
2406 changed = 1;
c5c76735
JL
2407 x = gen_rtx_PLUS (Pmode,
2408 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2409 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2410 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
2411 }
2412 }
2413
2414 if (changed && legitimate_address_p (mode, x, FALSE))
2415 return x;
2416
2417 if (GET_CODE (XEXP (x, 0)) == MULT)
2418 {
2419 changed = 1;
2420 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2421 }
2422
2423 if (GET_CODE (XEXP (x, 1)) == MULT)
2424 {
2425 changed = 1;
2426 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2427 }
2428
2429 if (changed
2430 && GET_CODE (XEXP (x, 1)) == REG
2431 && GET_CODE (XEXP (x, 0)) == REG)
2432 return x;
2433
2434 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2435 {
2436 changed = 1;
2437 x = legitimize_pic_address (x, 0);
2438 }
2439
2440 if (changed && legitimate_address_p (mode, x, FALSE))
2441 return x;
2442
2443 if (GET_CODE (XEXP (x, 0)) == REG)
2444 {
2445 register rtx temp = gen_reg_rtx (Pmode);
2446 register rtx val = force_operand (XEXP (x, 1), temp);
2447 if (val != temp)
2448 emit_move_insn (temp, val);
2449
2450 XEXP (x, 1) = temp;
2451 return x;
2452 }
2453
2454 else if (GET_CODE (XEXP (x, 1)) == REG)
2455 {
2456 register rtx temp = gen_reg_rtx (Pmode);
2457 register rtx val = force_operand (XEXP (x, 0), temp);
2458 if (val != temp)
2459 emit_move_insn (temp, val);
2460
2461 XEXP (x, 0) = temp;
2462 return x;
2463 }
2464 }
2465
2466 return x;
2467}
2a2ab3f9
JVA
2468\f
2469/* Print an integer constant expression in assembler syntax. Addition
2470 and subtraction are the only arithmetic that may appear in these
2471 expressions. FILE is the stdio stream to write to, X is the rtx, and
2472 CODE is the operand print code from the output string. */
2473
2474static void
2475output_pic_addr_const (file, x, code)
2476 FILE *file;
2477 rtx x;
2478 int code;
2479{
2480 char buf[256];
2481
2482 switch (GET_CODE (x))
2483 {
2484 case PC:
2485 if (flag_pic)
2486 putc ('.', file);
2487 else
2488 abort ();
2489 break;
2490
2491 case SYMBOL_REF:
91bb873f
RH
2492 assemble_name (file, XSTR (x, 0));
2493 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2494 fputs ("@PLT", file);
2a2ab3f9
JVA
2495 break;
2496
91bb873f
RH
2497 case LABEL_REF:
2498 x = XEXP (x, 0);
2499 /* FALLTHRU */
2a2ab3f9
JVA
2500 case CODE_LABEL:
2501 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2502 assemble_name (asm_out_file, buf);
2503 break;
2504
2505 case CONST_INT:
f64cecad 2506 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
2507 break;
2508
2509 case CONST:
2510 /* This used to output parentheses around the expression,
2511 but that does not work on the 386 (either ATT or BSD assembler). */
2512 output_pic_addr_const (file, XEXP (x, 0), code);
2513 break;
2514
2515 case CONST_DOUBLE:
2516 if (GET_MODE (x) == VOIDmode)
2517 {
2518 /* We can use %d if the number is <32 bits and positive. */
2519 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
2520 fprintf (file, "0x%lx%08lx",
2521 (unsigned long) CONST_DOUBLE_HIGH (x),
2522 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 2523 else
f64cecad 2524 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
2525 }
2526 else
2527 /* We can't handle floating point constants;
2528 PRINT_OPERAND must handle them. */
2529 output_operand_lossage ("floating constant misused");
2530 break;
2531
2532 case PLUS:
e9a25f70 2533 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
2534 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2535 {
2a2ab3f9 2536 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2537 putc ('+', file);
e9a25f70 2538 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 2539 }
91bb873f 2540 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 2541 {
2a2ab3f9 2542 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2543 putc ('+', file);
e9a25f70 2544 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 2545 }
91bb873f
RH
2546 else
2547 abort ();
2a2ab3f9
JVA
2548 break;
2549
2550 case MINUS:
e075ae69 2551 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 2552 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2553 putc ('-', file);
2a2ab3f9 2554 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2555 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
2556 break;
2557
91bb873f
RH
2558 case UNSPEC:
2559 if (XVECLEN (x, 0) != 1)
2560 abort ();
2561 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2562 switch (XINT (x, 1))
2563 {
2564 case 6:
2565 fputs ("@GOT", file);
2566 break;
2567 case 7:
2568 fputs ("@GOTOFF", file);
2569 break;
2570 case 8:
2571 fputs ("@PLT", file);
2572 break;
2573 default:
2574 output_operand_lossage ("invalid UNSPEC as operand");
2575 break;
2576 }
2577 break;
2578
2a2ab3f9
JVA
2579 default:
2580 output_operand_lossage ("invalid expression as operand");
2581 }
2582}
2583\f
a269a03c 2584static void
e075ae69 2585put_condition_code (code, mode, reverse, fp, file)
a269a03c 2586 enum rtx_code code;
e075ae69
RH
2587 enum machine_mode mode;
2588 int reverse, fp;
a269a03c
JC
2589 FILE *file;
2590{
a269a03c
JC
2591 const char *suffix;
2592
a269a03c
JC
2593 if (reverse)
2594 code = reverse_condition (code);
e075ae69 2595
a269a03c
JC
2596 switch (code)
2597 {
2598 case EQ:
2599 suffix = "e";
2600 break;
a269a03c
JC
2601 case NE:
2602 suffix = "ne";
2603 break;
a269a03c 2604 case GT:
e075ae69
RH
2605 if (mode == CCNOmode)
2606 abort ();
2607 suffix = "g";
a269a03c 2608 break;
a269a03c 2609 case GTU:
e075ae69
RH
2610 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2611 Those same assemblers have the same but opposite losage on cmov. */
2612 suffix = fp ? "nbe" : "a";
a269a03c 2613 break;
a269a03c 2614 case LT:
e075ae69 2615 if (mode == CCNOmode)
a269a03c
JC
2616 suffix = "s";
2617 else
e075ae69 2618 suffix = "l";
a269a03c 2619 break;
a269a03c
JC
2620 case LTU:
2621 suffix = "b";
2622 break;
a269a03c 2623 case GE:
e075ae69 2624 if (mode == CCNOmode)
a269a03c
JC
2625 suffix = "ns";
2626 else
e075ae69 2627 suffix = "ge";
a269a03c 2628 break;
a269a03c 2629 case GEU:
e075ae69
RH
2630 /* ??? As above. */
2631 suffix = fp ? "nb" : "ae";
a269a03c 2632 break;
a269a03c 2633 case LE:
e075ae69
RH
2634 if (mode == CCNOmode)
2635 abort ();
2636 suffix = "le";
a269a03c 2637 break;
a269a03c
JC
2638 case LEU:
2639 suffix = "be";
2640 break;
a269a03c
JC
2641 default:
2642 abort ();
2643 }
2644 fputs (suffix, file);
2645}
2646
e075ae69
RH
2647void
2648print_reg (x, code, file)
2649 rtx x;
2650 int code;
2651 FILE *file;
e5cb57e8 2652{
e075ae69
RH
2653 if (REGNO (x) == ARG_POINTER_REGNUM
2654 || REGNO (x) == FLAGS_REG
2655 || REGNO (x) == FPSR_REG)
2656 abort ();
e9a25f70 2657
e075ae69
RH
2658 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
2659 putc ('%', file);
2660
2661 if (code == 'w')
2662 code = 2;
2663 else if (code == 'b')
2664 code = 1;
2665 else if (code == 'k')
2666 code = 4;
2667 else if (code == 'y')
2668 code = 3;
2669 else if (code == 'h')
2670 code = 0;
2671 else
2672 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 2673
e075ae69
RH
2674 switch (code)
2675 {
2676 case 3:
2677 if (STACK_TOP_P (x))
2678 {
2679 fputs ("st(0)", file);
2680 break;
2681 }
2682 /* FALLTHRU */
2683 case 4:
2684 case 8:
2685 case 12:
2686 if (! FP_REG_P (x))
2687 putc ('e', file);
2688 /* FALLTHRU */
2689 case 2:
2690 fputs (hi_reg_name[REGNO (x)], file);
2691 break;
2692 case 1:
2693 fputs (qi_reg_name[REGNO (x)], file);
2694 break;
2695 case 0:
2696 fputs (qi_high_reg_name[REGNO (x)], file);
2697 break;
2698 default:
2699 abort ();
fe25fea3 2700 }
e5cb57e8
SC
2701}
2702
2a2ab3f9 2703/* Meaning of CODE:
fe25fea3 2704 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 2705 C -- print opcode suffix for set/cmov insn.
fe25fea3 2706 c -- like C, but print reversed condition
2a2ab3f9
JVA
2707 R -- print the prefix for register names.
2708 z -- print the opcode suffix for the size of the current operand.
2709 * -- print a star (in certain assembler syntax)
2710 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
2711 s -- print a shift double count, followed by the assemblers argument
2712 delimiter.
fe25fea3
SC
2713 b -- print the QImode name of the register for the indicated operand.
2714 %b0 would print %al if operands[0] is reg 0.
2715 w -- likewise, print the HImode name of the register.
2716 k -- likewise, print the SImode name of the register.
2717 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
e075ae69 2718 y -- print "st(0)" instead of "st" as a register. */
2a2ab3f9
JVA
2719
2720void
2721print_operand (file, x, code)
2722 FILE *file;
2723 rtx x;
2724 int code;
2725{
2726 if (code)
2727 {
2728 switch (code)
2729 {
2730 case '*':
e075ae69 2731 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
2732 putc ('*', file);
2733 return;
2734
2a2ab3f9 2735 case 'L':
e075ae69
RH
2736 if (ASSEMBLER_DIALECT == 0)
2737 putc ('l', file);
2a2ab3f9
JVA
2738 return;
2739
2740 case 'W':
e075ae69
RH
2741 if (ASSEMBLER_DIALECT == 0)
2742 putc ('w', file);
2a2ab3f9
JVA
2743 return;
2744
2745 case 'B':
e075ae69
RH
2746 if (ASSEMBLER_DIALECT == 0)
2747 putc ('b', file);
2a2ab3f9
JVA
2748 return;
2749
2750 case 'Q':
e075ae69
RH
2751 if (ASSEMBLER_DIALECT == 0)
2752 putc ('l', file);
2a2ab3f9
JVA
2753 return;
2754
2755 case 'S':
e075ae69
RH
2756 if (ASSEMBLER_DIALECT == 0)
2757 putc ('s', file);
2a2ab3f9
JVA
2758 return;
2759
5f1ec3e6 2760 case 'T':
e075ae69
RH
2761 if (ASSEMBLER_DIALECT == 0)
2762 putc ('t', file);
5f1ec3e6
JVA
2763 return;
2764
2a2ab3f9
JVA
2765 case 'z':
2766 /* 387 opcodes don't get size suffixes if the operands are
2767 registers. */
2768
2769 if (STACK_REG_P (x))
2770 return;
2771
e075ae69
RH
2772 /* Intel syntax has no truck with instruction suffixes. */
2773 if (ASSEMBLER_DIALECT != 0)
2774 return;
2775
2a2ab3f9
JVA
2776 /* this is the size of op from size of operand */
2777 switch (GET_MODE_SIZE (GET_MODE (x)))
2778 {
e075ae69
RH
2779 case 1:
2780 putc ('b', file);
2781 return;
2782
2a2ab3f9 2783 case 2:
e075ae69 2784 putc ('w', file);
2a2ab3f9
JVA
2785 return;
2786
2787 case 4:
2788 if (GET_MODE (x) == SFmode)
2789 {
e075ae69 2790 putc ('s', file);
2a2ab3f9
JVA
2791 return;
2792 }
2793 else
e075ae69 2794 putc ('l', file);
2a2ab3f9
JVA
2795 return;
2796
5f1ec3e6 2797 case 12:
e075ae69
RH
2798 putc ('t', file);
2799 return;
5f1ec3e6 2800
2a2ab3f9
JVA
2801 case 8:
2802 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
2803 {
2804#ifdef GAS_MNEMONICS
e075ae69 2805 putc ('q', file);
56c0e8fa 2806#else
e075ae69
RH
2807 putc ('l', file);
2808 putc ('l', file);
56c0e8fa
JVA
2809#endif
2810 }
e075ae69
RH
2811 else
2812 putc ('l', file);
2a2ab3f9
JVA
2813 return;
2814 }
4af3895e
JVA
2815
2816 case 'b':
2817 case 'w':
2818 case 'k':
2819 case 'h':
2820 case 'y':
5cb6195d 2821 case 'X':
e075ae69 2822 case 'P':
4af3895e
JVA
2823 break;
2824
2d49677f
SC
2825 case 's':
2826 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
2827 {
2828 PRINT_OPERAND (file, x, 0);
e075ae69 2829 putc (',', file);
2d49677f 2830 }
a269a03c
JC
2831 return;
2832
1853aadd 2833 case 'C':
e075ae69 2834 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 2835 return;
fe25fea3 2836 case 'F':
e075ae69 2837 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
2838 return;
2839
e9a25f70 2840 /* Like above, but reverse condition */
e075ae69
RH
2841 case 'c':
2842 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
2843 return;
fe25fea3 2844 case 'f':
e075ae69 2845 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 2846 return;
e5cb57e8 2847
4af3895e 2848 default:
68daafd4
JVA
2849 {
2850 char str[50];
68daafd4
JVA
2851 sprintf (str, "invalid operand code `%c'", code);
2852 output_operand_lossage (str);
2853 }
2a2ab3f9
JVA
2854 }
2855 }
e9a25f70 2856
2a2ab3f9
JVA
2857 if (GET_CODE (x) == REG)
2858 {
2859 PRINT_REG (x, code, file);
2860 }
e9a25f70 2861
2a2ab3f9
JVA
2862 else if (GET_CODE (x) == MEM)
2863 {
e075ae69
RH
2864 /* No `byte ptr' prefix for call instructions. */
2865 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 2866 {
69ddee61 2867 const char * size;
e075ae69
RH
2868 switch (GET_MODE_SIZE (GET_MODE (x)))
2869 {
2870 case 1: size = "BYTE"; break;
2871 case 2: size = "WORD"; break;
2872 case 4: size = "DWORD"; break;
2873 case 8: size = "QWORD"; break;
2874 case 12: size = "XWORD"; break;
2875 default:
2876 abort();
2877 }
2878 fputs (size, file);
2879 fputs (" PTR ", file);
2a2ab3f9 2880 }
e075ae69
RH
2881
2882 x = XEXP (x, 0);
2883 if (flag_pic && CONSTANT_ADDRESS_P (x))
2884 output_pic_addr_const (file, x, code);
2a2ab3f9 2885 else
e075ae69 2886 output_address (x);
2a2ab3f9 2887 }
e9a25f70 2888
2a2ab3f9
JVA
2889 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
2890 {
e9a25f70
JL
2891 REAL_VALUE_TYPE r;
2892 long l;
2893
5f1ec3e6
JVA
2894 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2895 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
2896
2897 if (ASSEMBLER_DIALECT == 0)
2898 putc ('$', file);
52267fcb 2899 fprintf (file, "0x%lx", l);
5f1ec3e6 2900 }
e9a25f70 2901
5f1ec3e6
JVA
2902 /* These float cases don't actually occur as immediate operands. */
2903 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
2904 {
e9a25f70
JL
2905 REAL_VALUE_TYPE r;
2906 char dstr[30];
2907
5f1ec3e6
JVA
2908 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2909 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
2910 fprintf (file, "%s", dstr);
2a2ab3f9 2911 }
e9a25f70 2912
5f1ec3e6 2913 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
2a2ab3f9 2914 {
e9a25f70
JL
2915 REAL_VALUE_TYPE r;
2916 char dstr[30];
2917
5f1ec3e6
JVA
2918 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2919 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
2920 fprintf (file, "%s", dstr);
2a2ab3f9 2921 }
79325812 2922 else
2a2ab3f9 2923 {
4af3895e 2924 if (code != 'P')
2a2ab3f9 2925 {
695dac07 2926 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
2927 {
2928 if (ASSEMBLER_DIALECT == 0)
2929 putc ('$', file);
2930 }
2a2ab3f9
JVA
2931 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
2932 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
2933 {
2934 if (ASSEMBLER_DIALECT == 0)
2935 putc ('$', file);
2936 else
2937 fputs ("OFFSET FLAT:", file);
2938 }
2a2ab3f9 2939 }
e075ae69
RH
2940 if (GET_CODE (x) == CONST_INT)
2941 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2942 else if (flag_pic)
2a2ab3f9
JVA
2943 output_pic_addr_const (file, x, code);
2944 else
2945 output_addr_const (file, x);
2946 }
2947}
2948\f
2949/* Print a memory operand whose address is ADDR. */
2950
2951void
2952print_operand_address (file, addr)
2953 FILE *file;
2954 register rtx addr;
2955{
e075ae69
RH
2956 struct ix86_address parts;
2957 rtx base, index, disp;
2958 int scale;
e9a25f70 2959
e075ae69
RH
2960 if (! ix86_decompose_address (addr, &parts))
2961 abort ();
e9a25f70 2962
e075ae69
RH
2963 base = parts.base;
2964 index = parts.index;
2965 disp = parts.disp;
2966 scale = parts.scale;
e9a25f70 2967
e075ae69
RH
2968 if (!base && !index)
2969 {
2970 /* Displacement only requires special attention. */
e9a25f70 2971
e075ae69 2972 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 2973 {
e075ae69
RH
2974 if (ASSEMBLER_DIALECT != 0)
2975 fputs ("ds:", file);
2976 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 2977 }
e075ae69
RH
2978 else if (flag_pic)
2979 output_pic_addr_const (file, addr, 0);
2980 else
2981 output_addr_const (file, addr);
2982 }
2983 else
2984 {
2985 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 2986 {
e075ae69 2987 if (disp)
2a2ab3f9 2988 {
c399861d 2989 if (flag_pic)
e075ae69
RH
2990 output_pic_addr_const (file, disp, 0);
2991 else if (GET_CODE (disp) == LABEL_REF)
2992 output_asm_label (disp);
2a2ab3f9 2993 else
e075ae69 2994 output_addr_const (file, disp);
2a2ab3f9
JVA
2995 }
2996
e075ae69
RH
2997 putc ('(', file);
2998 if (base)
2999 PRINT_REG (base, 0, file);
3000 if (index)
2a2ab3f9 3001 {
e075ae69
RH
3002 putc (',', file);
3003 PRINT_REG (index, 0, file);
3004 if (scale != 1)
3005 fprintf (file, ",%d", scale);
2a2ab3f9 3006 }
e075ae69 3007 putc (')', file);
2a2ab3f9 3008 }
2a2ab3f9
JVA
3009 else
3010 {
e075ae69 3011 rtx offset = NULL_RTX;
e9a25f70 3012
e075ae69
RH
3013 if (disp)
3014 {
3015 /* Pull out the offset of a symbol; print any symbol itself. */
3016 if (GET_CODE (disp) == CONST
3017 && GET_CODE (XEXP (disp, 0)) == PLUS
3018 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3019 {
3020 offset = XEXP (XEXP (disp, 0), 1);
3021 disp = gen_rtx_CONST (VOIDmode,
3022 XEXP (XEXP (disp, 0), 0));
3023 }
ce193852 3024
e075ae69
RH
3025 if (flag_pic)
3026 output_pic_addr_const (file, disp, 0);
3027 else if (GET_CODE (disp) == LABEL_REF)
3028 output_asm_label (disp);
3029 else if (GET_CODE (disp) == CONST_INT)
3030 offset = disp;
3031 else
3032 output_addr_const (file, disp);
3033 }
e9a25f70 3034
e075ae69
RH
3035 putc ('[', file);
3036 if (base)
a8620236 3037 {
e075ae69
RH
3038 PRINT_REG (base, 0, file);
3039 if (offset)
3040 {
3041 if (INTVAL (offset) >= 0)
3042 putc ('+', file);
3043 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3044 }
a8620236 3045 }
e075ae69
RH
3046 else if (offset)
3047 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 3048 else
e075ae69 3049 putc ('0', file);
e9a25f70 3050
e075ae69
RH
3051 if (index)
3052 {
3053 putc ('+', file);
3054 PRINT_REG (index, 0, file);
3055 if (scale != 1)
3056 fprintf (file, "*%d", scale);
3057 }
3058 putc (']', file);
3059 }
2a2ab3f9
JVA
3060 }
3061}
3062\f
3063/* Split one or more DImode RTL references into pairs of SImode
3064 references. The RTL can be REG, offsettable MEM, integer constant, or
3065 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3066 split and "num" is its length. lo_half and hi_half are output arrays
3067 that parallel "operands". */
3068
3069void
3070split_di (operands, num, lo_half, hi_half)
3071 rtx operands[];
3072 int num;
3073 rtx lo_half[], hi_half[];
3074{
3075 while (num--)
3076 {
57dbca5e 3077 rtx op = operands[num];
e075ae69
RH
3078 if (CONSTANT_P (op))
3079 split_double (op, &lo_half[num], &hi_half[num]);
3080 else if (! reload_completed)
a269a03c
JC
3081 {
3082 lo_half[num] = gen_lowpart (SImode, op);
3083 hi_half[num] = gen_highpart (SImode, op);
3084 }
3085 else if (GET_CODE (op) == REG)
2a2ab3f9 3086 {
57dbca5e
BS
3087 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3088 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 3089 }
57dbca5e 3090 else if (offsettable_memref_p (op))
2a2ab3f9 3091 {
57dbca5e
BS
3092 rtx lo_addr = XEXP (op, 0);
3093 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3094 lo_half[num] = change_address (op, SImode, lo_addr);
3095 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
3096 }
3097 else
3098 abort();
3099 }
3100}
3101\f
2a2ab3f9
JVA
3102/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3103 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3104 is the expression of the binary operation. The output may either be
3105 emitted here, or returned to the caller, like all output_* functions.
3106
3107 There is no guarantee that the operands are the same mode, as they
3108 might be within FLOAT or FLOAT_EXTEND expressions. */
3109
69ddee61 3110const char *
2a2ab3f9
JVA
3111output_387_binary_op (insn, operands)
3112 rtx insn;
3113 rtx *operands;
3114{
2a2ab3f9 3115 static char buf[100];
e075ae69 3116 rtx temp;
69ddee61 3117 const char *p;
2a2ab3f9
JVA
3118
3119 switch (GET_CODE (operands[3]))
3120 {
3121 case PLUS:
e075ae69
RH
3122 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3123 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3124 p = "fiadd";
3125 else
3126 p = "fadd";
2a2ab3f9
JVA
3127 break;
3128
3129 case MINUS:
e075ae69
RH
3130 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3131 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3132 p = "fisub";
3133 else
3134 p = "fsub";
2a2ab3f9
JVA
3135 break;
3136
3137 case MULT:
e075ae69
RH
3138 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3139 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3140 p = "fimul";
3141 else
3142 p = "fmul";
2a2ab3f9
JVA
3143 break;
3144
3145 case DIV:
e075ae69
RH
3146 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3147 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3148 p = "fidiv";
3149 else
3150 p = "fdiv";
2a2ab3f9
JVA
3151 break;
3152
3153 default:
3154 abort ();
3155 }
3156
e075ae69 3157 strcpy (buf, p);
2a2ab3f9
JVA
3158
3159 switch (GET_CODE (operands[3]))
3160 {
3161 case MULT:
3162 case PLUS:
3163 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3164 {
3165 temp = operands[2];
3166 operands[2] = operands[1];
3167 operands[1] = temp;
3168 }
3169
3170 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3171 {
3172 p = "%z2\t%2";
3173 break;
3174 }
2a2ab3f9
JVA
3175
3176 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3177 {
3178 if (STACK_TOP_P (operands[0]))
e075ae69 3179 p = "p\t{%0,%2|%2, %0}";
6b28fd63 3180 else
e075ae69
RH
3181 p = "p\t{%2,%0|%0, %2}";
3182 break;
6b28fd63 3183 }
2a2ab3f9
JVA
3184
3185 if (STACK_TOP_P (operands[0]))
e075ae69 3186 p = "\t{%y2,%0|%0, %y2}";
2a2ab3f9 3187 else
e075ae69
RH
3188 p = "\t{%2,%0|%0, %2}";
3189 break;
2a2ab3f9
JVA
3190
3191 case MINUS:
3192 case DIV:
3193 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
3194 {
3195 p = "r%z1\t%1";
3196 break;
3197 }
2a2ab3f9
JVA
3198
3199 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3200 {
3201 p = "%z2\t%2";
3202 break;
3203 }
2a2ab3f9 3204
2a2ab3f9
JVA
3205 if (! STACK_REG_P (operands[1]) || ! STACK_REG_P (operands[2]))
3206 abort ();
3207
e075ae69
RH
3208 /* Note that the Unixware assembler, and the AT&T assembler before
3209 that, are confusingly not reversed from Intel syntax in this
3210 area. */
2a2ab3f9 3211 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3212 {
3213 if (STACK_TOP_P (operands[0]))
e075ae69 3214 p = "p\t%0,%2";
6b28fd63 3215 else
e075ae69
RH
3216 p = "rp\t%2,%0";
3217 break;
6b28fd63 3218 }
2a2ab3f9
JVA
3219
3220 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63
JL
3221 {
3222 if (STACK_TOP_P (operands[0]))
e075ae69 3223 p = "rp\t%0,%1";
6b28fd63 3224 else
e075ae69
RH
3225 p = "p\t%1,%0";
3226 break;
6b28fd63 3227 }
2a2ab3f9
JVA
3228
3229 if (STACK_TOP_P (operands[0]))
3230 {
3231 if (STACK_TOP_P (operands[1]))
e075ae69 3232 p = "\t%y2,%0";
2a2ab3f9 3233 else
e075ae69
RH
3234 p = "r\t%y1,%0";
3235 break;
2a2ab3f9
JVA
3236 }
3237 else if (STACK_TOP_P (operands[1]))
e075ae69 3238 p = "\t%1,%0";
2a2ab3f9 3239 else
e075ae69
RH
3240 p = "r\t%2,%0";
3241 break;
2a2ab3f9
JVA
3242
3243 default:
3244 abort ();
3245 }
e075ae69
RH
3246
3247 strcat (buf, p);
3248 return buf;
2a2ab3f9 3249}
e075ae69 3250
2a2ab3f9 3251/* Output code for INSN to convert a float to a signed int. OPERANDS
e075ae69
RH
3252 are the insn operands. The output may be [SD]Imode and the input
3253 operand may be [SDX]Fmode. */
2a2ab3f9 3254
69ddee61 3255const char *
2a2ab3f9
JVA
3256output_fix_trunc (insn, operands)
3257 rtx insn;
3258 rtx *operands;
3259{
3260 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
3261 int dimode_p = GET_MODE (operands[0]) == DImode;
3262 rtx xops[4];
2a2ab3f9 3263
e075ae69
RH
3264 /* Jump through a hoop or two for DImode, since the hardware has no
3265 non-popping instruction. We used to do this a different way, but
3266 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
3267 if (dimode_p && !stack_top_dies)
3268 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
3269
3270 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
3271 abort ();
3272
e075ae69
RH
3273 xops[0] = GEN_INT (12);
3274 xops[1] = adj_offsettable_operand (operands[2], 1);
3275 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 3276
e075ae69
RH
3277 xops[2] = operands[0];
3278 if (GET_CODE (operands[0]) != MEM)
3279 xops[2] = operands[3];
2a2ab3f9 3280
e075ae69
RH
3281 output_asm_insn ("fnstcw\t%2", operands);
3282 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3283 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3284 output_asm_insn ("fldcw\t%2", operands);
3285 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 3286
e075ae69
RH
3287 if (stack_top_dies || dimode_p)
3288 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 3289 else
e075ae69
RH
3290 output_asm_insn ("fist%z2\t%2", xops);
3291
3292 output_asm_insn ("fldcw\t%2", operands);
10195bd8 3293
e075ae69 3294 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 3295 {
e075ae69 3296 if (dimode_p)
2e14a41b 3297 {
e075ae69
RH
3298 split_di (operands+0, 1, xops+0, xops+1);
3299 split_di (operands+3, 1, xops+2, xops+3);
3300 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3301 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 3302 }
e075ae69
RH
3303 else
3304 output_asm_insn ("mov{l}\t{%3,%0|%0, %3}", operands);
2a2ab3f9 3305 }
2a2ab3f9 3306
e075ae69 3307 return "";
2a2ab3f9 3308}
cda749b1 3309
e075ae69
RH
3310/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3311 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3312 when fucom should be used. */
3313
69ddee61 3314const char *
e075ae69 3315output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
3316 rtx insn;
3317 rtx *operands;
e075ae69 3318 int eflags_p, unordered_p;
cda749b1 3319{
e075ae69
RH
3320 int stack_top_dies;
3321 rtx cmp_op0 = operands[0];
3322 rtx cmp_op1 = operands[1];
3323
3324 if (eflags_p == 2)
3325 {
3326 cmp_op0 = cmp_op1;
3327 cmp_op1 = operands[2];
3328 }
cda749b1 3329
e075ae69 3330 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
3331 abort ();
3332
e075ae69 3333 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 3334
e075ae69
RH
3335 if (STACK_REG_P (cmp_op1)
3336 && stack_top_dies
3337 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3338 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 3339 {
e075ae69
RH
3340 /* If both the top of the 387 stack dies, and the other operand
3341 is also a stack register that dies, then this must be a
3342 `fcompp' float compare */
3343
3344 if (eflags_p == 1)
3345 {
3346 /* There is no double popping fcomi variant. Fortunately,
3347 eflags is immune from the fstp's cc clobbering. */
3348 if (unordered_p)
3349 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3350 else
3351 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3352 return "fstp\t%y0";
3353 }
3354 else
cda749b1 3355 {
e075ae69
RH
3356 if (eflags_p == 2)
3357 {
3358 if (unordered_p)
3359 return "fucompp\n\tfnstsw\t%0";
3360 else
3361 return "fcompp\n\tfnstsw\t%0";
3362 }
cda749b1
JW
3363 else
3364 {
e075ae69
RH
3365 if (unordered_p)
3366 return "fucompp";
3367 else
3368 return "fcompp";
cda749b1
JW
3369 }
3370 }
cda749b1
JW
3371 }
3372 else
3373 {
e075ae69 3374 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 3375
69ddee61 3376 static const char * const alt[24] =
e075ae69
RH
3377 {
3378 "fcom%z1\t%y1",
3379 "fcomp%z1\t%y1",
3380 "fucom%z1\t%y1",
3381 "fucomp%z1\t%y1",
3382
3383 "ficom%z1\t%y1",
3384 "ficomp%z1\t%y1",
3385 NULL,
3386 NULL,
3387
3388 "fcomi\t{%y1, %0|%0, %y1}",
3389 "fcomip\t{%y1, %0|%0, %y1}",
3390 "fucomi\t{%y1, %0|%0, %y1}",
3391 "fucomip\t{%y1, %0|%0, %y1}",
3392
3393 NULL,
3394 NULL,
3395 NULL,
3396 NULL,
3397
3398 "fcom%z2\t%y2\n\tfnstsw\t%0",
3399 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3400 "fucom%z2\t%y2\n\tfnstsw\t%0",
3401 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3402
3403 "ficom%z2\t%y2\n\tfnstsw\t%0",
3404 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3405 NULL,
3406 NULL
3407 };
3408
3409 int mask;
69ddee61 3410 const char *ret;
e075ae69
RH
3411
3412 mask = eflags_p << 3;
3413 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3414 mask |= unordered_p << 1;
3415 mask |= stack_top_dies;
3416
3417 if (mask >= 24)
3418 abort ();
3419 ret = alt[mask];
3420 if (ret == NULL)
3421 abort ();
cda749b1 3422
e075ae69 3423 return ret;
cda749b1
JW
3424 }
3425}
2a2ab3f9 3426
e075ae69 3427/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 3428
e075ae69 3429 If profile_block_flag == 2
2a2ab3f9 3430
e075ae69
RH
3431 Output code to call the subroutine `__bb_init_trace_func'
3432 and pass two parameters to it. The first parameter is
3433 the address of a block allocated in the object module.
3434 The second parameter is the number of the first basic block
3435 of the function.
2a2ab3f9 3436
e075ae69
RH
3437 The name of the block is a local symbol made with this statement:
3438
3439 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 3440
e075ae69
RH
3441 Of course, since you are writing the definition of
3442 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3443 can take a short cut in the definition of this macro and use the
3444 name that you know will result.
2a2ab3f9 3445
e075ae69
RH
3446 The number of the first basic block of the function is
3447 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 3448
e075ae69
RH
3449 If described in a virtual assembler language the code to be
3450 output looks like:
2a2ab3f9 3451
e075ae69
RH
3452 parameter1 <- LPBX0
3453 parameter2 <- BLOCK_OR_LABEL
3454 call __bb_init_trace_func
2a2ab3f9 3455
e075ae69 3456 else if profile_block_flag != 0
e74389ff 3457
e075ae69
RH
3458 Output code to call the subroutine `__bb_init_func'
3459 and pass one single parameter to it, which is the same
3460 as the first parameter to `__bb_init_trace_func'.
e74389ff 3461
e075ae69
RH
3462 The first word of this parameter is a flag which will be nonzero if
3463 the object module has already been initialized. So test this word
3464 first, and do not call `__bb_init_func' if the flag is nonzero.
3465 Note: When profile_block_flag == 2 the test need not be done
3466 but `__bb_init_trace_func' *must* be called.
e74389ff 3467
e075ae69
RH
3468 BLOCK_OR_LABEL may be used to generate a label number as a
3469 branch destination in case `__bb_init_func' will not be called.
e74389ff 3470
e075ae69
RH
3471 If described in a virtual assembler language the code to be
3472 output looks like:
2a2ab3f9 3473
e075ae69
RH
3474 cmp (LPBX0),0
3475 jne local_label
3476 parameter1 <- LPBX0
3477 call __bb_init_func
3478 local_label:
3479*/
c572e5ba 3480
e075ae69
RH
3481void
3482ix86_output_function_block_profiler (file, block_or_label)
3483 FILE *file;
3484 int block_or_label;
c572e5ba 3485{
e075ae69
RH
3486 static int num_func = 0;
3487 rtx xops[8];
3488 char block_table[80], false_label[80];
c572e5ba 3489
e075ae69 3490 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 3491
e075ae69
RH
3492 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3493 xops[5] = stack_pointer_rtx;
3494 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 3495
e075ae69 3496 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 3497
e075ae69 3498 switch (profile_block_flag)
c572e5ba 3499 {
e075ae69
RH
3500 case 2:
3501 xops[2] = GEN_INT (block_or_label);
3502 xops[3] = gen_rtx_MEM (Pmode,
3503 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
3504 xops[6] = GEN_INT (8);
e9a25f70 3505
e075ae69
RH
3506 output_asm_insn ("push{l}\t%2", xops);
3507 if (!flag_pic)
3508 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 3509 else
870a0c2c 3510 {
e075ae69
RH
3511 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3512 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 3513 }
e075ae69
RH
3514 output_asm_insn ("call\t%P3", xops);
3515 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3516 break;
c572e5ba 3517
e075ae69
RH
3518 default:
3519 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 3520
e075ae69
RH
3521 xops[0] = const0_rtx;
3522 xops[2] = gen_rtx_MEM (Pmode,
3523 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
3524 xops[3] = gen_rtx_MEM (Pmode,
3525 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
3526 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
3527 xops[6] = GEN_INT (4);
a14003ee 3528
e075ae69 3529 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 3530
e075ae69
RH
3531 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
3532 output_asm_insn ("jne\t%2", xops);
870a0c2c 3533
e075ae69
RH
3534 if (!flag_pic)
3535 output_asm_insn ("push{l}\t%1", xops);
3536 else
3537 {
3538 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
3539 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 3540 }
e075ae69
RH
3541 output_asm_insn ("call\t%P3", xops);
3542 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3543 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
3544 num_func++;
3545 break;
c572e5ba 3546 }
2a2ab3f9 3547}
305f097e 3548
e075ae69
RH
3549/* Output assembler code to FILE to increment a counter associated
3550 with basic block number BLOCKNO.
305f097e 3551
e075ae69 3552 If profile_block_flag == 2
ecbc4695 3553
e075ae69
RH
3554 Output code to initialize the global structure `__bb' and
3555 call the function `__bb_trace_func' which will increment the
3556 counter.
ecbc4695 3557
e075ae69
RH
3558 `__bb' consists of two words. In the first word the number
3559 of the basic block has to be stored. In the second word
3560 the address of a block allocated in the object module
3561 has to be stored.
ecbc4695 3562
e075ae69 3563 The basic block number is given by BLOCKNO.
ecbc4695 3564
e075ae69 3565 The address of the block is given by the label created with
305f097e 3566
e075ae69 3567 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 3568
e075ae69 3569 by FUNCTION_BLOCK_PROFILER.
ecbc4695 3570
e075ae69
RH
3571 Of course, since you are writing the definition of
3572 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3573 can take a short cut in the definition of this macro and use the
3574 name that you know will result.
305f097e 3575
e075ae69
RH
3576 If described in a virtual assembler language the code to be
3577 output looks like:
305f097e 3578
e075ae69
RH
3579 move BLOCKNO -> (__bb)
3580 move LPBX0 -> (__bb+4)
3581 call __bb_trace_func
305f097e 3582
e075ae69
RH
3583 Note that function `__bb_trace_func' must not change the
3584 machine state, especially the flag register. To grant
3585 this, you must output code to save and restore registers
3586 either in this macro or in the macros MACHINE_STATE_SAVE
3587 and MACHINE_STATE_RESTORE. The last two macros will be
3588 used in the function `__bb_trace_func', so you must make
3589 sure that the function prologue does not change any
3590 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 3591
e075ae69 3592 else if profile_block_flag != 0
305f097e 3593
e075ae69
RH
3594 Output code to increment the counter directly.
3595 Basic blocks are numbered separately from zero within each
3596 compiled object module. The count associated with block number
3597 BLOCKNO is at index BLOCKNO in an array of words; the name of
3598 this array is a local symbol made with this statement:
32b5b1aa 3599
e075ae69 3600 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 3601
e075ae69
RH
3602 Of course, since you are writing the definition of
3603 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3604 can take a short cut in the definition of this macro and use the
3605 name that you know will result.
32b5b1aa 3606
e075ae69
RH
3607 If described in a virtual assembler language the code to be
3608 output looks like:
32b5b1aa 3609
e075ae69
RH
3610 inc (LPBX2+4*BLOCKNO)
3611*/
32b5b1aa 3612
e075ae69
RH
3613void
3614ix86_output_block_profiler (file, blockno)
3615 FILE *file ATTRIBUTE_UNUSED;
3616 int blockno;
3617{
3618 rtx xops[8], cnt_rtx;
3619 char counts[80];
3620 char *block_table = counts;
3621
3622 switch (profile_block_flag)
3623 {
3624 case 2:
3625 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 3626
e075ae69
RH
3627 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3628 xops[2] = GEN_INT (blockno);
3629 xops[3] = gen_rtx_MEM (Pmode,
3630 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
3631 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
3632 xops[5] = plus_constant (xops[4], 4);
3633 xops[0] = gen_rtx_MEM (SImode, xops[4]);
3634 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 3635
e075ae69 3636 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 3637
e075ae69
RH
3638 output_asm_insn ("pushf", xops);
3639 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3640 if (flag_pic)
32b5b1aa 3641 {
e075ae69
RH
3642 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
3643 output_asm_insn ("push{l}\t%7", xops);
3644 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3645 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
3646 output_asm_insn ("pop{l}\t%7", xops);
3647 }
3648 else
3649 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
3650 output_asm_insn ("call\t%P3", xops);
3651 output_asm_insn ("popf", xops);
32b5b1aa 3652
e075ae69 3653 break;
32b5b1aa 3654
e075ae69
RH
3655 default:
3656 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
3657 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
3658 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 3659
e075ae69
RH
3660 if (blockno)
3661 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 3662
e075ae69
RH
3663 if (flag_pic)
3664 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 3665
e075ae69
RH
3666 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
3667 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 3668
e075ae69 3669 break;
32b5b1aa 3670 }
32b5b1aa 3671}
32b5b1aa 3672\f
79325812 3673void
e075ae69
RH
3674ix86_expand_move (mode, operands)
3675 enum machine_mode mode;
3676 rtx operands[];
32b5b1aa 3677{
e075ae69 3678 int strict = (reload_in_progress || reload_completed);
e075ae69 3679 rtx insn;
e9a25f70 3680
e075ae69 3681 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 3682 {
e075ae69 3683 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 3684
e075ae69
RH
3685 if (GET_CODE (operands[0]) == MEM)
3686 operands[1] = force_reg (Pmode, operands[1]);
3687 else
32b5b1aa 3688 {
e075ae69
RH
3689 rtx temp = operands[0];
3690 if (GET_CODE (temp) != REG)
3691 temp = gen_reg_rtx (Pmode);
3692 temp = legitimize_pic_address (operands[1], temp);
3693 if (temp == operands[0])
3694 return;
3695 operands[1] = temp;
32b5b1aa 3696 }
e075ae69
RH
3697 }
3698 else
3699 {
d7a29404
JH
3700 if (GET_CODE (operands[0]) == MEM
3701 && (GET_MODE (operands[0]) == QImode
3702 || !push_operand (operands[0], mode))
3703 && GET_CODE (operands[1]) == MEM)
e075ae69 3704 operands[1] = force_reg (mode, operands[1]);
e9a25f70 3705
e075ae69 3706 if (FLOAT_MODE_P (mode))
32b5b1aa 3707 {
d7a29404
JH
3708 /* If we are loading a floating point constant to a register,
3709 force the value to memory now, since we'll get better code
3710 out the back end. */
e075ae69
RH
3711
3712 if (strict)
3713 ;
e075ae69 3714 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 3715 && register_operand (operands[0], mode))
e075ae69 3716 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 3717 }
32b5b1aa 3718 }
e9a25f70 3719
e075ae69 3720 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 3721
e075ae69
RH
3722 emit_insn (insn);
3723}
e9a25f70 3724
e075ae69
RH
3725/* Attempt to expand a binary operator. Make the expansion closer to the
3726 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 3727 memory references (one output, two input) in a single insn. */
e9a25f70 3728
e075ae69
RH
3729void
3730ix86_expand_binary_operator (code, mode, operands)
3731 enum rtx_code code;
3732 enum machine_mode mode;
3733 rtx operands[];
3734{
3735 int matching_memory;
3736 rtx src1, src2, dst, op, clob;
3737
3738 dst = operands[0];
3739 src1 = operands[1];
3740 src2 = operands[2];
3741
3742 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
3743 if (GET_RTX_CLASS (code) == 'c'
3744 && (rtx_equal_p (dst, src2)
3745 || immediate_operand (src1, mode)))
3746 {
3747 rtx temp = src1;
3748 src1 = src2;
3749 src2 = temp;
32b5b1aa 3750 }
e9a25f70 3751
e075ae69
RH
3752 /* If the destination is memory, and we do not have matching source
3753 operands, do things in registers. */
3754 matching_memory = 0;
3755 if (GET_CODE (dst) == MEM)
32b5b1aa 3756 {
e075ae69
RH
3757 if (rtx_equal_p (dst, src1))
3758 matching_memory = 1;
3759 else if (GET_RTX_CLASS (code) == 'c'
3760 && rtx_equal_p (dst, src2))
3761 matching_memory = 2;
3762 else
3763 dst = gen_reg_rtx (mode);
3764 }
3765
3766 /* Both source operands cannot be in memory. */
3767 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
3768 {
3769 if (matching_memory != 2)
3770 src2 = force_reg (mode, src2);
3771 else
3772 src1 = force_reg (mode, src1);
32b5b1aa 3773 }
e9a25f70 3774
06a964de
JH
3775 /* If the operation is not commutable, source 1 cannot be a constant
3776 or non-matching memory. */
3777 if ((CONSTANT_P (src1)
3778 || (!matching_memory && GET_CODE (src1) == MEM))
3779 && GET_RTX_CLASS (code) != 'c')
e075ae69
RH
3780 src1 = force_reg (mode, src1);
3781
3782 /* If optimizing, copy to regs to improve CSE */
3783 if (optimize && !reload_in_progress && !reload_completed)
32b5b1aa 3784 {
e075ae69
RH
3785 if (GET_CODE (dst) == MEM)
3786 dst = gen_reg_rtx (mode);
3787 if (GET_CODE (src1) == MEM)
3788 src1 = force_reg (mode, src1);
3789 if (GET_CODE (src2) == MEM)
3790 src2 = force_reg (mode, src2);
32b5b1aa 3791 }
e9a25f70 3792
e075ae69
RH
3793 /* Emit the instruction. */
3794
3795 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
3796 if (reload_in_progress)
3797 {
3798 /* Reload doesn't know about the flags register, and doesn't know that
3799 it doesn't want to clobber it. We can only do this with PLUS. */
3800 if (code != PLUS)
3801 abort ();
3802 emit_insn (op);
3803 }
3804 else
32b5b1aa 3805 {
e075ae69
RH
3806 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
3807 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 3808 }
e9a25f70 3809
e075ae69
RH
3810 /* Fix up the destination if needed. */
3811 if (dst != operands[0])
3812 emit_move_insn (operands[0], dst);
3813}
3814
3815/* Return TRUE or FALSE depending on whether the binary operator meets the
3816 appropriate constraints. */
3817
3818int
3819ix86_binary_operator_ok (code, mode, operands)
3820 enum rtx_code code;
3821 enum machine_mode mode ATTRIBUTE_UNUSED;
3822 rtx operands[3];
3823{
3824 /* Both source operands cannot be in memory. */
3825 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
3826 return 0;
3827 /* If the operation is not commutable, source 1 cannot be a constant. */
3828 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
3829 return 0;
3830 /* If the destination is memory, we must have a matching source operand. */
3831 if (GET_CODE (operands[0]) == MEM
3832 && ! (rtx_equal_p (operands[0], operands[1])
3833 || (GET_RTX_CLASS (code) == 'c'
3834 && rtx_equal_p (operands[0], operands[2]))))
3835 return 0;
06a964de
JH
3836 /* If the operation is not commutable and the source 1 is memory, we must
3837 have a matching destionation. */
3838 if (GET_CODE (operands[1]) == MEM
3839 && GET_RTX_CLASS (code) != 'c'
3840 && ! rtx_equal_p (operands[0], operands[1]))
3841 return 0;
e075ae69
RH
3842 return 1;
3843}
3844
3845/* Attempt to expand a unary operator. Make the expansion closer to the
3846 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 3847 memory references (one output, one input) in a single insn. */
e075ae69 3848
9d81fc27 3849void
e075ae69
RH
3850ix86_expand_unary_operator (code, mode, operands)
3851 enum rtx_code code;
3852 enum machine_mode mode;
3853 rtx operands[];
3854{
06a964de
JH
3855 int matching_memory;
3856 rtx src, dst, op, clob;
3857
3858 dst = operands[0];
3859 src = operands[1];
e075ae69 3860
06a964de
JH
3861 /* If the destination is memory, and we do not have matching source
3862 operands, do things in registers. */
3863 matching_memory = 0;
3864 if (GET_CODE (dst) == MEM)
32b5b1aa 3865 {
06a964de
JH
3866 if (rtx_equal_p (dst, src))
3867 matching_memory = 1;
e075ae69 3868 else
06a964de 3869 dst = gen_reg_rtx (mode);
32b5b1aa 3870 }
e9a25f70 3871
06a964de
JH
3872 /* When source operand is memory, destination must match. */
3873 if (!matching_memory && GET_CODE (src) == MEM)
3874 src = force_reg (mode, src);
3875
3876 /* If optimizing, copy to regs to improve CSE */
3877 if (optimize && !reload_in_progress && !reload_completed)
3878 {
3879 if (GET_CODE (dst) == MEM)
3880 dst = gen_reg_rtx (mode);
3881 if (GET_CODE (src) == MEM)
3882 src = force_reg (mode, src);
3883 }
3884
3885 /* Emit the instruction. */
3886
3887 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
3888 if (reload_in_progress || code == NOT)
3889 {
3890 /* Reload doesn't know about the flags register, and doesn't know that
3891 it doesn't want to clobber it. */
3892 if (code != NOT)
3893 abort ();
3894 emit_insn (op);
3895 }
3896 else
3897 {
3898 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
3899 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
3900 }
3901
3902 /* Fix up the destination if needed. */
3903 if (dst != operands[0])
3904 emit_move_insn (operands[0], dst);
e075ae69
RH
3905}
3906
3907/* Return TRUE or FALSE depending on whether the unary operator meets the
3908 appropriate constraints. */
3909
3910int
3911ix86_unary_operator_ok (code, mode, operands)
3912 enum rtx_code code ATTRIBUTE_UNUSED;
3913 enum machine_mode mode ATTRIBUTE_UNUSED;
3914 rtx operands[2] ATTRIBUTE_UNUSED;
3915{
06a964de
JH
3916 /* If one of operands is memory, source and destination must match. */
3917 if ((GET_CODE (operands[0]) == MEM
3918 || GET_CODE (operands[1]) == MEM)
3919 && ! rtx_equal_p (operands[0], operands[1]))
3920 return FALSE;
e075ae69
RH
3921 return TRUE;
3922}
3923
3924/* Produce an unsigned comparison for a given signed comparison. */
3925
3926static enum rtx_code
3927unsigned_comparison (code)
3928 enum rtx_code code;
3929{
3930 switch (code)
32b5b1aa 3931 {
e075ae69
RH
3932 case GT:
3933 code = GTU;
3934 break;
3935 case LT:
3936 code = LTU;
3937 break;
3938 case GE:
3939 code = GEU;
3940 break;
3941 case LE:
3942 code = LEU;
3943 break;
3944 case EQ:
3945 case NE:
3946 case LEU:
3947 case LTU:
3948 case GEU:
3949 case GTU:
3950 break;
3951 default:
3952 abort ();
3953 }
3954 return code;
3955}
3956
3957/* Generate insn patterns to do an integer compare of OPERANDS. */
3958
3959static rtx
3960ix86_expand_int_compare (code, op0, op1)
3961 enum rtx_code code;
3962 rtx op0, op1;
3963{
3964 enum machine_mode cmpmode;
3965 rtx tmp, flags;
3966
3967 cmpmode = SELECT_CC_MODE (code, op0, op1);
3968 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
3969
3970 /* This is very simple, but making the interface the same as in the
3971 FP case makes the rest of the code easier. */
3972 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
3973 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
3974
3975 /* Return the test that should be put into the flags user, i.e.
3976 the bcc, scc, or cmov instruction. */
3977 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
3978}
3979
3980/* Generate insn patterns to do a floating point compare of OPERANDS.
3981 If UNORDERED, allow for unordered compares. */
3982
3983static rtx
3984ix86_expand_fp_compare (code, op0, op1, unordered)
3985 enum rtx_code code;
3986 rtx op0, op1;
3987 int unordered;
3988{
3989 enum machine_mode fpcmp_mode;
3990 enum machine_mode intcmp_mode;
3991 rtx tmp;
3992
3993 /* When not doing IEEE compliant compares, disable unordered. */
3994 if (! TARGET_IEEE_FP)
3995 unordered = 0;
3996 fpcmp_mode = unordered ? CCFPUmode : CCFPmode;
3997
3998 /* ??? If we knew whether invalid-operand exceptions were masked,
3999 we could rely on fcom to raise an exception and take care of
4000 NaNs. But we don't. We could know this from c9x math bits. */
4001 if (TARGET_IEEE_FP)
4002 unordered = 1;
4003
4004 /* All of the unordered compare instructions only work on registers.
4005 The same is true of the XFmode compare instructions. */
4006 if (unordered || GET_MODE (op0) == XFmode)
4007 {
4008 op0 = force_reg (GET_MODE (op0), op0);
4009 op1 = force_reg (GET_MODE (op1), op1);
4010 }
4011 else
4012 {
4013 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4014 things around if they appear profitable, otherwise force op0
4015 into a register. */
4016
4017 if (standard_80387_constant_p (op0) == 0
4018 || (GET_CODE (op0) == MEM
4019 && ! (standard_80387_constant_p (op1) == 0
4020 || GET_CODE (op1) == MEM)))
32b5b1aa 4021 {
e075ae69
RH
4022 rtx tmp;
4023 tmp = op0, op0 = op1, op1 = tmp;
4024 code = swap_condition (code);
4025 }
4026
4027 if (GET_CODE (op0) != REG)
4028 op0 = force_reg (GET_MODE (op0), op0);
4029
4030 if (CONSTANT_P (op1))
4031 {
4032 if (standard_80387_constant_p (op1))
4033 op1 = force_reg (GET_MODE (op1), op1);
4034 else
4035 op1 = validize_mem (force_const_mem (GET_MODE (op1), op1));
32b5b1aa
SC
4036 }
4037 }
e9a25f70 4038
e075ae69
RH
4039 /* %%% fcomi is probably always faster, even when dealing with memory,
4040 since compare-and-branch would be three insns instead of four. */
4041 if (TARGET_CMOVE && !unordered)
32b5b1aa 4042 {
e075ae69
RH
4043 if (GET_CODE (op0) != REG)
4044 op0 = force_reg (GET_MODE (op0), op0);
4045 if (GET_CODE (op1) != REG)
4046 op1 = force_reg (GET_MODE (op1), op1);
4047
4048 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4049 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4050 emit_insn (tmp);
4051
4052 /* The FP codes work out to act like unsigned. */
4053 code = unsigned_comparison (code);
4054 intcmp_mode = fpcmp_mode;
4055 }
4056 else
4057 {
4058 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e9a25f70 4059
e075ae69
RH
4060 rtx tmp2;
4061 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4062 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4063 tmp = gen_reg_rtx (HImode);
4064 emit_insn (gen_rtx_SET (VOIDmode, tmp, tmp2));
4065
4066 if (! unordered)
32b5b1aa 4067 {
e075ae69
RH
4068 /* We have two options here -- use sahf, or testing bits of ah
4069 directly. On PPRO, they are equivalent, sahf being one byte
4070 smaller. On Pentium, sahf is non-pairable while test is UV
4071 pairable. */
4072
4073 if (TARGET_USE_SAHF || optimize_size)
32b5b1aa 4074 {
e075ae69 4075 do_sahf:
e9a25f70 4076
e075ae69
RH
4077 /* The FP codes work out to act like unsigned. */
4078 code = unsigned_comparison (code);
4079 emit_insn (gen_x86_sahf_1 (tmp));
4080 intcmp_mode = CCmode;
32b5b1aa
SC
4081 }
4082 else
4083 {
e075ae69
RH
4084 /*
4085 * The numbers below correspond to the bits of the FPSW in AH.
d22ce03d 4086 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
e075ae69
RH
4087 *
4088 * cmp C3 C2 C0
4089 * > 0 0 0
4090 * < 0 0 1
4091 * = 1 0 0
4092 * un 1 1 1
4093 */
4094
4095 int mask;
4096
4097 switch (code)
32b5b1aa 4098 {
e075ae69 4099 case GT:
d22ce03d 4100 mask = 0x41;
e075ae69
RH
4101 code = EQ;
4102 break;
4103 case LT:
4104 mask = 0x01;
4105 code = NE;
4106 break;
4107 case GE:
4108 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4109 faster in all cases to just fall back on sahf. */
4110 goto do_sahf;
4111 case LE:
4112 mask = 0x41;
4113 code = NE;
4114 break;
4115 case EQ:
4116 mask = 0x40;
4117 code = NE;
4118 break;
4119 case NE:
4120 mask = 0x40;
4121 code = EQ;
4122 break;
4123 default:
4124 abort ();
32b5b1aa 4125 }
e075ae69
RH
4126
4127 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (mask)));
4128 intcmp_mode = CCNOmode;
32b5b1aa
SC
4129 }
4130 }
4131 else
4132 {
e075ae69
RH
4133 /* In the unordered case, we have to check C2 for NaN's, which
4134 doesn't happen to work out to anything nice combination-wise.
4135 So do some bit twiddling on the value we've got in AH to come
4136 up with an appropriate set of condition codes. */
4137
4138 intcmp_mode = CCNOmode;
4139 switch (code)
32b5b1aa 4140 {
e075ae69
RH
4141 case GT:
4142 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x45)));
4143 code = EQ;
4144 break;
4145 case LT:
4146 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4147 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x01)));
4148 intcmp_mode = CCmode;
4149 code = EQ;
4150 break;
4151 case GE:
4152 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x05)));
4153 code = EQ;
4154 break;
4155 case LE:
4156 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4157 emit_insn (gen_addqi_ext_1 (tmp, tmp, constm1_rtx));
4158 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4159 intcmp_mode = CCmode;
4160 code = LTU;
4161 break;
4162 case EQ:
4163 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4164 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4165 intcmp_mode = CCmode;
4166 code = EQ;
4167 break;
4168 case NE:
4169 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
7abd4e00 4170 emit_insn (gen_xorqi_cc_ext_1 (tmp, tmp, GEN_INT (0x40)));
e075ae69
RH
4171 code = NE;
4172 break;
4173 default:
4174 abort ();
32b5b1aa
SC
4175 }
4176 }
32b5b1aa 4177 }
e075ae69
RH
4178
4179 /* Return the test that should be put into the flags user, i.e.
4180 the bcc, scc, or cmov instruction. */
4181 return gen_rtx_fmt_ee (code, VOIDmode,
4182 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4183 const0_rtx);
4184}
4185
4186static rtx
4187ix86_expand_compare (code, unordered)
4188 enum rtx_code code;
4189 int unordered;
4190{
4191 rtx op0, op1, ret;
4192 op0 = ix86_compare_op0;
4193 op1 = ix86_compare_op1;
4194
4195 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4196 ret = ix86_expand_fp_compare (code, op0, op1, unordered);
32b5b1aa 4197 else
e075ae69
RH
4198 ret = ix86_expand_int_compare (code, op0, op1);
4199
4200 return ret;
4201}
4202
4203void
4204ix86_expand_branch (code, unordered, label)
4205 enum rtx_code code;
4206 int unordered;
4207 rtx label;
4208{
4209 rtx tmp, lo[2], hi[2], label2;
4210 enum rtx_code code1, code2, code3;
4211
4212 if (GET_MODE (ix86_compare_op0) != DImode)
32b5b1aa 4213 {
e075ae69
RH
4214 tmp = ix86_expand_compare (code, unordered);
4215 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4216 gen_rtx_LABEL_REF (VOIDmode, label),
4217 pc_rtx);
4218 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa
SC
4219 return;
4220 }
32b5b1aa 4221
e075ae69
RH
4222 /* Expand DImode branch into multiple compare+branch. */
4223
4224 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
32b5b1aa 4225 {
e075ae69
RH
4226 tmp = ix86_compare_op0;
4227 ix86_compare_op0 = ix86_compare_op1;
4228 ix86_compare_op1 = tmp;
4229 code = swap_condition (code);
4230 }
4231 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4232 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 4233
e075ae69
RH
4234 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid
4235 two branches. This costs one extra insn, so disable when optimizing
4236 for size. */
32b5b1aa 4237
e075ae69
RH
4238 if ((code == EQ || code == NE)
4239 && (!optimize_size
4240 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4241 {
4242 rtx xor0, xor1;
32b5b1aa 4243
e075ae69
RH
4244 xor1 = hi[0];
4245 if (hi[1] != const0_rtx)
4246 {
4247 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4248 NULL_RTX, 0, OPTAB_WIDEN);
4249 }
32b5b1aa 4250
e075ae69
RH
4251 xor0 = lo[0];
4252 if (lo[1] != const0_rtx)
4253 {
4254 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4255 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa
SC
4256 }
4257
e075ae69
RH
4258 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4259 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 4260
e075ae69
RH
4261 ix86_compare_op0 = tmp;
4262 ix86_compare_op1 = const0_rtx;
4263 ix86_expand_branch (code, unordered, label);
4264 return;
32b5b1aa
SC
4265 }
4266
e075ae69
RH
4267 /* Otherwise, if we are doing less-than, op1 is a constant and the
4268 low word is zero, then we can just examine the high word. */
4269
4270 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4271 && (code == LT || code == LTU))
32b5b1aa 4272 {
e075ae69
RH
4273 ix86_compare_op0 = hi[0];
4274 ix86_compare_op1 = hi[1];
4275 ix86_expand_branch (code, unordered, label);
4276 return;
4277 }
32b5b1aa 4278
e075ae69
RH
4279 /* Otherwise, we need two or three jumps. */
4280
4281 label2 = gen_label_rtx ();
32b5b1aa 4282
e075ae69
RH
4283 code1 = code;
4284 code2 = swap_condition (code);
4285 code3 = unsigned_condition (code);
4286
4287 switch (code)
4288 {
4289 case LT: case GT: case LTU: case GTU:
4290 break;
4291
4292 case LE: code1 = LT; code2 = GT; break;
4293 case GE: code1 = GT; code2 = LT; break;
4294 case LEU: code1 = LTU; code2 = GTU; break;
4295 case GEU: code1 = GTU; code2 = LTU; break;
4296
4297 case EQ: code1 = NIL; code2 = NE; break;
4298 case NE: code2 = NIL; break;
4299
4300 default:
4301 abort ();
32b5b1aa 4302 }
e075ae69
RH
4303
4304 /*
4305 * a < b =>
4306 * if (hi(a) < hi(b)) goto true;
4307 * if (hi(a) > hi(b)) goto false;
4308 * if (lo(a) < lo(b)) goto true;
4309 * false:
4310 */
4311
4312 ix86_compare_op0 = hi[0];
4313 ix86_compare_op1 = hi[1];
4314
4315 if (code1 != NIL)
4316 ix86_expand_branch (code1, unordered, label);
4317 if (code2 != NIL)
4318 ix86_expand_branch (code2, unordered, label2);
4319
4320 ix86_compare_op0 = lo[0];
4321 ix86_compare_op1 = lo[1];
4322 ix86_expand_branch (code3, unordered, label);
4323
4324 if (code2 != NIL)
4325 emit_label (label2);
32b5b1aa 4326}
e075ae69 4327
32b5b1aa 4328int
e075ae69
RH
4329ix86_expand_setcc (code, unordered, dest)
4330 enum rtx_code code;
4331 int unordered;
4332 rtx dest;
32b5b1aa 4333{
e075ae69
RH
4334 rtx ret, tmp;
4335 int type;
4336
4337 if (GET_MODE (ix86_compare_op0) == DImode)
4338 return 0; /* FAIL */
4339
4340 /* Three modes of generation:
4341 0 -- destination does not overlap compare sources:
4342 clear dest first, emit strict_low_part setcc.
4343 1 -- destination does overlap compare sources:
4344 emit subreg setcc, zero extend.
4345 2 -- destination is in QImode:
4346 emit setcc only.
4347 */
4348
4349 type = 0;
e075ae69
RH
4350
4351 if (GET_MODE (dest) == QImode)
4352 type = 2;
4353 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 4354 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
4355 type = 1;
4356
4357 if (type == 0)
4358 emit_move_insn (dest, const0_rtx);
4359
4360 ret = ix86_expand_compare (code, unordered);
4361 PUT_MODE (ret, QImode);
4362
4363 tmp = dest;
4364 if (type == 0)
32b5b1aa 4365 {
e075ae69
RH
4366 tmp = gen_lowpart (QImode, dest);
4367 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
4368 }
4369 else if (type == 1)
4370 {
4371 if (!cse_not_expected)
4372 tmp = gen_reg_rtx (QImode);
4373 else
4374 tmp = gen_lowpart (QImode, dest);
4375 }
32b5b1aa 4376
e075ae69
RH
4377 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
4378
4379 if (type == 1)
4380 {
4381 rtx clob;
4382
4383 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
4384 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
4385 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4386 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4387 emit_insn (tmp);
32b5b1aa 4388 }
e075ae69
RH
4389
4390 return 1; /* DONE */
32b5b1aa 4391}
e075ae69 4392
32b5b1aa 4393int
e075ae69
RH
4394ix86_expand_int_movcc (operands)
4395 rtx operands[];
32b5b1aa 4396{
e075ae69
RH
4397 enum rtx_code code = GET_CODE (operands[1]), compare_code;
4398 rtx compare_seq, compare_op;
32b5b1aa 4399
36583fea
JH
4400 /* When the compare code is not LTU or GEU, we can not use sbbl case.
4401 In case comparsion is done with immediate, we can convert it to LTU or
4402 GEU by altering the integer. */
4403
4404 if ((code == LEU || code == GTU)
4405 && GET_CODE (ix86_compare_op1) == CONST_INT
4406 && GET_MODE (operands[0]) != HImode
4407 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
4408 && GET_CODE (operands[2]) == CONST_INT
4409 && GET_CODE (operands[3]) == CONST_INT)
4410 {
4411 if (code == LEU)
4412 code = LTU;
4413 else
4414 code = GEU;
4415 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
4416 }
e075ae69
RH
4417 start_sequence ();
4418 compare_op = ix86_expand_compare (code, code == EQ || code == NE);
4419 compare_seq = gen_sequence ();
4420 end_sequence ();
4421
4422 compare_code = GET_CODE (compare_op);
4423
4424 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
4425 HImode insns, we'd be swallowed in word prefix ops. */
4426
4427 if (GET_MODE (operands[0]) != HImode
4428 && GET_CODE (operands[2]) == CONST_INT
4429 && GET_CODE (operands[3]) == CONST_INT)
4430 {
4431 rtx out = operands[0];
4432 HOST_WIDE_INT ct = INTVAL (operands[2]);
4433 HOST_WIDE_INT cf = INTVAL (operands[3]);
4434 HOST_WIDE_INT diff;
4435
36583fea 4436 if (compare_code == LTU || compare_code == GEU)
e075ae69 4437 {
e075ae69
RH
4438
4439 /* Detect overlap between destination and compare sources. */
4440 rtx tmp = out;
4441
36583fea
JH
4442 /* To simplify rest of code, restrict to the GEU case. */
4443 if (compare_code == LTU)
4444 {
4445 int tmp = ct;
4446 ct = cf;
4447 cf = tmp;
4448 compare_code = reverse_condition (compare_code);
4449 code = reverse_condition (code);
4450 }
4451 diff = ct - cf;
4452
e075ae69 4453 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 4454 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
4455 tmp = gen_reg_rtx (SImode);
4456
4457 emit_insn (compare_seq);
4458 emit_insn (gen_x86_movsicc_0_m1 (tmp));
4459
36583fea
JH
4460 if (diff == 1)
4461 {
4462 /*
4463 * cmpl op0,op1
4464 * sbbl dest,dest
4465 * [addl dest, ct]
4466 *
4467 * Size 5 - 8.
4468 */
4469 if (ct)
4470 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4471 }
4472 else if (cf == -1)
4473 {
4474 /*
4475 * cmpl op0,op1
4476 * sbbl dest,dest
4477 * orl $ct, dest
4478 *
4479 * Size 8.
4480 */
4481 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
4482 }
4483 else if (diff == -1 && ct)
4484 {
4485 /*
4486 * cmpl op0,op1
4487 * sbbl dest,dest
4488 * xorl $-1, dest
4489 * [addl dest, cf]
4490 *
4491 * Size 8 - 11.
4492 */
4493 emit_insn (gen_one_cmplsi2 (tmp, tmp));
4494 if (cf)
4495 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
4496 }
4497 else
4498 {
4499 /*
4500 * cmpl op0,op1
4501 * sbbl dest,dest
4502 * andl cf - ct, dest
4503 * [addl dest, ct]
4504 *
4505 * Size 8 - 11.
4506 */
4507 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
4508 if (ct)
4509 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4510 }
e075ae69
RH
4511
4512 if (tmp != out)
4513 emit_move_insn (out, tmp);
4514
4515 return 1; /* DONE */
4516 }
4517
4518 diff = ct - cf;
4519 if (diff < 0)
4520 {
4521 HOST_WIDE_INT tmp;
4522 tmp = ct, ct = cf, cf = tmp;
4523 diff = -diff;
4524 compare_code = reverse_condition (compare_code);
4525 code = reverse_condition (code);
4526 }
4527 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
4528 || diff == 3 || diff == 5 || diff == 9)
4529 {
4530 /*
4531 * xorl dest,dest
4532 * cmpl op1,op2
4533 * setcc dest
4534 * lea cf(dest*(ct-cf)),dest
4535 *
4536 * Size 14.
4537 *
4538 * This also catches the degenerate setcc-only case.
4539 */
4540
4541 rtx tmp;
4542 int nops;
4543
4544 out = emit_store_flag (out, code, ix86_compare_op0,
4545 ix86_compare_op1, VOIDmode, 0, 1);
4546
4547 nops = 0;
4548 if (diff == 1)
4549 tmp = out;
4550 else
4551 {
4552 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
4553 nops++;
4554 if (diff & 1)
4555 {
4556 tmp = gen_rtx_PLUS (SImode, tmp, out);
4557 nops++;
4558 }
4559 }
4560 if (cf != 0)
4561 {
4562 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
4563 nops++;
4564 }
4565 if (tmp != out)
4566 {
4567 if (nops == 0)
4568 emit_move_insn (out, tmp);
4569 else if (nops == 1)
4570 {
4571 rtx clob;
4572
4573 clob = gen_rtx_REG (CCmode, FLAGS_REG);
4574 clob = gen_rtx_CLOBBER (VOIDmode, clob);
4575
4576 tmp = gen_rtx_SET (VOIDmode, out, tmp);
4577 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4578 emit_insn (tmp);
4579 }
4580 else
4581 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
4582 }
4583 if (out != operands[0])
4584 emit_move_insn (operands[0], out);
4585
4586 return 1; /* DONE */
4587 }
4588
4589 /*
4590 * General case: Jumpful:
4591 * xorl dest,dest cmpl op1, op2
4592 * cmpl op1, op2 movl ct, dest
4593 * setcc dest jcc 1f
4594 * decl dest movl cf, dest
4595 * andl (cf-ct),dest 1:
4596 * addl ct,dest
4597 *
4598 * Size 20. Size 14.
4599 *
4600 * This is reasonably steep, but branch mispredict costs are
4601 * high on modern cpus, so consider failing only if optimizing
4602 * for space.
4603 *
4604 * %%% Parameterize branch_cost on the tuning architecture, then
4605 * use that. The 80386 couldn't care less about mispredicts.
4606 */
4607
4608 if (!optimize_size && !TARGET_CMOVE)
4609 {
4610 if (ct == 0)
4611 {
4612 ct = cf;
4613 cf = 0;
4614 compare_code = reverse_condition (compare_code);
4615 code = reverse_condition (code);
4616 }
4617
4618 out = emit_store_flag (out, code, ix86_compare_op0,
4619 ix86_compare_op1, VOIDmode, 0, 1);
4620
4621 emit_insn (gen_addsi3 (out, out, constm1_rtx));
4622 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
4623 if (ct != 0)
4624 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4625 if (out != operands[0])
4626 emit_move_insn (operands[0], out);
4627
4628 return 1; /* DONE */
4629 }
4630 }
4631
4632 if (!TARGET_CMOVE)
4633 {
4634 /* Try a few things more with specific constants and a variable. */
4635
4636 optab op = NULL;
4637 rtx var, orig_out, out, tmp;
4638
4639 if (optimize_size)
4640 return 0; /* FAIL */
4641
4642 /* If one of the two operands is an interesting constant, load a
4643 constant with the above and mask it in with a logical operation. */
4644
4645 if (GET_CODE (operands[2]) == CONST_INT)
4646 {
4647 var = operands[3];
4648 if (INTVAL (operands[2]) == 0)
4649 operands[3] = constm1_rtx, op = and_optab;
4650 else if (INTVAL (operands[2]) == -1)
4651 operands[3] = const0_rtx, op = ior_optab;
4652 }
4653 else if (GET_CODE (operands[3]) == CONST_INT)
4654 {
4655 var = operands[2];
4656 if (INTVAL (operands[3]) == 0)
4657 operands[2] = constm1_rtx, op = and_optab;
4658 else if (INTVAL (operands[3]) == -1)
4659 operands[2] = const0_rtx, op = ior_optab;
4660 }
4661
4662 if (op == NULL)
4663 return 0; /* FAIL */
4664
4665 orig_out = operands[0];
4666 tmp = gen_reg_rtx (GET_MODE (orig_out));
4667 operands[0] = tmp;
4668
4669 /* Recurse to get the constant loaded. */
4670 if (ix86_expand_int_movcc (operands) == 0)
4671 return 0; /* FAIL */
4672
4673 /* Mask in the interesting variable. */
4674 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
4675 OPTAB_WIDEN);
4676 if (out != orig_out)
4677 emit_move_insn (orig_out, out);
4678
4679 return 1; /* DONE */
4680 }
4681
4682 /*
4683 * For comparison with above,
4684 *
4685 * movl cf,dest
4686 * movl ct,tmp
4687 * cmpl op1,op2
4688 * cmovcc tmp,dest
4689 *
4690 * Size 15.
4691 */
4692
4693 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
4694 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
4695 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
4696 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
4697
4698 emit_insn (compare_seq);
4699 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4700 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
4701 compare_op, operands[2],
4702 operands[3])));
4703
4704 return 1; /* DONE */
e9a25f70 4705}
e075ae69 4706
32b5b1aa 4707int
e075ae69
RH
4708ix86_expand_fp_movcc (operands)
4709 rtx operands[];
32b5b1aa 4710{
e075ae69
RH
4711 enum rtx_code code;
4712 enum machine_mode mode;
4713 rtx tmp;
32b5b1aa 4714
e075ae69
RH
4715 /* The floating point conditional move instructions don't directly
4716 support conditions resulting from a signed integer comparison. */
32b5b1aa 4717
e075ae69
RH
4718 code = GET_CODE (operands[1]);
4719 switch (code)
4720 {
4721 case LT:
4722 case LE:
4723 case GE:
4724 case GT:
4725 tmp = gen_reg_rtx (QImode);
4726 ix86_expand_setcc (code, 0, tmp);
4727 code = NE;
4728 ix86_compare_op0 = tmp;
4729 ix86_compare_op1 = const0_rtx;
4730 break;
4731
4732 default:
4733 break;
4734 }
e9a25f70 4735
e075ae69
RH
4736 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
4737 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
4738 gen_rtx_COMPARE (mode,
4739 ix86_compare_op0,
4740 ix86_compare_op1)));
4741 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4742 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
4743 gen_rtx_fmt_ee (code, VOIDmode,
4744 gen_rtx_REG (mode, FLAGS_REG),
4745 const0_rtx),
4746 operands[2],
4747 operands[3])));
32b5b1aa 4748
e075ae69 4749 return 1;
32b5b1aa
SC
4750}
4751
2450a057
JH
4752/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
4753 works for floating pointer parameters and nonoffsetable memories.
4754 For pushes, it returns just stack offsets; the values will be saved
4755 in the right order. Maximally three parts are generated. */
4756
4757static void
4758ix86_split_to_parts (operand, parts, mode)
4759 rtx operand;
4760 rtx *parts;
4761 enum machine_mode mode;
32b5b1aa 4762{
2450a057
JH
4763 int size = GET_MODE_SIZE (mode) / 4;
4764
4765 if (size < 2 || size > 3)
4766 abort ();
4767
d7a29404
JH
4768 /* Optimize constant pool reference to immediates. This is used by fp moves,
4769 that force all constants to memory to allow combining. */
4770
4771 if (GET_CODE (operand) == MEM
4772 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
4773 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
4774 operand = get_pool_constant (XEXP (operand, 0));
4775
2450a057 4776 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 4777 {
2450a057
JH
4778 /* The only non-offsetable memories we handle are pushes. */
4779 if (! push_operand (operand, VOIDmode))
4780 abort ();
4781
4782 PUT_MODE (operand, SImode);
4783 parts[0] = parts[1] = parts[2] = operand;
4784 }
4785 else
4786 {
4787 if (mode == DImode)
4788 split_di (&operand, 1, &parts[0], &parts[1]);
4789 else
e075ae69 4790 {
2450a057
JH
4791 if (REG_P (operand))
4792 {
4793 if (!reload_completed)
4794 abort ();
4795 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
4796 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
4797 if (size == 3)
4798 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
4799 }
4800 else if (offsettable_memref_p (operand))
4801 {
4802 PUT_MODE (operand, SImode);
4803 parts[0] = operand;
4804 parts[1] = adj_offsettable_operand (operand, 4);
4805 if (size == 3)
4806 parts[2] = adj_offsettable_operand (operand, 8);
4807 }
4808 else if (GET_CODE (operand) == CONST_DOUBLE)
4809 {
4810 REAL_VALUE_TYPE r;
4811 long l[3];
4812
4813 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
4814 switch (mode)
4815 {
4816 case XFmode:
4817 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
4818 parts[2] = GEN_INT (l[2]);
4819 break;
4820 case DFmode:
4821 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
4822 break;
4823 default:
4824 abort ();
4825 }
4826 parts[1] = GEN_INT (l[1]);
4827 parts[0] = GEN_INT (l[0]);
4828 }
4829 else
4830 abort ();
e075ae69 4831 }
2450a057
JH
4832 }
4833
4834 return;
4835}
4836
4837/* Emit insns to perform a move or push of DI, DF, and XF values.
4838 Return false when normal moves are needed; true when all required
4839 insns have been emitted. Operands 2-4 contain the input values
4840 int the correct order; operands 5-7 contain the output values. */
4841
4842int
4843ix86_split_long_move (operands1)
4844 rtx operands1[];
4845{
4846 rtx part[2][3];
4847 rtx operands[2];
4848 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
4849 int push = 0;
4850 int collisions = 0;
4851
4852 /* Make our own copy to avoid clobbering the operands. */
4853 operands[0] = copy_rtx (operands1[0]);
4854 operands[1] = copy_rtx (operands1[1]);
4855
4856 if (size < 2 || size > 3)
4857 abort ();
4858
4859 /* The only non-offsettable memory we handle is push. */
4860 if (push_operand (operands[0], VOIDmode))
4861 push = 1;
4862 else if (GET_CODE (operands[0]) == MEM
4863 && ! offsettable_memref_p (operands[0]))
4864 abort ();
4865
4866 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
4867 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
4868
4869 /* When emitting push, take care for source operands on the stack. */
4870 if (push && GET_CODE (operands[1]) == MEM
4871 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
4872 {
4873 if (size == 3)
4874 part[1][1] = part[1][2];
4875 part[1][0] = part[1][1];
4876 }
4877
4878 /* We need to do copy in the right order in case an address register
4879 of the source overlaps the destination. */
4880 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
4881 {
4882 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
4883 collisions++;
4884 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
4885 collisions++;
4886 if (size == 3
4887 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
4888 collisions++;
4889
4890 /* Collision in the middle part can be handled by reordering. */
4891 if (collisions == 1 && size == 3
4892 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 4893 {
2450a057
JH
4894 rtx tmp;
4895 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
4896 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
4897 }
e075ae69 4898
2450a057
JH
4899 /* If there are more collisions, we can't handle it by reordering.
4900 Do an lea to the last part and use only one colliding move. */
4901 else if (collisions > 1)
4902 {
4903 collisions = 1;
4904 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
4905 XEXP (part[1][0], 0)));
4906 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
4907 part[1][1] = adj_offsettable_operand (part[1][0], 4);
4908 if (size == 3)
4909 part[1][2] = adj_offsettable_operand (part[1][0], 8);
4910 }
4911 }
4912
4913 if (push)
4914 {
4915 if (size == 3)
4916 emit_insn (gen_push (part[1][2]));
4917 emit_insn (gen_push (part[1][1]));
4918 emit_insn (gen_push (part[1][0]));
4919 return 1;
4920 }
4921
4922 /* Choose correct order to not overwrite the source before it is copied. */
4923 if ((REG_P (part[0][0])
4924 && REG_P (part[1][1])
4925 && (REGNO (part[0][0]) == REGNO (part[1][1])
4926 || (size == 3
4927 && REGNO (part[0][0]) == REGNO (part[1][2]))))
4928 || (collisions > 0
4929 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
4930 {
4931 if (size == 3)
4932 {
4933 operands1[2] = part[0][2];
4934 operands1[3] = part[0][1];
4935 operands1[4] = part[0][0];
4936 operands1[5] = part[1][2];
4937 operands1[6] = part[1][1];
4938 operands1[7] = part[1][0];
4939 }
4940 else
4941 {
4942 operands1[2] = part[0][1];
4943 operands1[3] = part[0][0];
4944 operands1[5] = part[1][1];
4945 operands1[6] = part[1][0];
4946 }
4947 }
4948 else
4949 {
4950 if (size == 3)
4951 {
4952 operands1[2] = part[0][0];
4953 operands1[3] = part[0][1];
4954 operands1[4] = part[0][2];
4955 operands1[5] = part[1][0];
4956 operands1[6] = part[1][1];
4957 operands1[7] = part[1][2];
4958 }
4959 else
4960 {
4961 operands1[2] = part[0][0];
4962 operands1[3] = part[0][1];
4963 operands1[5] = part[1][0];
4964 operands1[6] = part[1][1];
e075ae69
RH
4965 }
4966 }
32b5b1aa 4967
e9a25f70 4968 return 0;
32b5b1aa 4969}
32b5b1aa 4970
e075ae69
RH
4971void
4972ix86_split_ashldi (operands, scratch)
4973 rtx *operands, scratch;
32b5b1aa 4974{
e075ae69
RH
4975 rtx low[2], high[2];
4976 int count;
b985a30f 4977
e075ae69
RH
4978 if (GET_CODE (operands[2]) == CONST_INT)
4979 {
4980 split_di (operands, 2, low, high);
4981 count = INTVAL (operands[2]) & 63;
32b5b1aa 4982
e075ae69
RH
4983 if (count >= 32)
4984 {
4985 emit_move_insn (high[0], low[1]);
4986 emit_move_insn (low[0], const0_rtx);
b985a30f 4987
e075ae69
RH
4988 if (count > 32)
4989 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
4990 }
4991 else
4992 {
4993 if (!rtx_equal_p (operands[0], operands[1]))
4994 emit_move_insn (operands[0], operands[1]);
4995 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
4996 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
4997 }
4998 }
4999 else
5000 {
5001 if (!rtx_equal_p (operands[0], operands[1]))
5002 emit_move_insn (operands[0], operands[1]);
b985a30f 5003
e075ae69 5004 split_di (operands, 1, low, high);
b985a30f 5005
e075ae69
RH
5006 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5007 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 5008
e075ae69
RH
5009 if (TARGET_CMOVE && (! reload_completed || scratch))
5010 {
5011 if (! reload_completed)
5012 scratch = force_reg (SImode, const0_rtx);
5013 else
5014 emit_move_insn (scratch, const0_rtx);
5015
5016 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5017 scratch));
5018 }
5019 else
5020 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5021 }
e9a25f70 5022}
32b5b1aa 5023
e075ae69
RH
5024void
5025ix86_split_ashrdi (operands, scratch)
5026 rtx *operands, scratch;
32b5b1aa 5027{
e075ae69
RH
5028 rtx low[2], high[2];
5029 int count;
32b5b1aa 5030
e075ae69
RH
5031 if (GET_CODE (operands[2]) == CONST_INT)
5032 {
5033 split_di (operands, 2, low, high);
5034 count = INTVAL (operands[2]) & 63;
32b5b1aa 5035
e075ae69
RH
5036 if (count >= 32)
5037 {
5038 emit_move_insn (low[0], high[1]);
32b5b1aa 5039
e075ae69
RH
5040 if (! reload_completed)
5041 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5042 else
5043 {
5044 emit_move_insn (high[0], low[0]);
5045 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5046 }
5047
5048 if (count > 32)
5049 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5050 }
5051 else
5052 {
5053 if (!rtx_equal_p (operands[0], operands[1]))
5054 emit_move_insn (operands[0], operands[1]);
5055 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5056 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5057 }
5058 }
5059 else
32b5b1aa 5060 {
e075ae69
RH
5061 if (!rtx_equal_p (operands[0], operands[1]))
5062 emit_move_insn (operands[0], operands[1]);
5063
5064 split_di (operands, 1, low, high);
5065
5066 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5067 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5068
5069 if (TARGET_CMOVE && (!reload_completed || scratch))
5070 {
5071 if (! reload_completed)
5072 scratch = gen_reg_rtx (SImode);
5073 emit_move_insn (scratch, high[0]);
5074 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5075 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5076 scratch));
5077 }
5078 else
5079 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 5080 }
e075ae69 5081}
32b5b1aa 5082
e075ae69
RH
5083void
5084ix86_split_lshrdi (operands, scratch)
5085 rtx *operands, scratch;
5086{
5087 rtx low[2], high[2];
5088 int count;
32b5b1aa 5089
e075ae69 5090 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 5091 {
e075ae69
RH
5092 split_di (operands, 2, low, high);
5093 count = INTVAL (operands[2]) & 63;
5094
5095 if (count >= 32)
c7271385 5096 {
e075ae69
RH
5097 emit_move_insn (low[0], high[1]);
5098 emit_move_insn (high[0], const0_rtx);
32b5b1aa 5099
e075ae69
RH
5100 if (count > 32)
5101 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5102 }
5103 else
5104 {
5105 if (!rtx_equal_p (operands[0], operands[1]))
5106 emit_move_insn (operands[0], operands[1]);
5107 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5108 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5109 }
32b5b1aa 5110 }
e075ae69
RH
5111 else
5112 {
5113 if (!rtx_equal_p (operands[0], operands[1]))
5114 emit_move_insn (operands[0], operands[1]);
32b5b1aa 5115
e075ae69
RH
5116 split_di (operands, 1, low, high);
5117
5118 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5119 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5120
5121 /* Heh. By reversing the arguments, we can reuse this pattern. */
5122 if (TARGET_CMOVE && (! reload_completed || scratch))
5123 {
5124 if (! reload_completed)
5125 scratch = force_reg (SImode, const0_rtx);
5126 else
5127 emit_move_insn (scratch, const0_rtx);
5128
5129 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5130 scratch));
5131 }
5132 else
5133 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5134 }
32b5b1aa 5135}
3f803cd9 5136
e075ae69
RH
5137/* Expand the appropriate insns for doing strlen if not just doing
5138 repnz; scasb
5139
5140 out = result, initialized with the start address
5141 align_rtx = alignment of the address.
5142 scratch = scratch register, initialized with the startaddress when
5143 not aligned, otherwise undefined
3f803cd9
SC
5144
5145 This is just the body. It needs the initialisations mentioned above and
5146 some address computing at the end. These things are done in i386.md. */
5147
e075ae69
RH
5148void
5149ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5150 rtx out, align_rtx, scratch;
3f803cd9 5151{
e075ae69
RH
5152 int align;
5153 rtx tmp;
5154 rtx align_2_label = NULL_RTX;
5155 rtx align_3_label = NULL_RTX;
5156 rtx align_4_label = gen_label_rtx ();
5157 rtx end_0_label = gen_label_rtx ();
e075ae69
RH
5158 rtx mem;
5159 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
e2e52e1b 5160 rtx tmpreg = gen_reg_rtx (SImode);
e075ae69
RH
5161
5162 align = 0;
5163 if (GET_CODE (align_rtx) == CONST_INT)
5164 align = INTVAL (align_rtx);
3f803cd9 5165
e9a25f70 5166 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 5167
e9a25f70 5168 /* Is there a known alignment and is it less than 4? */
e075ae69 5169 if (align < 4)
3f803cd9 5170 {
e9a25f70 5171 /* Is there a known alignment and is it not 2? */
e075ae69 5172 if (align != 2)
3f803cd9 5173 {
e075ae69
RH
5174 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5175 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5176
5177 /* Leave just the 3 lower bits. */
5178 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5179 NULL_RTX, 0, OPTAB_WIDEN);
5180
5181 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
5182
5183 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5184 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5185 gen_rtx_LABEL_REF (VOIDmode,
5186 align_4_label),
5187 pc_rtx);
5188 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5189
5190 emit_insn (gen_cmpsi_1 (align_rtx, GEN_INT (2)));
5191
5192 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5193 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5194 gen_rtx_LABEL_REF (VOIDmode,
5195 align_2_label),
5196 pc_rtx);
5197 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5198
5199 tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx);
5200 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5201 gen_rtx_LABEL_REF (VOIDmode,
5202 align_3_label),
5203 pc_rtx);
5204 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5205 }
5206 else
5207 {
e9a25f70
JL
5208 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5209 check if is aligned to 4 - byte. */
e9a25f70 5210
e075ae69
RH
5211 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5212 NULL_RTX, 0, OPTAB_WIDEN);
5213
5214 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
5215
5216 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5217 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5218 gen_rtx_LABEL_REF (VOIDmode,
5219 align_4_label),
5220 pc_rtx);
5221 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5222 }
5223
e075ae69 5224 mem = gen_rtx_MEM (QImode, out);
e9a25f70 5225
e075ae69 5226 /* Now compare the bytes. */
e9a25f70 5227
e075ae69
RH
5228 /* Compare the first n unaligned byte on a byte per byte basis. */
5229 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
e9a25f70 5230
e075ae69
RH
5231 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5232 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5233 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5234 pc_rtx);
5235 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9 5236
e075ae69
RH
5237 /* Increment the address. */
5238 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 5239
e075ae69
RH
5240 /* Not needed with an alignment of 2 */
5241 if (align != 2)
5242 {
5243 emit_label (align_2_label);
3f803cd9 5244
e075ae69 5245 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
3f803cd9 5246
e075ae69
RH
5247 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5248 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5249 gen_rtx_LABEL_REF (VOIDmode,
5250 end_0_label),
5251 pc_rtx);
5252 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5253
5254 emit_insn (gen_addsi3 (out, out, const1_rtx));
5255
5256 emit_label (align_3_label);
5257 }
5258
5259 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
e9a25f70 5260
e075ae69
RH
5261 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5262 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5263 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5264 pc_rtx);
5265 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5266
5267 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
5268 }
5269
e075ae69
RH
5270 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5271 align this loop. It gives only huge programs, but does not help to
5272 speed up. */
5273 emit_label (align_4_label);
3f803cd9 5274
e075ae69
RH
5275 mem = gen_rtx_MEM (SImode, out);
5276 emit_move_insn (scratch, mem);
e075ae69 5277 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 5278
e2e52e1b
JH
5279 /* This formula yields a nonzero result iff one of the bytes is zero.
5280 This saves three branches inside loop and many cycles. */
5281
5282 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
5283 emit_insn (gen_one_cmplsi2 (scratch, scratch));
5284 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
5285 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
5286 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
5287
5288 if (TARGET_CMOVE)
5289 {
5290 rtx reg = gen_reg_rtx (SImode);
5291 emit_move_insn (reg, tmpreg);
5292 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
5293
5294 /* If zero is not in the first two bytes, move two bytes forward. */
5295 emit_insn (gen_testsi_1 (tmpreg, GEN_INT (0x8080)));
5296 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5297 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5298 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
5299 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5300 reg,
5301 tmpreg)));
5302 /* Emit lea manually to avoid clobbering of flags. */
5303 emit_insn (gen_rtx_SET (SImode, reg,
5304 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
5305
5306 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5307 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5308 emit_insn (gen_rtx_SET (VOIDmode, out,
5309 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5310 reg,
5311 out)));
5312
5313 }
5314 else
5315 {
5316 rtx end_2_label = gen_label_rtx ();
5317 /* Is zero in the first two bytes? */
5318
5319 emit_insn (gen_testsi_1 (tmpreg, GEN_INT (0x8080)));
5320 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5321 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
5322 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5323 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
5324 pc_rtx);
5325 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5326 JUMP_LABEL (tmp) = end_2_label;
5327
5328 /* Not in the first two. Move two bytes forward. */
5329 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
5330 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
5331
5332 emit_label (end_2_label);
5333
5334 }
5335
5336 /* Avoid branch in fixing the byte. */
5337 tmpreg = gen_lowpart (QImode, tmpreg);
5338 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
5339 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
5340
5341 emit_label (end_0_label);
5342}
5343\f
e075ae69
RH
5344/* Clear stack slot assignments remembered from previous functions.
5345 This is called from INIT_EXPANDERS once before RTL is emitted for each
5346 function. */
5347
36edd3cc
BS
5348static void
5349ix86_init_machine_status (p)
1526a060 5350 struct function *p;
e075ae69
RH
5351{
5352 enum machine_mode mode;
5353 int n;
36edd3cc
BS
5354 p->machine
5355 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
e075ae69
RH
5356
5357 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5358 mode = (enum machine_mode) ((int) mode + 1))
5359 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5360 ix86_stack_locals[(int) mode][n] = NULL_RTX;
e075ae69
RH
5361}
5362
1526a060
BS
5363/* Mark machine specific bits of P for GC. */
5364static void
5365ix86_mark_machine_status (p)
5366 struct function *p;
5367{
5368 enum machine_mode mode;
5369 int n;
5370
5371 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5372 mode = (enum machine_mode) ((int) mode + 1))
5373 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5374 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
5375}
5376
e075ae69
RH
5377/* Return a MEM corresponding to a stack slot with mode MODE.
5378 Allocate a new slot if necessary.
5379
5380 The RTL for a function can have several slots available: N is
5381 which slot to use. */
5382
5383rtx
5384assign_386_stack_local (mode, n)
5385 enum machine_mode mode;
5386 int n;
5387{
5388 if (n < 0 || n >= MAX_386_STACK_LOCALS)
5389 abort ();
5390
5391 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
5392 ix86_stack_locals[(int) mode][n]
5393 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
5394
5395 return ix86_stack_locals[(int) mode][n];
5396}
5397\f
5398/* Calculate the length of the memory address in the instruction
5399 encoding. Does not include the one-byte modrm, opcode, or prefix. */
5400
5401static int
5402memory_address_length (addr)
5403 rtx addr;
5404{
5405 struct ix86_address parts;
5406 rtx base, index, disp;
5407 int len;
5408
5409 if (GET_CODE (addr) == PRE_DEC
5410 || GET_CODE (addr) == POST_INC)
5411 return 0;
3f803cd9 5412
e075ae69
RH
5413 if (! ix86_decompose_address (addr, &parts))
5414 abort ();
3f803cd9 5415
e075ae69
RH
5416 base = parts.base;
5417 index = parts.index;
5418 disp = parts.disp;
5419 len = 0;
3f803cd9 5420
e075ae69
RH
5421 /* Register Indirect. */
5422 if (base && !index && !disp)
5423 {
5424 /* Special cases: ebp and esp need the two-byte modrm form. */
5425 if (addr == stack_pointer_rtx
5426 || addr == arg_pointer_rtx
5427 || addr == frame_pointer_rtx)
5428 len = 1;
3f803cd9 5429 }
e9a25f70 5430
e075ae69
RH
5431 /* Direct Addressing. */
5432 else if (disp && !base && !index)
5433 len = 4;
5434
3f803cd9
SC
5435 else
5436 {
e075ae69
RH
5437 /* Find the length of the displacement constant. */
5438 if (disp)
5439 {
5440 if (GET_CODE (disp) == CONST_INT
5441 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
5442 len = 1;
5443 else
5444 len = 4;
5445 }
3f803cd9 5446
e075ae69
RH
5447 /* An index requires the two-byte modrm form. */
5448 if (index)
5449 len += 1;
3f803cd9
SC
5450 }
5451
e075ae69
RH
5452 return len;
5453}
79325812 5454
e075ae69
RH
5455int
5456ix86_attr_length_default (insn)
5457 rtx insn;
5458{
5459 enum attr_type type;
5460 int len = 0, i;
5461
5462 type = get_attr_type (insn);
5463 extract_insn (insn);
5464 switch (type)
5465 {
5466 case TYPE_INCDEC:
5467 case TYPE_SETCC:
5468 case TYPE_ICMOV:
5469 case TYPE_FMOV:
5470 case TYPE_FOP:
5471 case TYPE_FCMP:
5472 case TYPE_FOP1:
5473 case TYPE_FMUL:
5474 case TYPE_FDIV:
5475 case TYPE_FSGN:
5476 case TYPE_FPSPC:
5477 case TYPE_FCMOV:
5478 case TYPE_IBR:
5479 break;
7c7ef435
JH
5480 case TYPE_STR:
5481 case TYPE_CLD:
5482 len = 0;
3f803cd9 5483
e075ae69
RH
5484 case TYPE_ALU1:
5485 case TYPE_NEGNOT:
5486 case TYPE_ALU:
5487 case TYPE_ICMP:
5488 case TYPE_IMOVX:
5489 case TYPE_ISHIFT:
5490 case TYPE_IMUL:
5491 case TYPE_IDIV:
5492 case TYPE_PUSH:
5493 case TYPE_POP:
1ccbefce
RH
5494 for (i = recog_data.n_operands - 1; i >= 0; --i)
5495 if (CONSTANT_P (recog_data.operand[i]))
e075ae69 5496 {
1ccbefce
RH
5497 if (GET_CODE (recog_data.operand[i]) == CONST_INT
5498 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
e075ae69
RH
5499 len += 1;
5500 else
1ccbefce 5501 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
e075ae69
RH
5502 }
5503 break;
5504
5505 case TYPE_IMOV:
1ccbefce
RH
5506 if (CONSTANT_P (recog_data.operand[1]))
5507 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
e075ae69
RH
5508 break;
5509
5510 case TYPE_CALL:
6baf1cc8
BS
5511 if (constant_call_address_operand (recog_data.operand[0],
5512 GET_MODE (recog_data.operand[0])))
e075ae69
RH
5513 return 5;
5514 break;
3f803cd9 5515
e075ae69 5516 case TYPE_CALLV:
6baf1cc8
BS
5517 if (constant_call_address_operand (recog_data.operand[1],
5518 GET_MODE (recog_data.operand[1])))
e075ae69
RH
5519 return 5;
5520 break;
3f803cd9 5521
e075ae69 5522 case TYPE_LEA:
3071fab5
RH
5523 {
5524 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
5525 as we'll get from running life_analysis during reg-stack when
5526 not optimizing. */
5527 rtx set = PATTERN (insn);
5528 if (GET_CODE (set) == SET)
5529 ;
5530 else if (GET_CODE (set) == PARALLEL
5531 && XVECLEN (set, 0) == 2
5532 && GET_CODE (XVECEXP (set, 0, 0)) == SET
5533 && GET_CODE (XVECEXP (set, 0, 1)) == CLOBBER)
5534 set = XVECEXP (set, 0, 0);
5535 else
5536 abort ();
5537
5538 len += memory_address_length (SET_SRC (set));
5539 goto just_opcode;
5540 }
3f803cd9 5541
e075ae69
RH
5542 case TYPE_OTHER:
5543 case TYPE_MULTI:
5544 return 15;
3f803cd9 5545
5d3c4797 5546 case TYPE_FXCH:
1ccbefce
RH
5547 if (STACK_TOP_P (recog_data.operand[0]))
5548 return 2 + (REGNO (recog_data.operand[1]) != FIRST_STACK_REG + 1);
5d3c4797 5549 else
1ccbefce 5550 return 2 + (REGNO (recog_data.operand[0]) != FIRST_STACK_REG + 1);
5d3c4797 5551
e075ae69
RH
5552 default:
5553 abort ();
5554 }
5555
1ccbefce
RH
5556 for (i = recog_data.n_operands - 1; i >= 0; --i)
5557 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 5558 {
1ccbefce 5559 len += memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
5560 break;
5561 }
5562
5563just_opcode:
5564 len += get_attr_length_opcode (insn);
5565 len += get_attr_length_prefix (insn);
5566
5567 return len;
3f803cd9 5568}
e075ae69
RH
5569\f
5570/* Return the maximum number of instructions a cpu can issue. */
b657fc39 5571
e075ae69
RH
5572int
5573ix86_issue_rate ()
b657fc39 5574{
e075ae69 5575 switch (ix86_cpu)
b657fc39 5576 {
e075ae69
RH
5577 case PROCESSOR_PENTIUM:
5578 case PROCESSOR_K6:
5579 return 2;
79325812 5580
e075ae69
RH
5581 case PROCESSOR_PENTIUMPRO:
5582 return 3;
b657fc39 5583
b657fc39 5584 default:
e075ae69 5585 return 1;
b657fc39 5586 }
b657fc39
L
5587}
5588
e075ae69
RH
5589/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
5590 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 5591
e075ae69
RH
5592static int
5593ix86_flags_dependant (insn, dep_insn, insn_type)
5594 rtx insn, dep_insn;
5595 enum attr_type insn_type;
5596{
5597 rtx set, set2;
b657fc39 5598
e075ae69
RH
5599 /* Simplify the test for uninteresting insns. */
5600 if (insn_type != TYPE_SETCC
5601 && insn_type != TYPE_ICMOV
5602 && insn_type != TYPE_FCMOV
5603 && insn_type != TYPE_IBR)
5604 return 0;
b657fc39 5605
e075ae69
RH
5606 if ((set = single_set (dep_insn)) != 0)
5607 {
5608 set = SET_DEST (set);
5609 set2 = NULL_RTX;
5610 }
5611 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
5612 && XVECLEN (PATTERN (dep_insn), 0) == 2
5613 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
5614 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
5615 {
5616 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5617 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5618 }
b657fc39 5619
e075ae69 5620 if (set && GET_CODE (set) == REG && REGNO (set) == FLAGS_REG)
b657fc39 5621 {
e075ae69
RH
5622 /* This test is true if the dependant insn reads the flags but
5623 not any other potentially set register. */
5624 if (reg_overlap_mentioned_p (set, PATTERN (insn))
5625 && (!set2 || !reg_overlap_mentioned_p (set2, PATTERN (insn))))
5626 return 1;
5627 }
b657fc39 5628
e075ae69
RH
5629 return 0;
5630}
b657fc39 5631
e075ae69
RH
5632/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
5633 address with operands set by DEP_INSN. */
5634
5635static int
5636ix86_agi_dependant (insn, dep_insn, insn_type)
5637 rtx insn, dep_insn;
5638 enum attr_type insn_type;
5639{
5640 rtx addr;
5641
5642 if (insn_type == TYPE_LEA)
5643 addr = SET_SRC (single_set (insn));
5644 else
5645 {
5646 int i;
5647 extract_insn (insn);
1ccbefce
RH
5648 for (i = recog_data.n_operands - 1; i >= 0; --i)
5649 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 5650 {
1ccbefce 5651 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
5652 goto found;
5653 }
5654 return 0;
5655 found:;
b657fc39
L
5656 }
5657
e075ae69 5658 return modified_in_p (addr, dep_insn);
b657fc39 5659}
a269a03c
JC
5660
5661int
e075ae69 5662ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
5663 rtx insn, link, dep_insn;
5664 int cost;
5665{
e075ae69
RH
5666 enum attr_type insn_type, dep_insn_type;
5667 rtx set, set2;
9b00189f 5668 int dep_insn_code_number;
a269a03c 5669
309ada50 5670 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 5671 if (REG_NOTE_KIND (link) != 0)
309ada50 5672 return 0;
a269a03c 5673
9b00189f
JH
5674 dep_insn_code_number = recog_memoized (dep_insn);
5675
e075ae69 5676 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 5677 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 5678 return cost;
a269a03c 5679
9b00189f
JH
5680 /* Prologue and epilogue allocators have false dependency on ebp.
5681 This results in one cycle extra stall on Pentium prologue scheduling, so
5682 handle this important case manually. */
5683
5684 if ((dep_insn_code_number == CODE_FOR_prologue_allocate_stack
5685 || dep_insn_code_number == CODE_FOR_epilogue_deallocate_stack)
5686 && !reg_mentioned_p (stack_pointer_rtx, insn))
5687 return 0;
5688
e075ae69
RH
5689 insn_type = get_attr_type (insn);
5690 dep_insn_type = get_attr_type (dep_insn);
a269a03c
JC
5691
5692 switch (ix86_cpu)
5693 {
5694 case PROCESSOR_PENTIUM:
e075ae69
RH
5695 /* Address Generation Interlock adds a cycle of latency. */
5696 if (ix86_agi_dependant (insn, dep_insn, insn_type))
5697 cost += 1;
5698
5699 /* ??? Compares pair with jump/setcc. */
5700 if (ix86_flags_dependant (insn, dep_insn, insn_type))
5701 cost = 0;
5702
5703 /* Floating point stores require value to be ready one cycle ealier. */
5704 if (insn_type == TYPE_FMOV
5705 && get_attr_memory (insn) == MEMORY_STORE
5706 && !ix86_agi_dependant (insn, dep_insn, insn_type))
5707 cost += 1;
5708 break;
a269a03c 5709
e075ae69
RH
5710 case PROCESSOR_PENTIUMPRO:
5711 /* Since we can't represent delayed latencies of load+operation,
5712 increase the cost here for non-imov insns. */
5713 if (dep_insn_type != TYPE_IMOV
5714 && dep_insn_type != TYPE_FMOV
5715 && get_attr_memory (dep_insn) == MEMORY_LOAD)
5716 cost += 1;
5717
5718 /* INT->FP conversion is expensive. */
5719 if (get_attr_fp_int_src (dep_insn))
5720 cost += 5;
5721
5722 /* There is one cycle extra latency between an FP op and a store. */
5723 if (insn_type == TYPE_FMOV
5724 && (set = single_set (dep_insn)) != NULL_RTX
5725 && (set2 = single_set (insn)) != NULL_RTX
5726 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
5727 && GET_CODE (SET_DEST (set2)) == MEM)
5728 cost += 1;
5729 break;
a269a03c 5730
e075ae69
RH
5731 case PROCESSOR_K6:
5732 /* The esp dependency is resolved before the instruction is really
5733 finished. */
5734 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
5735 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
5736 return 1;
a269a03c 5737
e075ae69
RH
5738 /* Since we can't represent delayed latencies of load+operation,
5739 increase the cost here for non-imov insns. */
5740 if (get_attr_memory (dep_insn) == MEMORY_LOAD)
5741 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
5742
5743 /* INT->FP conversion is expensive. */
5744 if (get_attr_fp_int_src (dep_insn))
5745 cost += 5;
a14003ee 5746 break;
e075ae69 5747
309ada50
JH
5748 case PROCESSOR_ATHLON:
5749 /* Address Generation Interlock cause problems on the Athlon CPU because
5750 the loads and stores are done in order so once one load or store has
5751 to wait, others must too, so penalize the AGIs slightly by one cycle.
5752 We might experiment with this value later. */
5753 if (ix86_agi_dependant (insn, dep_insn, insn_type))
5754 cost += 1;
5755
5756 /* Since we can't represent delayed latencies of load+operation,
5757 increase the cost here for non-imov insns. */
5758 if (dep_insn_type != TYPE_IMOV
5759 && dep_insn_type != TYPE_FMOV
5760 && get_attr_memory (dep_insn) == MEMORY_LOAD)
5761 cost += 2;
a269a03c 5762 default:
a269a03c
JC
5763 break;
5764 }
5765
5766 return cost;
5767}
0a726ef1 5768
e075ae69
RH
5769static union
5770{
5771 struct ppro_sched_data
5772 {
5773 rtx decode[3];
5774 int issued_this_cycle;
5775 } ppro;
5776} ix86_sched_data;
0a726ef1 5777
e075ae69
RH
5778static int
5779ix86_safe_length (insn)
5780 rtx insn;
5781{
5782 if (recog_memoized (insn) >= 0)
5783 return get_attr_length(insn);
5784 else
5785 return 128;
5786}
0a726ef1 5787
e075ae69
RH
5788static int
5789ix86_safe_length_prefix (insn)
5790 rtx insn;
5791{
5792 if (recog_memoized (insn) >= 0)
5793 return get_attr_length(insn);
5794 else
5795 return 0;
5796}
5797
5798static enum attr_memory
5799ix86_safe_memory (insn)
5800 rtx insn;
5801{
5802 if (recog_memoized (insn) >= 0)
5803 return get_attr_memory(insn);
5804 else
5805 return MEMORY_UNKNOWN;
5806}
0a726ef1 5807
e075ae69
RH
5808static enum attr_pent_pair
5809ix86_safe_pent_pair (insn)
5810 rtx insn;
5811{
5812 if (recog_memoized (insn) >= 0)
5813 return get_attr_pent_pair(insn);
5814 else
5815 return PENT_PAIR_NP;
5816}
0a726ef1 5817
e075ae69
RH
5818static enum attr_ppro_uops
5819ix86_safe_ppro_uops (insn)
5820 rtx insn;
5821{
5822 if (recog_memoized (insn) >= 0)
5823 return get_attr_ppro_uops (insn);
5824 else
5825 return PPRO_UOPS_MANY;
5826}
0a726ef1 5827
e075ae69
RH
5828static void
5829ix86_dump_ppro_packet (dump)
5830 FILE *dump;
0a726ef1 5831{
e075ae69 5832 if (ix86_sched_data.ppro.decode[0])
0a726ef1 5833 {
e075ae69
RH
5834 fprintf (dump, "PPRO packet: %d",
5835 INSN_UID (ix86_sched_data.ppro.decode[0]));
5836 if (ix86_sched_data.ppro.decode[1])
5837 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
5838 if (ix86_sched_data.ppro.decode[2])
5839 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
5840 fputc ('\n', dump);
5841 }
5842}
0a726ef1 5843
e075ae69 5844/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 5845
e075ae69
RH
5846void
5847ix86_sched_init (dump, sched_verbose)
5848 FILE *dump ATTRIBUTE_UNUSED;
5849 int sched_verbose ATTRIBUTE_UNUSED;
5850{
5851 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
5852}
5853
5854/* Shift INSN to SLOT, and shift everything else down. */
5855
5856static void
5857ix86_reorder_insn (insnp, slot)
5858 rtx *insnp, *slot;
5859{
5860 if (insnp != slot)
5861 {
5862 rtx insn = *insnp;
5863 do
5864 insnp[0] = insnp[1];
5865 while (++insnp != slot);
5866 *insnp = insn;
0a726ef1 5867 }
e075ae69
RH
5868}
5869
5870/* Find an instruction with given pairability and minimal amount of cycles
5871 lost by the fact that the CPU waits for both pipelines to finish before
5872 reading next instructions. Also take care that both instructions together
5873 can not exceed 7 bytes. */
5874
5875static rtx *
5876ix86_pent_find_pair (e_ready, ready, type, first)
5877 rtx *e_ready;
5878 rtx *ready;
5879 enum attr_pent_pair type;
5880 rtx first;
5881{
5882 int mincycles, cycles;
5883 enum attr_pent_pair tmp;
5884 enum attr_memory memory;
5885 rtx *insnp, *bestinsnp = NULL;
0a726ef1 5886
e075ae69
RH
5887 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
5888 return NULL;
0a726ef1 5889
e075ae69
RH
5890 memory = ix86_safe_memory (first);
5891 cycles = result_ready_cost (first);
5892 mincycles = INT_MAX;
5893
5894 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
5895 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
5896 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 5897 {
e075ae69
RH
5898 enum attr_memory second_memory;
5899 int secondcycles, currentcycles;
5900
5901 second_memory = ix86_safe_memory (*insnp);
5902 secondcycles = result_ready_cost (*insnp);
5903 currentcycles = abs (cycles - secondcycles);
5904
5905 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 5906 {
e075ae69
RH
5907 /* Two read/modify/write instructions together takes two
5908 cycles longer. */
5909 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
5910 currentcycles += 2;
5911
5912 /* Read modify/write instruction followed by read/modify
5913 takes one cycle longer. */
5914 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
5915 && tmp != PENT_PAIR_UV
5916 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
5917 currentcycles += 1;
6ec6d558 5918 }
e075ae69
RH
5919 if (currentcycles < mincycles)
5920 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 5921 }
0a726ef1 5922
e075ae69
RH
5923 return bestinsnp;
5924}
5925
5926/* We are about to being issuing insns for this clock cycle.
5927 Override the default sort algorithm to better slot instructions. */
5928
5929int
5930ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
5931 FILE *dump ATTRIBUTE_UNUSED;
5932 int sched_verbose ATTRIBUTE_UNUSED;
5933 rtx *ready;
69ddee61 5934 int n_ready, clock_var ATTRIBUTE_UNUSED;
e075ae69
RH
5935{
5936 rtx *e_ready = ready + n_ready - 1;
5937 rtx *insnp;
5938 int i;
5939
5940 if (n_ready < 2)
5941 goto out;
5942
5943 switch (ix86_cpu)
5944 {
5945 default:
5946 goto out;
5947
5948 case PROCESSOR_PENTIUM:
5949 /* This wouldn't be necessary if Haifa knew that static insn ordering
5950 is important to which pipe an insn is issued to. So we have to make
5951 some minor rearrangements. */
6ec6d558 5952 {
e075ae69
RH
5953 enum attr_pent_pair pair1, pair2;
5954
5955 pair1 = ix86_safe_pent_pair (*e_ready);
5956
5957 /* If the first insn is non-pairable, let it be. */
5958 if (pair1 == PENT_PAIR_NP)
5959 goto out;
5960 pair2 = PENT_PAIR_NP;
5961
5962 /* If the first insn is UV or PV pairable, search for a PU
5963 insn to go with. */
5964 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
5965 {
5966 insnp = ix86_pent_find_pair (e_ready-1, ready,
5967 PENT_PAIR_PU, *e_ready);
5968 if (insnp)
5969 pair2 = PENT_PAIR_PU;
5970 }
5971
5972 /* If the first insn is PU or UV pairable, search for a PV
5973 insn to go with. */
5974 if (pair2 == PENT_PAIR_NP
5975 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
5976 {
5977 insnp = ix86_pent_find_pair (e_ready-1, ready,
5978 PENT_PAIR_PV, *e_ready);
5979 if (insnp)
5980 pair2 = PENT_PAIR_PV;
5981 }
5982
5983 /* If the first insn is pairable, search for a UV
5984 insn to go with. */
5985 if (pair2 == PENT_PAIR_NP)
6ec6d558 5986 {
e075ae69
RH
5987 insnp = ix86_pent_find_pair (e_ready-1, ready,
5988 PENT_PAIR_UV, *e_ready);
5989 if (insnp)
5990 pair2 = PENT_PAIR_UV;
6ec6d558 5991 }
e075ae69
RH
5992
5993 if (pair2 == PENT_PAIR_NP)
5994 goto out;
5995
5996 /* Found something! Decide if we need to swap the order. */
5997 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
5998 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
5999 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6000 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6001 ix86_reorder_insn (insnp, e_ready);
6ec6d558 6002 else
e075ae69 6003 ix86_reorder_insn (insnp, e_ready - 1);
6ec6d558 6004 }
e075ae69 6005 break;
0a726ef1 6006
e075ae69
RH
6007 case PROCESSOR_PENTIUMPRO:
6008 {
6009 rtx decode[3];
6010 enum attr_ppro_uops cur_uops;
6011 int issued_this_cycle;
0a726ef1 6012
e075ae69
RH
6013 /* At this point .ppro.decode contains the state of the three
6014 decoders from last "cycle". That is, those insns that were
6015 actually independant. But here we're scheduling for the
6016 decoder, and we may find things that are decodable in the
6017 same cycle. */
fb693d44 6018
e075ae69
RH
6019 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
6020 issued_this_cycle = 0;
fb693d44 6021
e075ae69
RH
6022 insnp = e_ready;
6023 cur_uops = ix86_safe_ppro_uops (*insnp);
fb693d44 6024
e075ae69
RH
6025 /* If the decoders are empty, and we've a complex insn at the
6026 head of the priority queue, let it issue without complaint. */
6027 if (decode[0] == NULL)
6028 {
6029 if (cur_uops == PPRO_UOPS_MANY)
6030 {
6031 decode[0] = *insnp;
6032 goto ppro_done;
6033 }
fb693d44 6034
e075ae69
RH
6035 /* Otherwise, search for a 2-4 uop unsn to issue. */
6036 while (cur_uops != PPRO_UOPS_FEW)
6037 {
6038 if (insnp == ready)
6039 break;
6040 cur_uops = ix86_safe_ppro_uops (*--insnp);
6041 }
fb693d44 6042
e075ae69
RH
6043 /* If so, move it to the head of the line. */
6044 if (cur_uops == PPRO_UOPS_FEW)
6045 ix86_reorder_insn (insnp, e_ready);
fb693d44 6046
e075ae69
RH
6047 /* Issue the head of the queue. */
6048 issued_this_cycle = 1;
6049 decode[0] = *e_ready--;
6050 }
fb693d44 6051
e075ae69
RH
6052 /* Look for simple insns to fill in the other two slots. */
6053 for (i = 1; i < 3; ++i)
6054 if (decode[i] == NULL)
6055 {
6056 if (ready >= e_ready)
6057 goto ppro_done;
fb693d44 6058
e075ae69
RH
6059 insnp = e_ready;
6060 cur_uops = ix86_safe_ppro_uops (*insnp);
6061 while (cur_uops != PPRO_UOPS_ONE)
6062 {
6063 if (insnp == ready)
6064 break;
6065 cur_uops = ix86_safe_ppro_uops (*--insnp);
6066 }
6067
6068 /* Found one. Move it to the head of the queue and issue it. */
6069 if (cur_uops == PPRO_UOPS_ONE)
6070 {
6071 ix86_reorder_insn (insnp, e_ready);
6072 decode[i] = *e_ready--;
6073 issued_this_cycle++;
6074 continue;
6075 }
6076
6077 /* ??? Didn't find one. Ideally, here we would do a lazy split
6078 of 2-uop insns, issue one and queue the other. */
6079 }
6080
6081 ppro_done:
6082 if (issued_this_cycle == 0)
6083 issued_this_cycle = 1;
6084 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6085 }
6086 break;
fb693d44
RH
6087 }
6088
e075ae69
RH
6089out:
6090 return ix86_issue_rate ();
6091}
fb693d44 6092
e075ae69
RH
6093/* We are about to issue INSN. Return the number of insns left on the
6094 ready queue that can be issued this cycle. */
b222082e 6095
e075ae69
RH
6096int
6097ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6098 FILE *dump;
6099 int sched_verbose;
6100 rtx insn;
6101 int can_issue_more;
6102{
6103 int i;
6104 switch (ix86_cpu)
fb693d44 6105 {
e075ae69
RH
6106 default:
6107 return can_issue_more - 1;
fb693d44 6108
e075ae69
RH
6109 case PROCESSOR_PENTIUMPRO:
6110 {
6111 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 6112
e075ae69
RH
6113 if (uops == PPRO_UOPS_MANY)
6114 {
6115 if (sched_verbose)
6116 ix86_dump_ppro_packet (dump);
6117 ix86_sched_data.ppro.decode[0] = insn;
6118 ix86_sched_data.ppro.decode[1] = NULL;
6119 ix86_sched_data.ppro.decode[2] = NULL;
6120 if (sched_verbose)
6121 ix86_dump_ppro_packet (dump);
6122 ix86_sched_data.ppro.decode[0] = NULL;
6123 }
6124 else if (uops == PPRO_UOPS_FEW)
6125 {
6126 if (sched_verbose)
6127 ix86_dump_ppro_packet (dump);
6128 ix86_sched_data.ppro.decode[0] = insn;
6129 ix86_sched_data.ppro.decode[1] = NULL;
6130 ix86_sched_data.ppro.decode[2] = NULL;
6131 }
6132 else
6133 {
6134 for (i = 0; i < 3; ++i)
6135 if (ix86_sched_data.ppro.decode[i] == NULL)
6136 {
6137 ix86_sched_data.ppro.decode[i] = insn;
6138 break;
6139 }
6140 if (i == 3)
6141 abort ();
6142 if (i == 2)
6143 {
6144 if (sched_verbose)
6145 ix86_dump_ppro_packet (dump);
6146 ix86_sched_data.ppro.decode[0] = NULL;
6147 ix86_sched_data.ppro.decode[1] = NULL;
6148 ix86_sched_data.ppro.decode[2] = NULL;
6149 }
6150 }
6151 }
6152 return --ix86_sched_data.ppro.issued_this_cycle;
6153 }
fb693d44 6154}
This page took 1.597841 seconds and 5 git commands to generate.